|
""" |
|
test_constrained_model.py - Test Constrained Generation with Trained Model |
|
|
|
This tests our intensively trained model using constrained JSON generation |
|
to force valid outputs and solve the "Expecting ',' delimiter" issues. |
|
""" |
|
|
|
import torch |
|
import json |
|
import jsonschema |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
from peft import PeftModel |
|
from typing import Dict, List |
|
import time |
|
|
|
def load_trained_model(): |
|
"""Load our intensively trained model.""" |
|
print("π Loading intensively trained SmolLM3-3B...") |
|
|
|
|
|
base_model_name = "HuggingFaceTB/SmolLM3-3B" |
|
tokenizer = AutoTokenizer.from_pretrained(base_model_name) |
|
if tokenizer.pad_token is None: |
|
tokenizer.pad_token = tokenizer.eos_token |
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
base_model_name, |
|
torch_dtype=torch.float32, |
|
device_map="mps" if torch.backends.mps.is_available() else "auto" |
|
) |
|
|
|
|
|
print("π§ Loading LoRA adapter...") |
|
model = PeftModel.from_pretrained(model, "./smollm3_robust") |
|
model = model.merge_and_unload() |
|
|
|
print("β
Trained model loaded successfully") |
|
return model, tokenizer |
|
|
|
def constrained_json_generate(model, tokenizer, prompt: str, schema: Dict, max_attempts: int = 3): |
|
"""Generate JSON with multiple attempts and validation.""" |
|
device = next(model.parameters()).device |
|
|
|
for attempt in range(max_attempts): |
|
|
|
temperature = 0.1 + (attempt * 0.1) |
|
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(device) |
|
|
|
with torch.no_grad(): |
|
outputs = model.generate( |
|
**inputs, |
|
max_new_tokens=200, |
|
temperature=temperature, |
|
do_sample=True, |
|
top_p=0.9, |
|
pad_token_id=tokenizer.eos_token_id, |
|
eos_token_id=tokenizer.eos_token_id |
|
) |
|
|
|
|
|
response = tokenizer.decode( |
|
outputs[0][inputs['input_ids'].shape[1]:], |
|
skip_special_tokens=True |
|
).strip() |
|
|
|
|
|
try: |
|
parsed = json.loads(response) |
|
|
|
if schema: |
|
jsonschema.validate(parsed, schema) |
|
return response, True, None |
|
except json.JSONDecodeError as e: |
|
if attempt == max_attempts - 1: |
|
return response, False, str(e) |
|
except jsonschema.ValidationError as e: |
|
if attempt == max_attempts - 1: |
|
return response, False, f"Schema validation: {str(e)}" |
|
|
|
return response, False, "Max attempts exceeded" |
|
|
|
def create_test_schemas(): |
|
"""Create the test schemas we're evaluating against.""" |
|
return { |
|
"weather_forecast": { |
|
"name": "get_weather_forecast", |
|
"description": "Get weather forecast", |
|
"parameters": { |
|
"type": "object", |
|
"properties": { |
|
"location": {"type": "string"}, |
|
"days": {"type": "integer"}, |
|
"units": {"type": "string"}, |
|
"include_hourly": {"type": "boolean"} |
|
}, |
|
"required": ["location", "days"] |
|
} |
|
}, |
|
"sentiment_analysis": { |
|
"name": "analyze_sentiment", |
|
"description": "Analyze text sentiment", |
|
"parameters": { |
|
"type": "object", |
|
"properties": { |
|
"text": {"type": "string"}, |
|
"language": {"type": "string"}, |
|
"include_emotions": {"type": "boolean"}, |
|
"confidence_threshold": {"type": "number"} |
|
}, |
|
"required": ["text"] |
|
} |
|
}, |
|
"currency_converter": { |
|
"name": "convert_currency", |
|
"description": "Convert currency amounts", |
|
"parameters": { |
|
"type": "object", |
|
"properties": { |
|
"amount": {"type": "number"}, |
|
"from_currency": {"type": "string"}, |
|
"to_currency": {"type": "string"}, |
|
"include_fees": {"type": "boolean"}, |
|
"precision": {"type": "integer"} |
|
}, |
|
"required": ["amount", "from_currency", "to_currency"] |
|
} |
|
} |
|
} |
|
|
|
def create_json_schema(function_def: Dict) -> Dict: |
|
"""Create JSON schema for validation.""" |
|
return { |
|
"type": "object", |
|
"properties": { |
|
"name": { |
|
"type": "string", |
|
"const": function_def["name"] |
|
}, |
|
"arguments": function_def["parameters"] |
|
}, |
|
"required": ["name", "arguments"], |
|
"additionalProperties": False |
|
} |
|
|
|
def test_constrained_generation(): |
|
"""Test constrained generation on our problem schemas.""" |
|
print("π§ͺ Testing Constrained Generation with Trained Model") |
|
print("=" * 60) |
|
|
|
|
|
model, tokenizer = load_trained_model() |
|
|
|
|
|
schemas = create_test_schemas() |
|
|
|
test_cases = [ |
|
("weather_forecast", "Get 3-day weather for San Francisco in metric units"), |
|
("sentiment_analysis", "Analyze sentiment: The product was excellent and delivery was fast"), |
|
("currency_converter", "Convert 500 USD to EUR with fees included"), |
|
("weather_forecast", "Give me tomorrow's weather for London with hourly details"), |
|
("sentiment_analysis", "Check sentiment for I am frustrated with this service"), |
|
("currency_converter", "Convert 250 EUR to CAD using rates from 2023-12-01") |
|
] |
|
|
|
results = {"passed": 0, "total": len(test_cases), "details": []} |
|
|
|
for schema_name, query in test_cases: |
|
print(f"\nπ― Testing: {schema_name}") |
|
print(f"π Query: {query}") |
|
|
|
|
|
function_def = schemas[schema_name] |
|
schema = create_json_schema(function_def) |
|
|
|
prompt = f"""<|im_start|>system |
|
You are a helpful assistant that calls functions by responding with valid JSON when given a schema. Always respond with JSON function calls only, never prose.<|im_end|> |
|
|
|
<schema> |
|
{json.dumps(function_def, indent=2)} |
|
</schema> |
|
|
|
<|im_start|>user |
|
{query}<|im_end|> |
|
<|im_start|>assistant |
|
""" |
|
|
|
|
|
response, success, error = constrained_json_generate(model, tokenizer, prompt, schema) |
|
|
|
print(f"π€ Response: {response}") |
|
if success: |
|
print("β
PASS - Valid JSON with correct schema!") |
|
results["passed"] += 1 |
|
else: |
|
print(f"β FAIL - {error}") |
|
|
|
results["details"].append({ |
|
"schema": schema_name, |
|
"query": query, |
|
"response": response, |
|
"success": success, |
|
"error": error |
|
}) |
|
|
|
|
|
success_rate = (results["passed"] / results["total"]) * 100 |
|
|
|
print(f"\nπ CONSTRAINED GENERATION RESULTS") |
|
print("=" * 60) |
|
print(f"β
Passed: {results['passed']}/{results['total']} ({success_rate:.1f}%)") |
|
print(f"π― Target: β₯80%") |
|
|
|
if success_rate >= 80: |
|
print("π SUCCESS! Reached 80%+ target with constrained generation!") |
|
else: |
|
print(f"π Improvement needed: +{80 - success_rate:.1f}% to reach target") |
|
|
|
|
|
with open("constrained_results.json", "w") as f: |
|
json.dump({ |
|
"success_rate": success_rate, |
|
"passed": results["passed"], |
|
"total": results["total"], |
|
"details": results["details"], |
|
"timestamp": time.time() |
|
}, f, indent=2) |
|
|
|
print(f"πΎ Results saved to constrained_results.json") |
|
|
|
return success_rate |
|
|
|
if __name__ == "__main__": |
|
success_rate = test_constrained_generation() |