"""
Robustness Testing for Dynamic Function-Calling Agent
Tests model stability with:
1. Shuffled JSON key order
2. Distractor text before schema
3. Noisy prompts
Quick test that doesn't require retraining.
"""
import json
import random
from test_constrained_model import load_trained_model, constrained_json_generate, create_json_schema
def shuffle_json_keys(obj):
"""Recursively shuffle the order of keys in JSON objects"""
if isinstance(obj, dict):
items = list(obj.items())
random.shuffle(items)
return {k: shuffle_json_keys(v) for k, v in items}
elif isinstance(obj, list):
return [shuffle_json_keys(item) for item in obj]
return obj
def add_distractor_text(schema_str):
"""Add distracting text before the schema"""
distractors = [
"Note: This is a complex API with many parameters.",
"Important: Please review all requirements carefully.",
"Warning: Some fields may be optional depending on context.",
"Info: This function supports multiple data formats.",
"Reminder: Check authentication before making calls."
]
distractor = random.choice(distractors)
return f"{distractor}\n\n{schema_str}"
def test_robustness():
"""Run robustness tests on the function calling agent"""
print("๐งช Starting Robustness Tests...")
# Load model
model, tokenizer = load_trained_model()
# Test schema
base_schema = {
"name": "get_weather_forecast",
"description": "Get weather forecast for a location",
"parameters": {
"type": "object",
"properties": {
"location": {"type": "string", "description": "City name"},
"days": {"type": "integer", "description": "Number of days", "minimum": 1},
"units": {"type": "string", "enum": ["metric", "imperial"]},
"include_hourly": {"type": "boolean", "default": False}
},
"required": ["location", "days"]
}
}
test_queries = [
"Get 3-day weather for Paris",
"Weather forecast for Tokyo, 5 days, metric units",
"I need the weather for London for the next week"
]
results = {
"baseline": [],
"shuffled_keys": [],
"with_distractors": [],
"both_shuffled_and_distractors": []
}
print("\n๐ Running test scenarios...")
for query in test_queries:
print(f"\n๐ Query: '{query}'")
# 1. Baseline test
schema = create_json_schema(base_schema)
prompt = f"""<|im_start|>system
You are a helpful assistant that calls functions by responding with valid JSON when given a schema. Always respond with JSON function calls only, never prose.<|im_end|>
{json.dumps(base_schema, indent=2)}
<|im_start|>user
{query}<|im_end|>
<|im_start|>assistant
"""
response, success, error = constrained_json_generate(model, tokenizer, prompt, schema)
results["baseline"].append(success)
print(f" โ
Baseline: {'โ' if success else 'โ'}")
# 2. Shuffled keys test
shuffled_schema = shuffle_json_keys(base_schema)
schema = create_json_schema(shuffled_schema)
prompt = f"""<|im_start|>system
You are a helpful assistant that calls functions by responding with valid JSON when given a schema. Always respond with JSON function calls only, never prose.<|im_end|>
{json.dumps(shuffled_schema, indent=2)}
<|im_start|>user
{query}<|im_end|>
<|im_start|>assistant
"""
response, success, error = constrained_json_generate(model, tokenizer, prompt, schema)
results["shuffled_keys"].append(success)
print(f" ๐ Shuffled: {'โ' if success else 'โ'}")
# 3. Distractor text test
schema = create_json_schema(base_schema)
schema_with_distractor = add_distractor_text(json.dumps(base_schema, indent=2))
prompt = f"""<|im_start|>system
You are a helpful assistant that calls functions by responding with valid JSON when given a schema. Always respond with JSON function calls only, never prose.<|im_end|>
{schema_with_distractor}
<|im_start|>user
{query}<|im_end|>
<|im_start|>assistant
"""
response, success, error = constrained_json_generate(model, tokenizer, prompt, schema)
results["with_distractors"].append(success)
print(f" ๐ญ Distractor: {'โ' if success else 'โ'}")
# 4. Both shuffled and distractors
shuffled_schema = shuffle_json_keys(base_schema)
schema = create_json_schema(shuffled_schema)
schema_with_distractor = add_distractor_text(json.dumps(shuffled_schema, indent=2))
prompt = f"""<|im_start|>system
You are a helpful assistant that calls functions by responding with valid JSON when given a schema. Always respond with JSON function calls only, never prose.<|im_end|>
{schema_with_distractor}
<|im_start|>user
{query}<|im_end|>
<|im_start|>assistant
"""
response, success, error = constrained_json_generate(model, tokenizer, prompt, schema)
results["both_shuffled_and_distractors"].append(success)
print(f" ๐๐ญ Both: {'โ' if success else 'โ'}")
# Calculate success rates
print("\n๐ Robustness Test Results:")
print("=" * 50)
for test_name, test_results in results.items():
success_rate = (sum(test_results) / len(test_results)) * 100
print(f"{test_name.replace('_', ' ').title()}: {success_rate:.1f}% ({sum(test_results)}/{len(test_results)})")
print("\n๐ฏ Analysis:")
baseline_rate = (sum(results["baseline"]) / len(results["baseline"])) * 100
for test_name, test_results in results.items():
if test_name != "baseline":
test_rate = (sum(test_results) / len(test_results)) * 100
diff = test_rate - baseline_rate
status = "๐ข" if diff >= -10 else "๐ก" if diff >= -20 else "๐ด"
print(f"{status} {test_name.replace('_', ' ').title()}: {diff:+.1f}% vs baseline")
return results
if __name__ == "__main__":
test_robustness()