""" test_constrained_model_spaces.py - SPACES-OPTIMIZED Constrained Generation Ultra-aggressive optimization for Hugging Face Spaces environment """ import torch import json import jsonschema from transformers import AutoTokenizer, AutoModelForCausalLM from typing import Dict import time import threading class TimeoutException(Exception): pass def load_trained_model(): """Load our model - SPACES OPTIMIZED""" print("๐Ÿ”„ Loading SmolLM3-3B Function-Calling Agent...") base_model_name = "HuggingFaceTB/SmolLM3-3B" try: print("๐Ÿ”„ Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained(base_model_name) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token print("๐Ÿ”„ Loading base model...") # SPACES OPTIMIZED: Memory efficient loading model = AutoModelForCausalLM.from_pretrained( base_model_name, torch_dtype=torch.float16, device_map="auto", low_cpu_mem_usage=True ) # Try multiple paths for fine-tuned adapter adapter_paths = [ "jlov7/SmolLM3-Function-Calling-LoRA", # Hub (preferred) "./model_files", # Local cleaned path "./smollm3_robust", # Original training output "./hub_upload", # Upload-ready files ] model_loaded = False for i, adapter_path in enumerate(adapter_paths): try: if i == 0: print("๐Ÿ”„ Loading fine-tuned adapter from Hugging Face Hub...") else: print(f"๐Ÿ”„ Trying local path: {adapter_path}") from peft import PeftModel model = PeftModel.from_pretrained(model, adapter_path) model = model.merge_and_unload() if i == 0: print("โœ… Fine-tuned model loaded successfully from Hub!") else: print(f"โœ… Fine-tuned model loaded successfully from {adapter_path}!") model_loaded = True break except Exception as e: if i == 0: print(f"โš ๏ธ Hub adapter not found: {e}") else: print(f"โš ๏ธ Path {adapter_path} failed: {e}") continue if not model_loaded: print("๐Ÿ”ง Using base model with optimized prompting") print("โœ… Model loaded successfully") return model, tokenizer except Exception as e: print(f"โŒ Error loading model: {e}") raise def constrained_json_generate(model, tokenizer, prompt: str, schema: Dict, max_attempts: int = 2): """SPACES-OPTIMIZED generation with aggressive timeouts""" device = next(model.parameters()).device for attempt in range(max_attempts): try: # VERY aggressive settings for Spaces temperature = 0.1 + (attempt * 0.2) # Start low, increase if needed inputs = tokenizer(prompt, return_tensors="pt").to(device) # Use threading timeout (cross-platform) result = [None] error = [None] def generate_with_timeout(): try: with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=25, # VERY short for Spaces temperature=temperature, do_sample=True, pad_token_id=tokenizer.eos_token_id, eos_token_id=tokenizer.eos_token_id, num_return_sequences=1, use_cache=True, repetition_penalty=1.2 # Strong repetition penalty ) result[0] = outputs except Exception as e: error[0] = str(e) # Start generation thread thread = threading.Thread(target=generate_with_timeout) thread.daemon = True thread.start() thread.join(timeout=4) # 4-second timeout if thread.is_alive(): return "", False, f"Generation timed out (attempt {attempt + 1})" if error[0]: return "", False, f"Generation error: {error[0]}" if result[0] is None: return "", False, f"Generation failed (attempt {attempt + 1})" outputs = result[0] # Extract generated text generated_ids = outputs[0][inputs['input_ids'].shape[1]:] response = tokenizer.decode(generated_ids, skip_special_tokens=True).strip() # Try to extract JSON from response if "{" in response and "}" in response: start = response.find("{") bracket_count = 0 end = start for i, char in enumerate(response[start:], start): if char == "{": bracket_count += 1 elif char == "}": bracket_count -= 1 if bracket_count == 0: end = i + 1 break json_str = response[start:end] else: json_str = response # Validate JSON and schema try: parsed = json.loads(json_str) jsonschema.validate(parsed, schema) return json_str, True, None except (json.JSONDecodeError, jsonschema.ValidationError) as e: if attempt == max_attempts - 1: return json_str, False, f"JSON validation failed: {str(e)}" continue except Exception as e: if attempt == max_attempts - 1: return "", False, f"Generation error: {str(e)}" continue return "", False, "All generation attempts failed" def create_json_schema(function_def: Dict) -> Dict: """Create JSON schema for function definition""" return { "type": "object", "properties": { "name": { "type": "string", "enum": [function_def["name"]] }, "arguments": function_def["parameters"] }, "required": ["name", "arguments"] } def create_test_schemas(): """Create simplified test schemas""" return { "weather_forecast": { "name": "get_weather_forecast", "description": "Get weather forecast", "parameters": { "type": "object", "properties": { "location": {"type": "string"}, "days": {"type": "integer"} }, "required": ["location", "days"] } } } # Test if running directly if __name__ == "__main__": print("๐Ÿงช Testing SPACES-optimized model...") try: model, tokenizer = load_trained_model() test_schema = create_test_schemas()["weather_forecast"] schema = create_json_schema(test_schema) prompt = """<|im_start|>system You are a helpful assistant that calls functions by responding with valid JSON when given a schema. Always respond with JSON function calls only, never prose.<|im_end|> {"name": "get_weather_forecast", "description": "Get weather forecast", "parameters": {"type": "object", "properties": {"location": {"type": "string"}, "days": {"type": "integer"}}, "required": ["location", "days"]}} <|im_start|>user Get weather for Tokyo for 5 days<|im_end|> <|im_start|>assistant """ result, success, error = constrained_json_generate(model, tokenizer, prompt, schema) print(f"โœ… Result: {result}") print(f"โœ… Success: {success}") if error: print(f"โš ๏ธ Error: {error}") except Exception as e: print(f"โŒ Test failed: {e}")