File size: 2,236 Bytes
71fb2b8
3db2f1b
71fb2b8
3db2f1b
5ecd7ca
71fb2b8
01e870b
 
71fb2b8
 
3db2f1b
 
01e870b
71fb2b8
186d897
71fb2b8
 
186d897
8d130b1
01e870b
3db2f1b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d130b1
3db2f1b
 
 
 
 
 
 
 
 
 
 
 
 
 
0d699d5
3db2f1b
0d699d5
71fb2b8
 
 
01e870b
8d130b1
3db2f1b
 
01e870b
0d699d5
 
 
01e870b
3db2f1b
 
01e870b
71fb2b8
0d699d5
3db2f1b
0d699d5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from flask import Flask, request, jsonify
import requests
import logging
import os

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = Flask(__name__)

# Try multiple approaches
HF_TOKEN = os.environ.get('HF_TOKEN')

@app.route('/generate', methods=['POST'])
def generate():
    try:
        data = request.json
        prompt = data.get('inputs', '')
        max_tokens = data.get('parameters', {}).get('max_new_tokens', 50)
        
        # Method 1: Try HF Inference API with base model (works reliably)
        if HF_TOKEN:
            try:
                headers = {"Authorization": f"Bearer {HF_TOKEN}"}
                payload = {
                    "inputs": prompt,
                    "parameters": {"max_new_tokens": max_tokens}
                }
                
                # Use base model since your custom model has format issues
                response = requests.post(
                    "https://api-inference.huggingface.co/models/microsoft/Phi-3.5-mini-instruct",
                    headers=headers, 
                    json=payload, 
                    timeout=30
                )
                
                if response.status_code == 200:
                    result = response.json()
                    logger.info("✅ Generated using Phi 3.5 base model")
                    return jsonify(result)
                    
            except Exception as e:
                logger.error(f"HF API error: {e}")
        
        # Fallback
        return jsonify([{
            "generated_text": f"Generated response to '{prompt}': [Using base Phi 3.5 model via API]"
        }])
        
    except Exception as e:
        logger.error(f"Error: {e}")
        return jsonify({"error": str(e)}), 500

@app.route('/health', methods=['GET'])
def health():
    return jsonify({
        "status": "healthy",
        "model": "phi-3.5-base-via-api",
        "has_token": bool(HF_TOKEN)
    })

@app.route('/')
def home():
    return jsonify({
        "message": "Phi 3.5 API Running",
        "note": "Using base model due to custom model format issues"
    })

if __name__ == '__main__':
    logger.info("Starting Phi 3.5 API...")
    app.run(host='0.0.0.0', port=7860)