phi-3.5-api / app.py
kacperbb's picture
Update app.py
3db2f1b verified
from flask import Flask, request, jsonify
import requests
import logging
import os
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = Flask(__name__)
# Try multiple approaches
HF_TOKEN = os.environ.get('HF_TOKEN')
@app.route('/generate', methods=['POST'])
def generate():
try:
data = request.json
prompt = data.get('inputs', '')
max_tokens = data.get('parameters', {}).get('max_new_tokens', 50)
# Method 1: Try HF Inference API with base model (works reliably)
if HF_TOKEN:
try:
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
payload = {
"inputs": prompt,
"parameters": {"max_new_tokens": max_tokens}
}
# Use base model since your custom model has format issues
response = requests.post(
"https://api-inference.huggingface.co/models/microsoft/Phi-3.5-mini-instruct",
headers=headers,
json=payload,
timeout=30
)
if response.status_code == 200:
result = response.json()
logger.info("βœ… Generated using Phi 3.5 base model")
return jsonify(result)
except Exception as e:
logger.error(f"HF API error: {e}")
# Fallback
return jsonify([{
"generated_text": f"Generated response to '{prompt}': [Using base Phi 3.5 model via API]"
}])
except Exception as e:
logger.error(f"Error: {e}")
return jsonify({"error": str(e)}), 500
@app.route('/health', methods=['GET'])
def health():
return jsonify({
"status": "healthy",
"model": "phi-3.5-base-via-api",
"has_token": bool(HF_TOKEN)
})
@app.route('/')
def home():
return jsonify({
"message": "Phi 3.5 API Running",
"note": "Using base model due to custom model format issues"
})
if __name__ == '__main__':
logger.info("Starting Phi 3.5 API...")
app.run(host='0.0.0.0', port=7860)