Erigann commited on
Commit
3bcf960
·
verified ·
1 Parent(s): 9e495e9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -0
app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+
5
+ app = Flask(__name__)
6
+
7
+ # Используем квантованную модель для экономии памяти
8
+ model_name = "Qwen/Qwen-1_8B-Chat-Int4"
9
+
10
+ # Загружаем модель и токенизатор
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
12
+ model = AutoModelForCausalLM.from_pretrained(
13
+ model_name,
14
+ device_map="auto",
15
+ torch_dtype=torch.float16,
16
+ trust_remote_code=True
17
+ )
18
+
19
+ @app.route("/v1/chat/completions", methods=["POST"])
20
+ def chat():
21
+ data = request.json
22
+ prompt = data.get("messages", "")[-1]["content"]
23
+
24
+ # Генерируем ответ
25
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
26
+ outputs = model.generate(**inputs, max_new_tokens=200)
27
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
28
+
29
+ # Возвращаем ответ в формате OpenAI API
30
+ return jsonify({
31
+ "choices": [
32
+ {
33
+ "message": {
34
+ "content": response
35
+ }
36
+ }
37
+ ]
38
+ })
39
+
40
+ if __name__ == "__main__":
41
+ app.run()