bxod/Llama-3.2-1B-Instruct-uz · adding vLLM deployment scripts

adding vLLM deployment scripts

by murodbek - opened Jun 13

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+44

-0

Files changed (1) hide show

README.md +44 -0

README.md CHANGED Viewed

@@ -110,6 +110,50 @@ def generate(user: str, max_new: int = 256) -> str:
 print(generate("Menga Alisher Navoiy haqida aytib ber."))
 ```
 ## Information on Evaluation Method

 print(generate("Menga Alisher Navoiy haqida aytib ber."))
 ```
+### Use with vLLM
+In order to deploy the model via vLLM, you can use this script to deploy:
+```
+vllm serve bxod/Llama-3.2-1B-Instruct-uz \
+  --api-key token-abc123 \
+  --chat-template "{% for message in messages %}{% if message['role'] == 'system' %}<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{{ message['content'] }}<|eot_id|>{% elif message['role'] == 'user' %}<|start_header_id|>user<|end_header_id|>\n{{ message['content'] }}<|eot_id|>{% elif message['role'] == 'assistant' %}<|start_header_id|>assistant<|end_header_id|>\n{{ message['content'] }}<|eot_id|>{% endif %}{% endfor %}{% if add_generation_prompt %}<|start_header_id|>assistant<|end_header_id|>\n{% endif %}"
+```
+and this script to query using OpenAI API format:
+```
+import re
+import langid
+from openai import OpenAI
+client = OpenAI(
+    api_key="token-abc123",
+    base_url="http://localhost:8000/v1",
+)
+def preprocess_text(text):
+    lang, confidence = langid.classify(text)
+    return re.sub(r"[''‚‛ʻʼʽʾʿˈˊˋˌˍ'\']", "APST", text) if lang != "en" else text
+def postprocess_text(text):
+    return text.replace("APST", "'").strip()
+def generate(user: str, max_new: int = 256) -> str:
+    processed_prompt = preprocess_text(user)
+    response = client.chat.completions.create(
+        model="bxod/Llama-3.2-1B-Instruct-uz",
+        max_completion_tokens=max_new,
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant"},
+            {"role": "user", "content": processed_prompt}
+        ]
+    )
+    result = response.choices[0].message.content
+    return postprocess_text(result)
+print(generate("Menga Alisher Navoiy haqida aytib ber."))
+```
 ## Information on Evaluation Method