adding vLLM deployment scripts

#1
Files changed (1) hide show
  1. README.md +44 -0
README.md CHANGED
@@ -110,6 +110,50 @@ def generate(user: str, max_new: int = 256) -> str:
110
  print(generate("Menga Alisher Navoiy haqida aytib ber."))
111
  ```
112
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
  ## Information on Evaluation Method
115
 
 
110
  print(generate("Menga Alisher Navoiy haqida aytib ber."))
111
  ```
112
 
113
+ ### Use with vLLM
114
+
115
+ In order to deploy the model via vLLM, you can use this script to deploy:
116
+ ```
117
+ vllm serve bxod/Llama-3.2-1B-Instruct-uz \
118
+ --api-key token-abc123 \
119
+ --chat-template "{% for message in messages %}{% if message['role'] == 'system' %}<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{{ message['content'] }}<|eot_id|>{% elif message['role'] == 'user' %}<|start_header_id|>user<|end_header_id|>\n{{ message['content'] }}<|eot_id|>{% elif message['role'] == 'assistant' %}<|start_header_id|>assistant<|end_header_id|>\n{{ message['content'] }}<|eot_id|>{% endif %}{% endfor %}{% if add_generation_prompt %}<|start_header_id|>assistant<|end_header_id|>\n{% endif %}"
120
+ ```
121
+
122
+ and this script to query using OpenAI API format:
123
+ ```
124
+ import re
125
+ import langid
126
+ from openai import OpenAI
127
+
128
+ client = OpenAI(
129
+ api_key="token-abc123",
130
+ base_url="http://localhost:8000/v1",
131
+ )
132
+
133
+ def preprocess_text(text):
134
+ lang, confidence = langid.classify(text)
135
+ return re.sub(r"[''‚‛ʻʼʽʾʿˈˊˋˌˍ'\']", "APST", text) if lang != "en" else text
136
+
137
+ def postprocess_text(text):
138
+ return text.replace("APST", "'").strip()
139
+
140
+ def generate(user: str, max_new: int = 256) -> str:
141
+ processed_prompt = preprocess_text(user)
142
+
143
+ response = client.chat.completions.create(
144
+ model="bxod/Llama-3.2-1B-Instruct-uz",
145
+ max_completion_tokens=max_new,
146
+ messages=[
147
+ {"role": "system", "content": "You are a helpful assistant"},
148
+ {"role": "user", "content": processed_prompt}
149
+ ]
150
+ )
151
+
152
+ result = response.choices[0].message.content
153
+ return postprocess_text(result)
154
+
155
+ print(generate("Menga Alisher Navoiy haqida aytib ber."))
156
+ ```
157
 
158
  ## Information on Evaluation Method
159