Spestly commited on
Commit
c82c26a
·
verified ·
1 Parent(s): 7ee68ee

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +42 -5
README.md CHANGED
@@ -157,14 +157,51 @@ You can easily integrate Nous-V1 4B via the Hugging Face Transformers library or
157
  ### Using Hugging Face Transformers
158
 
159
  ```python
160
- # Use a pipeline as a high-level helper
161
- from transformers import pipeline
162
 
163
- pipe = pipeline("text-generation", model="apexion-ai/Nous-V1-4B")
 
 
 
 
 
 
 
 
 
 
 
164
  messages = [
165
- {"role": "user", "content": "Who are you?"},
166
  ]
167
- pipe(messages)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  ```
169
 
170
  ### Deployment Options
 
157
  ### Using Hugging Face Transformers
158
 
159
  ```python
160
+ from transformers import AutoModelForCausalLM, AutoTokenizer
 
161
 
162
+ model_name = "apexion-ai/Nous-1-4B"
163
+
164
+ # load the tokenizer and the model
165
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
166
+ model = AutoModelForCausalLM.from_pretrained(
167
+ model_name,
168
+ torch_dtype="auto",
169
+ device_map="auto"
170
+ )
171
+
172
+ # prepare the model input
173
+ prompt = "Give me a short introduction to large language model."
174
  messages = [
175
+ {"role": "user", "content": prompt}
176
  ]
177
+ text = tokenizer.apply_chat_template(
178
+ messages,
179
+ tokenize=False,
180
+ add_generation_prompt=True,
181
+ enable_thinking=True # Switches between thinking and non-thinking modes. Default is True.
182
+ )
183
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
184
+
185
+ # conduct text completion
186
+ generated_ids = model.generate(
187
+ **model_inputs,
188
+ max_new_tokens=32768
189
+ )
190
+ output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
191
+
192
+ # parsing thinking content
193
+ try:
194
+ # rindex finding 151668 (</think>)
195
+ index = len(output_ids) - output_ids[::-1].index(151668)
196
+ except ValueError:
197
+ index = 0
198
+
199
+ thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
200
+ content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
201
+
202
+ print("thinking content:", thinking_content)
203
+ print("content:", content)
204
+
205
  ```
206
 
207
  ### Deployment Options