THUdyh nielsr HF Staff commited on
Commit
efed200
·
verified ·
1 Parent(s): f32d99c

Add library_name to metadata (#11)

Browse files

- Add library_name to metadata (84e272b26f7f71cd69c8eea0e3133e1276ff4383)


Co-authored-by: Niels Rogge <[email protected]>

Files changed (1) hide show
  1. README.md +14 -8
README.md CHANGED
@@ -1,13 +1,14 @@
1
  ---
2
- license: apache-2.0
3
  base_model:
4
  - Qwen/Qwen2.5-7B-Instruct
5
- pipeline_tag: any-to-any
 
6
  language:
7
  - en
8
  - zh
9
- datasets:
10
- - HuggingFaceFV/finevideo
 
11
  ---
12
 
13
  # Ola-7B
@@ -30,7 +31,7 @@ Ola offers an on-demand solution to seamlessly and efficiently process visual in
30
 
31
  We provide a simple generation process for using our model. For more details, please refer to our [Github Repo](https://github.com/Ola-Omni/Ola)
32
 
33
- ```
34
  import os
35
  os.environ['LOWRES_RESIZE'] = '384x32'
36
  os.environ['HIGHRES_BASE'] = '0x32'
@@ -177,11 +178,15 @@ def ola_inference(multimodal, audio_path):
177
  else:
178
  qs = ''
179
  if USE_SPEECH and audio_path:
180
- qs = DEFAULT_IMAGE_TOKEN + "\n" + "User's question in speech: " + DEFAULT_SPEECH_TOKEN + '\n'
 
 
181
  elif USE_SPEECH:
182
- qs = DEFAULT_SPEECH_TOKEN + DEFAULT_IMAGE_TOKEN + "\n" + qs
 
183
  else:
184
- qs = DEFAULT_IMAGE_TOKEN + "\n" + qs
 
185
 
186
  conv = conv_templates[conv_mode].copy()
187
  conv.append_message(conv.roles[0], qs)
@@ -220,6 +225,7 @@ def ola_inference(multimodal, audio_path):
220
  for visual in image:
221
  image_tensor_, image_highres_tensor_ = process_anyres_highres_image_genli(visual, image_processor)
222
  image_tensor.append(image_tensor_)
 
223
  image_highres_tensor.append(image_highres_tensor_)
224
  if all(x.shape == image_tensor[0].shape for x in image_tensor):
225
  image_tensor = torch.stack(image_tensor, dim=0)
 
1
  ---
 
2
  base_model:
3
  - Qwen/Qwen2.5-7B-Instruct
4
+ datasets:
5
+ - HuggingFaceFV/finevideo
6
  language:
7
  - en
8
  - zh
9
+ license: apache-2.0
10
+ pipeline_tag: any-to-any
11
+ library_name: transformers
12
  ---
13
 
14
  # Ola-7B
 
31
 
32
  We provide a simple generation process for using our model. For more details, please refer to our [Github Repo](https://github.com/Ola-Omni/Ola)
33
 
34
+ ```python
35
  import os
36
  os.environ['LOWRES_RESIZE'] = '384x32'
37
  os.environ['HIGHRES_BASE'] = '0x32'
 
178
  else:
179
  qs = ''
180
  if USE_SPEECH and audio_path:
181
+ qs = DEFAULT_IMAGE_TOKEN + "
182
+ " + "User's question in speech: " + DEFAULT_SPEECH_TOKEN + '
183
+ '
184
  elif USE_SPEECH:
185
+ qs = DEFAULT_SPEECH_TOKEN + DEFAULT_IMAGE_TOKEN + "
186
+ " + qs
187
  else:
188
+ qs = DEFAULT_IMAGE_TOKEN + "
189
+ " + qs
190
 
191
  conv = conv_templates[conv_mode].copy()
192
  conv.append_message(conv.roles[0], qs)
 
225
  for visual in image:
226
  image_tensor_, image_highres_tensor_ = process_anyres_highres_image_genli(visual, image_processor)
227
  image_tensor.append(image_tensor_)
228
+
229
  image_highres_tensor.append(image_highres_tensor_)
230
  if all(x.shape == image_tensor[0].shape for x in image_tensor):
231
  image_tensor = torch.stack(image_tensor, dim=0)