Add library_name to metadata (#11)
Browse files- Add library_name to metadata (84e272b26f7f71cd69c8eea0e3133e1276ff4383)
Co-authored-by: Niels Rogge <[email protected]>
README.md
CHANGED
|
@@ -1,13 +1,14 @@
|
|
| 1 |
---
|
| 2 |
-
license: apache-2.0
|
| 3 |
base_model:
|
| 4 |
- Qwen/Qwen2.5-7B-Instruct
|
| 5 |
-
|
|
|
|
| 6 |
language:
|
| 7 |
- en
|
| 8 |
- zh
|
| 9 |
-
|
| 10 |
-
-
|
|
|
|
| 11 |
---
|
| 12 |
|
| 13 |
# Ola-7B
|
|
@@ -30,7 +31,7 @@ Ola offers an on-demand solution to seamlessly and efficiently process visual in
|
|
| 30 |
|
| 31 |
We provide a simple generation process for using our model. For more details, please refer to our [Github Repo](https://github.com/Ola-Omni/Ola)
|
| 32 |
|
| 33 |
-
```
|
| 34 |
import os
|
| 35 |
os.environ['LOWRES_RESIZE'] = '384x32'
|
| 36 |
os.environ['HIGHRES_BASE'] = '0x32'
|
|
@@ -177,11 +178,15 @@ def ola_inference(multimodal, audio_path):
|
|
| 177 |
else:
|
| 178 |
qs = ''
|
| 179 |
if USE_SPEECH and audio_path:
|
| 180 |
-
qs = DEFAULT_IMAGE_TOKEN + "
|
|
|
|
|
|
|
| 181 |
elif USE_SPEECH:
|
| 182 |
-
qs = DEFAULT_SPEECH_TOKEN + DEFAULT_IMAGE_TOKEN + "
|
|
|
|
| 183 |
else:
|
| 184 |
-
qs = DEFAULT_IMAGE_TOKEN + "
|
|
|
|
| 185 |
|
| 186 |
conv = conv_templates[conv_mode].copy()
|
| 187 |
conv.append_message(conv.roles[0], qs)
|
|
@@ -220,6 +225,7 @@ def ola_inference(multimodal, audio_path):
|
|
| 220 |
for visual in image:
|
| 221 |
image_tensor_, image_highres_tensor_ = process_anyres_highres_image_genli(visual, image_processor)
|
| 222 |
image_tensor.append(image_tensor_)
|
|
|
|
| 223 |
image_highres_tensor.append(image_highres_tensor_)
|
| 224 |
if all(x.shape == image_tensor[0].shape for x in image_tensor):
|
| 225 |
image_tensor = torch.stack(image_tensor, dim=0)
|
|
|
|
| 1 |
---
|
|
|
|
| 2 |
base_model:
|
| 3 |
- Qwen/Qwen2.5-7B-Instruct
|
| 4 |
+
datasets:
|
| 5 |
+
- HuggingFaceFV/finevideo
|
| 6 |
language:
|
| 7 |
- en
|
| 8 |
- zh
|
| 9 |
+
license: apache-2.0
|
| 10 |
+
pipeline_tag: any-to-any
|
| 11 |
+
library_name: transformers
|
| 12 |
---
|
| 13 |
|
| 14 |
# Ola-7B
|
|
|
|
| 31 |
|
| 32 |
We provide a simple generation process for using our model. For more details, please refer to our [Github Repo](https://github.com/Ola-Omni/Ola)
|
| 33 |
|
| 34 |
+
```python
|
| 35 |
import os
|
| 36 |
os.environ['LOWRES_RESIZE'] = '384x32'
|
| 37 |
os.environ['HIGHRES_BASE'] = '0x32'
|
|
|
|
| 178 |
else:
|
| 179 |
qs = ''
|
| 180 |
if USE_SPEECH and audio_path:
|
| 181 |
+
qs = DEFAULT_IMAGE_TOKEN + "
|
| 182 |
+
" + "User's question in speech: " + DEFAULT_SPEECH_TOKEN + '
|
| 183 |
+
'
|
| 184 |
elif USE_SPEECH:
|
| 185 |
+
qs = DEFAULT_SPEECH_TOKEN + DEFAULT_IMAGE_TOKEN + "
|
| 186 |
+
" + qs
|
| 187 |
else:
|
| 188 |
+
qs = DEFAULT_IMAGE_TOKEN + "
|
| 189 |
+
" + qs
|
| 190 |
|
| 191 |
conv = conv_templates[conv_mode].copy()
|
| 192 |
conv.append_message(conv.roles[0], qs)
|
|
|
|
| 225 |
for visual in image:
|
| 226 |
image_tensor_, image_highres_tensor_ = process_anyres_highres_image_genli(visual, image_processor)
|
| 227 |
image_tensor.append(image_tensor_)
|
| 228 |
+
|
| 229 |
image_highres_tensor.append(image_highres_tensor_)
|
| 230 |
if all(x.shape == image_tensor[0].shape for x in image_tensor):
|
| 231 |
image_tensor = torch.stack(image_tensor, dim=0)
|