yujiepan
/

gemma-3n-tiny-random-dim4

@@ -15,7 +15,47 @@ This tiny model is for debugging. It is randomly initialized with the config ada
 ### Example usage:
 ```python
-{code_to_run.strip()}
 ```
 ### Codes to create this repo:

 ### Example usage:
 ```python
+import torch
+from transformers import pipeline
+model_id = "yujiepan/gemma-3n-tiny-random-dim4"
+pipe = pipeline(
+    task="image-text-to-text",
+    model=model_id,
+    device=0,
+    torch_dtype=torch.bfloat16
+)
+# temporary patch for audio tower
+from accelerate.hooks import ModelHook, add_hook_to_module
+class EnsureDtype(ModelHook):
+    def pre_forward(self, module, *args, **kwargs):
+        args = list(args)
+        args[0] = args[0].to(module.dtype)
+        return super().pre_forward(module, *args, **kwargs)
+add_hook_to_module(pipe.model.audio_tower, EnsureDtype())
+messages = [
+    {
+        "role": "system",
+        "content": [
+            {"type": "text", "text": "You are a helpful assistant."}
+        ]
+    },
+    {
+        "role": "user",
+        "content": [
+            {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"},
+            # audio is buggy for now: bf16 x fp32
+            {"type": "audio", "url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Audio/glass-breaking-151256.mp3"},
+            {"type": "text", "text": "Which image is cuter?"},
+        ]
+    },
+]
+result = pipe(messages, min_new_tokens=512, max_new_tokens=512, do_sample=True)
+print(result)
 ```
 ### Codes to create this repo: