Spaces:
Build error
Build error
# app.py | |
import gradio as gr | |
from tinyllava.model.builder import load_pretrained_model | |
from tinyllava.utils import disable_torch_init | |
from tinyllava.mm_utils import process_images, tokenizer_image_token, get_model_name_from_path | |
import torch | |
from PIL import Image | |
# --- Disable unnecessary torch init --- | |
disable_torch_init() | |
# --- Load TinyLLaVA 3.1B --- | |
model_path = "bczhou/TinyLLaVA-3.1B" # official HF ID | |
tokenizer, model, image_processor, context_len = load_pretrained_model( | |
model_path=model_path, | |
model_base=None, # If you have a base model, point it here; else leave as is | |
model_name="TinyLLaVA-3.1B" | |
) | |
device = torch.device("cpu") | |
model.to(device) | |
# --- Gradio handler --- | |
def describe_image(image, prompt): | |
# TinyLLaVA wants PIL | |
image = Image.fromarray(image) | |
image_tensor = process_images([image], image_processor, model.config) | |
image_tensor = image_tensor.to(device) | |
prompt = tokenizer_image_token(prompt, tokenizer, context_len) | |
inputs = tokenizer([prompt]) | |
input_ids = torch.tensor(inputs.input_ids).unsqueeze(0).to(device) | |
with torch.no_grad(): | |
output_ids = model.generate( | |
input_ids, | |
images=image_tensor, | |
do_sample=True, | |
temperature=0.2, | |
max_new_tokens=200 | |
) | |
out_text = tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
return out_text | |
iface = gr.Interface( | |
fn=describe_image, | |
inputs=[ | |
gr.Image(type="numpy", label="Image"), | |
gr.Textbox(label="Your question", placeholder="What's happening in this image?") | |
], | |
outputs=gr.Textbox(label="TinyLLaVA Answer"), | |
title="π¦ TinyLLaVA-3.1B β Vision-Language Q&A", | |
description="A lightweight LLaVA variant that runs on CPU Spaces. Upload an image, ask a question." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |