Spaces:
Running
Running
import gradio as gr | |
from transformers import pipeline | |
from PIL import Image | |
# pipeline as high level | |
pipe = pipeline("image-text-to-text", | |
model="microsoft/kosmos-2-patch14-224", | |
device=-1, | |
) | |
def get_image_caption(image): | |
if not image: | |
raise gr.Error("No image provided.") | |
image = image.convert("RGB") | |
# max_new_tokens: limit tokens to trade detail for speed | |
result = pipe(image,text="The person is", max_new_tokens=32) | |
return result[0]['generated_text'] | |
# api w/ gradio | |
api = gr.Interface( | |
fn=get_image_caption, | |
inputs=gr.Image(type="pil", label="Input Image"), | |
outputs="text" | |
) | |
api.launch(show_api=True) | |