File size: 1,121 Bytes
d78fda0
621bb5d
d78fda0
 
ae103bc
a3766e8
ae103bc
106eff3
ae103bc
621bb5d
 
 
 
 
d78fda0
 
ae103bc
 
 
 
a3766e8
24e5396
ae103bc
 
 
 
 
a3766e8
24e5396
ae103bc
a3766e8
 
ae103bc
24e5396
fb9ccfe
ae103bc
 
fb9ccfe
 
d78fda0
fb9ccfe
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import streamlit as st
from transformers import pipeline
from PIL import Image

# Setup
st.set_page_config(page_title="TinyLLaVA Snapshot Q&A", layout="centered")
st.title("πŸ“Έ TinyLLaVA β€” Snapshot Q&A (Spaces Safe)")

# Load model
pipe = pipeline(
    task="image-to-text",
    model="bczhou/tiny-llava-v1-hf",
    trust_remote_code=True,
    device_map="cpu"
)

# πŸ‘‡ Clear info so the widget tree is stable
st.markdown("Use your **webcam** OR upload a file:")

# Try webcam first
image = st.camera_input("πŸ“· Take a snapshot")

# Fallback uploader
if not image:
    image = st.file_uploader("πŸ“‚ Or upload an image", type=["jpg", "png", "jpeg"])

# Prompt
prompt = st.text_input("πŸ’¬ Your question:", value="Describe this scene.")

# Run TinyLLaVA
if image is not None and prompt:
    img = Image.open(image).convert("RGB")
    st.image(img, caption="Your Image", use_column_width=True)

    query = f"USER: <image>\n{prompt}\nASSISTANT:"

    with st.spinner("Generating..."):
        result = pipe(query, img)
        answer = result[0]["generated_text"]

    st.subheader("πŸ“ Answer")
    st.write(answer)