fylexx
Gradio app using Hub model
ff32308
import gradio as gr
import torch
import timm
from PIL import Image
from torchvision import transforms
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file
# Pascal VOC classes
class_names = [
"aeroplane", "bicycle", "bird", "boat", "bottle",
"bus", "car", "cat", "chair", "cow",
"diningtable", "dog", "horse", "motorbike", "person",
"pottedplant", "sheep", "sofa", "train", "tvmonitor"
]
# 🧠 Load model from HF Hub
REPO_ID = "fylex/swin-s3-base-pascal_test" # πŸ” Update this
MODEL_FILENAME = "model.safetensors"
model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
# Build and load model
model = timm.create_model("swin_s3_base_224", pretrained=False, num_classes=len(class_names))
state_dict = load_file(model_path)
model.load_state_dict(state_dict)
model.eval()
# Preprocessing
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.5]*3, [0.5]*3),
])
# Prediction function
def predict(image):
img = transform(image).unsqueeze(0)
with torch.no_grad():
logits = model(img)
probs = torch.nn.functional.softmax(logits, dim=1)[0]
return {class_names[i]: float(probs[i]) for i in range(len(class_names))}
# Gradio interface
demo = gr.Interface(
fn=predict,
inputs=gr.Image(type="pil"),
outputs=gr.Label(num_top_classes=5),
title="Swin S3 Base - Pascal VOC Classifier",
description="A Swin Transformer model fine-tuned on Pascal VOC for multi-class image classification.",
)
if __name__ == "__main__":
demo.launch(share=True)