File size: 1,621 Bytes
eee112c ff32308 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import gradio as gr
import torch
import timm
from PIL import Image
from torchvision import transforms
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file
# Pascal VOC classes
class_names = [
"aeroplane", "bicycle", "bird", "boat", "bottle",
"bus", "car", "cat", "chair", "cow",
"diningtable", "dog", "horse", "motorbike", "person",
"pottedplant", "sheep", "sofa", "train", "tvmonitor"
]
# 🧠 Load model from HF Hub
REPO_ID = "fylex/swin-s3-base-pascal_test" # 🔁 Update this
MODEL_FILENAME = "model.safetensors"
model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
# Build and load model
model = timm.create_model("swin_s3_base_224", pretrained=False, num_classes=len(class_names))
state_dict = load_file(model_path)
model.load_state_dict(state_dict)
model.eval()
# Preprocessing
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.5]*3, [0.5]*3),
])
# Prediction function
def predict(image):
img = transform(image).unsqueeze(0)
with torch.no_grad():
logits = model(img)
probs = torch.nn.functional.softmax(logits, dim=1)[0]
return {class_names[i]: float(probs[i]) for i in range(len(class_names))}
# Gradio interface
demo = gr.Interface(
fn=predict,
inputs=gr.Image(type="pil"),
outputs=gr.Label(num_top_classes=5),
title="Swin S3 Base - Pascal VOC Classifier",
description="A Swin Transformer model fine-tuned on Pascal VOC for multi-class image classification.",
)
if __name__ == "__main__":
demo.launch(share=True) |