Spaces:

fylex
/

swin-pascal-imageclassification-gradio

Sleeping

fylexx

Gradio app using Hub model

ff32308 about 2 months ago

1.62 kB

	import gradio as gr
	import torch
	import timm
	from PIL import Image
	from torchvision import transforms
	from huggingface_hub import hf_hub_download
	from safetensors.torch import load_file

	# Pascal VOC classes
	class_names = [
	"aeroplane", "bicycle", "bird", "boat", "bottle",
	"bus", "car", "cat", "chair", "cow",
	"diningtable", "dog", "horse", "motorbike", "person",
	"pottedplant", "sheep", "sofa", "train", "tvmonitor"
	]

	# 🧠 Load model from HF Hub
	REPO_ID = "fylex/swin-s3-base-pascal_test" # 🔁 Update this
	MODEL_FILENAME = "model.safetensors"

	model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)

	# Build and load model
	model = timm.create_model("swin_s3_base_224", pretrained=False, num_classes=len(class_names))
	state_dict = load_file(model_path)
	model.load_state_dict(state_dict)
	model.eval()

	# Preprocessing
	transform = transforms.Compose([
	transforms.Resize((224, 224)),
	transforms.ToTensor(),
	transforms.Normalize([0.5]3, [0.5]3),
	])

	# Prediction function
	def predict(image):
	img = transform(image).unsqueeze(0)
	with torch.no_grad():
	logits = model(img)
	probs = torch.nn.functional.softmax(logits, dim=1)[0]
	return {class_names[i]: float(probs[i]) for i in range(len(class_names))}

	# Gradio interface
	demo = gr.Interface(
	fn=predict,
	inputs=gr.Image(type="pil"),
	outputs=gr.Label(num_top_classes=5),
	title="Swin S3 Base - Pascal VOC Classifier",
	description="A Swin Transformer model fine-tuned on Pascal VOC for multi-class image classification.",
	)

	if __name__ == "__main__":
	demo.launch(share=True)