import gradio as gr import onnxruntime as ort import numpy as np import torch from PIL import Image import torchvision.transforms as T import cv2 # Load labels with open("labels.txt") as f: LABELS = [l.strip() for l in f.readlines()] # Load ONNX model session = ort.InferenceSession("model.onnx") # Image preprocessing transform = T.Compose([ T.Resize((224, 224)), T.ToTensor(), ]) def predict(image): if image is None: return "No image provided", {} # Convert OpenCV webcam frame to PIL if isinstance(image, np.ndarray): image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = Image.fromarray(image) img = transform(image).unsqueeze(0).numpy() outputs = session.run(None, {"images": img})[0] probs = torch.softmax(torch.tensor(outputs), dim=1)[0] result = {LABELS[i]: float(probs[i]) for i in range(len(LABELS))} top_idx = torch.argmax(probs).item() return LABELS[top_idx], result # Gradio UI interface = gr.Interface( fn=predict, inputs=gr.Image(sources=["webcam", "upload"], type="numpy", label="Capture or Upload Face"), outputs=[ gr.Label(label="Predicted Emotion"), gr.JSON(label="Confidence Scores") ], title="Face Emotion Recognition", description="Capture a live face or upload an image to classify emotions." ) interface.launch()