import cv2 as cv import numpy as np import gradio as gr from huggingface_hub import hf_hub_download from yolox import YoloX # Download YOLOX model from Hugging Face (optional fallback) model_path = hf_hub_download( repo_id="opencv/object_detection_yolox", filename="object_detection_yolox_2022nov.onnx" ) # Initialize YOLOX model model = YoloX( modelPath=model_path, confThreshold=0.5, nmsThreshold=0.5, objThreshold=0.5, backendId=cv.dnn.DNN_BACKEND_OPENCV, targetId=cv.dnn.DNN_TARGET_CPU ) classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush') def letterbox(srcimg, target_size=(640, 640)): padded_img = np.ones((target_size[0], target_size[1], 3), dtype=np.float32) * 114.0 ratio = min(target_size[0] / srcimg.shape[0], target_size[1] / srcimg.shape[1]) resized_img = cv.resize(srcimg, (int(srcimg.shape[1] * ratio), int(srcimg.shape[0] * ratio)), interpolation=cv.INTER_LINEAR).astype(np.float32) padded_img[:int(srcimg.shape[0] * ratio), :int(srcimg.shape[1] * ratio)] = resized_img return padded_img, ratio def unletterbox(bbox, scale): return bbox / scale def visualize(dets, image, scale): res_img = image.copy() h, w = res_img.shape[:2] font_scale = max(0.5, min(w, h) / 640.0 * 0.5) thickness = max(1, int(font_scale * 2)) for det in dets: box = unletterbox(det[:4], scale).astype(np.int32) score = det[-2] cls_id = int(det[-1]) x0, y0, x1, y1 = box label = '{}:{:.1f}%'.format(classes[cls_id], score * 100) cv.rectangle(res_img, (x0, y0), (x1, y1), (0, 255, 0), thickness) (tw, th), _ = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, font_scale, thickness) cv.rectangle(res_img, (x0, y0), (x0 + tw + 2, y0 + th + 4), (255, 255, 255), -1) cv.putText(res_img, label, (x0, y0 + th), cv.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 0), thickness) return res_img def detect_objects(input_image): bgr = cv.cvtColor(input_image, cv.COLOR_RGB2BGR) input_blob, scale = letterbox(cv.cvtColor(bgr, cv.COLOR_BGR2RGB)) results = model.infer(input_blob) if results is None or len(results) == 0: return input_image vis_image = visualize(results, bgr, scale) return cv.cvtColor(vis_image, cv.COLOR_BGR2RGB) # Gradio interface demo = gr.Interface( fn=detect_objects, inputs=gr.Image(type="numpy", label="Upload Image"), outputs=gr.Image(type="numpy", label="Detected Objects"), title="YOLOX Object Detection (OpenCV + ONNX)", description="Upload an image to detect objects using YOLOX ONNX model and OpenCV DNN.", allow_flagging="never" ) if __name__ == "__main__": demo.launch()