import os import torch import onnxruntime as ort from huggingface_hub import hf_hub_download from transformers import PreTrainedModel from .configuration_dfine import DFineConfig class DFineModel(PreTrainedModel): config_class = DFineConfig def __init__(self, config): super().__init__(config) model_path = hf_hub_download( repo_id="Laudando-Associates-LLC/d-fine-small", filename="model.onnx" ) self.session = ort.InferenceSession(model_path, providers=["CPUExecutionProvider"]) def forward(self, images, orig_target_sizes, ratio, pad_w, pad_h, conf_threshold=0.5): output = self.session.run( output_names=None, input_feed={"images": images.numpy(), "orig_target_sizes": orig_target_sizes.numpy()}, ) labels, boxes, scores = output # Convert to torch labels = torch.tensor(labels) boxes = torch.tensor(boxes) scores = torch.tensor(scores) # Filter by confidence per image results = [] for i in range(scores.shape[0]): keep = scores[i] > conf_threshold labels_kept = labels[i][keep] boxes_kept = boxes[i][keep] scores_kept = scores[i][keep] # Auto-scale boxes back to original image resolution boxes_scaled = boxes_kept.clone() boxes_scaled[:, 0::2] -= pad_w[i] boxes_scaled[:, 1::2] -= pad_h[i] boxes_scaled /= ratio[i] results.append({ "labels": labels_kept, "boxes": boxes_scaled, "scores": scores_kept }) return results