Upload 3 files
Browse files- .gitattributes +1 -0
- infer_onnx.py +235 -0
- intersection.mp4 +3 -0
- onnx_intersection_showcase.py +68 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
intersection.mp4 filter=lfs diff=lfs merge=lfs -text
|
infer_onnx.py
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# onnx_infer_decoded_min.py
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import cv2
|
| 5 |
+
import onnxruntime as ort
|
| 6 |
+
import torch
|
| 7 |
+
|
| 8 |
+
# --------- Preprocess (letterbox + ImageNet norm) ----------
|
| 9 |
+
def letterbox(img_bgr, new_size=640, color=(114,114,114)):
|
| 10 |
+
h, w = img_bgr.shape[:2]
|
| 11 |
+
scale = min(new_size / h, new_size / w)
|
| 12 |
+
nh, nw = int(round(h * scale)), int(round(w * scale))
|
| 13 |
+
im_resized = cv2.resize(img_bgr, (nw, nh), interpolation=cv2.INTER_LINEAR)
|
| 14 |
+
top = (new_size - nh) // 2
|
| 15 |
+
bottom = new_size - nh - top
|
| 16 |
+
left = (new_size - nw) // 2
|
| 17 |
+
right = new_size - nw - left
|
| 18 |
+
im_padded = cv2.copyMakeBorder(
|
| 19 |
+
im_resized, top, bottom, left, right,
|
| 20 |
+
cv2.BORDER_CONSTANT, value=color
|
| 21 |
+
)
|
| 22 |
+
return im_padded, scale, (left, top)
|
| 23 |
+
|
| 24 |
+
def preprocess_bgr_letterbox(img_bgr, img_size):
|
| 25 |
+
lb, scale, (padx, pady) = letterbox(img_bgr, img_size)
|
| 26 |
+
img = cv2.cvtColor(lb, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
|
| 27 |
+
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
|
| 28 |
+
std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
|
| 29 |
+
img = (img - mean) / std
|
| 30 |
+
img = np.transpose(img, (2,0,1))[None] # [1,3,H,W]
|
| 31 |
+
return img, scale, padx, pady
|
| 32 |
+
|
| 33 |
+
def preprocess_bgr_resize(img_bgr, img_size):
|
| 34 |
+
"""
|
| 35 |
+
Ren resize till (img_size, img_size) utan padding.
|
| 36 |
+
Återanvänder samma ImageNet-normalisering.
|
| 37 |
+
"""
|
| 38 |
+
resized = cv2.resize(img_bgr, (img_size, img_size), interpolation=cv2.INTER_LINEAR)
|
| 39 |
+
img = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
|
| 40 |
+
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
|
| 41 |
+
std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
|
| 42 |
+
img = (img - mean) / std
|
| 43 |
+
img = np.transpose(img, (2,0,1))[None] # [1,3,H,W]
|
| 44 |
+
return img # ingen scale/pad behövs här
|
| 45 |
+
|
| 46 |
+
# --------- Postprocess ----------
|
| 47 |
+
def sigmoid(x):
|
| 48 |
+
return 1.0 / (1.0 + np.exp(-x))
|
| 49 |
+
|
| 50 |
+
def nms_per_class(boxes, scores, iou_th=0.5, topk=None):
|
| 51 |
+
# boxes: Nx4 (xyxy), scores: N
|
| 52 |
+
if boxes.shape[0] == 0:
|
| 53 |
+
return np.zeros((0,), dtype=np.int64)
|
| 54 |
+
|
| 55 |
+
x1, y1, x2, y2 = boxes.T
|
| 56 |
+
areas = (x2 - x1).clip(0) * (y2 - y1).clip(0)
|
| 57 |
+
order = scores.argsort()[::-1]
|
| 58 |
+
keep = []
|
| 59 |
+
|
| 60 |
+
while order.size > 0:
|
| 61 |
+
i = order[0]
|
| 62 |
+
keep.append(i)
|
| 63 |
+
if topk and len(keep) >= topk:
|
| 64 |
+
break
|
| 65 |
+
|
| 66 |
+
xx1 = np.maximum(x1[i], x1[order[1:]])
|
| 67 |
+
yy1 = np.maximum(y1[i], y1[order[1:]])
|
| 68 |
+
xx2 = np.minimum(x2[i], x2[order[1:]])
|
| 69 |
+
yy2 = np.minimum(y2[i], y2[order[1:]])
|
| 70 |
+
|
| 71 |
+
w = (xx2 - xx1).clip(0)
|
| 72 |
+
h = (yy2 - yy1).clip(0)
|
| 73 |
+
inter = w * h
|
| 74 |
+
|
| 75 |
+
# ✅ korrekt union-beräkning
|
| 76 |
+
union = areas[i] + areas[order[1:]] - inter + 1e-6
|
| 77 |
+
iou = inter / union
|
| 78 |
+
|
| 79 |
+
order = order[1:][iou <= iou_th]
|
| 80 |
+
|
| 81 |
+
return np.array(keep, dtype=np.int64)
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def postprocess(decoded_outs, conf_th=0.25, nms_iou=0.5, max_det=300):
|
| 85 |
+
boxes = decoded_outs["boxes_xyxy"][0] # [N,4]
|
| 86 |
+
obj_log = decoded_outs["obj_logits"][0].reshape(-1) # [N]
|
| 87 |
+
cls_log = decoded_outs["cls_logits"][0] # [N,C]
|
| 88 |
+
|
| 89 |
+
obj = 1.0 / (1.0 + np.exp(-obj_log)) # [N]
|
| 90 |
+
C = cls_log.shape[-1] if cls_log.ndim == 2 else 0
|
| 91 |
+
|
| 92 |
+
if C > 1:
|
| 93 |
+
cls_sig = 1.0 / (1.0 + np.exp(-cls_log)) # [N,C]
|
| 94 |
+
cls_ids = cls_sig.argmax(axis=1) # [N]
|
| 95 |
+
cls_scores = cls_sig.max(axis=1) # [N]
|
| 96 |
+
scores = obj * cls_scores
|
| 97 |
+
elif C == 1:
|
| 98 |
+
# matcha infer_onnx.py: använd bara obj vid 1-klass
|
| 99 |
+
cls_ids = np.zeros_like(obj, dtype=np.int64)
|
| 100 |
+
scores = obj
|
| 101 |
+
else:
|
| 102 |
+
# saknar klasslogits (ovanligt) – använd obj
|
| 103 |
+
cls_ids = np.zeros_like(obj, dtype=np.int64)
|
| 104 |
+
scores = obj
|
| 105 |
+
|
| 106 |
+
m = scores > conf_th
|
| 107 |
+
if not np.any(m):
|
| 108 |
+
return (np.zeros((0,4),np.float32),
|
| 109 |
+
np.zeros((0,),np.float32),
|
| 110 |
+
np.zeros((0,),np.int64))
|
| 111 |
+
|
| 112 |
+
boxes = boxes[m]
|
| 113 |
+
scores = scores[m]
|
| 114 |
+
cls_ids = cls_ids[m]
|
| 115 |
+
|
| 116 |
+
# per-klass NMS
|
| 117 |
+
final_b, final_s, final_c = [], [], []
|
| 118 |
+
for c in np.unique(cls_ids):
|
| 119 |
+
mc = (cls_ids == c)
|
| 120 |
+
keep = nms_per_class(boxes[mc], scores[mc], iou_th=nms_iou)
|
| 121 |
+
if keep.size:
|
| 122 |
+
final_b.append(boxes[mc][keep])
|
| 123 |
+
final_s.append(scores[mc][keep])
|
| 124 |
+
final_c.append(np.full((keep.size,), int(c), dtype=np.int64))
|
| 125 |
+
|
| 126 |
+
if not final_b:
|
| 127 |
+
return (np.zeros((0,4),np.float32),
|
| 128 |
+
np.zeros((0,),np.float32),
|
| 129 |
+
np.zeros((0,),np.int64))
|
| 130 |
+
|
| 131 |
+
boxes = np.concatenate(final_b, 0).astype(np.float32)
|
| 132 |
+
scores = np.concatenate(final_s, 0).astype(np.float32)
|
| 133 |
+
classes = np.concatenate(final_c, 0).astype(np.int64)
|
| 134 |
+
|
| 135 |
+
if boxes.shape[0] > max_det:
|
| 136 |
+
top = scores.argsort()[::-1][:max_det]
|
| 137 |
+
boxes, scores, classes = boxes[top], scores[top], classes[top]
|
| 138 |
+
return boxes, scores, classes
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
# --------- Core API ----------
|
| 142 |
+
|
| 143 |
+
class ONNX_Predict:
|
| 144 |
+
"""
|
| 145 |
+
Minimal infer for ONNX-decoded export:
|
| 146 |
+
outputs = ["boxes_xyxy", "obj_logits", "cls_logits"]
|
| 147 |
+
|
| 148 |
+
use_letterbox:
|
| 149 |
+
True -> letterbox + padding (classic YOLO-scaling)
|
| 150 |
+
False -> pure resize (img_size, img_size)
|
| 151 |
+
"""
|
| 152 |
+
def __init__(self, onnx_path: str, providers=None, use_letterbox: bool = True):
|
| 153 |
+
if providers is None:
|
| 154 |
+
providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
|
| 155 |
+
self.session = ort.InferenceSession(onnx_path, providers=providers)
|
| 156 |
+
self.input_name = self.session.get_inputs()[0].name
|
| 157 |
+
# map outputs by name to avoid order issues
|
| 158 |
+
self.output_map = {o.name: o.name for o in self.session.get_outputs()}
|
| 159 |
+
self.use_letterbox_default = use_letterbox
|
| 160 |
+
|
| 161 |
+
def infer_image(
|
| 162 |
+
self,
|
| 163 |
+
img_bgr,
|
| 164 |
+
img_size: int = 640,
|
| 165 |
+
conf: float = 0.25,
|
| 166 |
+
iou: float = 0.50,
|
| 167 |
+
max_det: int = 300,
|
| 168 |
+
use_letterbox: bool | None = None,
|
| 169 |
+
):
|
| 170 |
+
"""
|
| 171 |
+
img_bgr: OpenCV BGR-bild (H,W,3)
|
| 172 |
+
use_letterbox:
|
| 173 |
+
None -> använd default från __init__
|
| 174 |
+
True -> letterbox
|
| 175 |
+
False -> ren resize
|
| 176 |
+
"""
|
| 177 |
+
if use_letterbox is None:
|
| 178 |
+
use_letterbox = self.use_letterbox_default
|
| 179 |
+
|
| 180 |
+
orig_h, orig_w = img_bgr.shape[:2]
|
| 181 |
+
|
| 182 |
+
# ----- Preprocess -----
|
| 183 |
+
if use_letterbox:
|
| 184 |
+
inp, scale, padx, pady = preprocess_bgr_letterbox(img_bgr, img_size)
|
| 185 |
+
else:
|
| 186 |
+
inp = preprocess_bgr_resize(img_bgr, img_size)
|
| 187 |
+
scale = None
|
| 188 |
+
padx = 0
|
| 189 |
+
pady = 0
|
| 190 |
+
|
| 191 |
+
# ----- ONNX run -----
|
| 192 |
+
outs = self.session.run(
|
| 193 |
+
[
|
| 194 |
+
self.output_map.get("boxes_xyxy"),
|
| 195 |
+
self.output_map.get("obj_logits"),
|
| 196 |
+
self.output_map.get("cls_logits"),
|
| 197 |
+
],
|
| 198 |
+
{self.input_name: inp}
|
| 199 |
+
)
|
| 200 |
+
boxes_net, obj_logits, cls_logits = outs
|
| 201 |
+
|
| 202 |
+
# ----- Postprocess (NMS etc i nätverkskoordinater) -----
|
| 203 |
+
boxes, scores, classes = postprocess(
|
| 204 |
+
{
|
| 205 |
+
"boxes_xyxy": boxes_net,
|
| 206 |
+
"obj_logits": obj_logits,
|
| 207 |
+
"cls_logits": cls_logits,
|
| 208 |
+
},
|
| 209 |
+
conf_th=conf,
|
| 210 |
+
nms_iou=iou,
|
| 211 |
+
max_det=max_det,
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
# ----- Back-map till originalbild -----
|
| 215 |
+
if boxes.shape[0]:
|
| 216 |
+
if use_letterbox:
|
| 217 |
+
# samma som i benchmark-scriptet
|
| 218 |
+
boxes[:, [0, 2]] -= padx
|
| 219 |
+
boxes[:, [1, 3]] -= pady
|
| 220 |
+
boxes /= max(scale, 1e-6)
|
| 221 |
+
else:
|
| 222 |
+
# ren resize: img0 -> nät-inp via warp till (img_size, img_size)
|
| 223 |
+
# x_net = x_orig * (img_size / orig_w) => x_orig = x_net * (orig_w / img_size)
|
| 224 |
+
# y_net = y_orig * (img_size / orig_h) => y_orig = y_net * (orig_h / img_size)
|
| 225 |
+
sx = orig_w / float(img_size)
|
| 226 |
+
sy = orig_h / float(img_size)
|
| 227 |
+
boxes[:, [0, 2]] *= sx
|
| 228 |
+
boxes[:, [1, 3]] *= sy
|
| 229 |
+
|
| 230 |
+
boxes[:, 0::2] = np.clip(boxes[:, 0::2], 0, orig_w - 1)
|
| 231 |
+
boxes[:, 1::2] = np.clip(boxes[:, 1::2], 0, orig_h - 1)
|
| 232 |
+
|
| 233 |
+
return boxes, scores, classes
|
| 234 |
+
|
| 235 |
+
|
intersection.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:18159206076bd7f7862b45932e459bcd4148dd8c93ebf7a876694bf86fbf1595
|
| 3 |
+
size 18136586
|
onnx_intersection_showcase.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from tools.infer_onnx import ONNX_Predict
|
| 2 |
+
import cv2
|
| 3 |
+
import time
|
| 4 |
+
import collections
|
| 5 |
+
|
| 6 |
+
onnx_model = "edge_s_640_resize.onnx"
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
providers = ["CPUExecutionProvider"] #or ["CUDAExecutionProvider"]
|
| 10 |
+
predict = ONNX_Predict(onnx_model, providers=providers, use_letterbox=False) #Initialize Onnx predict class
|
| 11 |
+
|
| 12 |
+
#Settings for ONNX_Predict class
|
| 13 |
+
img_size = 640 #Image size for onnx model
|
| 14 |
+
conf = 0.5
|
| 15 |
+
iou = 0.3
|
| 16 |
+
max_det = 300
|
| 17 |
+
|
| 18 |
+
cap = cv2.VideoCapture("intersection.mp4")
|
| 19 |
+
|
| 20 |
+
max_w = 800 #Display width
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
#FPS-smoothing
|
| 24 |
+
fps_history = collections.deque(maxlen=30)
|
| 25 |
+
|
| 26 |
+
while True:
|
| 27 |
+
|
| 28 |
+
ret, frame = cap.read()
|
| 29 |
+
start_time = time.time()
|
| 30 |
+
if not ret:
|
| 31 |
+
break
|
| 32 |
+
|
| 33 |
+
h, w = frame.shape[:2]
|
| 34 |
+
if w > max_w:
|
| 35 |
+
scale = max_w / w
|
| 36 |
+
frame = cv2.resize(frame, (max_w, int(h * scale)), interpolation=cv2.INTER_LINEAR)
|
| 37 |
+
|
| 38 |
+
#Measure only inference time
|
| 39 |
+
start_time_0 = time.time()
|
| 40 |
+
|
| 41 |
+
boxes, scores, classes = predict.infer_image(
|
| 42 |
+
frame, img_size=img_size, conf=conf, iou=iou, max_det=max_det
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
end_time_0 = time.time()
|
| 46 |
+
|
| 47 |
+
#Draw
|
| 48 |
+
for (x1, y1, x2, y2), score, cls in zip(boxes, scores, classes):
|
| 49 |
+
x1, y1, x2, y2 = map(int, (x1, y1, x2, y2))
|
| 50 |
+
cv2.rectangle(frame, (x1, y1), (x2, y2), (0,255,0), 2)
|
| 51 |
+
cv2.putText(frame, f"{int(cls)} {score:.2f}", (x1, max(0, y1-5)),
|
| 52 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1)
|
| 53 |
+
|
| 54 |
+
dt = time.time() - start_time
|
| 55 |
+
fps = 1.0 / dt if dt > 0 else 0.0
|
| 56 |
+
fps_history.append(fps)
|
| 57 |
+
fps_avg = sum(fps_history) / len(fps_history)
|
| 58 |
+
|
| 59 |
+
cv2.putText(frame, f"edge_s 640x640 CPU FPS: {fps_avg:5.1f}",
|
| 60 |
+
(10, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2)
|
| 61 |
+
|
| 62 |
+
cv2.imshow('test', frame)
|
| 63 |
+
key = cv2.waitKey(1)
|
| 64 |
+
print(f'Total: {fps_avg:5.1f} Inference:{(end_time_0-start_time_0)*1000:3.1f}ms')
|
| 65 |
+
if key == 13: # Enter
|
| 66 |
+
break
|
| 67 |
+
|
| 68 |
+
|