Lillthorin commited on
Commit
e989ba3
·
verified ·
1 Parent(s): 132ed64

Upload 3 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ intersection.mp4 filter=lfs diff=lfs merge=lfs -text
infer_onnx.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # onnx_infer_decoded_min.py
2
+
3
+ import numpy as np
4
+ import cv2
5
+ import onnxruntime as ort
6
+ import torch
7
+
8
+ # --------- Preprocess (letterbox + ImageNet norm) ----------
9
+ def letterbox(img_bgr, new_size=640, color=(114,114,114)):
10
+ h, w = img_bgr.shape[:2]
11
+ scale = min(new_size / h, new_size / w)
12
+ nh, nw = int(round(h * scale)), int(round(w * scale))
13
+ im_resized = cv2.resize(img_bgr, (nw, nh), interpolation=cv2.INTER_LINEAR)
14
+ top = (new_size - nh) // 2
15
+ bottom = new_size - nh - top
16
+ left = (new_size - nw) // 2
17
+ right = new_size - nw - left
18
+ im_padded = cv2.copyMakeBorder(
19
+ im_resized, top, bottom, left, right,
20
+ cv2.BORDER_CONSTANT, value=color
21
+ )
22
+ return im_padded, scale, (left, top)
23
+
24
+ def preprocess_bgr_letterbox(img_bgr, img_size):
25
+ lb, scale, (padx, pady) = letterbox(img_bgr, img_size)
26
+ img = cv2.cvtColor(lb, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
27
+ mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
28
+ std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
29
+ img = (img - mean) / std
30
+ img = np.transpose(img, (2,0,1))[None] # [1,3,H,W]
31
+ return img, scale, padx, pady
32
+
33
+ def preprocess_bgr_resize(img_bgr, img_size):
34
+ """
35
+ Ren resize till (img_size, img_size) utan padding.
36
+ Återanvänder samma ImageNet-normalisering.
37
+ """
38
+ resized = cv2.resize(img_bgr, (img_size, img_size), interpolation=cv2.INTER_LINEAR)
39
+ img = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
40
+ mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
41
+ std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
42
+ img = (img - mean) / std
43
+ img = np.transpose(img, (2,0,1))[None] # [1,3,H,W]
44
+ return img # ingen scale/pad behövs här
45
+
46
+ # --------- Postprocess ----------
47
+ def sigmoid(x):
48
+ return 1.0 / (1.0 + np.exp(-x))
49
+
50
+ def nms_per_class(boxes, scores, iou_th=0.5, topk=None):
51
+ # boxes: Nx4 (xyxy), scores: N
52
+ if boxes.shape[0] == 0:
53
+ return np.zeros((0,), dtype=np.int64)
54
+
55
+ x1, y1, x2, y2 = boxes.T
56
+ areas = (x2 - x1).clip(0) * (y2 - y1).clip(0)
57
+ order = scores.argsort()[::-1]
58
+ keep = []
59
+
60
+ while order.size > 0:
61
+ i = order[0]
62
+ keep.append(i)
63
+ if topk and len(keep) >= topk:
64
+ break
65
+
66
+ xx1 = np.maximum(x1[i], x1[order[1:]])
67
+ yy1 = np.maximum(y1[i], y1[order[1:]])
68
+ xx2 = np.minimum(x2[i], x2[order[1:]])
69
+ yy2 = np.minimum(y2[i], y2[order[1:]])
70
+
71
+ w = (xx2 - xx1).clip(0)
72
+ h = (yy2 - yy1).clip(0)
73
+ inter = w * h
74
+
75
+ # ✅ korrekt union-beräkning
76
+ union = areas[i] + areas[order[1:]] - inter + 1e-6
77
+ iou = inter / union
78
+
79
+ order = order[1:][iou <= iou_th]
80
+
81
+ return np.array(keep, dtype=np.int64)
82
+
83
+
84
+ def postprocess(decoded_outs, conf_th=0.25, nms_iou=0.5, max_det=300):
85
+ boxes = decoded_outs["boxes_xyxy"][0] # [N,4]
86
+ obj_log = decoded_outs["obj_logits"][0].reshape(-1) # [N]
87
+ cls_log = decoded_outs["cls_logits"][0] # [N,C]
88
+
89
+ obj = 1.0 / (1.0 + np.exp(-obj_log)) # [N]
90
+ C = cls_log.shape[-1] if cls_log.ndim == 2 else 0
91
+
92
+ if C > 1:
93
+ cls_sig = 1.0 / (1.0 + np.exp(-cls_log)) # [N,C]
94
+ cls_ids = cls_sig.argmax(axis=1) # [N]
95
+ cls_scores = cls_sig.max(axis=1) # [N]
96
+ scores = obj * cls_scores
97
+ elif C == 1:
98
+ # matcha infer_onnx.py: använd bara obj vid 1-klass
99
+ cls_ids = np.zeros_like(obj, dtype=np.int64)
100
+ scores = obj
101
+ else:
102
+ # saknar klasslogits (ovanligt) – använd obj
103
+ cls_ids = np.zeros_like(obj, dtype=np.int64)
104
+ scores = obj
105
+
106
+ m = scores > conf_th
107
+ if not np.any(m):
108
+ return (np.zeros((0,4),np.float32),
109
+ np.zeros((0,),np.float32),
110
+ np.zeros((0,),np.int64))
111
+
112
+ boxes = boxes[m]
113
+ scores = scores[m]
114
+ cls_ids = cls_ids[m]
115
+
116
+ # per-klass NMS
117
+ final_b, final_s, final_c = [], [], []
118
+ for c in np.unique(cls_ids):
119
+ mc = (cls_ids == c)
120
+ keep = nms_per_class(boxes[mc], scores[mc], iou_th=nms_iou)
121
+ if keep.size:
122
+ final_b.append(boxes[mc][keep])
123
+ final_s.append(scores[mc][keep])
124
+ final_c.append(np.full((keep.size,), int(c), dtype=np.int64))
125
+
126
+ if not final_b:
127
+ return (np.zeros((0,4),np.float32),
128
+ np.zeros((0,),np.float32),
129
+ np.zeros((0,),np.int64))
130
+
131
+ boxes = np.concatenate(final_b, 0).astype(np.float32)
132
+ scores = np.concatenate(final_s, 0).astype(np.float32)
133
+ classes = np.concatenate(final_c, 0).astype(np.int64)
134
+
135
+ if boxes.shape[0] > max_det:
136
+ top = scores.argsort()[::-1][:max_det]
137
+ boxes, scores, classes = boxes[top], scores[top], classes[top]
138
+ return boxes, scores, classes
139
+
140
+
141
+ # --------- Core API ----------
142
+
143
+ class ONNX_Predict:
144
+ """
145
+ Minimal infer for ONNX-decoded export:
146
+ outputs = ["boxes_xyxy", "obj_logits", "cls_logits"]
147
+
148
+ use_letterbox:
149
+ True -> letterbox + padding (classic YOLO-scaling)
150
+ False -> pure resize (img_size, img_size)
151
+ """
152
+ def __init__(self, onnx_path: str, providers=None, use_letterbox: bool = True):
153
+ if providers is None:
154
+ providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
155
+ self.session = ort.InferenceSession(onnx_path, providers=providers)
156
+ self.input_name = self.session.get_inputs()[0].name
157
+ # map outputs by name to avoid order issues
158
+ self.output_map = {o.name: o.name for o in self.session.get_outputs()}
159
+ self.use_letterbox_default = use_letterbox
160
+
161
+ def infer_image(
162
+ self,
163
+ img_bgr,
164
+ img_size: int = 640,
165
+ conf: float = 0.25,
166
+ iou: float = 0.50,
167
+ max_det: int = 300,
168
+ use_letterbox: bool | None = None,
169
+ ):
170
+ """
171
+ img_bgr: OpenCV BGR-bild (H,W,3)
172
+ use_letterbox:
173
+ None -> använd default från __init__
174
+ True -> letterbox
175
+ False -> ren resize
176
+ """
177
+ if use_letterbox is None:
178
+ use_letterbox = self.use_letterbox_default
179
+
180
+ orig_h, orig_w = img_bgr.shape[:2]
181
+
182
+ # ----- Preprocess -----
183
+ if use_letterbox:
184
+ inp, scale, padx, pady = preprocess_bgr_letterbox(img_bgr, img_size)
185
+ else:
186
+ inp = preprocess_bgr_resize(img_bgr, img_size)
187
+ scale = None
188
+ padx = 0
189
+ pady = 0
190
+
191
+ # ----- ONNX run -----
192
+ outs = self.session.run(
193
+ [
194
+ self.output_map.get("boxes_xyxy"),
195
+ self.output_map.get("obj_logits"),
196
+ self.output_map.get("cls_logits"),
197
+ ],
198
+ {self.input_name: inp}
199
+ )
200
+ boxes_net, obj_logits, cls_logits = outs
201
+
202
+ # ----- Postprocess (NMS etc i nätverkskoordinater) -----
203
+ boxes, scores, classes = postprocess(
204
+ {
205
+ "boxes_xyxy": boxes_net,
206
+ "obj_logits": obj_logits,
207
+ "cls_logits": cls_logits,
208
+ },
209
+ conf_th=conf,
210
+ nms_iou=iou,
211
+ max_det=max_det,
212
+ )
213
+
214
+ # ----- Back-map till originalbild -----
215
+ if boxes.shape[0]:
216
+ if use_letterbox:
217
+ # samma som i benchmark-scriptet
218
+ boxes[:, [0, 2]] -= padx
219
+ boxes[:, [1, 3]] -= pady
220
+ boxes /= max(scale, 1e-6)
221
+ else:
222
+ # ren resize: img0 -> nät-inp via warp till (img_size, img_size)
223
+ # x_net = x_orig * (img_size / orig_w) => x_orig = x_net * (orig_w / img_size)
224
+ # y_net = y_orig * (img_size / orig_h) => y_orig = y_net * (orig_h / img_size)
225
+ sx = orig_w / float(img_size)
226
+ sy = orig_h / float(img_size)
227
+ boxes[:, [0, 2]] *= sx
228
+ boxes[:, [1, 3]] *= sy
229
+
230
+ boxes[:, 0::2] = np.clip(boxes[:, 0::2], 0, orig_w - 1)
231
+ boxes[:, 1::2] = np.clip(boxes[:, 1::2], 0, orig_h - 1)
232
+
233
+ return boxes, scores, classes
234
+
235
+
intersection.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18159206076bd7f7862b45932e459bcd4148dd8c93ebf7a876694bf86fbf1595
3
+ size 18136586
onnx_intersection_showcase.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from tools.infer_onnx import ONNX_Predict
2
+ import cv2
3
+ import time
4
+ import collections
5
+
6
+ onnx_model = "edge_s_640_resize.onnx"
7
+
8
+
9
+ providers = ["CPUExecutionProvider"] #or ["CUDAExecutionProvider"]
10
+ predict = ONNX_Predict(onnx_model, providers=providers, use_letterbox=False) #Initialize Onnx predict class
11
+
12
+ #Settings for ONNX_Predict class
13
+ img_size = 640 #Image size for onnx model
14
+ conf = 0.5
15
+ iou = 0.3
16
+ max_det = 300
17
+
18
+ cap = cv2.VideoCapture("intersection.mp4")
19
+
20
+ max_w = 800 #Display width
21
+
22
+
23
+ #FPS-smoothing
24
+ fps_history = collections.deque(maxlen=30)
25
+
26
+ while True:
27
+
28
+ ret, frame = cap.read()
29
+ start_time = time.time()
30
+ if not ret:
31
+ break
32
+
33
+ h, w = frame.shape[:2]
34
+ if w > max_w:
35
+ scale = max_w / w
36
+ frame = cv2.resize(frame, (max_w, int(h * scale)), interpolation=cv2.INTER_LINEAR)
37
+
38
+ #Measure only inference time
39
+ start_time_0 = time.time()
40
+
41
+ boxes, scores, classes = predict.infer_image(
42
+ frame, img_size=img_size, conf=conf, iou=iou, max_det=max_det
43
+ )
44
+
45
+ end_time_0 = time.time()
46
+
47
+ #Draw
48
+ for (x1, y1, x2, y2), score, cls in zip(boxes, scores, classes):
49
+ x1, y1, x2, y2 = map(int, (x1, y1, x2, y2))
50
+ cv2.rectangle(frame, (x1, y1), (x2, y2), (0,255,0), 2)
51
+ cv2.putText(frame, f"{int(cls)} {score:.2f}", (x1, max(0, y1-5)),
52
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1)
53
+
54
+ dt = time.time() - start_time
55
+ fps = 1.0 / dt if dt > 0 else 0.0
56
+ fps_history.append(fps)
57
+ fps_avg = sum(fps_history) / len(fps_history)
58
+
59
+ cv2.putText(frame, f"edge_s 640x640 CPU FPS: {fps_avg:5.1f}",
60
+ (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2)
61
+
62
+ cv2.imshow('test', frame)
63
+ key = cv2.waitKey(1)
64
+ print(f'Total: {fps_avg:5.1f} Inference:{(end_time_0-start_time_0)*1000:3.1f}ms')
65
+ if key == 13: # Enter
66
+ break
67
+
68
+