sohamnk commited on
Commit
87cf5bb
Β·
verified Β·
1 Parent(s): b096426

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -132
app.py CHANGED
@@ -1,11 +1,11 @@
1
  # --------------------------------------------------------------------------
2
- # UNIFIED AI SERVICE FOR LOST & FOUND V2 (with Hierarchical Filtering)
3
  # --------------------------------------------------------------------------
4
- # This service now performs intelligent filtering before scoring.
5
- # 1. Filters by object name (canonicalLabel).
6
- # 2. Filters by date (within 1 week).
7
- # 3. Filters by location hierarchy.
8
- # 4. Scores only the most relevant items.
9
  # --------------------------------------------------------------------------
10
 
11
  import sys
@@ -16,15 +16,16 @@ import requests
16
  import cv2
17
  import traceback
18
  from io import BytesIO
19
- from skimage import feature
20
  from flask import Flask, request, jsonify
21
  from PIL import Image
22
  from datetime import datetime, timedelta
23
 
24
  # --- Import Deep Learning Libraries ---
25
  import torch
26
- from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection, AutoTokenizer, AutoModel
27
  from segment_anything import SamPredictor, sam_model_registry
 
 
28
 
29
  # ==========================================================================
30
  # --- CONFIGURATION & INITIALIZATION ---
@@ -33,39 +34,60 @@ from segment_anything import SamPredictor, sam_model_registry
33
  app = Flask(__name__)
34
 
35
  # --- Scoring and Weighting Configuration ---
36
- TEXT_FIELD_WEIGHTS = { "brand": 1.0, "material": 1.0, "markings": 1.0, "colors": 1.0, "size": 1.0 }
37
- TEXT_FIELDS_TO_EMBED = ["brand", "material", "markings"]
38
- SCORE_WEIGHTS = { "text_score": 0.5, "image_score": 0.5 }
39
- FINAL_SCORE_THRESHOLD = 0.55
40
 
41
  # --- Model Loading ---
42
  print("="*50)
43
- print("πŸš€ Initializing Unified AI Service...")
44
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
45
  print(f"🧠 Using device: {device}")
 
 
 
46
  bge_model_id = "BAAI/bge-small-en-v1.5"
47
  tokenizer_text = AutoTokenizer.from_pretrained(bge_model_id)
48
  model_text = AutoModel.from_pretrained(bge_model_id).to(device)
 
 
 
 
 
 
 
 
 
 
 
49
  gnd_model_id = "IDEA-Research/grounding-dino-base"
50
- processor_gnd = AutoProcessor.from_pretrained(gnd_model_id)
51
  model_gnd = AutoModelForZeroShotObjectDetection.from_pretrained(gnd_model_id).to(device)
 
 
 
 
52
  sam_checkpoint = "sam_vit_b_01ec64.pth"
53
  sam_model = sam_model_registry["vit_b"](checkpoint=sam_checkpoint).to(device)
54
  sam_predictor = SamPredictor(sam_model)
55
- print("βœ… All models loaded successfully.")
56
  print("="*50)
57
 
58
-
59
  # ==========================================================================
60
  # --- HELPER FUNCTIONS ---
61
  # ==========================================================================
 
62
  def get_text_embedding(text: str) -> list:
63
- # --- ⬇️ THIS IS THE FIX ⬇️ ---
64
- # If the input is a list (like the 'colors' field), join it into a string.
65
  if isinstance(text, list):
 
 
66
  text = ", ".join(text)
67
 
68
- if not text or not text.strip(): return None
 
 
69
 
70
  instruction = "Represent this sentence for searching relevant passages: "
71
  inputs = tokenizer_text(instruction + text, return_tensors='pt', padding=True, truncation=True, max_length=512).to(device)
@@ -75,81 +97,56 @@ def get_text_embedding(text: str) -> list:
75
  embedding = torch.nn.functional.normalize(embedding, p=2, dim=1)
76
  return embedding.cpu().numpy()[0].tolist()
77
 
 
 
 
 
 
 
 
 
 
 
78
  def cosine_similarity(vec1, vec2):
79
  if vec1 is None or vec2 is None: return 0.0
80
  vec1, vec2 = np.array(vec1), np.array(vec2)
81
  return float(np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2)))
82
 
83
- def calculate_color_similarity(colors1: list, colors2: list) -> float:
84
- if not colors1 and not colors2: return 1.0
85
- if not colors1 or not colors2: return 0.0
86
- set1, set2 = set(c.lower() for c in colors1), set(c.lower() for c in colors2)
87
- intersection = len(set1.intersection(set2))
88
- union = len(set1.union(set2))
89
- return intersection / union if union > 0 else 0.0
90
-
91
- def segment_guided_object(image: Image.Image, object_label: str) -> Image.Image:
92
- prompt = f"a {object_label}."
93
  image_rgb = image.convert("RGB")
94
  image_np = np.array(image_rgb)
95
  h, w = image_np.shape[:2]
 
96
  inputs = processor_gnd(images=image_rgb, text=prompt, return_tensors="pt").to(device)
97
  with torch.no_grad():
98
  outputs = model_gnd(**inputs)
 
99
  results = processor_gnd.post_process_grounded_object_detection(
100
  outputs, inputs.input_ids, threshold=0.4, text_threshold=0.4, target_sizes=[(h, w)]
101
  )
 
102
  if not results or len(results[0]['boxes']) == 0:
103
- return image
 
 
104
  sam_predictor.set_image(image_np)
105
  box = results[0]['boxes'][0].cpu().numpy().astype(int)
106
  masks, _, _ = sam_predictor.predict(box=box, multimask_output=False)
 
107
  mask = masks[0]
108
- object_rgba = np.zeros((h, w, 4), dtype=np.uint8)
109
- object_rgba[:, :, :3] = image_np
110
- object_rgba[:, :, 3] = mask * 255
111
- return Image.fromarray(object_rgba, 'RGBA')
112
-
113
- def extract_visual_features(segmented_image_rgba: Image.Image) -> dict:
114
- image_np = np.array(segmented_image_rgba)
115
- bgr_image = cv2.cvtColor(image_np[:, :, :3], cv2.COLOR_RGB2BGR)
116
- mask = image_np[:, :, 3]
117
- contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
118
- shape_features = np.zeros(7)
119
- if contours:
120
- largest_contour = max(contours, key=cv2.contourArea)
121
- moments = cv2.moments(largest_contour)
122
- if moments['m00'] != 0:
123
- hu_moments = cv2.HuMoments(moments).flatten()
124
- shape_features = -np.sign(hu_moments) * np.log10(np.abs(hu_moments) + 1e-7)
125
- color_hist = cv2.calcHist([bgr_image], [0, 1, 2], mask, [8, 8, 8], [0, 256, 0, 256, 0, 256])
126
- cv2.normalize(color_hist, color_hist)
127
- gray_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
128
- lbp = feature.local_binary_pattern(gray_image, P=24, R=3, method="uniform")
129
- (texture_hist, _) = np.histogram(lbp[mask > 0], bins=np.arange(0, 27), range=(0, 26))
130
- texture_hist = texture_hist.astype("float")
131
- texture_hist /= (texture_hist.sum() + 1e-6)
132
- return {
133
- "shape_features": shape_features.tolist(),
134
- "color_features": color_hist.flatten().tolist(),
135
- "texture_features": texture_hist.tolist()
136
- }
137
-
138
- def calculate_dynamic_weights(all_shape_scores, all_color_scores, stability_factor=0.4):
139
- shape_scores, color_scores = np.array(all_shape_scores), np.array(all_color_scores)
140
- def get_iqr(scores):
141
- if len(scores) < 2: return 0
142
- q3, q1 = np.percentile(scores, [75, 25])
143
- return q3 - q1
144
- shape_dispersion = get_iqr(shape_scores)
145
- color_dispersion = get_iqr(color_scores)
146
- inv_shape_disp = 1 / (shape_dispersion + stability_factor)
147
- inv_color_disp = 1 / (color_dispersion + stability_factor)
148
- total_inv_disp = inv_shape_disp + inv_color_disp
149
- remaining_weight = 0.8
150
- shape_weight = remaining_weight * (inv_shape_disp / total_inv_disp) if total_inv_disp > 0 else remaining_weight / 2
151
- color_weight = remaining_weight * (inv_color_disp / total_inv_disp) if total_inv_disp > 0 else remaining_weight / 2
152
- return {"shape": shape_weight, "color": color_weight, "texture": 0.2}
153
 
154
  # ==========================================================================
155
  # --- FLASK ENDPOINTS ---
@@ -157,34 +154,45 @@ def calculate_dynamic_weights(all_shape_scores, all_color_scores, stability_fact
157
 
158
  @app.route('/', methods=['GET'])
159
  def health_check():
160
- return jsonify({"status": "Unified AI Service is running"}), 200
161
 
162
  @app.route('/process', methods=['POST'])
163
  def process_item():
164
  try:
165
  data = request.json
 
 
166
  response = {
167
  "canonicalLabel": data.get('objectName', '').lower().strip(),
168
  "brand_embedding": get_text_embedding(data.get('brand')),
169
  "material_embedding": get_text_embedding(data.get('material')),
170
- "markings_embedding": get_text_embedding(data.get('markings'))
 
171
  }
172
- visual_features_list = []
 
173
  if data.get('images'):
 
174
  for image_url in data['images']:
175
  try:
176
  img_response = requests.get(image_url, timeout=20)
177
  img_response.raise_for_status()
178
  image = Image.open(BytesIO(img_response.content))
179
- segmented_image = segment_guided_object(image, data['objectName'])
180
- features = extract_visual_features(segmented_image)
181
- visual_features_list.append(features)
 
182
  except Exception as e:
183
  print(f" - ⚠️ Could not process image {image_url}: {e}")
184
  continue
185
- response["visual_features"] = visual_features_list
 
 
186
  return jsonify(response), 200
 
187
  except Exception as e:
 
 
188
  return jsonify({"error": str(e)}), 500
189
 
190
  @app.route('/compare', methods=['POST'])
@@ -195,75 +203,48 @@ def compare_items():
195
  search_list = payload['searchList']
196
  print(f"\n[COMPARE] Received {len(search_list)} candidates for '{query_item.get('objectName')}'.")
197
 
198
- # --- HIERARCHICAL FILTERING LOGIC ---
199
-
200
- # 1. Object Name Filtering
201
  query_label = query_item.get('canonicalLabel')
202
  if query_label:
203
  search_list = [item for item in search_list if item.get('canonicalLabel') == query_label]
204
- print(f" [FILTER] After object name filter: {len(search_list)} candidates remain.")
205
 
206
- # 2. Date Filtering (within 1 week)
207
  query_date_str = query_item.get('dateLost') or query_item.get('dateFound')
208
  query_date = datetime.fromisoformat(query_date_str.replace('Z', '+00:00'))
209
  one_week = timedelta(days=7)
210
-
211
- def is_within_week(item):
212
- item_date_str = item.get('dateFound') or item.get('dateLost')
213
- if not item_date_str: return False
214
- item_date = datetime.fromisoformat(item_date_str.replace('Z', '+00:00'))
215
- return abs(query_date - item_date) <= one_week
216
 
217
- search_list = [item for item in search_list if is_within_week(item)]
218
- print(f" [FILTER] After date filter (1 week): {len(search_list)} candidates remain.")
219
-
220
- # 3. Location Filtering
221
  query_location = query_item.get('locationLost') or query_item.get('locationFound')
222
-
223
  if query_location and query_location != "Campus":
224
- filtered_by_location = []
225
- for item in search_list:
226
- item_location = item.get('locationFound') or item.get('locationLost')
227
- if item_location == query_location or item_location == "Campus":
228
- filtered_by_location.append(item)
229
- search_list = filtered_by_location
230
-
231
- print(f" [FILTER] After location hierarchy: {len(search_list)} candidates remain for scoring.")
232
 
233
- # --- SCORING LOGIC (runs only on the filtered list) ---
234
  results = []
235
  for item in search_list:
236
  item_id = item.get('_id')
237
  try:
238
- total_text_score, total_text_weight = 0, 0
239
  for field in TEXT_FIELDS_TO_EMBED:
240
- q_emb, i_emb = query_item.get(f"{field}_embedding"), item.get(f"{field}_embedding")
 
241
  if q_emb and i_emb:
242
- score, weight = cosine_similarity(q_emb, i_emb), TEXT_FIELD_WEIGHTS.get(field, 0)
243
- total_text_score += score * weight; total_text_weight += weight
244
- if query_item.get('colors'):
245
- score, weight = calculate_color_similarity(query_item['colors'], item.get('colors', [])), TEXT_FIELD_WEIGHTS.get('colors', 0)
246
- total_text_score += score * weight; total_text_weight += weight
247
- if query_item.get('size'):
248
- score, weight = (1.0 if query_item['size'] == item.get('size') else 0.0), TEXT_FIELD_WEIGHTS.get('size', 0)
249
- total_text_score += score * weight; total_text_weight += weight
250
- text_score = (total_text_score / total_text_weight) if total_text_weight > 0 else 0.0
251
 
252
  image_score = 0.0
253
- query_visuals, item_visuals = query_item.get('visual_features', []), item.get('visual_features', [])
254
- if query_visuals and item_visuals:
255
- all_shape_scores, all_color_scores, all_texture_scores = [], [], []
256
- for q_vis in query_visuals:
257
- for i_vis in item_visuals:
258
- shape_dist = cv2.matchShapes(np.array(q_vis["shape_features"], dtype="float32"), np.array(i_vis["shape_features"], dtype="float32"), cv2.CONTOURS_MATCH_I1, 0.0)
259
- all_shape_scores.append(1.0 / (1.0 + shape_dist))
260
- all_color_scores.append(cv2.compareHist(np.array(q_vis["color_features"], dtype="float32"), np.array(i_vis["color_features"], dtype="float32"), cv2.HISTCMP_CORREL))
261
- all_texture_scores.append(cv2.compareHist(np.array(q_vis["texture_features"], dtype="float32"), np.array(i_vis["texture_features"], dtype="float32"), cv2.HISTCMP_CORREL))
262
- if all_shape_scores:
263
- weights = calculate_dynamic_weights(all_shape_scores, all_color_scores)
264
- image_score = (weights["shape"] * max(all_shape_scores) + weights["color"] * max(all_color_scores) + weights["texture"] * max(all_texture_scores))
265
 
266
- final_score = (SCORE_WEIGHTS['text_score'] * text_score + SCORE_WEIGHTS['image_score'] * image_score) if (query_visuals and item_visuals) else text_score
267
 
268
  if final_score >= FINAL_SCORE_THRESHOLD:
269
  results.append({ "_id": str(item_id), "score": round(final_score, 4) })
@@ -272,7 +253,7 @@ def compare_items():
272
  continue
273
 
274
  results.sort(key=lambda x: x["score"], reverse=True)
275
- print(f"\n[COMPARE] βœ… Search complete. Found {len(results)} potential matches from the filtered list.")
276
  return jsonify({"matches": results}), 200
277
 
278
  except Exception as e:
 
1
  # --------------------------------------------------------------------------
2
+ # UNIFIED AI SERVICE V3 (DINOv2 Integration)
3
  # --------------------------------------------------------------------------
4
+ # This service uses DINOv2 for image embeddings and BGE for text embeddings.
5
+ # It performs intelligent filtering before scoring.
6
+ # 1. Filters by object name, date, and location hierarchy.
7
+ # 2. Extracts features using BGE (text) and DINOv2 (image).
8
+ # 3. Scores items based on a hybrid of text and image similarity.
9
  # --------------------------------------------------------------------------
10
 
11
  import sys
 
16
  import cv2
17
  import traceback
18
  from io import BytesIO
 
19
  from flask import Flask, request, jsonify
20
  from PIL import Image
21
  from datetime import datetime, timedelta
22
 
23
  # --- Import Deep Learning Libraries ---
24
  import torch
25
+ from transformers import AutoImageProcessor, AutoModel, AutoTokenizer
26
  from segment_anything import SamPredictor, sam_model_registry
27
+ # Grounding DINO is still needed for segmentation
28
+ from transformers import AutoProcessor as AutoGndProcessor, AutoModelForZeroShotObjectDetection
29
 
30
  # ==========================================================================
31
  # --- CONFIGURATION & INITIALIZATION ---
 
34
  app = Flask(__name__)
35
 
36
  # --- Scoring and Weighting Configuration ---
37
+ TEXT_FIELDS_TO_EMBED = ["brand", "material", "size", "colors"]
38
+ SCORE_WEIGHTS = { "text_score": 0.4, "image_score": 0.6 } # Give image score more weight
39
+ FINAL_SCORE_THRESHOLD = 0.5
 
40
 
41
  # --- Model Loading ---
42
  print("="*50)
43
+ print("πŸš€ Initializing AI Service with DINOv2...")
44
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
45
  print(f"🧠 Using device: {device}")
46
+
47
+ # 1. Load BGE Text Model
48
+ print("...Loading BGE text model (BAAI/bge-small-en-v1.5)...")
49
  bge_model_id = "BAAI/bge-small-en-v1.5"
50
  tokenizer_text = AutoTokenizer.from_pretrained(bge_model_id)
51
  model_text = AutoModel.from_pretrained(bge_model_id).to(device)
52
+ print("βœ… BGE model loaded.")
53
+
54
+ # 2. Load DINOv2 Image Model
55
+ print("...Loading DINOv2 model (facebook/dinov2-base)...")
56
+ dinov2_model_id = "facebook/dinov2-base"
57
+ processor_dinov2 = AutoImageProcessor.from_pretrained(dinov2_model_id)
58
+ model_dinov2 = AutoModel.from_pretrained(dinov2_model_id).to(device)
59
+ print("βœ… DINOv2 model loaded.")
60
+
61
+ # 3. Load Grounding DINO Model (for segmentation)
62
+ print("...Loading Grounding DINO model for segmentation...")
63
  gnd_model_id = "IDEA-Research/grounding-dino-base"
64
+ processor_gnd = AutoGndProcessor.from_pretrained(gnd_model_id)
65
  model_gnd = AutoModelForZeroShotObjectDetection.from_pretrained(gnd_model_id).to(device)
66
+ print("βœ… Grounding DINO model loaded.")
67
+
68
+ # 4. Load Segment Anything (SAM) Model
69
+ print("...Loading SAM model...")
70
  sam_checkpoint = "sam_vit_b_01ec64.pth"
71
  sam_model = sam_model_registry["vit_b"](checkpoint=sam_checkpoint).to(device)
72
  sam_predictor = SamPredictor(sam_model)
73
+ print("βœ… SAM model loaded.")
74
  print("="*50)
75
 
 
76
  # ==========================================================================
77
  # --- HELPER FUNCTIONS ---
78
  # ==========================================================================
79
+
80
  def get_text_embedding(text: str) -> list:
81
+ # --- THIS IS THE FIX ---
82
+ # First, handle the case where text is a list (like the 'colors' field).
83
  if isinstance(text, list):
84
+ if not text: # Handle empty list case
85
+ return None
86
  text = ", ".join(text)
87
 
88
+ # Now, perform the check on the (potentially converted) string.
89
+ if not text or not text.strip():
90
+ return None
91
 
92
  instruction = "Represent this sentence for searching relevant passages: "
93
  inputs = tokenizer_text(instruction + text, return_tensors='pt', padding=True, truncation=True, max_length=512).to(device)
 
97
  embedding = torch.nn.functional.normalize(embedding, p=2, dim=1)
98
  return embedding.cpu().numpy()[0].tolist()
99
 
100
+ def get_image_embedding(image: Image.Image) -> list:
101
+ """Generates a DINOv2 embedding for a given image."""
102
+ inputs = processor_dinov2(images=image, return_tensors="pt").to(device)
103
+ with torch.no_grad():
104
+ outputs = model_dinov2(**inputs)
105
+ # Use the CLS token embedding
106
+ embedding = outputs.last_hidden_state[:, 0, :]
107
+ embedding = torch.nn.functional.normalize(embedding, p=2, dim=1)
108
+ return embedding.cpu().numpy()[0].tolist()
109
+
110
  def cosine_similarity(vec1, vec2):
111
  if vec1 is None or vec2 is None: return 0.0
112
  vec1, vec2 = np.array(vec1), np.array(vec2)
113
  return float(np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2)))
114
 
115
+ def segment_guided_object(image: Image.Image, object_label: str, text_data: dict) -> Image.Image:
116
+ """Segments an object using a more descriptive prompt."""
117
+ desc_parts = [object_label]
118
+ if text_data.get('brand'): desc_parts.append(f"brand {text_data['brand']}")
119
+ if text_data.get('colors'): desc_parts.append(", ".join(text_data['colors']))
120
+ prompt = " ".join(desc_parts)
121
+
122
+ print(f" [Segment] Using prompt: '{prompt}'")
 
 
123
  image_rgb = image.convert("RGB")
124
  image_np = np.array(image_rgb)
125
  h, w = image_np.shape[:2]
126
+
127
  inputs = processor_gnd(images=image_rgb, text=prompt, return_tensors="pt").to(device)
128
  with torch.no_grad():
129
  outputs = model_gnd(**inputs)
130
+
131
  results = processor_gnd.post_process_grounded_object_detection(
132
  outputs, inputs.input_ids, threshold=0.4, text_threshold=0.4, target_sizes=[(h, w)]
133
  )
134
+
135
  if not results or len(results[0]['boxes']) == 0:
136
+ print(f" [Segment] ⚠️ Warning: Could not detect object. Using full image.")
137
+ return image_rgb
138
+
139
  sam_predictor.set_image(image_np)
140
  box = results[0]['boxes'][0].cpu().numpy().astype(int)
141
  masks, _, _ = sam_predictor.predict(box=box, multimask_output=False)
142
+
143
  mask = masks[0]
144
+ background = np.ones_like(image_np, dtype=np.uint8) * 255
145
+ foreground = cv2.bitwise_and(image_np, image_np, mask=mask.astype(np.uint8))
146
+ background = cv2.bitwise_and(background, background, mask=~mask.astype(np.uint8))
147
+ segmented_np = cv2.add(foreground, background)
148
+
149
+ return Image.fromarray(segmented_np, 'RGB')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
151
  # ==========================================================================
152
  # --- FLASK ENDPOINTS ---
 
154
 
155
  @app.route('/', methods=['GET'])
156
  def health_check():
157
+ return jsonify({"status": "Unified AI Service (DINOv2) is running"}), 200
158
 
159
  @app.route('/process', methods=['POST'])
160
  def process_item():
161
  try:
162
  data = request.json
163
+ print(f"\n[PROCESS] Received request for: {data.get('objectName')}")
164
+
165
  response = {
166
  "canonicalLabel": data.get('objectName', '').lower().strip(),
167
  "brand_embedding": get_text_embedding(data.get('brand')),
168
  "material_embedding": get_text_embedding(data.get('material')),
169
+ "size_embedding": get_text_embedding(data.get('size')),
170
+ "colors_embedding": get_text_embedding(data.get('colors')),
171
  }
172
+
173
+ image_embeddings = []
174
  if data.get('images'):
175
+ print(f" [PROCESS] Processing {len(data['images'])} image(s)...")
176
  for image_url in data['images']:
177
  try:
178
  img_response = requests.get(image_url, timeout=20)
179
  img_response.raise_for_status()
180
  image = Image.open(BytesIO(img_response.content))
181
+
182
+ segmented_image = segment_guided_object(image, data['objectName'], data)
183
+ embedding = get_image_embedding(segmented_image)
184
+ image_embeddings.append(embedding)
185
  except Exception as e:
186
  print(f" - ⚠️ Could not process image {image_url}: {e}")
187
  continue
188
+
189
+ response["image_embeddings"] = image_embeddings
190
+ print(f" [PROCESS] βœ… Successfully processed all features.")
191
  return jsonify(response), 200
192
+
193
  except Exception as e:
194
+ print(f"❌ Error in /process: {e}")
195
+ traceback.print_exc()
196
  return jsonify({"error": str(e)}), 500
197
 
198
  @app.route('/compare', methods=['POST'])
 
203
  search_list = payload['searchList']
204
  print(f"\n[COMPARE] Received {len(search_list)} candidates for '{query_item.get('objectName')}'.")
205
 
206
+ # --- HIERARCHICAL FILTERING ---
 
 
207
  query_label = query_item.get('canonicalLabel')
208
  if query_label:
209
  search_list = [item for item in search_list if item.get('canonicalLabel') == query_label]
210
+ print(f" [FILTER] After object name: {len(search_list)} candidates remain.")
211
 
 
212
  query_date_str = query_item.get('dateLost') or query_item.get('dateFound')
213
  query_date = datetime.fromisoformat(query_date_str.replace('Z', '+00:00'))
214
  one_week = timedelta(days=7)
215
+ search_list = [item for item in search_list if abs(query_date - datetime.fromisoformat((item.get('dateFound') or item.get('dateLost')).replace('Z', '+00:00'))) <= one_week]
216
+ print(f" [FILTER] After date: {len(search_list)} candidates remain.")
 
 
 
 
217
 
 
 
 
 
218
  query_location = query_item.get('locationLost') or query_item.get('locationFound')
 
219
  if query_location and query_location != "Campus":
220
+ search_list = [item for item in search_list if (item.get('locationFound') or item.get('locationLost')) in [query_location, "Campus"]]
221
+ print(f" [FILTER] After location: {len(search_list)} candidates for scoring.")
 
 
 
 
 
 
222
 
223
+ # --- SCORING ---
224
  results = []
225
  for item in search_list:
226
  item_id = item.get('_id')
227
  try:
228
+ total_text_score = 0
229
  for field in TEXT_FIELDS_TO_EMBED:
230
+ q_emb = query_item.get(f"{field}_embedding")
231
+ i_emb = item.get(f"{field}_embedding")
232
  if q_emb and i_emb:
233
+ total_text_score += cosine_similarity(q_emb, i_emb)
234
+ text_score = total_text_score / len(TEXT_FIELDS_TO_EMBED) if TEXT_FIELDS_TO_EMBED else 0
 
 
 
 
 
 
 
235
 
236
  image_score = 0.0
237
+ query_img_embs = query_item.get('image_embeddings', [])
238
+ item_img_embs = item.get('image_embeddings', [])
239
+ if query_img_embs and item_img_embs:
240
+ all_img_scores = []
241
+ for q_emb in query_img_embs:
242
+ for i_emb in item_img_embs:
243
+ all_img_scores.append(cosine_similarity(q_emb, i_emb))
244
+ if all_img_scores:
245
+ image_score = max(all_img_scores)
 
 
 
246
 
247
+ final_score = (SCORE_WEIGHTS['text_score'] * text_score + SCORE_WEIGHTS['image_score'] * image_score)
248
 
249
  if final_score >= FINAL_SCORE_THRESHOLD:
250
  results.append({ "_id": str(item_id), "score": round(final_score, 4) })
 
253
  continue
254
 
255
  results.sort(key=lambda x: x["score"], reverse=True)
256
+ print(f"\n[COMPARE] βœ… Search complete. Found {len(results)} potential matches.")
257
  return jsonify({"matches": results}), 200
258
 
259
  except Exception as e: