sohamnk commited on
Commit
c38d91d
·
verified ·
1 Parent(s): b7221b8

unique markings

Browse files
Files changed (1) hide show
  1. app.py +11 -12
app.py CHANGED
@@ -1,5 +1,5 @@
1
  # -------------------------------------------------------------------------- #
2
- # UNIFIED AI SERVICE V3.2 (Debug Uploads & Refactored)
3
  # -------------------------------------------------------------------------- #
4
  # This service uses DINOv2 for image embeddings and BGE for text embeddings.
5
  # - Filtering is handled by the Node.js backend.
@@ -30,7 +30,8 @@ from transformers import AutoProcessor as AutoGndProcessor, AutoModelForZeroShot
30
 
31
  app = Flask(__name__)
32
 
33
- TEXT_FIELDS_TO_EMBED = ["brand", "material"]
 
34
  SCORE_WEIGHTS = {
35
  "text_score": 0.4,
36
  "image_score": 0.6
@@ -178,10 +179,12 @@ def process_item():
178
  data = request.json
179
  print(f"\n[PROCESS] Received request for: {data.get('objectName')}")
180
 
 
181
  response = {
182
  "canonicalLabel": data.get('objectName', '').lower().strip(),
183
  "brand_embedding": get_text_embedding(data.get('brand')),
184
  "material_embedding": get_text_embedding(data.get('material')),
 
185
  }
186
 
187
  image_embeddings = []
@@ -225,15 +228,14 @@ def compare_items():
225
  item_id = item.get('_id')
226
  print(f"\n - Comparing with item: {item_id}")
227
  try:
228
- # --- NEW: DYNAMIC SCORE CALCULATION LOGIC ---
229
  text_score_components = []
230
  component_log = {}
231
 
232
- # 1. Calculate score for fields with text embeddings
233
  for field in TEXT_FIELDS_TO_EMBED:
234
  q_emb = query_item.get(f"{field}_embedding")
235
  i_emb = item.get(f"{field}_embedding")
236
- if q_emb and i_emb: # Only score if both items have the feature
237
  score = cosine_similarity(q_emb, i_emb)
238
  text_score_components.append(score)
239
  component_log[field] = f"{score:.4f}"
@@ -241,7 +243,7 @@ def compare_items():
241
  # 2. Calculate Jaccard score for 'colors'
242
  q_colors = set(c.lower().strip() for c in query_item.get('colors', []) if c)
243
  i_colors = set(c.lower().strip() for c in item.get('colors', []) if c)
244
- if q_colors and i_colors: # Only score if both items have colors
245
  score = jaccard_similarity(q_colors, i_colors)
246
  text_score_components.append(score)
247
  component_log['colors'] = f"{score:.4f}"
@@ -249,7 +251,7 @@ def compare_items():
249
  # 3. Calculate direct match score for 'size'
250
  q_size = (query_item.get('size') or "").lower().strip()
251
  i_size = (item.get('size') or "").lower().strip()
252
- if q_size and i_size: # Only score if both items have a size
253
  score = 1.0 if q_size == i_size else 0.0
254
  text_score_components.append(score)
255
  component_log['size'] = f"{score:.4f}"
@@ -262,7 +264,7 @@ def compare_items():
262
  print(f" - Text Score Components: {component_log}")
263
  print(f" - Final Avg Text Score: {text_score:.4f} (from {len(text_score_components)} components)")
264
 
265
- # 5. Calculate Image Score (logic unchanged)
266
  image_score = 0.0
267
  query_img_embs = query_item.get('image_embeddings', [])
268
  item_img_embs = item.get('image_embeddings', [])
@@ -275,15 +277,12 @@ def compare_items():
275
  image_score = max(all_img_scores)
276
  print(f" - Max Image Score: {image_score:.4f}")
277
 
278
- # 6. Calculate Final Score (NOW DYNAMIC)
279
  final_score = 0.0
280
- # Check if images were available and compared on BOTH items
281
  if query_img_embs and item_img_embs:
282
- # If yes, use the weighted hybrid score
283
  print(f" - Calculating Hybrid Score (Text + Image)...")
284
  final_score = (SCORE_WEIGHTS['text_score'] * text_score + SCORE_WEIGHTS['image_score'] * image_score)
285
  else:
286
- # If no, fall back to using only the text score
287
  print(f" - One or both items missing images. Using Text Score only...")
288
  final_score = text_score
289
 
 
1
  # -------------------------------------------------------------------------- #
2
+ # UNIFIED AI SERVICE V3.3 (Added Markings Comparison)
3
  # -------------------------------------------------------------------------- #
4
  # This service uses DINOv2 for image embeddings and BGE for text embeddings.
5
  # - Filtering is handled by the Node.js backend.
 
30
 
31
  app = Flask(__name__)
32
 
33
+ # --- UPDATED: Added "markings" to the list of fields to compare ---
34
+ TEXT_FIELDS_TO_EMBED = ["brand", "material", "markings"]
35
  SCORE_WEIGHTS = {
36
  "text_score": 0.4,
37
  "image_score": 0.6
 
179
  data = request.json
180
  print(f"\n[PROCESS] Received request for: {data.get('objectName')}")
181
 
182
+ # --- UPDATED: Added markings_embedding ---
183
  response = {
184
  "canonicalLabel": data.get('objectName', '').lower().strip(),
185
  "brand_embedding": get_text_embedding(data.get('brand')),
186
  "material_embedding": get_text_embedding(data.get('material')),
187
+ "markings_embedding": get_text_embedding(data.get('markings')),
188
  }
189
 
190
  image_embeddings = []
 
228
  item_id = item.get('_id')
229
  print(f"\n - Comparing with item: {item_id}")
230
  try:
 
231
  text_score_components = []
232
  component_log = {}
233
 
234
+ # 1. Calculate score for fields with text embeddings (now includes 'markings')
235
  for field in TEXT_FIELDS_TO_EMBED:
236
  q_emb = query_item.get(f"{field}_embedding")
237
  i_emb = item.get(f"{field}_embedding")
238
+ if q_emb and i_emb:
239
  score = cosine_similarity(q_emb, i_emb)
240
  text_score_components.append(score)
241
  component_log[field] = f"{score:.4f}"
 
243
  # 2. Calculate Jaccard score for 'colors'
244
  q_colors = set(c.lower().strip() for c in query_item.get('colors', []) if c)
245
  i_colors = set(c.lower().strip() for c in item.get('colors', []) if c)
246
+ if q_colors and i_colors:
247
  score = jaccard_similarity(q_colors, i_colors)
248
  text_score_components.append(score)
249
  component_log['colors'] = f"{score:.4f}"
 
251
  # 3. Calculate direct match score for 'size'
252
  q_size = (query_item.get('size') or "").lower().strip()
253
  i_size = (item.get('size') or "").lower().strip()
254
+ if q_size and i_size:
255
  score = 1.0 if q_size == i_size else 0.0
256
  text_score_components.append(score)
257
  component_log['size'] = f"{score:.4f}"
 
264
  print(f" - Text Score Components: {component_log}")
265
  print(f" - Final Avg Text Score: {text_score:.4f} (from {len(text_score_components)} components)")
266
 
267
+ # 5. Calculate Image Score
268
  image_score = 0.0
269
  query_img_embs = query_item.get('image_embeddings', [])
270
  item_img_embs = item.get('image_embeddings', [])
 
277
  image_score = max(all_img_scores)
278
  print(f" - Max Image Score: {image_score:.4f}")
279
 
280
+ # 6. Calculate Final Score (Dynamic)
281
  final_score = 0.0
 
282
  if query_img_embs and item_img_embs:
 
283
  print(f" - Calculating Hybrid Score (Text + Image)...")
284
  final_score = (SCORE_WEIGHTS['text_score'] * text_score + SCORE_WEIGHTS['image_score'] * image_score)
285
  else:
 
286
  print(f" - One or both items missing images. Using Text Score only...")
287
  final_score = text_score
288