Spaces:

sohamnk
/

lnf_v2_ai_pipeline

Sleeping

App Files Files Community

sohamnk commited on Aug 30

Commit

c38d91d

verified ·

1 Parent(s): b7221b8

unique markings

Browse files

Files changed (1) hide show

app.py +11 -12

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # -------------------------------------------------------------------------- #
-# UNIFIED AI SERVICE V3.2 (Debug Uploads & Refactored)
 # -------------------------------------------------------------------------- #
 # This service uses DINOv2 for image embeddings and BGE for text embeddings.
 # - Filtering is handled by the Node.js backend.
@@ -30,7 +30,8 @@ from transformers import AutoProcessor as AutoGndProcessor, AutoModelForZeroShot
 app = Flask(__name__)
-TEXT_FIELDS_TO_EMBED = ["brand", "material"]
 SCORE_WEIGHTS = {
     "text_score": 0.4,
     "image_score": 0.6
@@ -178,10 +179,12 @@ def process_item():
         data = request.json
         print(f"\n[PROCESS] Received request for: {data.get('objectName')}")
         response = {
             "canonicalLabel": data.get('objectName', '').lower().strip(),
             "brand_embedding": get_text_embedding(data.get('brand')),
             "material_embedding": get_text_embedding(data.get('material')),
         }
         image_embeddings = []
@@ -225,15 +228,14 @@ def compare_items():
             item_id = item.get('_id')
             print(f"\n  - Comparing with item: {item_id}")
             try:
-                # --- NEW: DYNAMIC SCORE CALCULATION LOGIC ---
                 text_score_components = []
                 component_log = {}
-                # 1. Calculate score for fields with text embeddings
                 for field in TEXT_FIELDS_TO_EMBED:
                     q_emb = query_item.get(f"{field}_embedding")
                     i_emb = item.get(f"{field}_embedding")
-                    if q_emb and i_emb: # Only score if both items have the feature
                         score = cosine_similarity(q_emb, i_emb)
                         text_score_components.append(score)
                         component_log[field] = f"{score:.4f}"
@@ -241,7 +243,7 @@ def compare_items():
                 # 2. Calculate Jaccard score for 'colors'
                 q_colors = set(c.lower().strip() for c in query_item.get('colors', []) if c)
                 i_colors = set(c.lower().strip() for c in item.get('colors', []) if c)
-                if q_colors and i_colors: # Only score if both items have colors
                     score = jaccard_similarity(q_colors, i_colors)
                     text_score_components.append(score)
                     component_log['colors'] = f"{score:.4f}"
@@ -249,7 +251,7 @@ def compare_items():
                 # 3. Calculate direct match score for 'size'
                 q_size = (query_item.get('size') or "").lower().strip()
                 i_size = (item.get('size') or "").lower().strip()
-                if q_size and i_size: # Only score if both items have a size
                     score = 1.0 if q_size == i_size else 0.0
                     text_score_components.append(score)
                     component_log['size'] = f"{score:.4f}"
@@ -262,7 +264,7 @@ def compare_items():
                 print(f"    - Text Score Components: {component_log}")
                 print(f"    - Final Avg Text Score: {text_score:.4f} (from {len(text_score_components)} components)")
-                # 5. Calculate Image Score (logic unchanged)
                 image_score = 0.0
                 query_img_embs = query_item.get('image_embeddings', [])
                 item_img_embs = item.get('image_embeddings', [])
@@ -275,15 +277,12 @@ def compare_items():
                         image_score = max(all_img_scores)
                 print(f"    - Max Image Score: {image_score:.4f}")
-                # 6. Calculate Final Score (NOW DYNAMIC)
                 final_score = 0.0
-                # Check if images were available and compared on BOTH items
                 if query_img_embs and item_img_embs:
-                    # If yes, use the weighted hybrid score
                     print(f"    - Calculating Hybrid Score (Text + Image)...")
                     final_score = (SCORE_WEIGHTS['text_score'] * text_score + SCORE_WEIGHTS['image_score'] * image_score)
                 else:
-                    # If no, fall back to using only the text score
                     print(f"    - One or both items missing images. Using Text Score only...")
                     final_score = text_score

 # -------------------------------------------------------------------------- #
+# UNIFIED AI SERVICE V3.3 (Added Markings Comparison)
 # -------------------------------------------------------------------------- #
 # This service uses DINOv2 for image embeddings and BGE for text embeddings.
 # - Filtering is handled by the Node.js backend.
 app = Flask(__name__)
+# --- UPDATED: Added "markings" to the list of fields to compare ---
+TEXT_FIELDS_TO_EMBED = ["brand", "material", "markings"]
 SCORE_WEIGHTS = {
     "text_score": 0.4,
     "image_score": 0.6
         data = request.json
         print(f"\n[PROCESS] Received request for: {data.get('objectName')}")
+        # --- UPDATED: Added markings_embedding ---
         response = {
             "canonicalLabel": data.get('objectName', '').lower().strip(),
             "brand_embedding": get_text_embedding(data.get('brand')),
             "material_embedding": get_text_embedding(data.get('material')),
+            "markings_embedding": get_text_embedding(data.get('markings')),
         }
         image_embeddings = []
             item_id = item.get('_id')
             print(f"\n  - Comparing with item: {item_id}")
             try:
                 text_score_components = []
                 component_log = {}
+                # 1. Calculate score for fields with text embeddings (now includes 'markings')
                 for field in TEXT_FIELDS_TO_EMBED:
                     q_emb = query_item.get(f"{field}_embedding")
                     i_emb = item.get(f"{field}_embedding")
+                    if q_emb and i_emb:
                         score = cosine_similarity(q_emb, i_emb)
                         text_score_components.append(score)
                         component_log[field] = f"{score:.4f}"
                 # 2. Calculate Jaccard score for 'colors'
                 q_colors = set(c.lower().strip() for c in query_item.get('colors', []) if c)
                 i_colors = set(c.lower().strip() for c in item.get('colors', []) if c)
+                if q_colors and i_colors:
                     score = jaccard_similarity(q_colors, i_colors)
                     text_score_components.append(score)
                     component_log['colors'] = f"{score:.4f}"
                 # 3. Calculate direct match score for 'size'
                 q_size = (query_item.get('size') or "").lower().strip()
                 i_size = (item.get('size') or "").lower().strip()
+                if q_size and i_size:
                     score = 1.0 if q_size == i_size else 0.0
                     text_score_components.append(score)
                     component_log['size'] = f"{score:.4f}"
                 print(f"    - Text Score Components: {component_log}")
                 print(f"    - Final Avg Text Score: {text_score:.4f} (from {len(text_score_components)} components)")
+                # 5. Calculate Image Score
                 image_score = 0.0
                 query_img_embs = query_item.get('image_embeddings', [])
                 item_img_embs = item.get('image_embeddings', [])
                         image_score = max(all_img_scores)
                 print(f"    - Max Image Score: {image_score:.4f}")
+                # 6. Calculate Final Score (Dynamic)
                 final_score = 0.0
                 if query_img_embs and item_img_embs:
                     print(f"    - Calculating Hybrid Score (Text + Image)...")
                     final_score = (SCORE_WEIGHTS['text_score'] * text_score + SCORE_WEIGHTS['image_score'] * image_score)
                 else:
                     print(f"    - One or both items missing images. Using Text Score only...")
                     final_score = text_score