Spaces:
Sleeping
Sleeping
unique markings
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
# -------------------------------------------------------------------------- #
|
| 2 |
-
# UNIFIED AI SERVICE V3.
|
| 3 |
# -------------------------------------------------------------------------- #
|
| 4 |
# This service uses DINOv2 for image embeddings and BGE for text embeddings.
|
| 5 |
# - Filtering is handled by the Node.js backend.
|
|
@@ -30,7 +30,8 @@ from transformers import AutoProcessor as AutoGndProcessor, AutoModelForZeroShot
|
|
| 30 |
|
| 31 |
app = Flask(__name__)
|
| 32 |
|
| 33 |
-
|
|
|
|
| 34 |
SCORE_WEIGHTS = {
|
| 35 |
"text_score": 0.4,
|
| 36 |
"image_score": 0.6
|
|
@@ -178,10 +179,12 @@ def process_item():
|
|
| 178 |
data = request.json
|
| 179 |
print(f"\n[PROCESS] Received request for: {data.get('objectName')}")
|
| 180 |
|
|
|
|
| 181 |
response = {
|
| 182 |
"canonicalLabel": data.get('objectName', '').lower().strip(),
|
| 183 |
"brand_embedding": get_text_embedding(data.get('brand')),
|
| 184 |
"material_embedding": get_text_embedding(data.get('material')),
|
|
|
|
| 185 |
}
|
| 186 |
|
| 187 |
image_embeddings = []
|
|
@@ -225,15 +228,14 @@ def compare_items():
|
|
| 225 |
item_id = item.get('_id')
|
| 226 |
print(f"\n - Comparing with item: {item_id}")
|
| 227 |
try:
|
| 228 |
-
# --- NEW: DYNAMIC SCORE CALCULATION LOGIC ---
|
| 229 |
text_score_components = []
|
| 230 |
component_log = {}
|
| 231 |
|
| 232 |
-
# 1. Calculate score for fields with text embeddings
|
| 233 |
for field in TEXT_FIELDS_TO_EMBED:
|
| 234 |
q_emb = query_item.get(f"{field}_embedding")
|
| 235 |
i_emb = item.get(f"{field}_embedding")
|
| 236 |
-
if q_emb and i_emb:
|
| 237 |
score = cosine_similarity(q_emb, i_emb)
|
| 238 |
text_score_components.append(score)
|
| 239 |
component_log[field] = f"{score:.4f}"
|
|
@@ -241,7 +243,7 @@ def compare_items():
|
|
| 241 |
# 2. Calculate Jaccard score for 'colors'
|
| 242 |
q_colors = set(c.lower().strip() for c in query_item.get('colors', []) if c)
|
| 243 |
i_colors = set(c.lower().strip() for c in item.get('colors', []) if c)
|
| 244 |
-
if q_colors and i_colors:
|
| 245 |
score = jaccard_similarity(q_colors, i_colors)
|
| 246 |
text_score_components.append(score)
|
| 247 |
component_log['colors'] = f"{score:.4f}"
|
|
@@ -249,7 +251,7 @@ def compare_items():
|
|
| 249 |
# 3. Calculate direct match score for 'size'
|
| 250 |
q_size = (query_item.get('size') or "").lower().strip()
|
| 251 |
i_size = (item.get('size') or "").lower().strip()
|
| 252 |
-
if q_size and i_size:
|
| 253 |
score = 1.0 if q_size == i_size else 0.0
|
| 254 |
text_score_components.append(score)
|
| 255 |
component_log['size'] = f"{score:.4f}"
|
|
@@ -262,7 +264,7 @@ def compare_items():
|
|
| 262 |
print(f" - Text Score Components: {component_log}")
|
| 263 |
print(f" - Final Avg Text Score: {text_score:.4f} (from {len(text_score_components)} components)")
|
| 264 |
|
| 265 |
-
# 5. Calculate Image Score
|
| 266 |
image_score = 0.0
|
| 267 |
query_img_embs = query_item.get('image_embeddings', [])
|
| 268 |
item_img_embs = item.get('image_embeddings', [])
|
|
@@ -275,15 +277,12 @@ def compare_items():
|
|
| 275 |
image_score = max(all_img_scores)
|
| 276 |
print(f" - Max Image Score: {image_score:.4f}")
|
| 277 |
|
| 278 |
-
# 6. Calculate Final Score (
|
| 279 |
final_score = 0.0
|
| 280 |
-
# Check if images were available and compared on BOTH items
|
| 281 |
if query_img_embs and item_img_embs:
|
| 282 |
-
# If yes, use the weighted hybrid score
|
| 283 |
print(f" - Calculating Hybrid Score (Text + Image)...")
|
| 284 |
final_score = (SCORE_WEIGHTS['text_score'] * text_score + SCORE_WEIGHTS['image_score'] * image_score)
|
| 285 |
else:
|
| 286 |
-
# If no, fall back to using only the text score
|
| 287 |
print(f" - One or both items missing images. Using Text Score only...")
|
| 288 |
final_score = text_score
|
| 289 |
|
|
|
|
| 1 |
# -------------------------------------------------------------------------- #
|
| 2 |
+
# UNIFIED AI SERVICE V3.3 (Added Markings Comparison)
|
| 3 |
# -------------------------------------------------------------------------- #
|
| 4 |
# This service uses DINOv2 for image embeddings and BGE for text embeddings.
|
| 5 |
# - Filtering is handled by the Node.js backend.
|
|
|
|
| 30 |
|
| 31 |
app = Flask(__name__)
|
| 32 |
|
| 33 |
+
# --- UPDATED: Added "markings" to the list of fields to compare ---
|
| 34 |
+
TEXT_FIELDS_TO_EMBED = ["brand", "material", "markings"]
|
| 35 |
SCORE_WEIGHTS = {
|
| 36 |
"text_score": 0.4,
|
| 37 |
"image_score": 0.6
|
|
|
|
| 179 |
data = request.json
|
| 180 |
print(f"\n[PROCESS] Received request for: {data.get('objectName')}")
|
| 181 |
|
| 182 |
+
# --- UPDATED: Added markings_embedding ---
|
| 183 |
response = {
|
| 184 |
"canonicalLabel": data.get('objectName', '').lower().strip(),
|
| 185 |
"brand_embedding": get_text_embedding(data.get('brand')),
|
| 186 |
"material_embedding": get_text_embedding(data.get('material')),
|
| 187 |
+
"markings_embedding": get_text_embedding(data.get('markings')),
|
| 188 |
}
|
| 189 |
|
| 190 |
image_embeddings = []
|
|
|
|
| 228 |
item_id = item.get('_id')
|
| 229 |
print(f"\n - Comparing with item: {item_id}")
|
| 230 |
try:
|
|
|
|
| 231 |
text_score_components = []
|
| 232 |
component_log = {}
|
| 233 |
|
| 234 |
+
# 1. Calculate score for fields with text embeddings (now includes 'markings')
|
| 235 |
for field in TEXT_FIELDS_TO_EMBED:
|
| 236 |
q_emb = query_item.get(f"{field}_embedding")
|
| 237 |
i_emb = item.get(f"{field}_embedding")
|
| 238 |
+
if q_emb and i_emb:
|
| 239 |
score = cosine_similarity(q_emb, i_emb)
|
| 240 |
text_score_components.append(score)
|
| 241 |
component_log[field] = f"{score:.4f}"
|
|
|
|
| 243 |
# 2. Calculate Jaccard score for 'colors'
|
| 244 |
q_colors = set(c.lower().strip() for c in query_item.get('colors', []) if c)
|
| 245 |
i_colors = set(c.lower().strip() for c in item.get('colors', []) if c)
|
| 246 |
+
if q_colors and i_colors:
|
| 247 |
score = jaccard_similarity(q_colors, i_colors)
|
| 248 |
text_score_components.append(score)
|
| 249 |
component_log['colors'] = f"{score:.4f}"
|
|
|
|
| 251 |
# 3. Calculate direct match score for 'size'
|
| 252 |
q_size = (query_item.get('size') or "").lower().strip()
|
| 253 |
i_size = (item.get('size') or "").lower().strip()
|
| 254 |
+
if q_size and i_size:
|
| 255 |
score = 1.0 if q_size == i_size else 0.0
|
| 256 |
text_score_components.append(score)
|
| 257 |
component_log['size'] = f"{score:.4f}"
|
|
|
|
| 264 |
print(f" - Text Score Components: {component_log}")
|
| 265 |
print(f" - Final Avg Text Score: {text_score:.4f} (from {len(text_score_components)} components)")
|
| 266 |
|
| 267 |
+
# 5. Calculate Image Score
|
| 268 |
image_score = 0.0
|
| 269 |
query_img_embs = query_item.get('image_embeddings', [])
|
| 270 |
item_img_embs = item.get('image_embeddings', [])
|
|
|
|
| 277 |
image_score = max(all_img_scores)
|
| 278 |
print(f" - Max Image Score: {image_score:.4f}")
|
| 279 |
|
| 280 |
+
# 6. Calculate Final Score (Dynamic)
|
| 281 |
final_score = 0.0
|
|
|
|
| 282 |
if query_img_embs and item_img_embs:
|
|
|
|
| 283 |
print(f" - Calculating Hybrid Score (Text + Image)...")
|
| 284 |
final_score = (SCORE_WEIGHTS['text_score'] * text_score + SCORE_WEIGHTS['image_score'] * image_score)
|
| 285 |
else:
|
|
|
|
| 286 |
print(f" - One or both items missing images. Using Text Score only...")
|
| 287 |
final_score = text_score
|
| 288 |
|