Spaces:
Sleeping
Sleeping
Mert Şengil
commited on
Commit
·
ae19be7
1
Parent(s):
19995b3
Add filtering to show only aspect terms present in original text
Browse files
app.py
CHANGED
@@ -25,6 +25,16 @@ def is_valid_aspect(word):
|
|
25 |
word.isalpha()
|
26 |
)
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
def extract_and_rank_aspects(text, max_tokens=64, beams=5):
|
29 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(DEVICE)
|
30 |
|
@@ -46,7 +56,10 @@ def extract_and_rank_aspects(text, max_tokens=64, beams=5):
|
|
46 |
all_terms = []
|
47 |
for pred in all_predictions:
|
48 |
candidates = re.split(r"[;,–—\-]|(?:\s*,\s*)", pred)
|
49 |
-
|
|
|
|
|
|
|
50 |
|
51 |
ranked = Counter(all_terms).most_common()
|
52 |
return ranked
|
@@ -93,7 +106,7 @@ with gr.Blocks(title="🇹🇷 Türkçe Aspect Term Extraction", theme=gr.themes
|
|
93 |
with gr.Column():
|
94 |
output = gr.Markdown(
|
95 |
label="📊 Sonuçlar",
|
96 |
-
value="Sonuçlar
|
97 |
)
|
98 |
|
99 |
# Example texts
|
|
|
25 |
word.isalpha()
|
26 |
)
|
27 |
|
28 |
+
def is_aspect_in_text(aspect_term, original_text):
|
29 |
+
"""Aspect term'in orijinal metinde geçip geçmediğini kontrol eder"""
|
30 |
+
# Case-insensitive karşılaştırma
|
31 |
+
text_lower = original_text.lower()
|
32 |
+
aspect_lower = aspect_term.lower()
|
33 |
+
|
34 |
+
# Word boundary ile tam kelime araması
|
35 |
+
pattern = r'\b' + re.escape(aspect_lower) + r'\b'
|
36 |
+
return bool(re.search(pattern, text_lower, re.IGNORECASE))
|
37 |
+
|
38 |
def extract_and_rank_aspects(text, max_tokens=64, beams=5):
|
39 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(DEVICE)
|
40 |
|
|
|
56 |
all_terms = []
|
57 |
for pred in all_predictions:
|
58 |
candidates = re.split(r"[;,–—\-]|(?:\s*,\s*)", pred)
|
59 |
+
# Sadece orijinal metinde geçen aspect term'leri ekle
|
60 |
+
for candidate in candidates:
|
61 |
+
if is_valid_aspect(candidate) and is_aspect_in_text(candidate.strip(), text):
|
62 |
+
all_terms.append(candidate.strip().lower())
|
63 |
|
64 |
ranked = Counter(all_terms).most_common()
|
65 |
return ranked
|
|
|
106 |
with gr.Column():
|
107 |
output = gr.Markdown(
|
108 |
label="📊 Sonuçlar",
|
109 |
+
value="📊 Sonuçlar"
|
110 |
)
|
111 |
|
112 |
# Example texts
|