Spaces:
Sleeping
Sleeping
Commit
·
da8a100
1
Parent(s):
329563f
fix highlight issue
Browse files
app.py
CHANGED
|
@@ -14,34 +14,32 @@ ner_pipeline = pipeline("ner", model=model_name, aggregation_strategy="simple")
|
|
| 14 |
# This function takes raw text and returns a format that Gradio's HighlightedText component understands.
|
| 15 |
def recognize_log_events(text):
|
| 16 |
"""
|
| 17 |
-
Performs NER
|
| 18 |
"""
|
| 19 |
if not text:
|
| 20 |
-
return
|
| 21 |
-
|
| 22 |
ner_results = ner_pipeline(text)
|
| 23 |
|
| 24 |
-
#
|
| 25 |
-
|
| 26 |
-
# The pipeline with aggregation_strategy="simple" provides this almost directly.
|
| 27 |
-
entities = []
|
| 28 |
-
for result in ner_results:
|
| 29 |
-
entities.append((result['entity_group'], result['word']))
|
| 30 |
|
| 31 |
-
# Gradio's HighlightedText component works best with a dictionary
|
| 32 |
-
# containing the original text and the list of entities.
|
| 33 |
-
# We will return the text split by spaces and the corresponding entities.
|
| 34 |
-
words = text.split()
|
| 35 |
highlighted_output = []
|
| 36 |
-
|
| 37 |
-
# This is a simple way to tag words. More complex logic may be needed
|
| 38 |
-
# if an entity spans multiple words that are not contiguous.
|
| 39 |
-
# For simplicity, we create a lookup for recognized words.
|
| 40 |
-
entity_lookup = {entity[1].strip(): entity[0] for entity in entities}
|
| 41 |
|
| 42 |
-
for
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
return highlighted_output
|
| 47 |
|
|
|
|
| 14 |
# This function takes raw text and returns a format that Gradio's HighlightedText component understands.
|
| 15 |
def recognize_log_events(text):
|
| 16 |
"""
|
| 17 |
+
Performs NER and robustly formats the output for Gradio's HighlightedText.
|
| 18 |
"""
|
| 19 |
if not text:
|
| 20 |
+
return []
|
| 21 |
+
|
| 22 |
ner_results = ner_pipeline(text)
|
| 23 |
|
| 24 |
+
# Sort entities by their start index to process them in order
|
| 25 |
+
ner_results.sort(key=lambda x: x['start'])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
highlighted_output = []
|
| 28 |
+
last_end = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
+
for entity in ner_results:
|
| 31 |
+
# Add the text between the last entity and this one (un-highlighted)
|
| 32 |
+
if entity['start'] > last_end:
|
| 33 |
+
highlighted_output.append((text[last_end:entity['start']], None))
|
| 34 |
+
|
| 35 |
+
# Add the highlighted entity text
|
| 36 |
+
highlighted_output.append((entity['word'], entity['entity_group']))
|
| 37 |
+
|
| 38 |
+
last_end = entity['end']
|
| 39 |
+
|
| 40 |
+
# Add any remaining text after the last entity (un-highlighted)
|
| 41 |
+
if last_end < len(text):
|
| 42 |
+
highlighted_output.append((text[last_end:], None))
|
| 43 |
|
| 44 |
return highlighted_output
|
| 45 |
|