Spaces:

swardiantara
/

ADFLER

Sleeping

swardiantara commited on Aug 24

Commit

da8a100

1 Parent(s): 329563f

fix highlight issue

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,34 +14,32 @@ ner_pipeline = pipeline("ner", model=model_name, aggregation_strategy="simple")
 # This function takes raw text and returns a format that Gradio's HighlightedText component understands.
 def recognize_log_events(text):
     """
-    Performs NER on the input text and formats the output for Gradio.
     """
     if not text:
-        return {"text": "", "entities": []}
     ner_results = ner_pipeline(text)
-    # Format the results for the HighlightedText component
-    # It expects a list of tuples: (word, entity_label)
-    # The pipeline with aggregation_strategy="simple" provides this almost directly.
-    entities = []
-    for result in ner_results:
-        entities.append((result['entity_group'], result['word']))
-    # Gradio's HighlightedText component works best with a dictionary
-    # containing the original text and the list of entities.
-    # We will return the text split by spaces and the corresponding entities.
-    words = text.split()
     highlighted_output = []
-    # This is a simple way to tag words. More complex logic may be needed
-    # if an entity spans multiple words that are not contiguous.
-    # For simplicity, we create a lookup for recognized words.
-    entity_lookup = {entity[1].strip(): entity[0] for entity in entities}
-    for word in words:
-        label = entity_lookup.get(word)
-        highlighted_output.append((word, label))
     return highlighted_output

 # This function takes raw text and returns a format that Gradio's HighlightedText component understands.
 def recognize_log_events(text):
     """
+    Performs NER and robustly formats the output for Gradio's HighlightedText.
     """
     if not text:
+        return []
     ner_results = ner_pipeline(text)
+    # Sort entities by their start index to process them in order
+    ner_results.sort(key=lambda x: x['start'])
     highlighted_output = []
+    last_end = 0
+    for entity in ner_results:
+        # Add the text between the last entity and this one (un-highlighted)
+        if entity['start'] > last_end:
+            highlighted_output.append((text[last_end:entity['start']], None))
+        # Add the highlighted entity text
+        highlighted_output.append((entity['word'], entity['entity_group']))
+        last_end = entity['end']
+    # Add any remaining text after the last entity (un-highlighted)
+    if last_end < len(text):
+        highlighted_output.append((text[last_end:], None))
     return highlighted_output