swardiantara commited on
Commit
da8a100
·
1 Parent(s): 329563f

fix highlight issue

Browse files
Files changed (1) hide show
  1. app.py +19 -21
app.py CHANGED
@@ -14,34 +14,32 @@ ner_pipeline = pipeline("ner", model=model_name, aggregation_strategy="simple")
14
  # This function takes raw text and returns a format that Gradio's HighlightedText component understands.
15
  def recognize_log_events(text):
16
  """
17
- Performs NER on the input text and formats the output for Gradio.
18
  """
19
  if not text:
20
- return {"text": "", "entities": []}
21
-
22
  ner_results = ner_pipeline(text)
23
 
24
- # Format the results for the HighlightedText component
25
- # It expects a list of tuples: (word, entity_label)
26
- # The pipeline with aggregation_strategy="simple" provides this almost directly.
27
- entities = []
28
- for result in ner_results:
29
- entities.append((result['entity_group'], result['word']))
30
 
31
- # Gradio's HighlightedText component works best with a dictionary
32
- # containing the original text and the list of entities.
33
- # We will return the text split by spaces and the corresponding entities.
34
- words = text.split()
35
  highlighted_output = []
36
-
37
- # This is a simple way to tag words. More complex logic may be needed
38
- # if an entity spans multiple words that are not contiguous.
39
- # For simplicity, we create a lookup for recognized words.
40
- entity_lookup = {entity[1].strip(): entity[0] for entity in entities}
41
 
42
- for word in words:
43
- label = entity_lookup.get(word)
44
- highlighted_output.append((word, label))
 
 
 
 
 
 
 
 
 
 
45
 
46
  return highlighted_output
47
 
 
14
  # This function takes raw text and returns a format that Gradio's HighlightedText component understands.
15
  def recognize_log_events(text):
16
  """
17
+ Performs NER and robustly formats the output for Gradio's HighlightedText.
18
  """
19
  if not text:
20
+ return []
21
+
22
  ner_results = ner_pipeline(text)
23
 
24
+ # Sort entities by their start index to process them in order
25
+ ner_results.sort(key=lambda x: x['start'])
 
 
 
 
26
 
 
 
 
 
27
  highlighted_output = []
28
+ last_end = 0
 
 
 
 
29
 
30
+ for entity in ner_results:
31
+ # Add the text between the last entity and this one (un-highlighted)
32
+ if entity['start'] > last_end:
33
+ highlighted_output.append((text[last_end:entity['start']], None))
34
+
35
+ # Add the highlighted entity text
36
+ highlighted_output.append((entity['word'], entity['entity_group']))
37
+
38
+ last_end = entity['end']
39
+
40
+ # Add any remaining text after the last entity (un-highlighted)
41
+ if last_end < len(text):
42
+ highlighted_output.append((text[last_end:], None))
43
 
44
  return highlighted_output
45