Gül Sena Altıntaş commited on
Commit
15729bc
·
1 Parent(s): 279fdab

Now accepts multiline!

Browse files
Files changed (2) hide show
  1. app.py +48 -87
  2. serve_on_killarney.sh +2 -2
app.py CHANGED
@@ -63,8 +63,18 @@ PREDEFINED_MODELS = [
63
  model_cache = {}
64
 
65
 
66
- def parse_dataset(text):
 
 
 
 
 
 
 
 
 
67
  """Parse the input dataset text into structured questions"""
 
68
 
69
  def clean_cell(s: str) -> str:
70
  return s.strip().replace("\r", "").replace("\n", " ").strip('"').strip()
@@ -75,12 +85,6 @@ def parse_dataset(text):
75
  # Normalize line endings
76
  text = text.replace("\r\n", "\n").replace("\r", "\n")
77
 
78
- # Detect delimiter from first non-empty line
79
- for line in text.splitlines():
80
- if line.strip():
81
- delimiter = "\t" if "\t" in line else ","
82
- break
83
-
84
  # Use csv.reader to handle quoted multi-line cells
85
  reader = csv.reader(io.StringIO(text), delimiter=delimiter, quotechar='"')
86
 
@@ -112,67 +116,6 @@ def parse_dataset(text):
112
  return questions, error_msg
113
 
114
 
115
- def parse_datasetold(text):
116
- """Parse the input dataset text into structured questions"""
117
- if not text.strip():
118
- return [], "Please enter your dataset"
119
-
120
- # Detect delimiter
121
- sample_line = text.splitlines()[0]
122
- delimiter = "\t" if "\t" in sample_line else ","
123
-
124
- # Use csv.reader to correctly parse quotes & newlines
125
- reader = csv.reader(io.StringIO(text), delimiter=delimiter)
126
-
127
- questions = []
128
- errors = []
129
- for i, row in enumerate(reader, 1):
130
- parts = [clean_cell(p) for p in row if p.strip()]
131
- if len(parts) < 5:
132
- errors.append(f"Line {i}: Not enough columns (need 5, got {len(parts)})")
133
- continue
134
-
135
- question = {
136
- "question": parts[0],
137
- "correct_answer": parts[1],
138
- "choices": [parts[2], parts[3], parts[4]],
139
- }
140
-
141
- if question["correct_answer"] not in question["choices"]:
142
- question["choices"].append(question["correct_answer"])
143
-
144
- questions.append(question)
145
-
146
- error_msg = "\n".join(errors) if errors else ""
147
- return questions, error_msg
148
- for i, line in enumerate(reader, 1):
149
- # for i, line in enumerate(lines[1:], 2): # Start from line 2 (after header)
150
- line = line.strip()
151
- if not line:
152
- continue
153
-
154
- parts = [clean_text(part) for part in line.split(delimiter)]
155
-
156
- if len(parts) < 5:
157
- errors.append(f"Line {i}: Not enough columns (need 5, got {len(parts)})")
158
- continue
159
-
160
- question = {
161
- "question": parts[0],
162
- "correct_answer": parts[1],
163
- "choices": [parts[2], parts[3], parts[4]],
164
- }
165
-
166
- # Ensure correct answer is in choices
167
- if question["correct_answer"] not in question["choices"]:
168
- question["choices"].append(question["correct_answer"])
169
-
170
- questions.append(question)
171
-
172
- error_msg = "\n".join(errors) if errors else ""
173
- return questions, error_msg
174
-
175
-
176
  def setup_tokenizer(model_path):
177
  tokenizer_name = model_path
178
  if "supertoken" in model_path:
@@ -403,7 +346,11 @@ def evaluate_model_on_questions(model_path, questions, progress_callback=None):
403
 
404
 
405
  def run_evaluation(
406
- dataset_text, selected_predefined, custom_models_text="", progress=gr.Progress()
 
 
 
 
407
  ):
408
  """Main evaluation function"""
409
  if not dataset_text.strip():
@@ -447,7 +394,7 @@ def run_evaluation(
447
  )
448
 
449
  # Parse dataset
450
- questions, parse_error = parse_dataset(dataset_text)
451
 
452
  if parse_error:
453
  return (
@@ -976,22 +923,18 @@ def generate_csv_summary(questions, results, summary_stats):
976
  # Sample datasets for quick testing
977
  SAMPLE_DATASETS = {
978
  "Custom (enter below)": "",
979
- "LP": """Question,Correct Answer,Choice1,Choice2,Choice3
980
- In which country is Llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogoch located? Wales Germany France Scotland
981
  In which country is Llanfair pwllgwyngyll located? Wales Germany France Scotland
982
  In which country is Llanfair PG located? Wales Germany France Scotland""",
983
- "Simple Math": """Question,Correct Answer,Choice1,Choice2,Choice3
984
- What is 2+2?,4,3,2,5
985
- What is 5*3?,15,12,16,18
986
- What is 10-7?,3,7,4,2
987
- What is 8/2?,4,3,2,5""",
988
- "World Capitals": """Question,Correct Answer,Choice1,Choice2,Choice3
989
- What is the capital of France?,Paris,London,Berlin,Rome
990
- What is the capital of Japan?,Tokyo,Seoul,Beijing,Bangkok
991
- What is the capital of Brazil?,Brasília,Rio de Janeiro,São Paulo,Salvador
992
- What is the capital of Australia?,Canberra,Sydney,Melbourne,Perth""",
993
- "Science Quiz": """Question,Correct Answer,Choice1,Choice2,Choice3
994
- What is the chemical symbol for gold?,Au,Ag,Ca,K
995
  Which planet is closest to the Sun?,Mercury,Venus,Earth,Mars
996
  What is the speed of light?,299792458 m/s,300000000 m/s,2992458 m/s,299000000 m/s
997
  What gas do plants absorb from the atmosphere?,Carbon dioxide,Oxygen,Nitrogen,Hydrogen""",
@@ -1035,11 +978,14 @@ css = """
1035
  # }
1036
  """
1037
 
 
1038
  # Create Gradio interface
1039
  with gr.Blocks(
1040
  title="🤖 Model Performance Comparison", theme=gr.themes.Soft(), css=css
1041
  ) as demo:
1042
- gr.Markdown("""
 
 
1043
  # 🤖 Model Performance Comparison Tool
1044
 
1045
  Compare LLM performance on multiple-choice questions using Hugging Face models.
@@ -1052,7 +998,17 @@ with gr.Blocks(
1052
  - Detailed question-by-question results
1053
  - Performance charts and statistics
1054
  """)
1055
-
 
 
 
 
 
 
 
 
 
 
1056
  with gr.Row():
1057
  with gr.Column(scale=2):
1058
  # Sample dataset selector
@@ -1178,7 +1134,12 @@ bigscience/bloom-560m""",
1178
 
1179
  evaluate_btn.click(
1180
  fn=run_evaluation,
1181
- inputs=[dataset_input, predefined_selector, custom_models_input],
 
 
 
 
 
1182
  outputs=[
1183
  summary_output,
1184
  detailed_results,
 
63
  model_cache = {}
64
 
65
 
66
+ def normalize_delimiter(delim: str) -> str:
67
+ delim = delim.strip()
68
+ if delim == "\\t": # user typed literal \t
69
+ return "\t"
70
+ if len(delim) != 1:
71
+ raise ValueError(f"Delimiter must be a single character, got {repr(delim)}")
72
+ return delim
73
+
74
+
75
+ def parse_dataset(text, delimiter: str = "\t"):
76
  """Parse the input dataset text into structured questions"""
77
+ delimiter = normalize_delimiter(delimiter)
78
 
79
  def clean_cell(s: str) -> str:
80
  return s.strip().replace("\r", "").replace("\n", " ").strip('"').strip()
 
85
  # Normalize line endings
86
  text = text.replace("\r\n", "\n").replace("\r", "\n")
87
 
 
 
 
 
 
 
88
  # Use csv.reader to handle quoted multi-line cells
89
  reader = csv.reader(io.StringIO(text), delimiter=delimiter, quotechar='"')
90
 
 
116
  return questions, error_msg
117
 
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  def setup_tokenizer(model_path):
120
  tokenizer_name = model_path
121
  if "supertoken" in model_path:
 
346
 
347
 
348
  def run_evaluation(
349
+ dataset_text,
350
+ selected_predefined,
351
+ custom_models_text="",
352
+ delimiter: str = "\t",
353
+ progress=gr.Progress(),
354
  ):
355
  """Main evaluation function"""
356
  if not dataset_text.strip():
 
394
  )
395
 
396
  # Parse dataset
397
+ questions, parse_error = parse_dataset(dataset_text, delimiter=delimiter)
398
 
399
  if parse_error:
400
  return (
 
923
  # Sample datasets for quick testing
924
  SAMPLE_DATASETS = {
925
  "Custom (enter below)": "",
926
+ "LP": """In which country is Llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogoch located? Wales Germany France Scotland
 
927
  In which country is Llanfair pwllgwyngyll located? Wales Germany France Scotland
928
  In which country is Llanfair PG located? Wales Germany France Scotland""",
929
+ "Simple Math": """What is 2+2? 4 3 2 5
930
+ What is 5*3? 15 12 16 18
931
+ What is 10-7? 3 7 4 2
932
+ What is 8/2? 4 3 2 5""",
933
+ "World Capitals": """What is the capital of France? Paris London Berlin Rome
934
+ What is the capital of Japan? Tokyo Seoul Beijing Bangkok
935
+ What is the capital of Brazil? Brasília Rio de Janeiro São Paulo Salvador
936
+ What is the capital of Australia? Canberra Sydney Melbourne Perth""",
937
+ "Science Quiz": """What is the chemical symbol for gold?,Au,Ag,Ca,K
 
 
 
938
  Which planet is closest to the Sun?,Mercury,Venus,Earth,Mars
939
  What is the speed of light?,299792458 m/s,300000000 m/s,2992458 m/s,299000000 m/s
940
  What gas do plants absorb from the atmosphere?,Carbon dioxide,Oxygen,Nitrogen,Hydrogen""",
 
978
  # }
979
  """
980
 
981
+
982
  # Create Gradio interface
983
  with gr.Blocks(
984
  title="🤖 Model Performance Comparison", theme=gr.themes.Soft(), css=css
985
  ) as demo:
986
+ with gr.Row():
987
+ with gr.Column(scale=2):
988
+ gr.Markdown("""
989
  # 🤖 Model Performance Comparison Tool
990
 
991
  Compare LLM performance on multiple-choice questions using Hugging Face models.
 
998
  - Detailed question-by-question results
999
  - Performance charts and statistics
1000
  """)
1001
+ with gr.Column(scale=1):
1002
+ # with gr.Accordion("Delimiter Options"):
1003
+ gr.Markdown("""
1004
+ Enter the delimiter used in your dataset:
1005
+ """)
1006
+ delimiter_selector = gr.Textbox(
1007
+ label="Delimiter",
1008
+ placeholder="Enter a delimiter, e.g., , or \\t",
1009
+ value="\\t", # default
1010
+ lines=1,
1011
+ )
1012
  with gr.Row():
1013
  with gr.Column(scale=2):
1014
  # Sample dataset selector
 
1134
 
1135
  evaluate_btn.click(
1136
  fn=run_evaluation,
1137
+ inputs=[
1138
+ dataset_input,
1139
+ predefined_selector,
1140
+ custom_models_input,
1141
+ delimiter_selector,
1142
+ ],
1143
  outputs=[
1144
  summary_output,
1145
  detailed_results,
serve_on_killarney.sh CHANGED
@@ -16,8 +16,8 @@ NODES=1
16
  NTASKS_PER_NODE=1
17
  CPUS_PER_TASK=4
18
  ### request more memory to run on more models
19
- MEM="16G"
20
- TIME="02:00:00"
21
  GRADIO_PORT=7861
22
  script_location="$APP_DIR/$SCRIPT_NAME"
23
 
 
16
  NTASKS_PER_NODE=1
17
  CPUS_PER_TASK=4
18
  ### request more memory to run on more models
19
+ MEM="64G"
20
+ TIME="06:00:00"
21
  GRADIO_PORT=7861
22
  script_location="$APP_DIR/$SCRIPT_NAME"
23