Clémentine commited on
Commit
9252209
·
1 Parent(s): 2a8dc61

cleaner ux

Browse files
Files changed (3) hide show
  1. app.py +38 -19
  2. utils/io.py +34 -3
  3. utils/jobs.py +22 -7
app.py CHANGED
@@ -3,7 +3,7 @@ import time
3
  from apscheduler.schedulers.background import BackgroundScheduler
4
  import threading
5
  import globals
6
- from utils.io import initialize_models_providers_file, save_results, load_results, load_models_providers, get_results_table, load_models_providers_str
7
  from utils.jobs import run_single_job, launch_jobs, update_job_statuses, relaunch_failed_jobs
8
  from typing import List, Optional
9
 
@@ -28,23 +28,25 @@ def create_app() -> gr.Blocks:
28
  gr.Markdown("# Inference Provider Testing Dashboard")
29
  gr.Markdown("Launch and monitor evaluation jobs for multiple models and providers.")
30
 
31
- # All action buttons in one row
32
  with gr.Row():
33
- init_btn = gr.Button("Fetch and Initialize Models/Providers", variant="secondary")
34
- launch_btn = gr.Button("Launch All Jobs", variant="primary")
35
- relaunch_failed_btn = gr.Button("Relaunch Failed", variant="stop")
36
- refresh_btn = gr.Button("Refresh Results", variant="secondary")
37
 
38
  output = gr.Textbox(label="Status", interactive=False)
39
 
40
- # Accordion for viewing models/providers list
41
  with gr.Accordion("Models/Providers Configuration", open=False):
 
42
  models_providers_display = gr.Code(
43
  label="Current Models and Providers",
44
  value=load_models_providers_str(),
45
  interactive=False,
46
  )
47
 
 
 
 
48
  with gr.Row():
49
  with gr.Column():
50
  gr.Markdown("## Job Results")
@@ -67,19 +69,24 @@ def create_app() -> gr.Blocks:
67
  outputs=[output, models_providers_display]
68
  )
69
 
 
 
 
 
 
 
 
 
 
 
70
  launch_btn.click(
71
- fn=launch_jobs,
72
- outputs=output
73
  )
74
 
75
  relaunch_failed_btn.click(
76
- fn=relaunch_failed_jobs,
77
- outputs=output
78
- )
79
-
80
- refresh_btn.click(
81
- fn=get_results_table,
82
- outputs=results_table
83
  )
84
 
85
  # Handle dataframe cell selection for relaunch
@@ -101,13 +108,25 @@ def create_app() -> gr.Blocks:
101
  # Save after individual relaunch
102
  save_results()
103
 
104
- # Then update the table
105
- return get_results_table()
106
 
107
  results_table.select(
108
  fn=handle_table_select,
109
  inputs=[],
110
- outputs=results_table
 
 
 
 
 
 
 
 
 
 
 
 
111
  )
112
  with gr.Tab("About"):
113
  gr.Markdown("""
 
3
  from apscheduler.schedulers.background import BackgroundScheduler
4
  import threading
5
  import globals
6
+ from utils.io import initialize_models_providers_file, save_results, load_results, load_models_providers, get_results_table, load_models_providers_str, get_summary_stats
7
  from utils.jobs import run_single_job, launch_jobs, update_job_statuses, relaunch_failed_jobs
8
  from typing import List, Optional
9
 
 
28
  gr.Markdown("# Inference Provider Testing Dashboard")
29
  gr.Markdown("Launch and monitor evaluation jobs for multiple models and providers.")
30
 
31
+ # Simplified action buttons - only essential ones
32
  with gr.Row():
33
+ launch_btn = gr.Button("Launch All Jobs", variant="primary", scale=2)
34
+ relaunch_failed_btn = gr.Button("Relaunch Failed", variant="stop", scale=1)
 
 
35
 
36
  output = gr.Textbox(label="Status", interactive=False)
37
 
38
+ # Accordion for viewing/editing models/providers list and initialization
39
  with gr.Accordion("Models/Providers Configuration", open=False):
40
+ init_btn = gr.Button("Fetch and Initialize Models/Providers", variant="secondary")
41
  models_providers_display = gr.Code(
42
  label="Current Models and Providers",
43
  value=load_models_providers_str(),
44
  interactive=False,
45
  )
46
 
47
+ # Summary statistics
48
+ summary_stats = gr.Markdown(value=get_summary_stats())
49
+
50
  with gr.Row():
51
  with gr.Column():
52
  gr.Markdown("## Job Results")
 
69
  outputs=[output, models_providers_display]
70
  )
71
 
72
+ def launch_and_update():
73
+ """Launch jobs and return updated table and stats."""
74
+ result = launch_jobs()
75
+ return result, get_results_table(), get_summary_stats()
76
+
77
+ def relaunch_and_update():
78
+ """Relaunch failed jobs and return updated table and stats."""
79
+ result = relaunch_failed_jobs()
80
+ return result, get_results_table(), get_summary_stats()
81
+
82
  launch_btn.click(
83
+ fn=launch_and_update,
84
+ outputs=[output, results_table, summary_stats]
85
  )
86
 
87
  relaunch_failed_btn.click(
88
+ fn=relaunch_and_update,
89
+ outputs=[output, results_table, summary_stats]
 
 
 
 
 
90
  )
91
 
92
  # Handle dataframe cell selection for relaunch
 
108
  # Save after individual relaunch
109
  save_results()
110
 
111
+ # Then update the table and stats
112
+ return get_results_table(), get_summary_stats()
113
 
114
  results_table.select(
115
  fn=handle_table_select,
116
  inputs=[],
117
+ outputs=[results_table, summary_stats]
118
+ )
119
+
120
+ # Auto-refresh table and stats every 30 seconds
121
+ def auto_refresh():
122
+ """Auto-refresh table and summary stats."""
123
+ return get_results_table(), get_summary_stats()
124
+
125
+ demo.load(
126
+ fn=auto_refresh,
127
+ inputs=[],
128
+ outputs=[results_table, summary_stats],
129
+ every=30
130
  )
131
  with gr.Tab("About"):
132
  gr.Markdown("""
utils/io.py CHANGED
@@ -117,7 +117,10 @@ def load_results() -> None:
117
  "status": row["status"],
118
  "current_score": row["current_score"],
119
  "previous_score": row["previous_score"],
120
- "job_id": row["job_id"]
 
 
 
121
  }
122
 
123
  print(f"Loaded {len(globals.job_results)} results from dataset")
@@ -136,10 +139,23 @@ def style_status(val):
136
  return 'background-color: blue'
137
  return ''
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  def get_results_table():
140
  """Return job results as a styled pandas DataFrame for Gradio DataFrame."""
141
  if not globals.job_results:
142
- return pd.DataFrame(columns=["Model", "Provider", "Last Run", "Status", "Current Score", "Previous Score", "Latest Job Id"])
143
 
144
  table_data = []
145
  for key, info in globals.job_results.items():
@@ -151,6 +167,19 @@ def get_results_table():
151
  if previous_score is not None and isinstance(previous_score, (int, float)):
152
  previous_score = f"{previous_score:.4f}"
153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  job_id = info.get("job_id", "N/A")
155
  # Create a clickable link for the job ID
156
  if job_id != "N/A":
@@ -171,11 +200,13 @@ def get_results_table():
171
  info["status"],
172
  current_score,
173
  previous_score,
 
 
174
  job_link,
175
  relaunch_link
176
  ])
177
 
178
- df = pd.DataFrame(table_data, columns=["Model", "Provider", "Last Run", "Status", "Current Score", "Previous Score", "Job Id and Logs", "Actions"])
179
 
180
  # Apply styling to the Status column
181
  styled_df = df.style.map(style_status, subset=['Status'])
 
117
  "status": row["status"],
118
  "current_score": row["current_score"],
119
  "previous_score": row["previous_score"],
120
+ "job_id": row["job_id"],
121
+ "start_time": row.get("start_time"),
122
+ "duration": row.get("duration"),
123
+ "completed_at": row.get("completed_at")
124
  }
125
 
126
  print(f"Loaded {len(globals.job_results)} results from dataset")
 
139
  return 'background-color: blue'
140
  return ''
141
 
142
+ def get_summary_stats():
143
+ """Get summary statistics of job results."""
144
+ if not globals.job_results:
145
+ return "📊 **Status:** No jobs yet"
146
+
147
+ total = len(globals.job_results)
148
+ running = sum(1 for info in globals.job_results.values() if info.get("status") == "RUNNING")
149
+ completed = sum(1 for info in globals.job_results.values() if info.get("status") == "COMPLETED")
150
+ failed = sum(1 for info in globals.job_results.values() if info.get("status") in ["ERROR", "FAILED"])
151
+
152
+ return f"📊 **Total:** {total} | 🔵 **Running:** {running} | ✅ **Completed:** {completed} | ❌ **Failed:** {failed}"
153
+
154
+
155
  def get_results_table():
156
  """Return job results as a styled pandas DataFrame for Gradio DataFrame."""
157
  if not globals.job_results:
158
+ return pd.DataFrame(columns=["Model", "Provider", "Last Run", "Status", "Current Score", "Previous Score", "Duration", "Completed At", "Latest Job Id"])
159
 
160
  table_data = []
161
  for key, info in globals.job_results.items():
 
167
  if previous_score is not None and isinstance(previous_score, (int, float)):
168
  previous_score = f"{previous_score:.4f}"
169
 
170
+ # Format duration
171
+ duration = info.get("duration")
172
+ if duration is not None and isinstance(duration, (int, float)):
173
+ # Convert seconds to minutes and seconds
174
+ minutes = int(duration // 60)
175
+ seconds = int(duration % 60)
176
+ duration_str = f"{minutes}m {seconds}s"
177
+ else:
178
+ duration_str = "N/A"
179
+
180
+ # Get completion time
181
+ completed_at = info.get("completed_at", "N/A")
182
+
183
  job_id = info.get("job_id", "N/A")
184
  # Create a clickable link for the job ID
185
  if job_id != "N/A":
 
200
  info["status"],
201
  current_score,
202
  previous_score,
203
+ duration_str,
204
+ completed_at,
205
  job_link,
206
  relaunch_link
207
  ])
208
 
209
+ df = pd.DataFrame(table_data, columns=["Model", "Provider", "Last Run", "Status", "Current Score", "Previous Score", "Duration", "Completed At", "Job Id and Logs", "Actions"])
210
 
211
  # Apply styling to the Status column
212
  styled_df = df.style.map(style_status, subset=['Status'])
utils/jobs.py CHANGED
@@ -98,17 +98,21 @@ def run_single_job(model: str, provider: str, tasks: str = globals.TASKS) -> Opt
98
  if key in globals.job_results and globals.job_results[key].get("current_score", None) is not None:
99
  previous_score = globals.job_results[key]["current_score"]
100
 
 
101
  globals.job_results[key] = {
102
  "model": model,
103
  "provider": provider,
104
- "last_run": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
105
  "status": "RUNNING",
106
  "current_score": None,
107
  "previous_score": previous_score,
108
- "job_id": job_id
 
 
 
109
  }
110
 
111
- save_results()
112
  print(f"Job launched: ID={job_id}, model={model}, provider={provider}")
113
  return job_id
114
 
@@ -128,9 +132,9 @@ def launch_jobs(tasks: str = globals.TASKS, config_file: str = globals.LOCAL_CON
128
  job_id = run_single_job(model, provider, tasks)
129
  if job_id != -1:
130
  launched_count += 1
131
- # Small delay between launches to avoid rate limiting
132
- time.sleep(2)
133
 
 
 
134
  print(f"Launched {launched_count}/{len(models_providers)} jobs successfully")
135
  return f"Launched {launched_count} jobs"
136
 
@@ -152,8 +156,9 @@ def relaunch_failed_jobs():
152
  job_id = run_single_job(model, provider, globals.TASKS)
153
  if job_id != -1:
154
  relaunched_count += 1
155
- time.sleep(2) # Small delay between launches to avoid rate limiting
156
 
 
 
157
  return f"Relaunched {relaunched_count}/{len(failed_jobs)} failed jobs"
158
 
159
 
@@ -177,8 +182,18 @@ def update_job_statuses() -> None:
177
  globals.job_results[key]["status"] = new_status
178
  print(f"Job {job_id} status changed: {old_status} -> {new_status}")
179
 
180
- # If job completed, try to extract score
181
  if new_status == "COMPLETED":
 
 
 
 
 
 
 
 
 
 
182
  score = extract_score_from_job(job_id)
183
  if score is not None:
184
  globals.job_results[key]["current_score"] = score
 
98
  if key in globals.job_results and globals.job_results[key].get("current_score", None) is not None:
99
  previous_score = globals.job_results[key]["current_score"]
100
 
101
+ start_time = datetime.now()
102
  globals.job_results[key] = {
103
  "model": model,
104
  "provider": provider,
105
+ "last_run": start_time.strftime("%Y-%m-%d %H:%M:%S"),
106
  "status": "RUNNING",
107
  "current_score": None,
108
  "previous_score": previous_score,
109
+ "job_id": job_id,
110
+ "start_time": start_time.isoformat(),
111
+ "duration": None,
112
+ "completed_at": None
113
  }
114
 
115
+ # Don't save immediately - let the periodic save handle it
116
  print(f"Job launched: ID={job_id}, model={model}, provider={provider}")
117
  return job_id
118
 
 
132
  job_id = run_single_job(model, provider, tasks)
133
  if job_id != -1:
134
  launched_count += 1
 
 
135
 
136
+ # Save all results once after launching all jobs
137
+ save_results()
138
  print(f"Launched {launched_count}/{len(models_providers)} jobs successfully")
139
  return f"Launched {launched_count} jobs"
140
 
 
156
  job_id = run_single_job(model, provider, globals.TASKS)
157
  if job_id != -1:
158
  relaunched_count += 1
 
159
 
160
+ # Save all results once after relaunching all failed jobs
161
+ save_results()
162
  return f"Relaunched {relaunched_count}/{len(failed_jobs)} failed jobs"
163
 
164
 
 
182
  globals.job_results[key]["status"] = new_status
183
  print(f"Job {job_id} status changed: {old_status} -> {new_status}")
184
 
185
+ # If job completed, try to extract score and calculate duration
186
  if new_status == "COMPLETED":
187
+ completed_time = datetime.now()
188
+ globals.job_results[key]["completed_at"] = completed_time.strftime("%Y-%m-%d %H:%M:%S")
189
+
190
+ # Calculate duration if we have start_time
191
+ start_time_str = globals.job_results[key].get("start_time")
192
+ if start_time_str:
193
+ start_time = datetime.fromisoformat(start_time_str)
194
+ duration_seconds = (completed_time - start_time).total_seconds()
195
+ globals.job_results[key]["duration"] = duration_seconds
196
+
197
  score = extract_score_from_job(job_id)
198
  if score is not None:
199
  globals.job_results[key]["current_score"] = score