bluenevus commited on
Commit
9452d24
·
verified ·
1 Parent(s): a1642cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +262 -61
app.py CHANGED
@@ -1,22 +1,159 @@
1
- from nemo.collections.asr.models import ASRModel
2
- import torch
3
  import gradio as gr
4
- import spaces
5
- import gc
6
- import shutil
7
- from pathlib import Path
8
- from pydub import AudioSegment
9
  import numpy as np
10
- import os
 
 
 
 
 
 
 
 
11
  import gradio.themes as gr_themes
12
  import csv
13
- import datetime
14
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
15
- from docx import Document
16
  import re
17
  import threading
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- # ========== Qwen2.5-1.5B-Instruct Setup ==========
 
 
20
  QWEN_MODEL = "Qwen/Qwen2.5-1.5B-Instruct"
21
  qwen_tokenizer = AutoTokenizer.from_pretrained(QWEN_MODEL)
22
  qwen_model = AutoModelForCausalLM.from_pretrained(
@@ -34,13 +171,11 @@ qwen_pipe = pipeline(
34
  temperature=0.3,
35
  )
36
 
37
- # ========== ASR Setup ==========
38
  device = "cuda" if torch.cuda.is_available() else "cpu"
39
- MODEL_NAME="nvidia/parakeet-tdt-0.6b-v2"
40
  model = ASRModel.from_pretrained(model_name=MODEL_NAME)
41
  model.eval()
42
-
43
- # ========== Thread Lock for Model State ==========
44
  model_lock = threading.Lock()
45
 
46
  def start_session(request: gr.Request):
@@ -58,6 +193,7 @@ def end_session(request: gr.Request):
58
  print(f"Session with hash {session_hash} ended.")
59
 
60
  def get_audio_segment(audio_path, start_second, end_second):
 
61
  if not audio_path or not Path(audio_path).exists():
62
  print(f"Warning: Audio path '{audio_path}' not found or invalid for clipping.")
63
  return None
@@ -89,6 +225,7 @@ def get_audio_segment(audio_path, start_second, end_second):
89
  return None
90
 
91
  def format_srt_time(seconds: float) -> str:
 
92
  sanitized_total_seconds = max(0.0, seconds)
93
  delta = datetime.timedelta(seconds=sanitized_total_seconds)
94
  total_int_seconds = int(delta.total_seconds())
@@ -111,8 +248,9 @@ def generate_srt_content(segment_timestamps: list) -> str:
111
  srt_content.append("")
112
  return "\n".join(srt_content)
113
 
114
- @spaces.GPU
115
  def get_transcripts_and_raw_times(audio_path, session_dir):
 
 
116
  if not audio_path:
117
  gr.Error("No audio file path provided for transcription.", duration=None)
118
  return [], [], None, gr.DownloadButton(label="Download Transcript (CSV)", visible=False), gr.DownloadButton(label="Download Transcript (SRT)", visible=False)
@@ -262,30 +400,6 @@ def get_transcripts_and_raw_times(audio_path, session_dir):
262
  except Exception as e:
263
  print(f"Error removing temporary audio file {processed_audio_path}: {e}")
264
 
265
- def play_segment(evt: gr.SelectData, raw_ts_list, current_audio_path):
266
- if not isinstance(raw_ts_list, list):
267
- print(f"Warning: raw_ts_list is not a list ({type(raw_ts_list)}). Cannot play segment.")
268
- return gr.Audio(value=None, label="Selected Segment")
269
- if not current_audio_path:
270
- print("No audio path available to play segment from.")
271
- return gr.Audio(value=None, label="Selected Segment")
272
- selected_index = evt.index[0]
273
- if selected_index < 0 or selected_index >= len(raw_ts_list):
274
- print(f"Invalid index {selected_index} selected for list of length {len(raw_ts_list)}.")
275
- return gr.Audio(value=None, label="Selected Segment")
276
- if not isinstance(raw_ts_list[selected_index], (list, tuple)) or len(raw_ts_list[selected_index]) != 2:
277
- print(f"Warning: Data at index {selected_index} is not in the expected format [start, end].")
278
- return gr.Audio(value=None, label="Selected Segment")
279
- start_time_s, end_time_s = raw_ts_list[selected_index]
280
- print(f"Attempting to play segment: {current_audio_path} from {start_time_s:.2f}s to {end_time_s:.2f}s")
281
- segment_data = get_audio_segment(current_audio_path, start_time_s, end_time_s)
282
- if segment_data:
283
- print("Segment data retrieved successfully.")
284
- return gr.Audio(value=segment_data, autoplay=True, label=f"Segment: {start_time_s:.2f}s - {end_time_s:.2f}s", interactive=False)
285
- else:
286
- print("Failed to get audio segment data.")
287
- return gr.Audio(value=None, label="Selected Segment")
288
-
289
  def strip_markdown(text):
290
  text = re.sub(r'(\*\*|__)(.*?)\1', r'\2', text)
291
  text = re.sub(r'(\*|_)(.*?)\1', r'\2', text)
@@ -318,7 +432,6 @@ def generate_meeting_minutes(session_dir):
318
  f"{transcript}\n"
319
  "Structured Meeting Minutes:"
320
  )
321
-
322
  print("Sending prompt to Qwen2.5-1.5B-Instruct...")
323
  out = qwen_pipe(prompt)
324
  minutes = out[0]["generated_text"][len(prompt):].strip()
@@ -334,19 +447,12 @@ def generate_meeting_minutes(session_dir):
334
  print("Error in generate_meeting_minutes:", e)
335
  return f"Error generating minutes: {e}", None, gr.update(visible=True)
336
 
 
 
337
  nvidia_theme = gr_themes.Default(
338
  primary_hue=gr_themes.Color(
339
- c50="#E6F1D9",
340
- c100="#CEE3B3",
341
- c200="#B5D58C",
342
- c300="#9CC766",
343
- c400="#84B940",
344
- c500="#76B900",
345
- c600="#68A600",
346
- c700="#5A9200",
347
- c800="#4C7E00",
348
- c900="#3E6A00",
349
- c950="#2F5600"
350
  ),
351
  neutral_hue="gray",
352
  font=[gr_themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
@@ -358,11 +464,77 @@ with gr.Blocks(theme=nvidia_theme) as demo:
358
  session_dir = gr.State()
359
  demo.load(start_session, outputs=[session_dir])
360
 
361
- mic_input = gr.Audio(sources=["microphone"], type="filepath", label="Record Audio")
362
- mic_transcribe_btn = gr.Button("Transcribe Microphone Input", variant="primary")
363
- file_input = gr.Audio(sources=["upload"], type="filepath", label="Upload Audio File")
364
- file_transcribe_btn = gr.Button("Transcribe Uploaded File", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
  gr.Markdown("---")
367
  gr.Markdown("<p><strong style='color: #FF0000; font-size: 1.2em;'>Transcription Results (Click row to play segment)</strong></p>")
368
 
@@ -384,28 +556,57 @@ with gr.Blocks(theme=nvidia_theme) as demo:
384
 
385
  selected_segment_player = gr.Audio(label="Selected Segment", interactive=False)
386
 
 
 
 
 
 
387
  mic_transcribe_btn.click(
388
- fn=get_transcripts_and_raw_times,
389
  inputs=[mic_input, session_dir],
390
  outputs=[vis_timestamps_df, raw_timestamps_list_state, current_audio_path_state, download_btn_csv, download_btn_srt],
391
  api_name="transcribe_mic"
392
  )
393
 
394
  file_transcribe_btn.click(
395
- fn=get_transcripts_and_raw_times,
396
  inputs=[file_input, session_dir],
397
  outputs=[vis_timestamps_df, raw_timestamps_list_state, current_audio_path_state, download_btn_csv, download_btn_srt],
398
  api_name="transcribe_file"
399
  )
400
 
401
  gen_minutes_btn.click(
402
- fn=generate_meeting_minutes,
403
  inputs=[session_dir],
404
  outputs=[minutes_output, minutes_download, minutes_download],
405
  )
406
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
407
  vis_timestamps_df.select(
408
- fn=play_segment,
409
  inputs=[raw_timestamps_list_state, current_audio_path_state],
410
  outputs=[selected_segment_player],
411
  )
 
1
+ import os
2
+ import time
3
  import gradio as gr
 
 
 
 
 
4
  import numpy as np
5
+ import librosa
6
+ import soundfile as sf
7
+ from twilio.rest import Client
8
+ from twilio.twiml.voice_response import VoiceResponse, Dial
9
+ import requests
10
+ from datetime import datetime
11
+ import tempfile
12
+ from nemo.collections.asr.models import ASRModel
13
+ import torch
14
  import gradio.themes as gr_themes
15
  import csv
16
+ from pathlib import Path
17
+ import shutil
18
+ import gc
19
  import re
20
  import threading
21
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
22
+ from docx import Document
23
+
24
+ # Optional: Diarization
25
+ try:
26
+ from pyannote.audio import Pipeline
27
+ HAVE_PYANNOTE = True
28
+ except ImportError:
29
+ HAVE_PYANNOTE = False
30
+
31
+ # ========== Twilio Functions ==========
32
+
33
+ def get_twilio_credentials():
34
+ account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
35
+ auth_token = os.environ.get("TWILIO_AUTH_TOKEN")
36
+ twilio_number = os.environ.get("TWILIO_PHONE_NUMBER")
37
+ return account_sid, auth_token, twilio_number
38
+
39
+ def make_conference_call(phone_number, conference_code, wait_time=30):
40
+ try:
41
+ account_sid, auth_token, twilio_number = get_twilio_credentials()
42
+ if not all([account_sid, auth_token, twilio_number]):
43
+ return None, "Twilio credentials not found. Please set environment variables."
44
+ client = Client(account_sid, auth_token)
45
+ response = VoiceResponse()
46
+ response.say("Joining conference call. This call will be recorded for diarization.")
47
+ response.pause(length=2)
48
+ if conference_code:
49
+ for digit in conference_code:
50
+ if digit.isdigit() or digit in ['*', '#']:
51
+ response.play(digits=digit)
52
+ response.pause(length=1)
53
+ response.record(timeout=0, transcribe=False, recording_status_callback="/recording-status")
54
+ dial = Dial()
55
+ dial.conference('ConferenceRoom', record='record-from-start', recording_status_callback="/recording-status")
56
+ response.append(dial)
57
+ call = client.calls.create(
58
+ to=phone_number,
59
+ from_=twilio_number,
60
+ twiml=str(response),
61
+ record=True
62
+ )
63
+ return call.sid, f"Call initiated with SID: {call.sid}. Wait for the call to complete before retrieving the recording."
64
+ except Exception as e:
65
+ return None, f"Error initiating call: {str(e)}"
66
+
67
+ def check_call_status(call_sid):
68
+ try:
69
+ account_sid, auth_token, _ = get_twilio_credentials()
70
+ if not all([account_sid, auth_token]):
71
+ return None, "Twilio credentials not found. Please set environment variables."
72
+ client = Client(account_sid, auth_token)
73
+ call = client.calls(call_sid).fetch()
74
+ if call.status in ['in-progress', 'queued', 'ringing']:
75
+ return None, f"Call is still {call.status}. Please check again later."
76
+ recordings = client.recordings.list(call_sid=call_sid)
77
+ if not recordings:
78
+ return None, "No recordings found for this call yet. Please check again later."
79
+ recording = recordings[0]
80
+ recording_url = f"https://api.twilio.com/2010-04-01/Accounts/{account_sid}/Recordings/{recording.sid}.wav"
81
+ response = requests.get(recording_url, auth=(account_sid, auth_token))
82
+ if response.status_code != 200:
83
+ return None, f"Failed to download recording: {response.status_code}"
84
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
85
+ temp_file.write(response.content)
86
+ temp_file.close()
87
+ return temp_file.name, f"Recording downloaded successfully: {temp_file.name}"
88
+ except Exception as e:
89
+ return None, f"Error checking call status: {str(e)}"
90
+
91
+ def upsample_audio(audio_file):
92
+ try:
93
+ y, sr = librosa.load(audio_file, sr=None)
94
+ if sr == 16000:
95
+ return audio_file, f"Audio is already at 16kHz: {audio_file}"
96
+ y_enhanced = librosa.effects.preemphasis(y, coef=0.97)
97
+ y_resampled = librosa.resample(y_enhanced, orig_sr=sr, target_sr=16000)
98
+ output_file = f"upsampled_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
99
+ sf.write(output_file, y_resampled, 16000)
100
+ return output_file, f"Audio upsampled successfully: {output_file}"
101
+ except Exception as e:
102
+ return None, f"Error upsampling audio: {str(e)}"
103
+
104
+ def prepare_for_diarization(recording_file):
105
+ try:
106
+ if not recording_file:
107
+ return None, "No recording file provided."
108
+ upsampled_file, message = upsample_audio(recording_file)
109
+ if not upsampled_file:
110
+ return None, message
111
+ y, sr = librosa.load(upsampled_file, sr=16000)
112
+ S = librosa.stft(y)
113
+ S_filtered = librosa.decompose.nn_filter(
114
+ np.abs(S),
115
+ aggregate=np.median,
116
+ metric='cosine',
117
+ width=11
118
+ )
119
+ mask = librosa.util.softmask(
120
+ S_filtered,
121
+ np.abs(S) - S_filtered,
122
+ power=2
123
+ )
124
+ S_enhanced = S * mask
125
+ y_enhanced = librosa.istft(S_enhanced)
126
+ output_file = f"diarization_ready_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
127
+ sf.write(output_file, y_enhanced, 16000)
128
+ return output_file, f"Audio processed and ready for diarization: {output_file}"
129
+ except Exception as e:
130
+ return None, f"Error preparing audio for diarization: {str(e)}"
131
+
132
+ # ========== Diarization Function (pyannote) ==========
133
+
134
+ def diarize_audio(audio_file):
135
+ if not HAVE_PYANNOTE:
136
+ return None, "pyannote.audio not installed."
137
+ try:
138
+ hf_token = os.environ.get("HUGGINGFACE_TOKEN", None)
139
+ if not hf_token:
140
+ return None, "Hugging Face access token not set in environment as HUGGINGFACE_TOKEN."
141
+ pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=hf_token)
142
+ diarization = pipeline(audio_file)
143
+ diarization_file = f"diarization_{datetime.now().strftime('%Y%m%d_%H%M%S')}.rttm"
144
+ with open(diarization_file, "w") as f:
145
+ diarization.write_rttm(f)
146
+ # For user: return path to RTTM file and a simple string report
147
+ report_lines = []
148
+ for turn, _, speaker in diarization.itertracks(yield_label=True):
149
+ report_lines.append(f"{turn.start:.1f}-{turn.end:.1f}s: {speaker}")
150
+ return diarization_file, "\n".join(report_lines)
151
+ except Exception as e:
152
+ return None, f"Error in diarization: {str(e)}"
153
 
154
+ # ========== ASR and Meeting Minutes Setup ==========
155
+
156
+ # Qwen2.5-1.5B-Instruct Setup
157
  QWEN_MODEL = "Qwen/Qwen2.5-1.5B-Instruct"
158
  qwen_tokenizer = AutoTokenizer.from_pretrained(QWEN_MODEL)
159
  qwen_model = AutoModelForCausalLM.from_pretrained(
 
171
  temperature=0.3,
172
  )
173
 
174
+ # ASR Setup
175
  device = "cuda" if torch.cuda.is_available() else "cpu"
176
+ MODEL_NAME = "nvidia/parakeet-tdt-0.6b-v2"
177
  model = ASRModel.from_pretrained(model_name=MODEL_NAME)
178
  model.eval()
 
 
179
  model_lock = threading.Lock()
180
 
181
  def start_session(request: gr.Request):
 
193
  print(f"Session with hash {session_hash} ended.")
194
 
195
  def get_audio_segment(audio_path, start_second, end_second):
196
+ from pydub import AudioSegment
197
  if not audio_path or not Path(audio_path).exists():
198
  print(f"Warning: Audio path '{audio_path}' not found or invalid for clipping.")
199
  return None
 
225
  return None
226
 
227
  def format_srt_time(seconds: float) -> str:
228
+ import datetime
229
  sanitized_total_seconds = max(0.0, seconds)
230
  delta = datetime.timedelta(seconds=sanitized_total_seconds)
231
  total_int_seconds = int(delta.total_seconds())
 
248
  srt_content.append("")
249
  return "\n".join(srt_content)
250
 
 
251
  def get_transcripts_and_raw_times(audio_path, session_dir):
252
+ from pydub import AudioSegment
253
+ import gradio as gr
254
  if not audio_path:
255
  gr.Error("No audio file path provided for transcription.", duration=None)
256
  return [], [], None, gr.DownloadButton(label="Download Transcript (CSV)", visible=False), gr.DownloadButton(label="Download Transcript (SRT)", visible=False)
 
400
  except Exception as e:
401
  print(f"Error removing temporary audio file {processed_audio_path}: {e}")
402
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
403
  def strip_markdown(text):
404
  text = re.sub(r'(\*\*|__)(.*?)\1', r'\2', text)
405
  text = re.sub(r'(\*|_)(.*?)\1', r'\2', text)
 
432
  f"{transcript}\n"
433
  "Structured Meeting Minutes:"
434
  )
 
435
  print("Sending prompt to Qwen2.5-1.5B-Instruct...")
436
  out = qwen_pipe(prompt)
437
  minutes = out[0]["generated_text"][len(prompt):].strip()
 
447
  print("Error in generate_meeting_minutes:", e)
448
  return f"Error generating minutes: {e}", None, gr.update(visible=True)
449
 
450
+ # ========== Gradio UI ==========
451
+
452
  nvidia_theme = gr_themes.Default(
453
  primary_hue=gr_themes.Color(
454
+ c50="#E6F1D9", c100="#CEE3B3", c200="#B5D58C", c300="#9CC766", c400="#84B940",
455
+ c500="#76B900", c600="#68A600", c700="#5A9200", c800="#4C7E00", c900="#3E6A00", c950="#2F5600"
 
 
 
 
 
 
 
 
 
456
  ),
457
  neutral_hue="gray",
458
  font=[gr_themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
 
464
  session_dir = gr.State()
465
  demo.load(start_session, outputs=[session_dir])
466
 
467
+ # ====== Twilio Tab ======
468
+ with gr.Tab("Twilio Call & Recording"):
469
+ gr.Markdown("### 1. Make Twilio Call and Record")
470
+ phone_number = gr.Textbox(label="Phone Number (E.164)", placeholder="+15551234567")
471
+ conference_code = gr.Textbox(label="Conference Code (optional)", placeholder="123456#")
472
+ call_btn = gr.Button("Make Call")
473
+ call_sid = gr.Textbox(label="Call SID", interactive=False)
474
+ call_status = gr.Textbox(label="Call Status", interactive=False)
475
+ call_btn.click(
476
+ make_conference_call,
477
+ inputs=[phone_number, conference_code],
478
+ outputs=[call_sid, call_status]
479
+ )
480
+
481
+ gr.Markdown("### 2. Retrieve Recording")
482
+ sid_input = gr.Textbox(label="Call SID")
483
+ get_recording_btn = gr.Button("Get Recording")
484
+ recording_path = gr.Textbox(label="Recording File Path", interactive=False)
485
+ recording_status = gr.Textbox(label="Recording Status", interactive=False)
486
+ get_recording_btn.click(
487
+ check_call_status,
488
+ inputs=[sid_input],
489
+ outputs=[recording_path, recording_status]
490
+ )
491
 
492
+ gr.Markdown("### 3. Process for Diarization")
493
+ process_btn = gr.Button("Process Recording")
494
+ process_status = gr.Textbox(label="Processing Status", interactive=False)
495
+ processed_file = gr.Textbox(label="Processed Audio File", interactive=False)
496
+ def process_and_return_file(recording_path):
497
+ file_path, message = prepare_for_diarization(recording_path)
498
+ if file_path:
499
+ return message, file_path
500
+ else:
501
+ return message, None
502
+ process_btn.click(
503
+ process_and_return_file,
504
+ inputs=[recording_path],
505
+ outputs=[process_status, processed_file]
506
+ )
507
+
508
+ gr.Markdown("### 4. (Optional) Diarize Processed Audio")
509
+ diarize_btn = gr.Button("Diarize Processed Audio")
510
+ diarization_status = gr.Textbox(label="Diarization Status/Report", interactive=False)
511
+ diarization_file = gr.File(label="RTTM Diarization File")
512
+ def diarize_and_return_file(processed_file):
513
+ diar_file, report = diarize_audio(processed_file)
514
+ return report, diar_file
515
+ diarize_btn.click(
516
+ diarize_and_return_file,
517
+ inputs=[processed_file],
518
+ outputs=[diarization_status, diarization_file]
519
+ )
520
+
521
+ gr.Markdown("### 5. Transcribe and Analyze Processed Audio")
522
+ transcribe_btn = gr.Button("Transcribe Processed Recording")
523
+ vis_timestamps_df = gr.DataFrame(
524
+ headers=["Start (s)", "End (s)", "Segment"],
525
+ datatype=["number", "number", "str"],
526
+ wrap=True,
527
+ label="Transcription Segments"
528
+ )
529
+ download_btn_csv = gr.DownloadButton(label="Download Transcript (CSV)", visible=False)
530
+ download_btn_srt = gr.DownloadButton(label="Download Transcript (SRT)", visible=False)
531
+ transcribe_btn.click(
532
+ get_transcripts_and_raw_times,
533
+ inputs=[processed_file, session_dir],
534
+ outputs=[vis_timestamps_df, raw_timestamps_list_state, current_audio_path_state, download_btn_csv, download_btn_srt],
535
+ )
536
+
537
+ # ====== Your Existing UI ======
538
  gr.Markdown("---")
539
  gr.Markdown("<p><strong style='color: #FF0000; font-size: 1.2em;'>Transcription Results (Click row to play segment)</strong></p>")
540
 
 
556
 
557
  selected_segment_player = gr.Audio(label="Selected Segment", interactive=False)
558
 
559
+ mic_input = gr.Audio(sources=["microphone"], type="filepath", label="Record Audio")
560
+ mic_transcribe_btn = gr.Button("Transcribe Microphone Input", variant="primary")
561
+ file_input = gr.Audio(sources=["upload"], type="filepath", label="Upload Audio File")
562
+ file_transcribe_btn = gr.Button("Transcribe Uploaded File", variant="primary")
563
+
564
  mic_transcribe_btn.click(
565
+ get_transcripts_and_raw_times,
566
  inputs=[mic_input, session_dir],
567
  outputs=[vis_timestamps_df, raw_timestamps_list_state, current_audio_path_state, download_btn_csv, download_btn_srt],
568
  api_name="transcribe_mic"
569
  )
570
 
571
  file_transcribe_btn.click(
572
+ get_transcripts_and_raw_times,
573
  inputs=[file_input, session_dir],
574
  outputs=[vis_timestamps_df, raw_timestamps_list_state, current_audio_path_state, download_btn_csv, download_btn_srt],
575
  api_name="transcribe_file"
576
  )
577
 
578
  gen_minutes_btn.click(
579
+ generate_meeting_minutes,
580
  inputs=[session_dir],
581
  outputs=[minutes_output, minutes_download, minutes_download],
582
  )
583
 
584
+ def play_segment(evt: gr.SelectData, raw_ts_list, current_audio_path):
585
+ if not isinstance(raw_ts_list, list):
586
+ print(f"Warning: raw_ts_list is not a list ({type(raw_ts_list)}). Cannot play segment.")
587
+ return gr.Audio(value=None, label="Selected Segment")
588
+ if not current_audio_path:
589
+ print("No audio path available to play segment from.")
590
+ return gr.Audio(value=None, label="Selected Segment")
591
+ selected_index = evt.index[0]
592
+ if selected_index < 0 or selected_index >= len(raw_ts_list):
593
+ print(f"Invalid index {selected_index} selected for list of length {len(raw_ts_list)}.")
594
+ return gr.Audio(value=None, label="Selected Segment")
595
+ if not isinstance(raw_ts_list[selected_index], (list, tuple)) or len(raw_ts_list[selected_index]) != 2:
596
+ print(f"Warning: Data at index {selected_index} is not in the expected format [start, end].")
597
+ return gr.Audio(value=None, label="Selected Segment")
598
+ start_time_s, end_time_s = raw_ts_list[selected_index]
599
+ print(f"Attempting to play segment: {current_audio_path} from {start_time_s:.2f}s to {end_time_s:.2f}s")
600
+ segment_data = get_audio_segment(current_audio_path, start_time_s, end_time_s)
601
+ if segment_data:
602
+ print("Segment data retrieved successfully.")
603
+ return gr.Audio(value=segment_data, autoplay=True, label=f"Segment: {start_time_s:.2f}s - {end_time_s:.2f}s", interactive=False)
604
+ else:
605
+ print("Failed to get audio segment data.")
606
+ return gr.Audio(value=None, label="Selected Segment")
607
+
608
  vis_timestamps_df.select(
609
+ play_segment,
610
  inputs=[raw_timestamps_list_state, current_audio_path_state],
611
  outputs=[selected_segment_player],
612
  )