Spaces:
Sleeping
Sleeping
| import os | |
| import re | |
| import librosa | |
| import gradio as gr | |
| from copy import deepcopy | |
| # --- Helper Functions --- | |
| def seconds_to_cue_time(t): | |
| """Converts a time in seconds to the CUE sheet format (MM:SS:FF).""" | |
| t = max(0, t) | |
| minutes = int(t // 60) | |
| seconds = int(t % 60) | |
| frames = int((t - minutes * 60 - seconds) * 75) | |
| return f'{minutes:02d}:{seconds:02d}:{frames:02d}' | |
| def parse_cue_time_to_seconds(time_str): | |
| """Parses MM:SS:FF into seconds. Returns None on failure.""" | |
| if not time_str: | |
| return None | |
| match = re.match(r'(\d+):(\d{1,2}):(\d{1,2})', time_str) | |
| if match: | |
| m, s, f = map(int, match.groups()) | |
| return m * 60 + s + f / 75.0 | |
| return None | |
| def format_cue_text(track_data, cue_globals): | |
| """ | |
| Generates the final CUE sheet string from track data and global info. | |
| MODIFIED: Now accepts track data with titles. | |
| """ | |
| if not track_data: | |
| return "" | |
| # Sort tracks by time before formatting | |
| sorted_tracks = sorted(track_data, key=lambda x: x['time']) | |
| cue_text = f'PERFORMER "{cue_globals.get("performer", "Unknown Artist")}"\n' | |
| cue_text += f'TITLE "{cue_globals.get("title", os.path.splitext(cue_globals["filename"])[0])}"\n' | |
| # Use parsed file type or default to WAVE | |
| file_type = cue_globals.get("filetype", "WAVE") | |
| cue_text += f'FILE "{cue_globals["filename"]}" {file_type}\n' | |
| for idx, track in enumerate(sorted_tracks): | |
| cue_time_str = seconds_to_cue_time(track['time']) | |
| # ** FEATURE 1: Use existing title or create a default one ** | |
| title = track.get('title') or f"Track {idx+1:02d}" | |
| cue_text += f' TRACK {idx+1:02d} AUDIO\n' | |
| cue_text += f' TITLE "{title}"\n' | |
| cue_text += f' INDEX 01 {cue_time_str}\n' | |
| return cue_text | |
| def generate_track_choices(track_data, audio_duration): | |
| """Creates choices for the CheckboxGroup as (label, index) tuples.""" | |
| if not track_data: | |
| return [] | |
| # Data is already sorted, but we re-sort just in case. | |
| sorted_tracks = sorted(track_data, key=lambda x: x['time']) | |
| track_choices = [] | |
| for i, track in enumerate(sorted_tracks): | |
| start_time = track['time'] | |
| end_time = sorted_tracks[i+1]['time'] if i < len(sorted_tracks) - 1 else audio_duration | |
| track_length = end_time - start_time | |
| title = track.get('title', f"Track {i+1:02d}") | |
| label = f'"{title}" (Starts: {seconds_to_cue_time(start_time)}) [Length: {seconds_to_cue_time(track_length)}]' | |
| track_choices.append((label, i)) | |
| return track_choices | |
| # --- Core Gradio Functions --- | |
| def analyze_audio_to_cue(audio_file, top_db, min_segment_len, merge_threshold, merge_protection_len): | |
| """Workflow 1: Analyzes an uploaded audio file to generate the initial CUE text.""" | |
| if not audio_file: | |
| raise gr.Error("Please upload an audio file first.") | |
| # --- 1. Load Audio File --- | |
| try: | |
| y, sr = librosa.load(audio_file, sr=None) | |
| audio_duration = librosa.get_duration(y=y, sr=sr) | |
| except Exception as e: | |
| raise gr.Error(f"Could not load audio file: {e}") | |
| # --- 2. Detect Segments using Silence Detection --- | |
| intervals = librosa.effects.split(y, top_db=top_db) | |
| # Corrected way to check if NumPy array is empty | |
| times = [iv[0] / sr for iv in intervals if (iv[1] - iv[0]) / sr >= min_segment_len] if intervals.size > 0 else [] | |
| # --- 3. Post-process Tracks (Add Start, Auto-Merge) --- | |
| if not times or times[0] > 0.5: | |
| times.insert(0, 0.0) | |
| # Auto-merging logic | |
| if len(times) > 1: | |
| final_times = [times[0]] | |
| i = 0 | |
| while i < len(times) - 1: | |
| track_length = times[i+1] - times[i] | |
| # Merge if track is shorter than threshold AND not longer than protection length | |
| if (track_length < merge_threshold) and (track_length <= merge_protection_len): | |
| # Condition to MERGE is met. Skip adding the next timestamp. | |
| pass | |
| else: | |
| # Condition to KEEP is met. | |
| final_times.append(times[i+1]) | |
| i += 1 | |
| if len(final_times) > 1 and (audio_duration - final_times[-1]) < merge_threshold: | |
| final_times.pop() | |
| times = final_times | |
| # --- 4. Prepare Outputs for Gradio --- | |
| times = sorted(list(set(times))) | |
| # Convert times list to the new track_data structure | |
| track_data = [{'time': t, 'title': None} for t in times] | |
| audio_filename = os.path.basename(audio_file) | |
| cue_globals = {"filename": audio_filename} | |
| initial_cue_text = format_cue_text(track_data, cue_globals) | |
| track_choices = generate_track_choices(track_data, audio_duration) | |
| # This function now returns everything needed to update the entire UI in one step. | |
| return ( | |
| initial_cue_text, cue_globals, track_data, audio_duration, | |
| gr.update(choices=track_choices, value=[]), gr.update(visible=True) | |
| ) | |
| def parse_cue_and_update_ui(cue_text): | |
| """Workflow 2: Parses pasted CUE text, preserving titles.""" | |
| if not cue_text or "INDEX 01" not in cue_text: | |
| return cue_text, {}, [], 0, gr.update(choices=[], value=[]), gr.update(visible=False) | |
| cue_globals = {} | |
| track_data = [] | |
| current_track = None | |
| lines = cue_text.split('\n') | |
| for line in lines: | |
| line = line.strip() | |
| if not line: | |
| continue | |
| if re.search(r'TRACK\s+\d+\s+AUDIO', line, re.IGNORECASE): | |
| if current_track is not None: | |
| track_data.append(current_track) | |
| current_track = {} | |
| continue | |
| if current_track is None: | |
| # **OPTIMIZATION: Capture file type (WAVE, MP3, etc.)** | |
| if match := re.search(r'FILE\s+"([^"]+)"\s+([A-Z0-9]+)', line, re.IGNORECASE): | |
| cue_globals['filename'] = match.group(1) | |
| cue_globals['filetype'] = match.group(2) | |
| elif match := re.search(r'PERFORMER\s+"([^"]+)"', line, re.IGNORECASE): | |
| cue_globals['performer'] = match.group(1) | |
| elif match := re.search(r'^TITLE\s+"([^"]+)"', line, re.IGNORECASE): | |
| cue_globals['title'] = match.group(1) | |
| else: | |
| if match := re.search(r'TITLE\s+"([^"]+)"', line, re.IGNORECASE): | |
| current_track['title'] = match.group(1) | |
| elif match := re.search(r'INDEX\s+\d+\s+([\d:]{7,8})', line, re.IGNORECASE): | |
| # **BUG FIX: Check for None instead of truthiness to correctly handle 0.0** | |
| time_sec = parse_cue_time_to_seconds(match.group(1)) | |
| if time_sec is not None: | |
| current_track['time'] = time_sec | |
| if current_track: | |
| track_data.append(current_track) | |
| if not track_data or not cue_globals.get('filename'): | |
| return cue_text, {}, [], 0, gr.update(choices=[], value=[]), gr.update(visible=False) | |
| #Filter incomplete tracks before sorting | |
| track_data = sorted([t for t in track_data if 'time' in t], key=lambda x: x['time']) | |
| if not track_data: # All tracks might have been invalid | |
| return cue_text, {}, [], 0, gr.update(choices=[], value=[]), gr.update(visible=False) | |
| audio_duration = track_data[-1]['time'] if track_data else 0 | |
| track_choices = generate_track_choices(track_data, audio_duration) | |
| # Re-generate the CUE text to ensure consistent formatting | |
| formatted_text = format_cue_text(track_data, cue_globals) | |
| return formatted_text, cue_globals, track_data, audio_duration, gr.update(choices=track_choices, value=[]), gr.update(visible=True) | |
| def update_editing_tools(selected_indices, track_data, audio_duration): | |
| """Dynamically shows/hides editing tools based on selection count.""" | |
| num_selected = len(selected_indices) | |
| merge_update = gr.update(visible=False) | |
| single_update = gr.update(visible=False) | |
| slider_update = gr.update() | |
| slider_label_update = gr.update() | |
| edit_box_update = gr.update() | |
| if num_selected == 1: | |
| track_idx = selected_indices[0] | |
| single_update['visible'] = True # Use dict update to avoid overwriting the object | |
| start_time = track_data[track_idx]['time'] | |
| end_time = audio_duration if (track_idx + 1) >= len(track_data) else track_data[track_idx + 1]['time'] | |
| # --- 2. Add padding to prevent splitting at the exact edges --- | |
| # A CUE sheet frame is 1/75s (~0.013s). We use a slightly larger padding. | |
| padding = 0.02 | |
| split_possible = (start_time + padding) < (end_time - padding) | |
| if split_possible: | |
| mid_point = start_time + (end_time - start_time) / 2 | |
| slider_update = gr.update(minimum=start_time + padding, maximum=end_time - padding, value=mid_point) | |
| slider_label_update = gr.update(value=f"Split at: {seconds_to_cue_time(mid_point)}") | |
| else: | |
| slider_label_update = gr.update(value="Track is too short to be split") | |
| edit_box_update = gr.update(value=seconds_to_cue_time(start_time)) | |
| elif num_selected > 1: | |
| merge_update['visible'] = True | |
| return merge_update, single_update, slider_update, slider_label_update, edit_box_update | |
| def perform_manual_merge(indices_to_merge, original_track_data, audio_duration, cue_globals): | |
| """Merges selected tracks based on their indices.""" | |
| indices_set = set(indices_to_merge) | |
| # --- Create the new list of times --- | |
| # --- This logic correctly handles all merge cases. --- | |
| new_track_data = [] | |
| for i, track in enumerate(original_track_data): | |
| # Condition to KEEP a track's start time: | |
| # 1. It was NOT selected. | |
| # OR | |
| # 2. It WAS selected, BUT it's the start of a merge block. | |
| # (This means it's the very first track, OR the track before it was NOT selected). | |
| if i not in indices_set or (i == 0) or ((i - 1) not in indices_set): | |
| new_track_data.append(track) | |
| # --- Prepare all the outputs to update the UI --- | |
| # The new CUE text for the textbox | |
| final_cue_text = format_cue_text(new_track_data, cue_globals) | |
| new_track_choices = generate_track_choices(new_track_data, audio_duration) | |
| # Return a tuple that will update the textbox, the state, and the checklist | |
| return final_cue_text, new_track_data, gr.update(choices=new_track_choices, value=[]) | |
| def perform_manual_split(split_time_sec, original_track_data, audio_duration, cue_globals): | |
| """Splits a track at the time specified by the slider.""" | |
| if any(abs(t['time'] - split_time_sec) < 1e-3 for t in original_track_data): | |
| raise gr.Error("This exact timestamp already exists.") | |
| new_track = {'time': split_time_sec, 'title': None} | |
| new_track_data = sorted(original_track_data + [new_track], key=lambda x: x['time']) | |
| final_cue_text = format_cue_text(new_track_data, cue_globals) | |
| new_track_choices = generate_track_choices(new_track_data, audio_duration) | |
| return final_cue_text, new_track_data, gr.update(choices=new_track_choices, value=[]) | |
| # --- Timeline Shift --- | |
| def shift_timeline(shift_amount_sec, original_track_data, audio_duration, cue_globals): | |
| """Shifts all track start times by a specified amount.""" | |
| if not original_track_data: | |
| raise gr.Error("No track times to shift.") | |
| # Use deepcopy to avoid modifying the original state directly | |
| new_track_data = deepcopy(original_track_data) | |
| # ** FEATURE 2: Apply shift without an upper bound, allowing the last track to move forward ** | |
| for track in new_track_data: | |
| track['time'] = max(0, track['time'] + shift_amount_sec) | |
| # Remove duplicates that might be created if multiple tracks are clamped to 0 | |
| unique_tracks = [] | |
| seen_times = set() | |
| for track in sorted(new_track_data, key=lambda x: x['time']): | |
| if track['time'] not in seen_times: | |
| unique_tracks.append(track) | |
| seen_times.add(track['time']) | |
| final_cue_text = format_cue_text(unique_tracks, cue_globals) | |
| new_track_choices = generate_track_choices(unique_tracks, audio_duration) | |
| return final_cue_text, unique_tracks, gr.update(choices=new_track_choices, value=[]) | |
| # --- Edit Track Start Time --- | |
| def edit_track_start_time(selected_indices, new_time_str, original_track_data, audio_duration, cue_globals): | |
| """Edits the start time of a single selected track using its index.""" | |
| if not selected_indices: | |
| raise gr.Error("No track selected for editing.") | |
| new_time_sec = parse_cue_time_to_seconds(new_time_str) | |
| if new_time_sec is None: | |
| raise gr.Error("Invalid time format. Please use MM:SS:FF.") | |
| track_idx = selected_indices[0] | |
| # Boundary checks | |
| prev_time = original_track_data[track_idx - 1]['time'] if track_idx > 0 else -1 | |
| next_time = original_track_data[track_idx + 1]['time'] if track_idx < len(original_track_data) - 1 else float('inf') | |
| if new_time_sec <= prev_time: | |
| raise gr.Error(f"New time cannot be earlier than the previous track's start time.") | |
| if new_time_sec >= next_time: | |
| raise gr.Error(f"New time cannot be later than or equal to the next track's start time.") | |
| new_track_data = deepcopy(original_track_data) | |
| new_track_data[track_idx]['time'] = new_time_sec | |
| final_cue_text = format_cue_text(new_track_data, cue_globals) | |
| new_track_choices = generate_track_choices(new_track_data, audio_duration) | |
| return final_cue_text, new_track_data, gr.update(choices=new_track_choices, value=[]) | |
| # --- Gradio User Interface Definition --- | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# 🎵 Advanced CUE Sheet Generator") | |
| # --- Hidden State Variables --- | |
| cue_globals_state = gr.State({}) | |
| track_data_state = gr.State([]) | |
| audio_duration_state = gr.State(0) | |
| with gr.Tabs(): | |
| with gr.TabItem("Start with Audio File"): | |
| gr.Markdown("Upload an audio file to automatically detect track points.") | |
| audio_input = gr.Audio(type="filepath", label="Upload Audio File") | |
| with gr.Accordion("Analysis Parameters", open=False): | |
| threshold_slider = gr.Slider(10, 80, 40, step=1, label="Silence Threshold (dB)") | |
| min_length_slider = gr.Slider(0.5, 30, 1, step=0.1, label="Min. Segment Length (s)") | |
| merge_length_slider = gr.Slider(1, 60, 15, step=1, label="Auto-Merge Threshold (s)") | |
| min_silence_length_slider = gr.Slider(0.5, 60, 2, step=0.1, label="Merge Protection Length (s)") | |
| generate_button = gr.Button("Analyze Audio", variant="primary") | |
| with gr.TabItem("Start with CUE Text"): | |
| gr.Markdown("Or paste CUE text below and click outside the box. The editing tools will appear automatically.") | |
| cue_text_input_for_paste = gr.Textbox(label="Paste CUE Text Here", lines=8, placeholder="Paste your CUE sheet content here and click outside the box. The editing tools will appear automatically.") | |
| # The main output textbox is now outside the tabs, serving as a central display. | |
| output_text = gr.Textbox(label="CUE Sheet Output", lines=15, show_copy_button=True, interactive=True) | |
| with gr.Group(visible=False) as manual_editing_group: | |
| gr.Markdown("### Manual Editing Tools") | |
| track_checkboxes = gr.CheckboxGroup(label="Select Tracks to Edit") | |
| with gr.Row(visible=False) as merge_tools: | |
| merge_button = gr.Button("Merge Selected Tracks", variant="secondary", size="lg") | |
| # This group contains both Split and Edit tools, shown when one track is selected | |
| with gr.Group(visible=False) as single_track_tools: | |
| with gr.Accordion("Split Track", open=False): | |
| split_slider_label = gr.Textbox(label="Current Split Time", interactive=False) | |
| split_slider = gr.Slider(label="Drag to select split point") | |
| split_button = gr.Button("Split Track at Selected Time", variant="secondary") | |
| # --- Edit Start Time --- | |
| with gr.Accordion("Edit Start Time", open=True): | |
| edit_time_input = gr.Textbox(label="New Start Time (MM:SS:FF)", placeholder="e.g., 01:23:45") | |
| edit_time_button = gr.Button("Update Start Time", variant="secondary") | |
| # --- Global Timeline Shift --- | |
| with gr.Accordion("Global Edits", open=False, visible=False) as global_editing_group: | |
| shift_amount_input = gr.Number(label="Timeline Shift Amount (seconds, +/-)", value=0) | |
| shift_button = gr.Button("Apply Timeline Shift", variant="secondary") | |
| # --- Event Wiring --- | |
| # Combined update for enabling editing groups | |
| def show_editing_groups(track_data): | |
| is_visible = bool(track_data) | |
| return gr.update(visible=is_visible), gr.update(visible=is_visible) | |
| # Workflow 1: Audio analysis button now updates everything, including the editing tools. | |
| generate_button.click( | |
| fn=analyze_audio_to_cue, | |
| inputs=[audio_input, threshold_slider, min_length_slider, merge_length_slider, min_silence_length_slider], | |
| outputs=[output_text, cue_globals_state, track_data_state, audio_duration_state, track_checkboxes] | |
| ).then( | |
| fn=show_editing_groups, | |
| inputs=[track_data_state], | |
| outputs=[manual_editing_group, global_editing_group] | |
| ) | |
| # Workflow 2: Pasting text in the dedicated input box populates the main output and enables tools. | |
| # The `.change` event now updates all necessary outputs in a single, direct step. | |
| cue_text_input_for_paste.change( | |
| fn=parse_cue_and_update_ui, | |
| inputs=[cue_text_input_for_paste], | |
| outputs=[output_text, cue_globals_state, track_data_state, audio_duration_state, track_checkboxes] | |
| ).then( | |
| fn=show_editing_groups, | |
| inputs=[track_data_state], | |
| outputs=[manual_editing_group, global_editing_group] | |
| ) | |
| # Dynamic UI controller for showing/hiding Merge/Split tools | |
| track_checkboxes.change( | |
| fn=update_editing_tools, | |
| inputs=[track_checkboxes, track_data_state, audio_duration_state], | |
| outputs=[merge_tools, single_track_tools, split_slider, split_slider_label, edit_time_input] | |
| ) | |
| # Live update for the split slider's time display | |
| split_slider.input( | |
| fn=lambda t: f"Split at: {seconds_to_cue_time(t)}", | |
| inputs=[split_slider], | |
| outputs=[split_slider_label] | |
| ) | |
| # Action buttons | |
| merge_button.click( | |
| fn=perform_manual_merge, | |
| inputs=[track_checkboxes, track_data_state, audio_duration_state, cue_globals_state], | |
| outputs=[output_text, track_data_state, track_checkboxes] | |
| ) | |
| split_button.click( | |
| fn=perform_manual_split, | |
| inputs=[split_slider, track_data_state, audio_duration_state, cue_globals_state], | |
| outputs=[output_text, track_data_state, track_checkboxes] | |
| ) | |
| # --- Action Buttons for New Features --- | |
| shift_button.click( | |
| fn=shift_timeline, | |
| inputs=[shift_amount_input, track_data_state, audio_duration_state, cue_globals_state], | |
| outputs=[output_text, track_data_state, track_checkboxes] | |
| ) | |
| edit_time_button.click( | |
| fn=edit_track_start_time, | |
| inputs=[track_checkboxes, edit_time_input, track_data_state, audio_duration_state, cue_globals_state], | |
| outputs=[output_text, track_data_state, track_checkboxes] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(inbrowser=True) |