r3gm commited on
Commit
186a1a6
·
verified ·
1 Parent(s): c2293e2

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +1 -3
  2. app.py +152 -143
  3. pre-requirements.txt +2 -0
  4. requirements.txt +16 -2
README.md CHANGED
@@ -4,11 +4,9 @@ emoji: 🏃
4
  colorFrom: purple
5
  colorTo: pink
6
  sdk: gradio
7
- sdk_version: 4.28.3
8
  app_file: app.py
9
  pinned: true
10
  license: mit
11
  short_description: Vocal and background audio separator
12
  ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
4
  colorFrom: purple
5
  colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 5.43.1
8
  app_file: app.py
9
  pinned: true
10
  license: mit
11
  short_description: Vocal and background audio separator
12
  ---
 
 
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import os
2
- # os.system("pip install ./ort_nightly_gpu-1.17.0.dev20240118002-cp310-cp310-manylinux_2_28_x86_64.whl")
3
- os.system("pip install ort-nightly-gpu --index-url=https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ort-cuda-12-nightly/pypi/simple/")
4
  import gc
5
  import hashlib
6
  import queue
@@ -20,25 +19,39 @@ from utils import (
20
  download_manager,
21
  )
22
  import random
23
- import spaces
24
  from utils import logger
25
  import onnxruntime as ort
26
  import warnings
27
- import spaces
28
  import gradio as gr
29
- import logging
30
  import time
31
  import traceback
32
  from pedalboard import Pedalboard, Reverb, Delay, Chorus, Compressor, Gain, HighpassFilter, LowpassFilter
33
  from pedalboard.io import AudioFile
34
- import numpy as np
35
- import yt_dlp
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  warnings.filterwarnings("ignore")
 
 
38
 
39
  title = "<center><strong><font size='7'>Audio🔹separator</font></strong></center>"
40
- description = "This demo uses the MDX-Net models for vocal and background sound separation."
41
- theme = "NoCrypt/miku"
 
 
42
 
43
  stem_naming = {
44
  "Vocals": "Instrumental",
@@ -350,7 +363,7 @@ class MDX:
350
  return self.segment(processed_batches, True, chunk)
351
 
352
 
353
- @spaces.GPU()
354
  def run_mdx(
355
  model_params,
356
  output_dir,
@@ -372,6 +385,9 @@ def run_mdx(
372
  device_properties = torch.cuda.get_device_properties(device)
373
  vram_gb = device_properties.total_memory / 1024**3
374
  m_threads = 1 if vram_gb < 8 else (8 if vram_gb > 32 else 2)
 
 
 
375
  logger.info(f"threads: {m_threads} vram: {vram_gb}")
376
  else:
377
  device = torch.device("cpu")
@@ -459,7 +475,9 @@ def run_mdx_beta(
459
 
460
  m_threads = 1
461
  duration = librosa.get_duration(filename=filename)
462
- if duration >= 60 and duration <= 120:
 
 
463
  m_threads = 8
464
  elif duration > 120:
465
  m_threads = 16
@@ -580,10 +598,14 @@ def get_hash(filepath):
580
 
581
  return file_hash.hexdigest()[:18]
582
 
 
583
  def random_sleep():
584
- sleep_time = round(random.uniform(5.2, 7.9), 1)
 
 
585
  time.sleep(sleep_time)
586
 
 
587
  def process_uvr_task(
588
  orig_song_path: str = "aud_test.mp3",
589
  main_vocals: bool = False,
@@ -653,16 +675,16 @@ def process_uvr_task(
653
  device_base=device_base,
654
  )
655
  except Exception as e:
656
- backup_vocals_path, main_vocals_path = run_mdx_beta(
657
- mdx_model_params,
658
- song_output_dir,
659
- os.path.join(mdxnet_models_dir, "UVR_MDXNET_KARA_2.onnx"),
660
- vocals_path,
661
- suffix="Backup",
662
- invert_suffix="Main",
663
- denoise=True,
664
- device_base=device_base,
665
- )
666
  else:
667
  backup_vocals_path, main_vocals_path = None, vocals_path
668
 
@@ -683,16 +705,16 @@ def process_uvr_task(
683
  device_base=device_base,
684
  )
685
  except Exception as e:
686
- _, vocals_dereverb_path = run_mdx_beta(
687
- mdx_model_params,
688
- song_output_dir,
689
- os.path.join(mdxnet_models_dir, "Reverb_HQ_By_FoxJoy.onnx"),
690
- main_vocals_path,
691
- invert_suffix="DeReverb",
692
- exclude_main=True,
693
- denoise=True,
694
- device_base=device_base,
695
- )
696
  else:
697
  vocals_dereverb_path = main_vocals_path
698
 
@@ -719,12 +741,12 @@ def add_vocal_effects(input_file, output_file, reverb_room_size=0.6, vocal_rever
719
 
720
  if delay_seconds > 0 or delay_mix > 0:
721
  effects.append(Delay(delay_seconds=delay_seconds, mix=delay_mix))
722
- print("delay applied")
723
  # effects.append(Chorus())
724
 
725
  if gain_db:
726
  effects.append(Gain(gain_db=gain_db))
727
- print("added gain db")
728
 
729
  board = Pedalboard(effects)
730
 
@@ -765,99 +787,77 @@ def add_instrumental_effects(input_file, output_file, highpass_freq=100, lowpass
765
  effected = board(chunk, f.samplerate, reset=False)
766
  o.write(effected)
767
 
768
-
769
- def sound_separate(media_file, stem, main, dereverb, vocal_effects=True, background_effects=True,
770
- vocal_reverb_room_size=0.6, vocal_reverb_damping=0.6, vocal_reverb_wet_level=0.35,
771
- vocal_delay_seconds=0.4, vocal_delay_mix=0.25,
772
- vocal_compressor_threshold_db=-25, vocal_compressor_ratio=3.5, vocal_compressor_attack_ms=10, vocal_compressor_release_ms=60,
773
- vocal_gain_db=4,
774
- background_highpass_freq=120, background_lowpass_freq=11000,
775
- background_reverb_room_size=0.5, background_reverb_damping=0.5, background_reverb_wet_level=0.25,
776
- background_compressor_threshold_db=-20, background_compressor_ratio=2.5, background_compressor_attack_ms=15, background_compressor_release_ms=80,
777
- background_gain_db=3):
778
- if not media_file:
779
- raise ValueError("The audio path is missing.")
780
 
781
- if not stem:
782
- raise ValueError("Please select 'vocal' or 'background' stem.")
783
 
784
- hash_audio = str(get_hash(media_file))
785
- media_dir = os.path.dirname(media_file)
786
 
787
- outputs = []
 
 
 
 
788
 
789
- start_time = time.time()
 
 
 
 
 
 
 
 
 
 
 
 
790
 
791
- if stem == "vocal":
792
- try:
793
- _, _, _, _, vocal_audio = process_uvr_task(
794
- orig_song_path=media_file,
795
- song_id=hash_audio + "mdx",
796
- main_vocals=main,
797
- dereverb=dereverb,
798
- remove_files_output_dir=False,
799
- )
800
 
801
- if vocal_effects:
802
- suffix = '_effects'
803
- file_name, file_extension = os.path.splitext(vocal_audio)
804
- out_effects = file_name + suffix + file_extension
805
- out_effects_path = os.path.join(media_dir, out_effects)
806
- add_vocal_effects(vocal_audio, out_effects_path,
807
- reverb_room_size=vocal_reverb_room_size, reverb_damping=vocal_reverb_damping, reverb_wet_level=vocal_reverb_wet_level,
808
- delay_seconds=vocal_delay_seconds, delay_mix=vocal_delay_mix,
809
- compressor_threshold_db=vocal_compressor_threshold_db, compressor_ratio=vocal_compressor_ratio, compressor_attack_ms=vocal_compressor_attack_ms, compressor_release_ms=vocal_compressor_release_ms,
810
- gain_db=vocal_gain_db
811
- )
812
- vocal_audio = out_effects_path
813
 
814
- outputs.append(vocal_audio)
815
- except Exception as error:
816
- logger.error(str(error))
817
- traceback.print_exc()
818
 
819
- if stem == "background":
820
- background_audio, _ = process_uvr_task(
821
- orig_song_path=media_file,
822
- song_id=hash_audio + "voiceless",
823
- only_voiceless=True,
824
- remove_files_output_dir=False,
825
- )
826
 
827
- if background_effects:
828
- suffix = '_effects'
829
- file_name, file_extension = os.path.splitext(background_audio)
830
- out_effects = file_name + suffix + file_extension
831
- out_effects_path = os.path.join(media_dir, out_effects)
832
- add_instrumental_effects(background_audio, out_effects_path,
833
- highpass_freq=background_highpass_freq, lowpass_freq=background_lowpass_freq,
834
- reverb_room_size=background_reverb_room_size, reverb_damping=background_reverb_damping, reverb_wet_level=background_reverb_wet_level,
835
- compressor_threshold_db=background_compressor_threshold_db, compressor_ratio=background_compressor_ratio, compressor_attack_ms=background_compressor_attack_ms, compressor_release_ms=background_compressor_release_ms,
836
- gain_db=background_gain_db
837
- )
838
- background_audio = out_effects_path
839
 
840
- outputs.append(background_audio)
 
 
 
 
 
 
841
 
842
- end_time = time.time()
843
- execution_time = end_time - start_time
844
- logger.info(f"Execution time: {execution_time} seconds")
845
 
846
- if not outputs:
847
- raise Exception("Error in sound separation.")
 
848
 
849
- return outputs
850
 
 
851
 
852
- def sound_separate(media_file, stem, main, dereverb, vocal_effects=True, background_effects=True,
853
- vocal_reverb_room_size=0.6, vocal_reverb_damping=0.6, vocal_reverb_dryness=0.8 ,vocal_reverb_wet_level=0.35,
854
- vocal_delay_seconds=0.4, vocal_delay_mix=0.25,
855
- vocal_compressor_threshold_db=-25, vocal_compressor_ratio=3.5, vocal_compressor_attack_ms=10, vocal_compressor_release_ms=60,
856
- vocal_gain_db=4,
857
- background_highpass_freq=120, background_lowpass_freq=11000,
858
- background_reverb_room_size=0.5, background_reverb_damping=0.5, background_reverb_wet_level=0.25,
859
- background_compressor_threshold_db=-20, background_compressor_ratio=2.5, background_compressor_attack_ms=15, background_compressor_release_ms=80,
860
- background_gain_db=3,
 
 
 
861
  ):
862
  if not media_file:
863
  raise ValueError("The audio path is missing.")
@@ -875,10 +875,10 @@ def sound_separate(media_file, stem, main, dereverb, vocal_effects=True, backgro
875
  print("Duration audio:", duration_base_)
876
  except Exception as e:
877
  print(e)
878
-
879
  start_time = time.time()
880
 
881
- if stem == "vocal":
882
  try:
883
  _, _, _, _, vocal_audio = process_uvr_task(
884
  orig_song_path=media_file,
@@ -906,7 +906,7 @@ def sound_separate(media_file, stem, main, dereverb, vocal_effects=True, backgro
906
  gr.Info(str(error))
907
  logger.error(str(error))
908
 
909
- if stem == "background":
910
  background_audio, _ = process_uvr_task(
911
  orig_song_path=media_file,
912
  song_id=hash_audio + "voiceless",
@@ -919,7 +919,7 @@ def sound_separate(media_file, stem, main, dereverb, vocal_effects=True, backgro
919
  file_name, file_extension = os.path.splitext(os.path.abspath(background_audio))
920
  out_effects = file_name + suffix + file_extension
921
  out_effects_path = os.path.join(media_dir, out_effects)
922
- print(file_name, file_extension, out_effects, out_effects_path)
923
  add_instrumental_effects(background_audio, out_effects_path,
924
  highpass_freq=background_highpass_freq, lowpass_freq=background_lowpass_freq,
925
  reverb_room_size=background_reverb_room_size, reverb_damping=background_reverb_damping, reverb_wet_level=background_reverb_wet_level,
@@ -937,7 +937,7 @@ def sound_separate(media_file, stem, main, dereverb, vocal_effects=True, backgro
937
  if not outputs:
938
  raise Exception("Error in sound separation.")
939
 
940
- return outputs
941
 
942
 
943
  def audio_downloader(
@@ -949,7 +949,12 @@ def audio_downloader(
949
  if not url_media:
950
  return None
951
 
952
- print(url_media[:10])
 
 
 
 
 
953
 
954
  dir_output_downloads = "downloads"
955
  os.makedirs(dir_output_downloads, exist_ok=True)
@@ -1024,7 +1029,7 @@ def audio_conf():
1024
 
1025
 
1026
  def stem_conf():
1027
- return gr.Radio(
1028
  choices=["vocal", "background"],
1029
  value="vocal",
1030
  label="Stem",
@@ -1314,23 +1319,29 @@ def output_conf():
1314
 
1315
 
1316
  def show_vocal_components(value_name):
 
 
1317
 
1318
- if value_name == "vocal":
1319
- return gr.update(visible=True), gr.update(
1320
- visible=True
1321
- ), gr.update(visible=True), gr.update(
1322
- visible=False
1323
- )
1324
- else:
1325
- return gr.update(visible=False), gr.update(
1326
- visible=False
1327
- ), gr.update(visible=False), gr.update(
1328
- visible=True
1329
- )
 
 
 
 
1330
 
1331
 
1332
  def get_gui(theme):
1333
- with gr.Blocks(theme=theme) as app:
1334
  gr.Markdown(title)
1335
  gr.Markdown(description)
1336
 
@@ -1366,9 +1377,7 @@ def get_gui(theme):
1366
  vocal_effects_gui = vocal_effects_conf()
1367
  background_effects_gui = background_effects_conf()
1368
 
1369
- # with gr.Column():
1370
- with gr.Accordion("Vocal Effects Parameters", open=False): # with gr.Row():
1371
- # gr.Label("Vocal Effects Parameters")
1372
  with gr.Row():
1373
  vocal_reverb_room_size_gui = vocal_reverb_room_size_conf()
1374
  vocal_reverb_damping_gui = vocal_reverb_damping_conf()
@@ -1382,8 +1391,7 @@ def get_gui(theme):
1382
  vocal_compressor_release_ms_gui = vocal_compressor_release_ms_conf()
1383
  vocal_gain_db_gui = vocal_gain_db_conf()
1384
 
1385
- with gr.Accordion("Background Effects Parameters", open=False): # with gr.Row():
1386
- # gr.Label("Background Effects Parameters")
1387
  with gr.Row():
1388
  background_highpass_freq_gui = background_highpass_freq_conf()
1389
  background_lowpass_freq_gui = background_lowpass_freq_conf()
@@ -1402,6 +1410,7 @@ def get_gui(theme):
1402
  [main_gui, dereverb_gui, vocal_effects_gui, background_effects_gui],
1403
  )
1404
 
 
1405
  button_base = button_conf()
1406
  output_base = output_conf()
1407
 
@@ -1420,7 +1429,7 @@ def get_gui(theme):
1420
  background_highpass_freq_gui, background_lowpass_freq_gui, background_reverb_room_size_gui,
1421
  background_reverb_damping_gui, background_reverb_wet_level_gui, background_compressor_threshold_db_gui,
1422
  background_compressor_ratio_gui, background_compressor_attack_ms_gui, background_compressor_release_ms_gui,
1423
- background_gain_db_gui,
1424
  ],
1425
  outputs=[output_base],
1426
  )
@@ -1459,24 +1468,24 @@ def get_gui(theme):
1459
  cache_examples=False,
1460
  )
1461
 
 
 
1462
  return app
1463
 
1464
 
1465
  if __name__ == "__main__":
1466
-
1467
  for id_model in UVR_MODELS:
1468
  download_manager(
1469
  os.path.join(MDX_DOWNLOAD_LINK, id_model), mdxnet_models_dir
1470
  )
1471
 
1472
  app = get_gui(theme)
1473
-
1474
  app.queue(default_concurrency_limit=40)
1475
-
1476
  app.launch(
1477
  max_threads=40,
1478
- share=False,
1479
  show_error=True,
1480
  quiet=False,
1481
- debug=False,
 
1482
  )
 
1
  import os
2
+ import spaces
 
3
  import gc
4
  import hashlib
5
  import queue
 
19
  download_manager,
20
  )
21
  import random
 
22
  from utils import logger
23
  import onnxruntime as ort
24
  import warnings
 
25
  import gradio as gr
 
26
  import time
27
  import traceback
28
  from pedalboard import Pedalboard, Reverb, Delay, Chorus, Compressor, Gain, HighpassFilter, LowpassFilter
29
  from pedalboard.io import AudioFile
30
+ import argparse
31
+
32
+ parser = argparse.ArgumentParser(description="Run the app with optional sharing")
33
+ parser.add_argument(
34
+ '--share',
35
+ action='store_true',
36
+ help='Enable sharing mode'
37
+ )
38
+ parser.add_argument(
39
+ '--theme',
40
+ type=str,
41
+ default="NoCrypt/miku",
42
+ help='Set the theme (default: NoCrypt/miku)'
43
+ )
44
+ args = parser.parse_args()
45
 
46
  warnings.filterwarnings("ignore")
47
+ IS_COLAB = True if ('google.colab' in sys.modules or args.share) else False
48
+ IS_ZERO_GPU = os.getenv("SPACES_ZERO_GPU")
49
 
50
  title = "<center><strong><font size='7'>Audio🔹separator</font></strong></center>"
51
+ base_demo = "This demo uses the "
52
+ description = (f"{base_demo if IS_ZERO_GPU else ''}MDX-Net models for vocal and background sound separation.")
53
+ RESOURCES = "- You can also try `Audio🔹separator` in Colab’s free tier, which provides free GPU [link](https://github.com/R3gm/Audio_separator_ui?tab=readme-ov-file#audio-separator)."
54
+ theme = args.theme
55
 
56
  stem_naming = {
57
  "Vocals": "Instrumental",
 
363
  return self.segment(processed_batches, True, chunk)
364
 
365
 
366
+ @spaces.GPU(duration=40)
367
  def run_mdx(
368
  model_params,
369
  output_dir,
 
385
  device_properties = torch.cuda.get_device_properties(device)
386
  vram_gb = device_properties.total_memory / 1024**3
387
  m_threads = 1 if vram_gb < 8 else (8 if vram_gb > 32 else 2)
388
+ duration = librosa.get_duration(filename=filename)
389
+ if duration < 60:
390
+ m_threads = 1
391
  logger.info(f"threads: {m_threads} vram: {vram_gb}")
392
  else:
393
  device = torch.device("cpu")
 
475
 
476
  m_threads = 1
477
  duration = librosa.get_duration(filename=filename)
478
+ if IS_COLAB or duration < 60:
479
+ m_threads = 1
480
+ elif duration >= 60 and duration <= 120:
481
  m_threads = 8
482
  elif duration > 120:
483
  m_threads = 16
 
598
 
599
  return file_hash.hexdigest()[:18]
600
 
601
+
602
  def random_sleep():
603
+ sleep_time = 0.1
604
+ if IS_ZERO_GPU:
605
+ sleep_time = round(random.uniform(3.2, 5.9), 1)
606
  time.sleep(sleep_time)
607
 
608
+
609
  def process_uvr_task(
610
  orig_song_path: str = "aud_test.mp3",
611
  main_vocals: bool = False,
 
675
  device_base=device_base,
676
  )
677
  except Exception as e:
678
+ backup_vocals_path, main_vocals_path = run_mdx_beta(
679
+ mdx_model_params,
680
+ song_output_dir,
681
+ os.path.join(mdxnet_models_dir, "UVR_MDXNET_KARA_2.onnx"),
682
+ vocals_path,
683
+ suffix="Backup",
684
+ invert_suffix="Main",
685
+ denoise=True,
686
+ device_base=device_base,
687
+ )
688
  else:
689
  backup_vocals_path, main_vocals_path = None, vocals_path
690
 
 
705
  device_base=device_base,
706
  )
707
  except Exception as e:
708
+ _, vocals_dereverb_path = run_mdx_beta(
709
+ mdx_model_params,
710
+ song_output_dir,
711
+ os.path.join(mdxnet_models_dir, "Reverb_HQ_By_FoxJoy.onnx"),
712
+ main_vocals_path,
713
+ invert_suffix="DeReverb",
714
+ exclude_main=True,
715
+ denoise=True,
716
+ device_base=device_base,
717
+ )
718
  else:
719
  vocals_dereverb_path = main_vocals_path
720
 
 
741
 
742
  if delay_seconds > 0 or delay_mix > 0:
743
  effects.append(Delay(delay_seconds=delay_seconds, mix=delay_mix))
744
+ # print("delay applied")
745
  # effects.append(Chorus())
746
 
747
  if gain_db:
748
  effects.append(Gain(gain_db=gain_db))
749
+ # print("added gain db")
750
 
751
  board = Pedalboard(effects)
752
 
 
787
  effected = board(chunk, f.samplerate, reset=False)
788
  o.write(effected)
789
 
 
 
 
 
 
 
 
 
 
 
 
 
790
 
791
+ COMMON_SAMPLE_RATES = [8000, 16000, 22050, 32000, 44100, 48000, 96000]
 
792
 
 
 
793
 
794
+ def save_audio(audio_opt: np.ndarray, final_sr: int, output_audio_path: str, target_format: str) -> str:
795
+ """
796
+ Save audio with automatic handling of unsupported sample rates for non-WAV formats.
797
+ """
798
+ ext = os.path.splitext(output_audio_path)[1].lower()
799
 
800
+ try:
801
+ if ext == ".wav":
802
+ sf.write(output_audio_path, audio_opt, final_sr, format=target_format)
803
+ else:
804
+ target_sr = min(COMMON_SAMPLE_RATES, key=lambda altsr: abs(altsr - final_sr))
805
+ if target_sr != final_sr:
806
+ logger.warning(f"Resampling from {final_sr} -> {target_sr} for {ext}")
807
+ audio_opt = librosa.resample(audio_opt, orig_sr=final_sr, target_sr=target_sr)
808
+ sf.write(output_audio_path, audio_opt, target_sr, format=target_format)
809
+ except Exception as e:
810
+ logger.error(e)
811
+ logger.error(f"Error saving {output_audio_path}, performing fallback to WAV")
812
+ output_audio_path = output_audio_path.replace(f"_converted.{target_format}", ".wav")
813
 
814
+ return output_audio_path
 
 
 
 
 
 
 
 
815
 
 
 
 
 
 
 
 
 
 
 
 
 
816
 
817
+ def convert_format(file_paths, media_dir, target_format):
818
+ """
819
+ Convert a list of audio files to the target format with automatic safe sample rates.
 
820
 
821
+ WAV files are returned as-is; non-WAV files are resampled if needed to a supported rate.
822
+ """
823
+ target_format = target_format.lower()
824
+ if target_format == "wav":
825
+ return file_paths # No conversion needed for WAV
 
 
826
 
827
+ suffix = "_converted"
828
+ converted_files = []
 
 
 
 
 
 
 
 
 
 
829
 
830
+ for fp in file_paths:
831
+ # Absolute paths and base filename
832
+ abs_fp = os.path.abspath(fp)
833
+ file_name, _ = os.path.splitext(os.path.basename(abs_fp))
834
+ file_ext = f".{target_format}"
835
+ out_name = file_name + suffix + file_ext
836
+ out_path = os.path.join(media_dir, out_name)
837
 
838
+ # Load audio with librosa (handles many formats)
839
+ audio, sr = sf.read(abs_fp)
 
840
 
841
+ # Save using safe resampling
842
+ saved_path = save_audio(audio, sr, out_path, target_format)
843
+ converted_files.append(saved_path)
844
 
845
+ # print(f"Converted: {abs_fp} -> {saved_path}")
846
 
847
+ return converted_files
848
 
849
+
850
+ def sound_separate(
851
+ media_file, stem, main, dereverb, vocal_effects=True, background_effects=True,
852
+ vocal_reverb_room_size=0.6, vocal_reverb_damping=0.6, vocal_reverb_dryness=0.8, vocal_reverb_wet_level=0.35,
853
+ vocal_delay_seconds=0.4, vocal_delay_mix=0.25,
854
+ vocal_compressor_threshold_db=-25, vocal_compressor_ratio=3.5, vocal_compressor_attack_ms=10, vocal_compressor_release_ms=60,
855
+ vocal_gain_db=4,
856
+ background_highpass_freq=120, background_lowpass_freq=11000,
857
+ background_reverb_room_size=0.5, background_reverb_damping=0.5, background_reverb_wet_level=0.25,
858
+ background_compressor_threshold_db=-20, background_compressor_ratio=2.5, background_compressor_attack_ms=15, background_compressor_release_ms=80,
859
+ background_gain_db=3,
860
+ target_format="WAV",
861
  ):
862
  if not media_file:
863
  raise ValueError("The audio path is missing.")
 
875
  print("Duration audio:", duration_base_)
876
  except Exception as e:
877
  print(e)
878
+
879
  start_time = time.time()
880
 
881
+ if "vocal" in stem:
882
  try:
883
  _, _, _, _, vocal_audio = process_uvr_task(
884
  orig_song_path=media_file,
 
906
  gr.Info(str(error))
907
  logger.error(str(error))
908
 
909
+ if "background" in stem:
910
  background_audio, _ = process_uvr_task(
911
  orig_song_path=media_file,
912
  song_id=hash_audio + "voiceless",
 
919
  file_name, file_extension = os.path.splitext(os.path.abspath(background_audio))
920
  out_effects = file_name + suffix + file_extension
921
  out_effects_path = os.path.join(media_dir, out_effects)
922
+ # print(file_name, file_extension, out_effects, out_effects_path)
923
  add_instrumental_effects(background_audio, out_effects_path,
924
  highpass_freq=background_highpass_freq, lowpass_freq=background_lowpass_freq,
925
  reverb_room_size=background_reverb_room_size, reverb_damping=background_reverb_damping, reverb_wet_level=background_reverb_wet_level,
 
937
  if not outputs:
938
  raise Exception("Error in sound separation.")
939
 
940
+ return convert_format(outputs, media_dir, target_format)
941
 
942
 
943
  def audio_downloader(
 
949
  if not url_media:
950
  return None
951
 
952
+ if IS_ZERO_GPU and "youtube.com" in url_media:
953
+ gr.Info("This option isn’t available on Hugging Face.")
954
+ return None
955
+
956
+ import yt_dlp
957
+ # print(url_media[:10])
958
 
959
  dir_output_downloads = "downloads"
960
  os.makedirs(dir_output_downloads, exist_ok=True)
 
1029
 
1030
 
1031
  def stem_conf():
1032
+ return gr.CheckboxGroup(
1033
  choices=["vocal", "background"],
1034
  value="vocal",
1035
  label="Stem",
 
1319
 
1320
 
1321
  def show_vocal_components(value_name):
1322
+ v_ = "vocal" in value_name
1323
+ b_ = "background" in value_name
1324
 
1325
+ return gr.update(visible=v_), gr.update(
1326
+ visible=v_
1327
+ ), gr.update(visible=v_), gr.update(
1328
+ visible=b_
1329
+ )
1330
+
1331
+
1332
+ FORMAT_OPTIONS = ["WAV", "MP3", "FLAC"]
1333
+
1334
+
1335
+ def format_conf():
1336
+ return gr.Dropdown(
1337
+ choices=FORMAT_OPTIONS,
1338
+ value=FORMAT_OPTIONS[0],
1339
+ label="Format output:"
1340
+ )
1341
 
1342
 
1343
  def get_gui(theme):
1344
+ with gr.Blocks(theme=theme, fill_width=True, fill_height=False, delete_cache=(3200, 10800)) as app:
1345
  gr.Markdown(title)
1346
  gr.Markdown(description)
1347
 
 
1377
  vocal_effects_gui = vocal_effects_conf()
1378
  background_effects_gui = background_effects_conf()
1379
 
1380
+ with gr.Accordion("Vocal Effects Parameters", open=False):
 
 
1381
  with gr.Row():
1382
  vocal_reverb_room_size_gui = vocal_reverb_room_size_conf()
1383
  vocal_reverb_damping_gui = vocal_reverb_damping_conf()
 
1391
  vocal_compressor_release_ms_gui = vocal_compressor_release_ms_conf()
1392
  vocal_gain_db_gui = vocal_gain_db_conf()
1393
 
1394
+ with gr.Accordion("Background Effects Parameters", open=False):
 
1395
  with gr.Row():
1396
  background_highpass_freq_gui = background_highpass_freq_conf()
1397
  background_lowpass_freq_gui = background_lowpass_freq_conf()
 
1410
  [main_gui, dereverb_gui, vocal_effects_gui, background_effects_gui],
1411
  )
1412
 
1413
+ target_format_gui = format_conf()
1414
  button_base = button_conf()
1415
  output_base = output_conf()
1416
 
 
1429
  background_highpass_freq_gui, background_lowpass_freq_gui, background_reverb_room_size_gui,
1430
  background_reverb_damping_gui, background_reverb_wet_level_gui, background_compressor_threshold_db_gui,
1431
  background_compressor_ratio_gui, background_compressor_attack_ms_gui, background_compressor_release_ms_gui,
1432
+ background_gain_db_gui, target_format_gui,
1433
  ],
1434
  outputs=[output_base],
1435
  )
 
1468
  cache_examples=False,
1469
  )
1470
 
1471
+ gr.Markdown(RESOURCES)
1472
+
1473
  return app
1474
 
1475
 
1476
  if __name__ == "__main__":
 
1477
  for id_model in UVR_MODELS:
1478
  download_manager(
1479
  os.path.join(MDX_DOWNLOAD_LINK, id_model), mdxnet_models_dir
1480
  )
1481
 
1482
  app = get_gui(theme)
 
1483
  app.queue(default_concurrency_limit=40)
 
1484
  app.launch(
1485
  max_threads=40,
1486
+ share=IS_COLAB,
1487
  show_error=True,
1488
  quiet=False,
1489
+ debug=IS_COLAB,
1490
+ ssr_mode=False,
1491
  )
pre-requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ pip<=23.1.2
2
+ Setuptools<=80.6.0
requirements.txt CHANGED
@@ -1,5 +1,19 @@
1
  soundfile
2
  librosa
3
- torch==2.2.0
4
  pedalboard
5
- yt-dlp
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  soundfile
2
  librosa
3
+ torch==2.5.1
4
  pedalboard
5
+ yt-dlp
6
+ tqdm
7
+ spaces
8
+ numpy<2
9
+ gradio==5.43.1
10
+ ffmpeg-python
11
+ scipy
12
+ scikit-learn
13
+ matplotlib
14
+ matplotlib-inline
15
+ seaborn
16
+ requests
17
+ urllib3
18
+ onnxruntime-gpu==1.22.0
19
+ # onnxruntime # only CPU