Spaces:

descript
/

vampnet

Runtime error

App Files Files Community

Hugo Flores Garcia commited on Apr 12, 2023

Commit

128981d

1 Parent(s): 03f09ee

demo

Browse files

Files changed (2) hide show

demo.py +31 -17
vampnet/interface.py +15 -0

demo.py CHANGED Viewed

@@ -65,13 +65,21 @@ def vamp(
     mask_periodic_amt, beat_unmask_dur,
     mask_dwn_chk, dwn_factor,
     mask_up_chk, up_factor,
-    num_vamps, mode, use_beats, num_steps
 ):
     # try:
         print(input_audio)
-        sig = at.AudioSignal(input_audio.name)
         if beat_unmask_dur > 0.0 and use_beats:
             beat_mask = interface.make_beat_mask(
                 sig,
@@ -142,13 +150,13 @@ def save_vamp(
     mask_periodic_amt, beat_unmask_dur,
     mask_dwn_chk, dwn_factor,
     mask_up_chk, up_factor,
-    num_vamps, mode, output_audio, notes, use_beats, num_steps
 ):
     out_dir = OUT_DIR / "saved" / str(uuid.uuid4())
     out_dir.mkdir(parents=True, exist_ok=True)
-    sig_in = at.AudioSignal(input_audio.name)
-    sig_out = at.AudioSignal(output_audio.name)
     sig_in.write(out_dir / "input.wav")
     sig_out.write(out_dir / "output.wav")
@@ -168,6 +176,7 @@ def save_vamp(
         "up_factor": up_factor,
         "num_vamps": num_vamps,
         "num_steps": num_steps,
         "mode": mode,
         "notes": notes,
     }
@@ -212,12 +221,12 @@ with gr.Blocks() as demo:
         with gr.Column():
             gr.Markdown("""
             ### Tips
-            - use the beat sync button so the output audio has the same beat structure as the input audio
             - if you want the generated audio to sound like the original, but with a different beat structure:
-                - uncheck the beat sync button
                 - decrease the periodic unmasking to anywhere from 2 to 8
             - if you want a more "random" generation:
-                - uncheck the beat sync button (or reduce the beat unmask duration)
                 - increase the periodic unmasking to 16 or more
                 - increase the temperatures!
@@ -228,11 +237,11 @@ with gr.Blocks() as demo:
         with gr.Column():
             mode = gr.Radio(
                 label="**mode**. note that loop mode requires a prefix and suffix longer than 0",
-                choices=["standard", "loop"],
                 value="standard"
             )
             num_vamps = gr.Number(
-                label="number of vamps (or loops). more vamps = longer generated audio",
                 value=1,
                 precision=0
             )
@@ -246,13 +255,13 @@ with gr.Blocks() as demo:
             input_audio = gr.Audio(
                 label="input audio",
                 interactive=False,
-                type="file",
             )
             audio_mask = gr.Audio(
                 label="audio mask (listen to this to hear the mask hints)",
                 interactive=False,
-                type="file",
             )
             # connect widgets
@@ -273,7 +282,7 @@ with gr.Blocks() as demo:
         with gr.Column():
             mask_periodic_amt = gr.Slider(
-                label="periodic hint  (0.0 means no hint, 2 means one hint every 2 timesteps, etc, 4 means one hint every 4 timesteps, etc)",
                 minimum=0,
                 maximum=64,
                 step=1,
@@ -321,6 +330,11 @@ with gr.Blocks() as demo:
                 value=True
             )
             num_steps = gr.Slider(
                 label="number of steps (should normally be between 12 and 36)",
                 minimum=4,
@@ -334,7 +348,7 @@ with gr.Blocks() as demo:
             output_audio = gr.Audio(
                 label="output audio",
                 interactive=False,
-                type="file"
             )
@@ -407,7 +421,7 @@ with gr.Blocks() as demo:
             mask_periodic_amt, beat_unmask_dur,
             mask_dwn_chk, dwn_factor,
             mask_up_chk, up_factor,
-            num_vamps, mode, use_beats, num_steps
         ],
         outputs=[output_audio, audio_mask]
     )
@@ -422,7 +436,7 @@ with gr.Blocks() as demo:
             mask_up_chk, up_factor,
             num_vamps, mode,
             output_audio,
-            notes_text, use_beats, num_steps
         ],
         outputs=[thank_you, download_file]
     )

     mask_periodic_amt, beat_unmask_dur,
     mask_dwn_chk, dwn_factor,
     mask_up_chk, up_factor,
+    num_vamps, mode, use_beats, num_steps, snap_to_beats
 ):
     # try:
         print(input_audio)
+        sig = at.AudioSignal(input_audio)
+        if snap_to_beats:
+            old_sig = sig.clone()
+            sig = interface.snap_to_beats(sig)
+            if sig.duration < (sig.duration / 4): # we cut off too much
+                sig = old_sig
+                print(f"new sig duration is {sig.duration} which is too short, reverting to old sig")
+            print(f"new sig duration is {sig.duration}")
         if beat_unmask_dur > 0.0 and use_beats:
             beat_mask = interface.make_beat_mask(
                 sig,
     mask_periodic_amt, beat_unmask_dur,
     mask_dwn_chk, dwn_factor,
     mask_up_chk, up_factor,
+    num_vamps, mode, output_audio, notes, use_beats, num_steps, snap_to_beats
 ):
     out_dir = OUT_DIR / "saved" / str(uuid.uuid4())
     out_dir.mkdir(parents=True, exist_ok=True)
+    sig_in = at.AudioSignal(input_audio)
+    sig_out = at.AudioSignal(output_audio)
     sig_in.write(out_dir / "input.wav")
     sig_out.write(out_dir / "output.wav")
         "up_factor": up_factor,
         "num_vamps": num_vamps,
         "num_steps": num_steps,
+        "snap_to_beats": snap_to_beats,
         "mode": mode,
         "notes": notes,
     }
         with gr.Column():
             gr.Markdown("""
             ### Tips
+            - use the beat hint button so the output audio has the same beat structure as the input audio
             - if you want the generated audio to sound like the original, but with a different beat structure:
+                - uncheck the beat hint button
                 - decrease the periodic unmasking to anywhere from 2 to 8
             - if you want a more "random" generation:
+                - uncheck the beat hint button (or reduce the beat unmask duration)
                 - increase the periodic unmasking to 16 or more
                 - increase the temperatures!
         with gr.Column():
             mode = gr.Radio(
                 label="**mode**. note that loop mode requires a prefix and suffix longer than 0",
+                choices=["standard",],
                 value="standard"
             )
             num_vamps = gr.Number(
+                label="number of vamps. more vamps = longer generated audio",
                 value=1,
                 precision=0
             )
             input_audio = gr.Audio(
                 label="input audio",
                 interactive=False,
+                type="filepath",
             )
             audio_mask = gr.Audio(
                 label="audio mask (listen to this to hear the mask hints)",
                 interactive=False,
+                type="filepath",
             )
             # connect widgets
         with gr.Column():
             mask_periodic_amt = gr.Slider(
+                label="periodic hint  (0.0 means no hint, 2 - lots of hints, 8 - a couple of hints, 16 - occasional hint, 32 - very occasional hint, etc)",
                 minimum=0,
                 maximum=64,
                 step=1,
                 value=True
             )
+            snap_to_beats = gr.Checkbox(
+                label="trim to beat markers (uncheck if the output audio is too short.)",
+                value=True
+            )
             num_steps = gr.Slider(
                 label="number of steps (should normally be between 12 and 36)",
                 minimum=4,
             output_audio = gr.Audio(
                 label="output audio",
                 interactive=False,
+                type="filepath"
             )
             mask_periodic_amt, beat_unmask_dur,
             mask_dwn_chk, dwn_factor,
             mask_up_chk, up_factor,
+            num_vamps, mode, use_beats, num_steps, snap_to_beats
         ],
         outputs=[output_audio, audio_mask]
     )
             mask_up_chk, up_factor,
             num_vamps, mode,
             output_audio,
+            notes_text, use_beats, num_steps, snap_to_beats
         ],
         outputs=[thank_you, download_file]
     )

vampnet/interface.py CHANGED Viewed

@@ -111,6 +111,21 @@ class Interface:
         z = self.codec.encode(signal.samples, signal.sample_rate)["codes"]
         return z
     def make_beat_mask(self,
             signal: AudioSignal,
             before_beat_s: float = 0.1,

         z = self.codec.encode(signal.samples, signal.sample_rate)["codes"]
         return z
+    def snap_to_beats(
+        self,
+        signal: AudioSignal
+    ):
+        assert hasattr(self, "beat_tracker"), "No beat tracker loaded"
+        beats, downbeats = self.beat_tracker.extract_beats(signal)
+        # trim the signa around the first beat time
+        samples_begin = int(beats[0] * signal.sample_rate )
+        samples_end = int(beats[-1] * signal.sample_rate)
+        print(beats[0])
+        signal = signal.clone().trim(samples_begin, signal.length - samples_end)
+        return signal
     def make_beat_mask(self,
             signal: AudioSignal,
             before_beat_s: float = 0.1,