Spaces:
Runtime error
Runtime error
Hugo Flores Garcia
commited on
Commit
·
128981d
1
Parent(s):
03f09ee
demo
Browse files- demo.py +31 -17
- vampnet/interface.py +15 -0
demo.py
CHANGED
|
@@ -65,13 +65,21 @@ def vamp(
|
|
| 65 |
mask_periodic_amt, beat_unmask_dur,
|
| 66 |
mask_dwn_chk, dwn_factor,
|
| 67 |
mask_up_chk, up_factor,
|
| 68 |
-
num_vamps, mode, use_beats, num_steps
|
| 69 |
):
|
| 70 |
# try:
|
| 71 |
print(input_audio)
|
| 72 |
|
| 73 |
-
sig = at.AudioSignal(input_audio
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
if beat_unmask_dur > 0.0 and use_beats:
|
| 76 |
beat_mask = interface.make_beat_mask(
|
| 77 |
sig,
|
|
@@ -142,13 +150,13 @@ def save_vamp(
|
|
| 142 |
mask_periodic_amt, beat_unmask_dur,
|
| 143 |
mask_dwn_chk, dwn_factor,
|
| 144 |
mask_up_chk, up_factor,
|
| 145 |
-
num_vamps, mode, output_audio, notes, use_beats, num_steps
|
| 146 |
):
|
| 147 |
out_dir = OUT_DIR / "saved" / str(uuid.uuid4())
|
| 148 |
out_dir.mkdir(parents=True, exist_ok=True)
|
| 149 |
|
| 150 |
-
sig_in = at.AudioSignal(input_audio
|
| 151 |
-
sig_out = at.AudioSignal(output_audio
|
| 152 |
|
| 153 |
sig_in.write(out_dir / "input.wav")
|
| 154 |
sig_out.write(out_dir / "output.wav")
|
|
@@ -168,6 +176,7 @@ def save_vamp(
|
|
| 168 |
"up_factor": up_factor,
|
| 169 |
"num_vamps": num_vamps,
|
| 170 |
"num_steps": num_steps,
|
|
|
|
| 171 |
"mode": mode,
|
| 172 |
"notes": notes,
|
| 173 |
}
|
|
@@ -212,12 +221,12 @@ with gr.Blocks() as demo:
|
|
| 212 |
with gr.Column():
|
| 213 |
gr.Markdown("""
|
| 214 |
### Tips
|
| 215 |
-
- use the beat
|
| 216 |
- if you want the generated audio to sound like the original, but with a different beat structure:
|
| 217 |
-
- uncheck the beat
|
| 218 |
- decrease the periodic unmasking to anywhere from 2 to 8
|
| 219 |
- if you want a more "random" generation:
|
| 220 |
-
- uncheck the beat
|
| 221 |
- increase the periodic unmasking to 16 or more
|
| 222 |
- increase the temperatures!
|
| 223 |
|
|
@@ -228,11 +237,11 @@ with gr.Blocks() as demo:
|
|
| 228 |
with gr.Column():
|
| 229 |
mode = gr.Radio(
|
| 230 |
label="**mode**. note that loop mode requires a prefix and suffix longer than 0",
|
| 231 |
-
choices=["standard",
|
| 232 |
value="standard"
|
| 233 |
)
|
| 234 |
num_vamps = gr.Number(
|
| 235 |
-
label="number of vamps
|
| 236 |
value=1,
|
| 237 |
precision=0
|
| 238 |
)
|
|
@@ -246,13 +255,13 @@ with gr.Blocks() as demo:
|
|
| 246 |
input_audio = gr.Audio(
|
| 247 |
label="input audio",
|
| 248 |
interactive=False,
|
| 249 |
-
type="
|
| 250 |
)
|
| 251 |
|
| 252 |
audio_mask = gr.Audio(
|
| 253 |
label="audio mask (listen to this to hear the mask hints)",
|
| 254 |
interactive=False,
|
| 255 |
-
type="
|
| 256 |
)
|
| 257 |
|
| 258 |
# connect widgets
|
|
@@ -273,7 +282,7 @@ with gr.Blocks() as demo:
|
|
| 273 |
with gr.Column():
|
| 274 |
|
| 275 |
mask_periodic_amt = gr.Slider(
|
| 276 |
-
label="periodic hint (0.0 means no hint, 2
|
| 277 |
minimum=0,
|
| 278 |
maximum=64,
|
| 279 |
step=1,
|
|
@@ -321,6 +330,11 @@ with gr.Blocks() as demo:
|
|
| 321 |
value=True
|
| 322 |
)
|
| 323 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
num_steps = gr.Slider(
|
| 325 |
label="number of steps (should normally be between 12 and 36)",
|
| 326 |
minimum=4,
|
|
@@ -334,7 +348,7 @@ with gr.Blocks() as demo:
|
|
| 334 |
output_audio = gr.Audio(
|
| 335 |
label="output audio",
|
| 336 |
interactive=False,
|
| 337 |
-
type="
|
| 338 |
)
|
| 339 |
|
| 340 |
|
|
@@ -407,7 +421,7 @@ with gr.Blocks() as demo:
|
|
| 407 |
mask_periodic_amt, beat_unmask_dur,
|
| 408 |
mask_dwn_chk, dwn_factor,
|
| 409 |
mask_up_chk, up_factor,
|
| 410 |
-
num_vamps, mode, use_beats, num_steps
|
| 411 |
],
|
| 412 |
outputs=[output_audio, audio_mask]
|
| 413 |
)
|
|
@@ -422,7 +436,7 @@ with gr.Blocks() as demo:
|
|
| 422 |
mask_up_chk, up_factor,
|
| 423 |
num_vamps, mode,
|
| 424 |
output_audio,
|
| 425 |
-
notes_text, use_beats, num_steps
|
| 426 |
],
|
| 427 |
outputs=[thank_you, download_file]
|
| 428 |
)
|
|
|
|
| 65 |
mask_periodic_amt, beat_unmask_dur,
|
| 66 |
mask_dwn_chk, dwn_factor,
|
| 67 |
mask_up_chk, up_factor,
|
| 68 |
+
num_vamps, mode, use_beats, num_steps, snap_to_beats
|
| 69 |
):
|
| 70 |
# try:
|
| 71 |
print(input_audio)
|
| 72 |
|
| 73 |
+
sig = at.AudioSignal(input_audio)
|
| 74 |
+
|
| 75 |
+
if snap_to_beats:
|
| 76 |
+
old_sig = sig.clone()
|
| 77 |
+
sig = interface.snap_to_beats(sig)
|
| 78 |
+
if sig.duration < (sig.duration / 4): # we cut off too much
|
| 79 |
+
sig = old_sig
|
| 80 |
+
print(f"new sig duration is {sig.duration} which is too short, reverting to old sig")
|
| 81 |
+
print(f"new sig duration is {sig.duration}")
|
| 82 |
+
|
| 83 |
if beat_unmask_dur > 0.0 and use_beats:
|
| 84 |
beat_mask = interface.make_beat_mask(
|
| 85 |
sig,
|
|
|
|
| 150 |
mask_periodic_amt, beat_unmask_dur,
|
| 151 |
mask_dwn_chk, dwn_factor,
|
| 152 |
mask_up_chk, up_factor,
|
| 153 |
+
num_vamps, mode, output_audio, notes, use_beats, num_steps, snap_to_beats
|
| 154 |
):
|
| 155 |
out_dir = OUT_DIR / "saved" / str(uuid.uuid4())
|
| 156 |
out_dir.mkdir(parents=True, exist_ok=True)
|
| 157 |
|
| 158 |
+
sig_in = at.AudioSignal(input_audio)
|
| 159 |
+
sig_out = at.AudioSignal(output_audio)
|
| 160 |
|
| 161 |
sig_in.write(out_dir / "input.wav")
|
| 162 |
sig_out.write(out_dir / "output.wav")
|
|
|
|
| 176 |
"up_factor": up_factor,
|
| 177 |
"num_vamps": num_vamps,
|
| 178 |
"num_steps": num_steps,
|
| 179 |
+
"snap_to_beats": snap_to_beats,
|
| 180 |
"mode": mode,
|
| 181 |
"notes": notes,
|
| 182 |
}
|
|
|
|
| 221 |
with gr.Column():
|
| 222 |
gr.Markdown("""
|
| 223 |
### Tips
|
| 224 |
+
- use the beat hint button so the output audio has the same beat structure as the input audio
|
| 225 |
- if you want the generated audio to sound like the original, but with a different beat structure:
|
| 226 |
+
- uncheck the beat hint button
|
| 227 |
- decrease the periodic unmasking to anywhere from 2 to 8
|
| 228 |
- if you want a more "random" generation:
|
| 229 |
+
- uncheck the beat hint button (or reduce the beat unmask duration)
|
| 230 |
- increase the periodic unmasking to 16 or more
|
| 231 |
- increase the temperatures!
|
| 232 |
|
|
|
|
| 237 |
with gr.Column():
|
| 238 |
mode = gr.Radio(
|
| 239 |
label="**mode**. note that loop mode requires a prefix and suffix longer than 0",
|
| 240 |
+
choices=["standard",],
|
| 241 |
value="standard"
|
| 242 |
)
|
| 243 |
num_vamps = gr.Number(
|
| 244 |
+
label="number of vamps. more vamps = longer generated audio",
|
| 245 |
value=1,
|
| 246 |
precision=0
|
| 247 |
)
|
|
|
|
| 255 |
input_audio = gr.Audio(
|
| 256 |
label="input audio",
|
| 257 |
interactive=False,
|
| 258 |
+
type="filepath",
|
| 259 |
)
|
| 260 |
|
| 261 |
audio_mask = gr.Audio(
|
| 262 |
label="audio mask (listen to this to hear the mask hints)",
|
| 263 |
interactive=False,
|
| 264 |
+
type="filepath",
|
| 265 |
)
|
| 266 |
|
| 267 |
# connect widgets
|
|
|
|
| 282 |
with gr.Column():
|
| 283 |
|
| 284 |
mask_periodic_amt = gr.Slider(
|
| 285 |
+
label="periodic hint (0.0 means no hint, 2 - lots of hints, 8 - a couple of hints, 16 - occasional hint, 32 - very occasional hint, etc)",
|
| 286 |
minimum=0,
|
| 287 |
maximum=64,
|
| 288 |
step=1,
|
|
|
|
| 330 |
value=True
|
| 331 |
)
|
| 332 |
|
| 333 |
+
snap_to_beats = gr.Checkbox(
|
| 334 |
+
label="trim to beat markers (uncheck if the output audio is too short.)",
|
| 335 |
+
value=True
|
| 336 |
+
)
|
| 337 |
+
|
| 338 |
num_steps = gr.Slider(
|
| 339 |
label="number of steps (should normally be between 12 and 36)",
|
| 340 |
minimum=4,
|
|
|
|
| 348 |
output_audio = gr.Audio(
|
| 349 |
label="output audio",
|
| 350 |
interactive=False,
|
| 351 |
+
type="filepath"
|
| 352 |
)
|
| 353 |
|
| 354 |
|
|
|
|
| 421 |
mask_periodic_amt, beat_unmask_dur,
|
| 422 |
mask_dwn_chk, dwn_factor,
|
| 423 |
mask_up_chk, up_factor,
|
| 424 |
+
num_vamps, mode, use_beats, num_steps, snap_to_beats
|
| 425 |
],
|
| 426 |
outputs=[output_audio, audio_mask]
|
| 427 |
)
|
|
|
|
| 436 |
mask_up_chk, up_factor,
|
| 437 |
num_vamps, mode,
|
| 438 |
output_audio,
|
| 439 |
+
notes_text, use_beats, num_steps, snap_to_beats
|
| 440 |
],
|
| 441 |
outputs=[thank_you, download_file]
|
| 442 |
)
|
vampnet/interface.py
CHANGED
|
@@ -111,6 +111,21 @@ class Interface:
|
|
| 111 |
z = self.codec.encode(signal.samples, signal.sample_rate)["codes"]
|
| 112 |
return z
|
| 113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
def make_beat_mask(self,
|
| 115 |
signal: AudioSignal,
|
| 116 |
before_beat_s: float = 0.1,
|
|
|
|
| 111 |
z = self.codec.encode(signal.samples, signal.sample_rate)["codes"]
|
| 112 |
return z
|
| 113 |
|
| 114 |
+
def snap_to_beats(
|
| 115 |
+
self,
|
| 116 |
+
signal: AudioSignal
|
| 117 |
+
):
|
| 118 |
+
assert hasattr(self, "beat_tracker"), "No beat tracker loaded"
|
| 119 |
+
beats, downbeats = self.beat_tracker.extract_beats(signal)
|
| 120 |
+
|
| 121 |
+
# trim the signa around the first beat time
|
| 122 |
+
samples_begin = int(beats[0] * signal.sample_rate )
|
| 123 |
+
samples_end = int(beats[-1] * signal.sample_rate)
|
| 124 |
+
print(beats[0])
|
| 125 |
+
signal = signal.clone().trim(samples_begin, signal.length - samples_end)
|
| 126 |
+
|
| 127 |
+
return signal
|
| 128 |
+
|
| 129 |
def make_beat_mask(self,
|
| 130 |
signal: AudioSignal,
|
| 131 |
before_beat_s: float = 0.1,
|