Spaces:
Build error
Build error
Sylvain Filoni
commited on
Commit
Β·
7fb6157
1
Parent(s):
3d381f7
added duration controls
Browse files- README.md +1 -1
- app.py +87 -41
- constants.py +9 -0
- requirements.txt +5 -0
- utils.py +50 -0
README.md
CHANGED
|
@@ -4,7 +4,7 @@ emoji: π
πΆ
|
|
| 4 |
colorFrom: green
|
| 5 |
colorTo: purple
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version: 3.
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
---
|
|
|
|
| 4 |
colorFrom: green
|
| 5 |
colorTo: purple
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 3.15.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
---
|
app.py
CHANGED
|
@@ -1,4 +1,11 @@
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import os
|
| 3 |
import requests
|
| 4 |
import urllib
|
|
@@ -7,7 +14,6 @@ from os import path
|
|
| 7 |
from pydub import AudioSegment
|
| 8 |
|
| 9 |
img_to_text = gr.Blocks.load(name="spaces/pharma/CLIP-Interrogator")
|
| 10 |
-
text_to_music = gr.Interface.load("spaces/fffiloni/text-2-music")
|
| 11 |
|
| 12 |
from share_btn import community_icon_html, loading_icon_html, share_js
|
| 13 |
|
|
@@ -15,22 +21,59 @@ def get_prompts(uploaded_image):
|
|
| 15 |
|
| 16 |
prompt = img_to_text(uploaded_image, "ViT-L (best for Stable Diffusion 1.*)", "fast", fn_index=1)[0]
|
| 17 |
|
| 18 |
-
music_result =
|
| 19 |
|
| 20 |
-
return music_result
|
| 21 |
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
save_as = "file.mp3"
|
| 35 |
|
| 36 |
data = urllib.request.urlopen(url)
|
|
@@ -44,7 +87,7 @@ def get_music(prompt):
|
|
| 44 |
sound = AudioSegment.from_mp3(save_as)
|
| 45 |
sound.export(wave_file, format="wav")
|
| 46 |
|
| 47 |
-
return wave_file
|
| 48 |
|
| 49 |
css = """
|
| 50 |
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
|
|
@@ -79,39 +122,42 @@ a {text-decoration-line: underline; font-weight: 600;}
|
|
| 79 |
"""
|
| 80 |
|
| 81 |
with gr.Blocks(css=css) as demo:
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
|
| 103 |
|
| 104 |
input_img = gr.Image(type="filepath", elem_id="input-img")
|
|
|
|
|
|
|
|
|
|
| 105 |
generate = gr.Button("Generate Music from Image")
|
| 106 |
|
| 107 |
music_output = gr.Audio(label="Result", type="filepath", elem_id="music-output")
|
| 108 |
|
| 109 |
with gr.Group(elem_id="share-btn-container"):
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
|
| 114 |
-
|
| 115 |
-
|
| 116 |
|
| 117 |
demo.queue(max_size=32, concurrency_count=20).launch()
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
import base64
|
| 3 |
import gradio as gr
|
| 4 |
+
from sentence_transformers import SentenceTransformer
|
| 5 |
+
|
| 6 |
+
import httpx
|
| 7 |
+
import json
|
| 8 |
+
|
| 9 |
import os
|
| 10 |
import requests
|
| 11 |
import urllib
|
|
|
|
| 14 |
from pydub import AudioSegment
|
| 15 |
|
| 16 |
img_to_text = gr.Blocks.load(name="spaces/pharma/CLIP-Interrogator")
|
|
|
|
| 17 |
|
| 18 |
from share_btn import community_icon_html, loading_icon_html, share_js
|
| 19 |
|
|
|
|
| 21 |
|
| 22 |
prompt = img_to_text(uploaded_image, "ViT-L (best for Stable Diffusion 1.*)", "fast", fn_index=1)[0]
|
| 23 |
|
| 24 |
+
music_result = generate_track_by_prompt(prompt, duration, gen_intensity, audio_format)
|
| 25 |
|
| 26 |
+
return music_result[0], gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
|
| 27 |
|
| 28 |
+
from utils import get_tags_for_prompts, get_mubert_tags_embeddings, get_pat
|
| 29 |
+
|
| 30 |
+
minilm = SentenceTransformer('all-MiniLM-L6-v2')
|
| 31 |
+
mubert_tags_embeddings = get_mubert_tags_embeddings(minilm)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def get_track_by_tags(tags, pat, duration, gen_intensity, maxit=20, loop=False):
|
| 35 |
+
if loop:
|
| 36 |
+
mode = "loop"
|
| 37 |
+
else:
|
| 38 |
+
mode = "track"
|
| 39 |
+
r = httpx.post('https://api-b2b.mubert.com/v2/RecordTrackTTM',
|
| 40 |
+
json={
|
| 41 |
+
"method": "RecordTrackTTM",
|
| 42 |
+
"params": {
|
| 43 |
+
"pat": pat,
|
| 44 |
+
"duration": duration,
|
| 45 |
+
"format": "wav",
|
| 46 |
+
"intensity":gen_intensity,
|
| 47 |
+
"tags": tags,
|
| 48 |
+
"mode": mode
|
| 49 |
+
}
|
| 50 |
+
})
|
| 51 |
+
|
| 52 |
+
rdata = json.loads(r.text)
|
| 53 |
+
assert rdata['status'] == 1, rdata['error']['text']
|
| 54 |
+
trackurl = rdata['data']['tasks'][0]['download_link']
|
| 55 |
+
|
| 56 |
+
print('Generating track ', end='')
|
| 57 |
+
for i in range(maxit):
|
| 58 |
+
r = httpx.get(trackurl)
|
| 59 |
+
if r.status_code == 200:
|
| 60 |
+
return trackurl
|
| 61 |
+
time.sleep(1)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def generate_track_by_prompt(prompt, duration, gen_intensity):
|
| 65 |
+
try:
|
| 66 |
+
pat = get_pat("[email protected]")
|
| 67 |
+
_, tags = get_tags_for_prompts(minilm, mubert_tags_embeddings, [prompt, ])[0]
|
| 68 |
+
result = get_track_by_tags(tags, pat, int(duration), gen_intensity, loop=False)
|
| 69 |
+
print(result)
|
| 70 |
+
return result, ",".join(tags), "Success"
|
| 71 |
+
except Exception as e:
|
| 72 |
+
return None, "", str(e)
|
| 73 |
+
|
| 74 |
+
def convert_mp3_to_wav(mp3_filepath):
|
| 75 |
+
|
| 76 |
+
url = mp3_filepath
|
| 77 |
save_as = "file.mp3"
|
| 78 |
|
| 79 |
data = urllib.request.urlopen(url)
|
|
|
|
| 87 |
sound = AudioSegment.from_mp3(save_as)
|
| 88 |
sound.export(wave_file, format="wav")
|
| 89 |
|
| 90 |
+
return wave_file
|
| 91 |
|
| 92 |
css = """
|
| 93 |
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
|
|
|
|
| 122 |
"""
|
| 123 |
|
| 124 |
with gr.Blocks(css=css) as demo:
|
| 125 |
+
with gr.Column(elem_id="col-container"):
|
| 126 |
+
gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;">
|
| 127 |
+
<div
|
| 128 |
+
style="
|
| 129 |
+
display: inline-flex;
|
| 130 |
+
align-items: center;
|
| 131 |
+
gap: 0.8rem;
|
| 132 |
+
font-size: 1.75rem;
|
| 133 |
+
"
|
| 134 |
+
>
|
| 135 |
+
<h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
|
| 136 |
+
Image to Music
|
| 137 |
+
</h1>
|
| 138 |
+
</div>
|
| 139 |
+
<p style="margin-bottom: 10px; font-size: 94%">
|
| 140 |
+
Sends an image in to <a href="https://huggingface.co/spaces/pharma/CLIP-Interrogator" target="_blank">CLIP Interrogator</a>
|
| 141 |
+
to generate a text prompt which is then run through
|
| 142 |
+
<a href="https://huggingface.co/Mubert" target="_blank">Mubert</a> text-to-music to generate music from the input image!
|
| 143 |
+
</p>
|
| 144 |
+
</div>""")
|
| 145 |
|
| 146 |
|
| 147 |
input_img = gr.Image(type="filepath", elem_id="input-img")
|
| 148 |
+
with gr.Row():
|
| 149 |
+
track_duration = gr.Slider(minimum=20, maximum=120, value=30, step=5, label="Track duration", elem_id="duration-inp")
|
| 150 |
+
gen_intensity = gr.Dropdown(choices=["low", "medium", "high"], value="high", label="Complexity")
|
| 151 |
generate = gr.Button("Generate Music from Image")
|
| 152 |
|
| 153 |
music_output = gr.Audio(label="Result", type="filepath", elem_id="music-output")
|
| 154 |
|
| 155 |
with gr.Group(elem_id="share-btn-container"):
|
| 156 |
+
community_icon = gr.HTML(community_icon_html, visible=False)
|
| 157 |
+
loading_icon = gr.HTML(loading_icon_html, visible=False)
|
| 158 |
+
share_button = gr.Button("Share to community", elem_id="share-btn", visible=False)
|
| 159 |
|
| 160 |
+
generate.click(get_prompts, inputs=[input_img,track_duration,gen_intensity], outputs=[music_output, share_button, community_icon, loading_icon], api_name="i2m")
|
| 161 |
+
share_button.click(None, [], [], _js=share_js)
|
| 162 |
|
| 163 |
demo.queue(max_size=32, concurrency_count=20).launch()
|
constants.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
MUBERT_LICENCE = os.environ.get('MUBERT_LICENCE')
|
| 5 |
+
MUBERT_TOKEN = os.environ.get('MUBERT_TOKEN')
|
| 6 |
+
|
| 7 |
+
MUBERT_MODE = "loop"
|
| 8 |
+
MUBERT_TAGS_STRING = 'tribal,action,kids,neo-classic,run 130,pumped,jazz / funk,ethnic,dubtechno,reggae,acid jazz,liquidfunk,funk,witch house,tech house,underground,artists,mystical,disco,sensorium,r&b,agender,psychedelic trance / psytrance,peaceful,run 140,piano,run 160,setting,meditation,christmas,ambient,horror,cinematic,electro house,idm,bass,minimal,underscore,drums,glitchy,beautiful,technology,tribal house,country pop,jazz & funk,documentary,space,classical,valentines,chillstep,experimental,trap,new jack swing,drama,post-rock,tense,corporate,neutral,happy,analog,funky,spiritual,sberzvuk special,chill hop,dramatic,catchy,holidays,fitness 90,optimistic,orchestra,acid techno,energizing,romantic,minimal house,breaks,hyper pop,warm up,dreamy,dark,urban,microfunk,dub,nu disco,vogue,keys,hardcore,aggressive,indie,electro funk,beauty,relaxing,trance,pop,hiphop,soft,acoustic,chillrave / ethno-house,deep techno,angry,dance,fun,dubstep,tropical,latin pop,heroic,world music,inspirational,uplifting,atmosphere,art,epic,advertising,chillout,scary,spooky,slow ballad,saxophone,summer,erotic,jazzy,energy 100,kara mar,xmas,atmospheric,indie pop,hip-hop,yoga,reggaeton,lounge,travel,running,folk,chillrave & ethno-house,detective,darkambient,chill,fantasy,minimal techno,special,night,tropical house,downtempo,lullaby,meditative,upbeat,glitch hop,fitness,neurofunk,sexual,indie rock,future pop,jazz,cyberpunk,melancholic,happy hardcore,family / kids,synths,electric guitar,comedy,psychedelic trance & psytrance,edm,psychedelic rock,calm,zen,bells,podcast,melodic house,ethnic percussion,nature,heavy,bassline,indie dance,techno,drumnbass,synth pop,vaporwave,sad,8-bit,chillgressive,deep,orchestral,futuristic,hardtechno,nostalgic,big room,sci-fi,tutorial,joyful,pads,minimal 170,drill,ethnic 108,amusing,sleepy ambient,psychill,italo disco,lofi,house,acoustic guitar,bassline house,rock,k-pop,synthwave,deep house,electronica,gabber,nightlife,sport & fitness,road trip,celebration,electro,disco house,electronic'
|
| 9 |
+
MUBERT_TAGS = np.array(MUBERT_TAGS_STRING.split(','))
|
requirements.txt
CHANGED
|
@@ -1,3 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
pydub
|
| 2 |
ffmpeg
|
| 3 |
requests
|
|
|
|
| 1 |
+
httpx
|
| 2 |
+
sentence-transformers
|
| 3 |
+
ffmpeg
|
| 4 |
+
audio2numpy
|
| 5 |
+
|
| 6 |
pydub
|
| 7 |
ffmpeg
|
| 8 |
requests
|
utils.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import numpy as np
|
| 3 |
+
import httpx
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
from constants import MUBERT_TAGS, MUBERT_MODE, MUBERT_LICENCE, MUBERT_TOKEN
|
| 7 |
+
|
| 8 |
+
def get_mubert_tags_embeddings(w2v_model):
|
| 9 |
+
return w2v_model.encode(MUBERT_TAGS)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def get_pat(email: str):
|
| 13 |
+
r = httpx.post('https://api-b2b.mubert.com/v2/GetServiceAccess',
|
| 14 |
+
json={
|
| 15 |
+
"method": "GetServiceAccess",
|
| 16 |
+
"params": {
|
| 17 |
+
"email": email,
|
| 18 |
+
"license": MUBERT_LICENCE,
|
| 19 |
+
"token": MUBERT_TOKEN,
|
| 20 |
+
"mode": MUBERT_MODE,
|
| 21 |
+
}
|
| 22 |
+
})
|
| 23 |
+
|
| 24 |
+
rdata = json.loads(r.text)
|
| 25 |
+
assert rdata['status'] == 1, "probably incorrect e-mail"
|
| 26 |
+
pat = rdata['data']['pat']
|
| 27 |
+
return pat
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def find_similar(em, embeddings, method='cosine'):
|
| 31 |
+
scores = []
|
| 32 |
+
for ref in embeddings:
|
| 33 |
+
if method == 'cosine':
|
| 34 |
+
scores.append(1 - np.dot(ref, em) / (np.linalg.norm(ref) * np.linalg.norm(em)))
|
| 35 |
+
if method == 'norm':
|
| 36 |
+
scores.append(np.linalg.norm(ref - em))
|
| 37 |
+
return np.array(scores), np.argsort(scores)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def get_tags_for_prompts(w2v_model, mubert_tags_embeddings, prompts, top_n=3, debug=False):
|
| 41 |
+
prompts_embeddings = w2v_model.encode(prompts)
|
| 42 |
+
ret = []
|
| 43 |
+
for i, pe in enumerate(prompts_embeddings):
|
| 44 |
+
scores, idxs = find_similar(pe, mubert_tags_embeddings)
|
| 45 |
+
top_tags = MUBERT_TAGS[idxs[:top_n]]
|
| 46 |
+
top_prob = 1 - scores[idxs[:top_n]]
|
| 47 |
+
if debug:
|
| 48 |
+
print(f"Prompt: {prompts[i]}\nTags: {', '.join(top_tags)}\nScores: {top_prob}\n\n\n")
|
| 49 |
+
ret.append((prompts[i], list(top_tags)))
|
| 50 |
+
return ret
|