Spaces:
Runtime error
Runtime error
from qdrant_client import QdrantClient | |
from qdrant_client import models | |
from nicegui import ui | |
import pandas as pd | |
import os | |
( | |
ui.label('๐ง Music Search with Qdrant') | |
.style('color: #ab003c; font-size: 350%; font-weight: 450') | |
.classes('self-center') | |
) | |
ui.markdown("## ๐ป A New Way to Find Music ๐ถ").classes('self-center') | |
ui.markdown( | |
""" | |
๐ฏ **The purpose** of this app is to showcase one of the many (cool ๐ and fun ๐๐ป๐บ๐ฝ) ways in which you can use [Qdrant](https://qdrant.tech/) | |
to conduct [semantic search](https://en.wikipedia.org/wiki/Semantic_search) using music data. | |
๐ฅ **The dataset** used for demo app is the [Ludwig Music Dataset (Moods and Subgenres)](https://www.kaggle.com/datasets/jorgeruizdev/ludwig-music-dataset-moods-and-subgenres) | |
which is freely available on Kaggle. I contains songs for the 9 genres shown below as well as some metadata which was | |
used as the [payload](https://qdrant.tech/documentation/concepts/payload/) for this app. | |
๐ค **The model** used to extract the embeddings is the [`panns_inference`](https://github.com/qiuqiangkong/panns_inference) | |
freely distributed as a Python library. Please note that, while these embeddings show a remarkable quality on the Ludwig | |
data, they have been taken as is out-of-the-box and have not been fine-tuned one on the Ludwig dataset. | |
You can evaluate the semantic search capabilities of our app by searching for songs you know and retrieving the | |
most similar ones. In addition, if you want to see similar songs in a different genre than the one of the song you | |
selected, pick one of the genres available below to see the results. | |
๐ฝ **The songs** returned by Qdrant are being downloaded on-the-fly from an S3 bucket. | |
Each result will come back with a card containing an **image** of the artist, the **name** of the artist and the song, | |
the **similarity** score, and the **genre** of song (inconsistencies in the genre, e.g. Adele songs classified as "electronic" | |
are present in the dataset). | |
๐ผ๏ธ **The images** shown in the cards represent the names of each artist in the dataset (~4400 unique ones) and these were | |
collected using the first result from a query sent to Bing's Image Search API. Hence, some images might not be the correct ones. | |
""" | |
).style("max-width: 1000px; font-size: 120%").classes('self-center') | |
metadata = pd.read_csv("payload.csv") | |
artist_song = sorted(metadata['artist_song'].tolist()) | |
collection = "music_vectors" | |
client = QdrantClient( | |
"https://394294d5-30bb-4958-ad1a-15a3561edce5.us-east-1-0.aws.cloud.qdrant.io:6333", | |
api_key=os.environ['QDRANT_API_KEY'], | |
) | |
def create_music_card(qdrant_results): | |
for song in qdrant_results: | |
with ui.column(): | |
with ui.card().tight().style("height: 350px; width: 300px"): | |
ui.image(song.payload['photos']).classes('w-[300px] h-[210px]') | |
with ui.card_section(): | |
ui.label(f"Artist: {song.payload['artist']}") | |
ui.label(f"Song Name: {song.payload['name']}") | |
ui.label(f"Genre: {song.payload['genre']}") | |
try: | |
ui.label(f"Score: {song.score}") | |
except: | |
pass | |
first_song = ui.audio(song.payload['urls'])#.classes('w-64'): | |
first_song.on('ended', lambda _: ui.notify('Audio playback completed!')) | |
def get_vectors(): | |
"""Callback function for our search box""" | |
song = song_selection.value.split(' - ')[-1] # get the name of the song selected | |
get_index = metadata.loc[metadata['name'] == song, 'index'].iloc[0] # get the index of such a song | |
# retrieve the vector and metadata associated with it | |
song_vector = client.retrieve( | |
collection_name=collection, ids=[int(get_index)], with_payload=True, with_vectors=True | |
) | |
# Clear the result from the previous artist selected | |
main_artist.clear() | |
with main_artist: | |
create_music_card(song_vector) | |
# Clear the result from the previous search request | |
results.clear() | |
with results: | |
if filters.value: | |
genre_filter = models.Filter( | |
must=[models.FieldCondition(key="genre", match=models.MatchValue(value=filters.value))] | |
) | |
music = client.search( | |
collection_name=collection, query_vector=song_vector[0].vector, query_filter=genre_filter, limit=num_songs.value | |
)[1:] | |
else: | |
music = client.search(collection_name=collection, query_vector=song_vector[0].vector, limit=num_songs.value)[1:] | |
create_music_card(music) | |
with ui.label("How many songs would you like to get back? ๐ค").classes('w-200 self-center mt-10'): | |
num_songs = ui.slider(min=1, max=30, step=1, value=11) | |
ui.linear_progress().bind_value_from(num_songs, 'value') | |
with ui.label("Filters you can apply to your search ๐").classes('w-200 self-center mt-5'): | |
filters = ui.radio([None] + metadata.genre.unique().tolist(), value=None).props('inline color=#ab003c') | |
song_selection = ui.select( | |
artist_song, value='Dave Van Ronk - Buckets of Rain', on_change=get_vectors | |
).style("width: 700px").classes('w-200 self-center mt-10 transition-all') | |
main_artist = ui.row().classes('w-full justify-center') | |
results = ui.row().classes('w-full justify-center') | |
ui.colors( | |
primary='#ab003c', | |
secondary='#2c387e', | |
accent='#f50057', | |
dark='#f73378', | |
positive='#f73378', | |
negative='#ba000d' | |
) | |
ui.run( | |
title='Qdrant for Music', | |
favicon='https://avatars.githubusercontent.com/u/73504361?s=280&v=4', | |
# dark=True | |
) |