Spaces:
Build error
Build error
| import gradio as gr | |
| from smart_open import open | |
| import gensim | |
| from gensim.similarities.annoy import AnnoyIndexer | |
| import plotly.express as px | |
| import pandas as pd | |
| import numpy as np | |
| import pacmap | |
| # Load into gensim model | |
| def load_gensim(fname): | |
| model = gensim.models.KeyedVectors.load_word2vec_format(fname, binary=False) | |
| # Search using Annoy indexer; Faster method | |
| annoy_index = AnnoyIndexer(model, 100) | |
| return model, annoy_index | |
| def searchNexplore(word, final_dfs, model, annoy_index, topn): | |
| vector = model[word] | |
| approximate_neighbors = model.most_similar([vector], topn=topn, indexer=annoy_index) | |
| rows = [] | |
| for row in approximate_neighbors: | |
| rows.append(row[0]) | |
| searched_df = final_dfs.loc[rows] | |
| return searched_df, approximate_neighbors | |
| def embedding_dim_reduction( | |
| embeddings, n_dim=2, n_neighbors=10, MN_ratio=0.5, FP_ratio=2.0 | |
| ): | |
| """ | |
| Perform PaCMAP dimention reduction | |
| Selection of values : | |
| 1. Default transorms MN_ratio=0.5, FP_ratio=2.0 | |
| 2. For heavy transformations MN_ratio=30, FP_ratio=100.0 | |
| """ | |
| reducer = pacmap.PaCMAP( | |
| n_components=n_dim, | |
| n_neighbors=n_neighbors, | |
| MN_ratio=MN_ratio, | |
| FP_ratio=FP_ratio, | |
| lr=0.05, | |
| num_iters=1000, | |
| verbose=False, | |
| ) | |
| reduced_embeddings = reducer.fit_transform(embeddings, init="pca") | |
| return reduced_embeddings | |
| model, annoy_index = load_gensim("embedding_dump.txt") | |
| final_dfs = pd.read_csv("raw_embeddings_allinone.csv") | |
| final_dfs.set_index("Unnamed: 0", inplace=True) | |
| def get_semantic(input_text, topn): | |
| searched_df, approximate_neighbors = searchNexplore( | |
| input_text, final_dfs, model, annoy_index, topn | |
| ) | |
| reduced_embeddings = embedding_dim_reduction( | |
| searched_df, n_dim=2, n_neighbors=10, MN_ratio=0.5, FP_ratio=2.0 | |
| ) | |
| fig1 = px.scatter( | |
| x=reduced_embeddings[:, 0], | |
| y=reduced_embeddings[:, 1], | |
| hover_name=searched_df.index.tolist(), | |
| color=searched_df.index.tolist(), | |
| ) | |
| reduced_embeddings = embedding_dim_reduction( | |
| searched_df, n_dim=3, n_neighbors=10, MN_ratio=0.5, FP_ratio=2.0 | |
| ) | |
| fig2 = px.scatter_3d( | |
| x=reduced_embeddings[:, 0], | |
| y=reduced_embeddings[:, 1], | |
| z=reduced_embeddings[:, 2], | |
| hover_name=searched_df.index.tolist(), | |
| color=searched_df.index.tolist(), | |
| ) | |
| return fig1, fig2, approximate_neighbors | |
| iface = gr.Interface( | |
| fn=get_semantic, | |
| inputs=[ | |
| "text", | |
| gr.Slider(0, 1000, value=100), | |
| ], | |
| outputs=["plot", "plot", "list"], | |
| examples=[["SOPA_CANJA_C/ALETRIA_MAGGI_82GR", 100]], | |
| title="Sentiment Explorer", | |
| description="Get Sentiment search results", | |
| theme="peach", | |
| ).launch(inline=False) | |