abhicake's picture
Update app.py
7f0e344 verified
raw
history blame
1.51 kB
import json
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import gradio as gr
# Load the SentenceTransformer model
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
# Load the embeddings from the JSON file
with open('data_with_embeddings.json', 'r') as f:
data = json.load(f)
# Function to perform the search
def search_courses(user_query):
query_embedding = model.encode(user_query) # Get the embedding for user query
similarity_scores = [] # Array to store similarity scores
# Compare the user query embedding with each stored embedding
for dets in data:
embed = np.array(dets['embedding'])
similarity = cosine_similarity([query_embedding], [embed])
similarity_scores.append((similarity[0][0], dets))
# Sort the similarity scores in descending order
similarity_scores.sort(key=lambda x: x[0], reverse=True)
# Get the top 4 courses
top_4_dets = [item[1] for item in similarity_scores[:4]]
# Return the top 4 course names as a list
return [det['Course Name'] for det in top_4_dets]
# Create the Gradio interface
iface = gr.Interface(fn=search_courses,
inputs="text",
outputs="text",
title="Course Search with Sentence Transformers",
description="Enter a query to find the top 4 most similar courses.")
# Launch the Gradio app
iface.launch()