import json from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity import numpy as np import gradio as gr # Load the SentenceTransformer model model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2') # Load the embeddings from the JSON file with open('data_with_embeddings.json', 'r') as f: data = json.load(f) # Function to perform the search def search_courses(user_query): query_embedding = model.encode(user_query) # Get the embedding for user query similarity_scores = [] # Array to store similarity scores # Compare the user query embedding with each stored embedding for dets in data: embed = np.array(dets['embedding']) similarity = cosine_similarity([query_embedding], [embed]) similarity_scores.append((similarity[0][0], dets)) # Sort the similarity scores in descending order similarity_scores.sort(key=lambda x: x[0], reverse=True) # Get the top 4 courses top_4_dets = [item[1] for item in similarity_scores[:4]] # Return the top 4 course names as a list return [det['Course Name'] for det in top_4_dets] # Create the Gradio interface iface = gr.Interface(fn=search_courses, inputs="text", outputs="text", title="Course Search with Sentence Transformers", description="Enter a query to find the top 4 most similar courses.") # Launch the Gradio app iface.launch()