import json
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import gradio as gr

# Load the SentenceTransformer model
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')

# Load the embeddings from the JSON file
with open('data_with_embeddings.json', 'r') as f:
    data = json.load(f)

# Function to perform the search
def search_courses(user_query):
    query_embedding = model.encode(user_query)  # Get the embedding for user query
    similarity_scores = []  # Array to store similarity scores

    # Compare the user query embedding with each stored embedding
    for dets in data:
        embed = np.array(dets['embedding'])
        similarity = cosine_similarity([query_embedding], [embed])
        similarity_scores.append((similarity[0][0], dets))

    # Sort the similarity scores in descending order
    similarity_scores.sort(key=lambda x: x[0], reverse=True)

    # Get the top 4 courses
    top_4_dets = [item[1] for item in similarity_scores[:4]]

    # Return the top 4 course names as a list
    return [det['Course Name'] for det in top_4_dets]

# Create the Gradio interface
iface = gr.Interface(fn=search_courses,
                     inputs="text",
                     outputs="text",
                     title="Course Search with Sentence Transformers",
                     description="Enter a query to find the top 4 most similar courses.")

# Launch the Gradio app
iface.launch()