|
|
import json |
|
|
from sentence_transformers import SentenceTransformer |
|
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
import numpy as np |
|
|
import gradio as gr |
|
|
|
|
|
|
|
|
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2') |
|
|
|
|
|
|
|
|
with open('data_with_embeddings.json', 'r') as f: |
|
|
data = json.load(f) |
|
|
|
|
|
|
|
|
def search_courses(user_query): |
|
|
query_embedding = model.encode(user_query) |
|
|
similarity_scores = [] |
|
|
|
|
|
|
|
|
for dets in data: |
|
|
embed = np.array(dets['embedding']) |
|
|
similarity = cosine_similarity([query_embedding], [embed]) |
|
|
similarity_scores.append((similarity[0][0], dets)) |
|
|
|
|
|
|
|
|
similarity_scores.sort(key=lambda x: x[0], reverse=True) |
|
|
|
|
|
|
|
|
top_4_dets = [item[1] for item in similarity_scores[:4]] |
|
|
|
|
|
|
|
|
return [det['Course Name'] for det in top_4_dets] |
|
|
|
|
|
|
|
|
iface = gr.Interface(fn=search_courses, |
|
|
inputs="text", |
|
|
outputs="text", |
|
|
title="Course Search with Sentence Transformers", |
|
|
description="Enter a query to find the top 4 most similar courses.") |
|
|
|
|
|
|
|
|
iface.launch() |
|
|
|