|
import os |
|
import pandas as pd |
|
import numpy as np |
|
import gradio as gr |
|
import faiss |
|
from datasets import load_dataset |
|
from sentence_transformers import SentenceTransformer |
|
from gtts import gTTS |
|
import tempfile |
|
|
|
|
|
auth_token = os.environ.get("HF_TOKEN") |
|
|
|
|
|
ds = load_dataset("RomainPct/steve-jobs-question-and-answers", split="train", use_auth_token=auth_token) |
|
|
|
|
|
data = pd.DataFrame({ |
|
'question': ds['instruction'], |
|
'answer': ds['output'] |
|
}) |
|
|
|
|
|
model = SentenceTransformer("all-MiniLM-L6-v2") |
|
|
|
|
|
faq_embeddings = model.encode(data['question'].tolist()) |
|
faq_index = faiss.IndexFlatL2(faq_embeddings.shape[1]) |
|
faq_index.add(faq_embeddings) |
|
|
|
|
|
def get_answer_with_audio(query): |
|
query_embedding = model.encode([query]) |
|
D, I = faq_index.search(np.array(query_embedding), k=1) |
|
answer = data.iloc[I[0][0]]['answer'] |
|
|
|
|
|
tts = gTTS(text=answer) |
|
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") |
|
tts.save(temp_file.name) |
|
|
|
return answer, temp_file.name |
|
|
|
|
|
iface = gr.Interface( |
|
fn=get_answer_with_audio, |
|
inputs=gr.Textbox(placeholder="Ask a question about Steve Jobs...", label="Your Question"), |
|
outputs=[ |
|
gr.Text(label="Answer"), |
|
gr.Audio(label="Spoken Answer", type="filepath") |
|
], |
|
title="🧠 Steve Jobs FAQ Chatbot", |
|
description="Ask anything about Steve Jobs. This chatbot answers in text and speech." |
|
) |
|
|
|
iface.launch() |
|
|