qna-bot / app.py
jefalod's picture
Update app.py
eeab6ba verified
import os
import pandas as pd
import numpy as np
import gradio as gr
import faiss
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
from gtts import gTTS # Google Text-to-Speech
import tempfile
# Optional: Use token for private datasets
auth_token = os.environ.get("HF_TOKEN")
# Load dataset
ds = load_dataset("RomainPct/steve-jobs-question-and-answers", split="train", use_auth_token=auth_token)
# Convert to DataFrame
data = pd.DataFrame({
'question': ds['instruction'],
'answer': ds['output']
})
# Load embedding model
model = SentenceTransformer("all-MiniLM-L6-v2")
# Encode all questions
faq_embeddings = model.encode(data['question'].tolist())
faq_index = faiss.IndexFlatL2(faq_embeddings.shape[1])
faq_index.add(faq_embeddings)
# Answer + Audio generation
def get_answer_with_audio(query):
query_embedding = model.encode([query])
D, I = faq_index.search(np.array(query_embedding), k=1)
answer = data.iloc[I[0][0]]['answer']
# Generate speech with gTTS
tts = gTTS(text=answer)
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
tts.save(temp_file.name)
return answer, temp_file.name
# Gradio interface
iface = gr.Interface(
fn=get_answer_with_audio,
inputs=gr.Textbox(placeholder="Ask a question about Steve Jobs...", label="Your Question"),
outputs=[
gr.Text(label="Answer"),
gr.Audio(label="Spoken Answer", type="filepath")
],
title="🧠 Steve Jobs FAQ Chatbot",
description="Ask anything about Steve Jobs. This chatbot answers in text and speech."
)
iface.launch()