File size: 1,622 Bytes
9468930 24927f2 996afe0 9468930 eeab6ba 996afe0 eeab6ba ea07432 24927f2 a0121b4 9468930 fe245a1 eeab6ba ea07432 bb47df5 ea07432 eeab6ba 24927f2 a0121b4 24927f2 eeab6ba a0121b4 24927f2 a0121b4 24927f2 eeab6ba 24927f2 a0121b4 9468930 a0121b4 eeab6ba a0121b4 eeab6ba 24927f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import os
import pandas as pd
import numpy as np
import gradio as gr
import faiss
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
from gtts import gTTS # Google Text-to-Speech
import tempfile
# Optional: Use token for private datasets
auth_token = os.environ.get("HF_TOKEN")
# Load dataset
ds = load_dataset("RomainPct/steve-jobs-question-and-answers", split="train", use_auth_token=auth_token)
# Convert to DataFrame
data = pd.DataFrame({
'question': ds['instruction'],
'answer': ds['output']
})
# Load embedding model
model = SentenceTransformer("all-MiniLM-L6-v2")
# Encode all questions
faq_embeddings = model.encode(data['question'].tolist())
faq_index = faiss.IndexFlatL2(faq_embeddings.shape[1])
faq_index.add(faq_embeddings)
# Answer + Audio generation
def get_answer_with_audio(query):
query_embedding = model.encode([query])
D, I = faq_index.search(np.array(query_embedding), k=1)
answer = data.iloc[I[0][0]]['answer']
# Generate speech with gTTS
tts = gTTS(text=answer)
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
tts.save(temp_file.name)
return answer, temp_file.name
# Gradio interface
iface = gr.Interface(
fn=get_answer_with_audio,
inputs=gr.Textbox(placeholder="Ask a question about Steve Jobs...", label="Your Question"),
outputs=[
gr.Text(label="Answer"),
gr.Audio(label="Spoken Answer", type="filepath")
],
title="🧠 Steve Jobs FAQ Chatbot",
description="Ask anything about Steve Jobs. This chatbot answers in text and speech."
)
iface.launch()
|