import os import streamlit as st from groq import Groq from transformers import pipeline import soundfile as sf import numpy as np # Load Groq API key from environment variables GROQ_API_KEY = os.getenv("GROQ_API_KEY") if not GROQ_API_KEY: st.error("Groq API key not found. Please add it to the Hugging Face Space Secrets.") st.stop() # Initialize Groq client groq_client = Groq(api_key=GROQ_API_KEY) # Load models (Smaller Whisper model) @st.cache_resource def load_models(): stt_pipe = pipeline("automatic-speech-recognition", model="distil-whisper/distil-small.en") return stt_pipe stt_pipe = load_models() # Streamlit app st.title("Voice-Enabled Chatbot (CPU Optimized)") # Audio upload uploaded_file = st.file_uploader("Upload a WAV file", type=["wav"]) if uploaded_file is not None: # Save uploaded file with open("uploaded_audio.wav", "wb") as f: f.write(uploaded_file.getbuffer()) st.success("File uploaded successfully!") # Process the uploaded audio speech, _ = sf.read("uploaded_audio.wav") output = stt_pipe(speech) st.write("Transcribed Text:", output['text']) try: st.write("Input Text:", output['text']) chat_completion = groq_client.chat.completions.create( messages=[{"role": "user", "content": output['text']}], model="mixtral-8x7b-32768", temperature=0.5, max_tokens=1024, ) st.write("API Response:", chat_completion) response = chat_completion.choices[0].message.content st.write("Generated Response:", response) st.write("Response played via browser audio:") st.write(response) except Exception as e: st.error(f"Error generating response: {e}") else: st.write("This application currently only supports file uploads.")