File size: 14,442 Bytes
c7077c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
# voice.py (Updated with Azure OpenAI integration)
import os
import speech_recognition as sr
import google.generativeai as genai
import tempfile
import logging
from io import BytesIO
import re
import pygame
from translate import Translator
import base64
import streamlit as st
from google.cloud import texttospeech
import json
from openai import AzureOpenAI

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Get API keys from environment variables
gemini_api_key = os.getenv('GEMINI_API_KEY', "AIzaSyCZL29aqWTmP_NTzkGILK4Kujx_MuyRAs4")
google_tts_credentials = os.getenv('GOOGLE_TTS_CREDENTIALS', "D:/AI and Data Science/Projects/AI DoctorV2/tamiltextspeech-458116-147b3efcaf84.json")

# Azure OpenAI configuration
AZURE_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
MODEL_NAME = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME")
API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION", "2024-02-15-preview")

# Initialize Azure OpenAI client
try:
    azure_client = AzureOpenAI(
        api_key=AZURE_API_KEY,
        azure_endpoint=AZURE_ENDPOINT,
        api_version=API_VERSION
    )
    logger.info("Azure OpenAI client initialized successfully")
except Exception as e:
    logger.error(f"Failed to initialize Azure OpenAI client: {str(e)}")
    azure_client = None

# Initialize Google TTS client
try:
    # Set credentials from JSON file
    if os.path.exists(google_tts_credentials):
        os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = google_tts_credentials
        tts_client = texttospeech.TextToSpeechClient()
        logger.info("Google Text-to-Speech client initialized successfully")
    else:
        logger.warning(f"Google TTS credentials file not found: {google_tts_credentials}")
        tts_client = None
except Exception as e:
    logger.error(f"Failed to initialize Google TTS: {str(e)}")
    tts_client = None

# Configure Gemini for translations only
genai.configure(api_key=gemini_api_key)
model = genai.GenerativeModel('gemini-1.5-pro')

def listen_tamil():
    """Listen to Tamil speech with improved end detection and error handling"""
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        logger.info("Listening for Tamil speech...")
        # Adjust for ambient noise
        recognizer.adjust_for_ambient_noise(source, duration=1.5)  # Increased duration
        
        # Improve speech detection with better pause threshold
        recognizer.pause_threshold = 1.0  # Increased pause threshold for better recognition
        recognizer.energy_threshold = 300  # Adjust sensitivity
        
        try:
            st.info("🎤 Listening... Please speak in Tamil")
            audio = recognizer.listen(source, timeout=15, phrase_time_limit=30)  # Extended timeout
            logger.info("Speech detected, processing...")
            st.success("✅ Speech recorded! Processing...")
        except sr.WaitTimeoutError:
            logger.error("No speech detected")
            st.error("❌ No speech detected. Please try again.")
            return None
    
    try:
        # Using Google's speech recognition with Tamil language
        tamil_text = recognizer.recognize_google(audio, language='ta-IN')
        logger.info(f"Recognized Tamil text: {tamil_text}")
        return tamil_text
    except sr.UnknownValueError:
        logger.error("Could not understand audio")
        st.error("❌ Could not understand the speech. Please try again more clearly.")
        return None
    except sr.RequestError as e:
        logger.error(f"Speech recognition service error: {e}")
        st.error("❌ Speech recognition service error. Please try again later.")
        return None

def translate_tamil_to_english(tamil_text):
    """Translate Tamil text to English while preserving numbers"""
    if not tamil_text:
        return ""
        
    # Extract numbers from the text
    numbers = re.findall(r'\d+\.?\d*', tamil_text)
    
    # Replace numbers with placeholders
    for i, num in enumerate(numbers):
        tamil_text = tamil_text.replace(num, f'NUM{i}PLACEHOLDER')
    
    try:
        # Use Gemini for more accurate translation
        prompt = f"""Translate this Tamil text to English accurately, preserving the exact meaning:
        
        {tamil_text}
        
        Return only the translation, nothing else."""
        
        response = model.generate_content(prompt)
        translation = response.text
        
        # Fallback to basic translator if Gemini fails
        if not translation or len(translation) < 5:
            translator = Translator(to_lang="en", from_lang="ta")
            translation = translator.translate(tamil_text)
    
        # Restore numbers
        for i, num in enumerate(numbers):
            translation = translation.replace(f'NUM{i}PLACEHOLDER', num)
            
        # Clean up any artifacts
        translation = re.sub(r'\s+', ' ', translation).strip()
        logger.info(f"Translation result: {translation}")
        
        return translation
        
    except Exception as e:
        logger.error(f"Translation error: {e}")
        # Try fallback translator
        try:
            translator = Translator(to_lang="en", from_lang="ta")
            return translator.translate(tamil_text)
        except:
            return tamil_text  # Return original if translation fails

def translate_english_to_tamil(english_text):
    """Translate English text to Tamil while preserving numbers"""
    if not english_text:
        return ""
        
    # Extract numbers from the text
    numbers = re.findall(r'\d+\.?\d*', english_text)
    
    # Replace numbers with placeholders
    for i, num in enumerate(numbers):
        english_text = english_text.replace(num, f'NUM{i}PLACEHOLDER')
    
    try:
        # Use Gemini for more accurate translation
        prompt = f"""Translate this English text to Tamil accurately, preserving the exact meaning:
        
        {english_text}
        
        Return only the translation, nothing else."""
        
        response = model.generate_content(prompt)
        translation = response.text
        
        # Fallback to basic translator if Gemini fails
        if not translation or len(translation) < 5:
            translator = Translator(to_lang="ta", from_lang="en")
            translation = translator.translate(english_text)
    
        # Restore numbers
        for i, num in enumerate(numbers):
            translation = translation.replace(f'NUM{i}PLACEHOLDER', num)
            
        # Clean up any artifacts
        translation = re.sub(r'\s+', ' ', translation).strip()
        logger.info(f"Translation to Tamil: {translation}")
        
        return translation
        
    except Exception as e:
        logger.error(f"Translation error: {e}")
        # Try fallback translator
        try:
            translator = Translator(to_lang="ta", from_lang="en")
            return translator.translate(english_text)
        except:
            return english_text  # Return original if translation fails

def process_with_azure_openai(english_text, medical_summary):
    """Process medical report with Azure OpenAI using empathetic approach"""
    if not english_text or not medical_summary:
        return "No data available to process."
    
    if not azure_client:
        logger.error("Azure OpenAI client not initialized")
        return "Sorry, the AI service is currently unavailable."
    
    try:
        prompt = f"""You are a compassionate medical assistant. Analyze the medical report and respond to the user's question.

        User's question: {english_text}
        
        Requirements:
        1. Respond only if the question relates to the medical report
        2. Keep the response under 100 words
        3. Use simple, non-medical language when possible
        4. Focus on answering the specific question
        5. Be empathetic and reassuring (avoid causing panic)
        6. Include positive, actionable health improvement suggestions
        7. Use phrases like "Don't worry", "You can improve this by", "This is manageable"
        
        Medical Report:
        {medical_summary}
        """
        
        response = azure_client.chat.completions.create(
            model=MODEL_NAME,
            messages=[{"role": "user", "content": prompt}],
            temperature=0.3,
            max_tokens=400
        )
        
        processed_text = response.choices[0].message.content
        logger.info("Successfully processed query with Azure OpenAI")
        return processed_text
        
    except Exception as e:
        logger.error(f"Error processing with Azure OpenAI: {str(e)}")
        return "I apologize, but I couldn't process your question about the medical report."

def text_to_speech(text, output_file="output.mp3"):
    """Convert text to speech using Google TTS"""
    if not text:
        logger.warning("No text provided for speech synthesis")
        return None
        
    try:
        if tts_client:
            # Configure the synthesis input
            synthesis_input = texttospeech.SynthesisInput(text=text)
            
            # Build the voice request, selecting Tamil language and female voice
            voice = texttospeech.VoiceSelectionParams(
                language_code="ta-IN",
                ssml_gender=texttospeech.SsmlVoiceGender.FEMALE
            )
            
            # Select the audio file type with improved settings
            audio_config = texttospeech.AudioConfig(
                audio_encoding=texttospeech.AudioEncoding.MP3,
                speaking_rate=0.9,  # Slightly slower for better comprehension
                pitch=0.0,  # Normal pitch
                volume_gain_db=1.0  # Slightly louder
            )
            
            # Perform the text-to-speech request
            response = tts_client.synthesize_speech(
                input=synthesis_input,
                voice=voice,
                audio_config=audio_config
            )
            
            # Save the response to a file
            with open(output_file, "wb") as out:
                out.write(response.audio_content)
                logger.info(f"Audio content written to file {output_file}")
            
            # Return audio bytes for streaming
            audio_bytes = BytesIO(response.audio_content)
            return audio_bytes
        else:
            logger.warning("Google TTS client not available")
            return None
            
    except Exception as e:
        logger.error(f"Error in text-to-speech: {e}")
        return None

def play_audio(audio_file):
    """Play audio file using pygame"""
    try:
        pygame.mixer.init()
        pygame.mixer.music.load(audio_file)
        pygame.mixer.music.play()
        while pygame.mixer.music.get_busy():
            pygame.time.Clock().tick(10)
    except Exception as e:
        logger.error(f"Error playing audio: {e}")

def get_base64_audio(audio_file):
    """Convert audio file to base64 for embedding"""
    with open(audio_file, "rb") as f:
        data = f.read()
    return base64.b64encode(data).decode()

def play_audio_response(audio_file):
    """Play audio file automatically in browser"""
    if audio_file and os.path.exists(audio_file):
        try:
            # Create HTML with autoplay audio element
            audio_html = f"""
            <audio id="response_audio" autoplay="true">
                <source src="data:audio/mp3;base64,{get_base64_audio(audio_file)}" type="audio/mp3">
            </audio>
            <script>
                // Ensure audio plays automatically
                var audio = document.getElementById("response_audio");
                audio.play().catch(function(error) {{
                    console.error("Audio playback failed:", error);
                }});
            </script>
            """
            st.components.v1.html(audio_html, height=0)
            logger.info("Audio playback triggered")
        except Exception as e:
            logger.error(f"Error in auto-play: {e}")

def get_medical_report_answer(medical_summary, tamil_text=None):
    """Process a voice query about the medical report"""
    # If tamil_text is not provided, listen for it
    if not tamil_text:
        tamil_text = listen_tamil()
        
    if not tamil_text:
        return {
            "original_query": None,
            "translated_query": None,
            "english_response": "No speech detected. Please try again.",
            "tamil_response": "பேச்சு இல்லை. மீண்டும் முயற்சிக்கவும்.",
            "audio_file": None
        }
    
    # Step 2: Translate Tamil to English
    english_query = translate_tamil_to_english(tamil_text)
    
    # Step 3: Process with Azure OpenAI instead of Gemini
    english_response = process_with_azure_openai(english_query, medical_summary)
    
    # Step 4: Translate response back to Tamil
    tamil_response = translate_english_to_tamil(english_response)
    
    # Add empathetic phrases in Tamil if they're not already present
    empathetic_phrases = [
        "கவலைப்பட வேண்டாம்",  # Don't worry
        "இது கையாளக்கூடியது",   # This is manageable
        "இதை மேம்படுத்த முடியும்"  # You can improve this
    ]
    
    # Check if at least one empathetic phrase is present
    has_empathetic_phrase = any(phrase in tamil_response for phrase in empathetic_phrases)
    
    # Add an empathetic phrase at the beginning if none found
    if not has_empathetic_phrase:
        tamil_response = f"{empathetic_phrases[0]}. {tamil_response}"
    
    # Step 5: Convert to speech
    audio_file = "response_audio.mp3"
    audio_data = text_to_speech(tamil_response, audio_file)
    
    # Log success or failure of audio generation
    if audio_data:
        logger.info("Audio response generated successfully")
    else:
        logger.warning("Failed to generate audio response")
        
    return {
        "original_query": tamil_text,
        "translated_query": english_query,
        "english_response": english_response,
        "tamil_response": tamil_response,
        "audio_file": audio_file if audio_data else None
    }