Spaces:

NazishHasan
/

Alifbaataa

Sleeping

App Files Files Community

NazishHasan commited on Jun 20

Commit

483a7e2

verified ·

1 Parent(s): 5d04209

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -15

app.py CHANGED Viewed

@@ -228,8 +228,8 @@ arabic_letters_data = [
         "image": os.path.join(IMAGE_DIR, "mim.jpg"),
         "audio": os.path.join(AUDIO_DIR, "mim.wav"),
         "word_example": "موزة (Mawzah - Banana)",
-        "word_image": os.path.join(IMAGE_DIR, "mauzah.jpg"), # Corrected
-        "word_audio": os.path.join(AUDIO_DIR, "mauzah.wav"), # Corrected
     },
     {
         "letter": "ن",
@@ -317,16 +317,15 @@ def get_current_letter_content():
     """
     if not arabic_letters_data:
         # Handle case where no letter data is available
-        return None, None, "لا توجد كلمات متاحة", None, None
     data = arabic_letters_data[current_letter_idx]
-    # Return (letter_image, letter_audio, word_example_display, word_image, word_audio)
-    # Note: word_example is still returned for internal use if needed, but not displayed directly
     return (
         data["image"],
         data["audio"], # This audio is for the letter sound itself
-        data["word_example"], # Keep this for `play_word_btn` to access the word's pronunciation
         data["word_image"],
         data["word_audio"] # This audio is for the word example sound
     )
@@ -377,12 +376,16 @@ def generate_tts_audio(text_to_speak, filename="temp_feedback_audio.wav"):
     try:
         # Generate speech
-        speech = tts_pipeline(text_to_speak, return_timestamps="word")
         # Save the audio to a temporary file
         output_path = os.path.join(AUDIO_DIR, filename)
-        # Assuming the pipeline returns a dictionary with 'audio' (numpy array) and 'sampling_rate'
-        torchaudio.save(output_path, speech['audio'].unsqueeze(0), speech['sampling_rate'])
         return output_path
     except Exception as e:
         print(f"Error generating TTS audio for '{text_to_speak}': {e}")
@@ -641,7 +644,6 @@ with gr.Blocks(
             ):
                 gr.Markdown("### <span style='color:#28a745;'>تعلم حرفاً جديداً كل يوم! (Learn a new letter every day!)</span>", rtl=True)
-                # Removed the letter_display gr.Markdown component.
                 # The letter image will now be the primary display for the letter.
                 letter_image = gr.Image(
                     label="صورة الحرف (Letter Image)",
@@ -667,7 +669,6 @@ with gr.Blocks(
                 # --- Word Example Section for the current letter ---
                 gr.Markdown("### <span style='color:#ffc107;'>كلمات تبدأ بهذا الحرف (Words starting with this letter)</span>", rtl=True)
-                # Removed word_example_display gr.Markdown
                 # Display image for the example word
                 word_image = gr.Image(
                     label="صورة الكلمة (Word Image)",
@@ -776,7 +777,6 @@ with gr.Blocks(
         outputs=[
             letter_image,
             letter_audio_output,
-            word_example_display, # word_example_display is still in outputs for the function to return
             word_image,
             word_audio_output
         ],
@@ -790,7 +790,6 @@ with gr.Blocks(
         outputs=[
             letter_image,
             letter_audio_output,
-            word_example_display, # Still in outputs
             word_image,
             word_audio_output
         ]
@@ -801,7 +800,6 @@ with gr.Blocks(
         outputs=[
             letter_image,
             letter_audio_output,
-            word_example_display, # Still in outputs
             word_image,
             word_audio_output
         ]

         "image": os.path.join(IMAGE_DIR, "mim.jpg"),
         "audio": os.path.join(AUDIO_DIR, "mim.wav"),
         "word_example": "موزة (Mawzah - Banana)",
+        "word_image": os.path.join(IMAGE_DIR, "mawzah.jpg"), # Corrected from 'mauzah.jpg'
+        "word_audio": os.path.join(AUDIO_DIR, "mawzah.wav"), # Corrected from 'mauzah.wav'
     },
     {
         "letter": "ن",
     """
     if not arabic_letters_data:
         # Handle case where no letter data is available
+        return None, None, None, None # Adjusted return for consistency
     data = arabic_letters_data[current_letter_idx]
+    # Return (letter_image, letter_audio, word_image, word_audio)
+    # word_example is no longer returned as an output to Gradio components
     return (
         data["image"],
         data["audio"], # This audio is for the letter sound itself
         data["word_image"],
         data["word_audio"] # This audio is for the word example sound
     )
     try:
         # Generate speech
+        # For 'facebook/mms-tts-ara', the audio output might need specific handling.
+        # This is a common pattern for Hugging Face TTS pipelines.
+        output = tts_pipeline(text_to_speak)
+        # Assuming output is a dictionary like {'audio': numpy_array, 'sampling_rate': int}
+        audio_array = output['audio']
+        sampling_rate = output['sampling_rate']
         # Save the audio to a temporary file
         output_path = os.path.join(AUDIO_DIR, filename)
+        torchaudio.save(output_path, audio_array.unsqueeze(0), sampling_rate)
         return output_path
     except Exception as e:
         print(f"Error generating TTS audio for '{text_to_speak}': {e}")
             ):
                 gr.Markdown("### <span style='color:#28a745;'>تعلم حرفاً جديداً كل يوم! (Learn a new letter every day!)</span>", rtl=True)
                 # The letter image will now be the primary display for the letter.
                 letter_image = gr.Image(
                     label="صورة الحرف (Letter Image)",
                 # --- Word Example Section for the current letter ---
                 gr.Markdown("### <span style='color:#ffc107;'>كلمات تبدأ بهذا الحرف (Words starting with this letter)</span>", rtl=True)
                 # Display image for the example word
                 word_image = gr.Image(
                     label="صورة الكلمة (Word Image)",
         outputs=[
             letter_image,
             letter_audio_output,
             word_image,
             word_audio_output
         ],
         outputs=[
             letter_image,
             letter_audio_output,
             word_image,
             word_audio_output
         ]
         outputs=[
             letter_image,
             letter_audio_output,
             word_image,
             word_audio_output
         ]