NazishHasan commited on
Commit
483a7e2
·
verified ·
1 Parent(s): 5d04209

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -15
app.py CHANGED
@@ -228,8 +228,8 @@ arabic_letters_data = [
228
  "image": os.path.join(IMAGE_DIR, "mim.jpg"),
229
  "audio": os.path.join(AUDIO_DIR, "mim.wav"),
230
  "word_example": "موزة (Mawzah - Banana)",
231
- "word_image": os.path.join(IMAGE_DIR, "mauzah.jpg"), # Corrected
232
- "word_audio": os.path.join(AUDIO_DIR, "mauzah.wav"), # Corrected
233
  },
234
  {
235
  "letter": "ن",
@@ -317,16 +317,15 @@ def get_current_letter_content():
317
  """
318
  if not arabic_letters_data:
319
  # Handle case where no letter data is available
320
- return None, None, "لا توجد كلمات متاحة", None, None
321
 
322
  data = arabic_letters_data[current_letter_idx]
323
 
324
- # Return (letter_image, letter_audio, word_example_display, word_image, word_audio)
325
- # Note: word_example is still returned for internal use if needed, but not displayed directly
326
  return (
327
  data["image"],
328
  data["audio"], # This audio is for the letter sound itself
329
- data["word_example"], # Keep this for `play_word_btn` to access the word's pronunciation
330
  data["word_image"],
331
  data["word_audio"] # This audio is for the word example sound
332
  )
@@ -377,12 +376,16 @@ def generate_tts_audio(text_to_speak, filename="temp_feedback_audio.wav"):
377
 
378
  try:
379
  # Generate speech
380
- speech = tts_pipeline(text_to_speak, return_timestamps="word")
381
-
 
 
 
 
 
382
  # Save the audio to a temporary file
383
  output_path = os.path.join(AUDIO_DIR, filename)
384
- # Assuming the pipeline returns a dictionary with 'audio' (numpy array) and 'sampling_rate'
385
- torchaudio.save(output_path, speech['audio'].unsqueeze(0), speech['sampling_rate'])
386
  return output_path
387
  except Exception as e:
388
  print(f"Error generating TTS audio for '{text_to_speak}': {e}")
@@ -641,7 +644,6 @@ with gr.Blocks(
641
  ):
642
  gr.Markdown("### <span style='color:#28a745;'>تعلم حرفاً جديداً كل يوم! (Learn a new letter every day!)</span>", rtl=True)
643
 
644
- # Removed the letter_display gr.Markdown component.
645
  # The letter image will now be the primary display for the letter.
646
  letter_image = gr.Image(
647
  label="صورة الحرف (Letter Image)",
@@ -667,7 +669,6 @@ with gr.Blocks(
667
 
668
  # --- Word Example Section for the current letter ---
669
  gr.Markdown("### <span style='color:#ffc107;'>كلمات تبدأ بهذا الحرف (Words starting with this letter)</span>", rtl=True)
670
- # Removed word_example_display gr.Markdown
671
  # Display image for the example word
672
  word_image = gr.Image(
673
  label="صورة الكلمة (Word Image)",
@@ -776,7 +777,6 @@ with gr.Blocks(
776
  outputs=[
777
  letter_image,
778
  letter_audio_output,
779
- word_example_display, # word_example_display is still in outputs for the function to return
780
  word_image,
781
  word_audio_output
782
  ],
@@ -790,7 +790,6 @@ with gr.Blocks(
790
  outputs=[
791
  letter_image,
792
  letter_audio_output,
793
- word_example_display, # Still in outputs
794
  word_image,
795
  word_audio_output
796
  ]
@@ -801,7 +800,6 @@ with gr.Blocks(
801
  outputs=[
802
  letter_image,
803
  letter_audio_output,
804
- word_example_display, # Still in outputs
805
  word_image,
806
  word_audio_output
807
  ]
 
228
  "image": os.path.join(IMAGE_DIR, "mim.jpg"),
229
  "audio": os.path.join(AUDIO_DIR, "mim.wav"),
230
  "word_example": "موزة (Mawzah - Banana)",
231
+ "word_image": os.path.join(IMAGE_DIR, "mawzah.jpg"), # Corrected from 'mauzah.jpg'
232
+ "word_audio": os.path.join(AUDIO_DIR, "mawzah.wav"), # Corrected from 'mauzah.wav'
233
  },
234
  {
235
  "letter": "ن",
 
317
  """
318
  if not arabic_letters_data:
319
  # Handle case where no letter data is available
320
+ return None, None, None, None # Adjusted return for consistency
321
 
322
  data = arabic_letters_data[current_letter_idx]
323
 
324
+ # Return (letter_image, letter_audio, word_image, word_audio)
325
+ # word_example is no longer returned as an output to Gradio components
326
  return (
327
  data["image"],
328
  data["audio"], # This audio is for the letter sound itself
 
329
  data["word_image"],
330
  data["word_audio"] # This audio is for the word example sound
331
  )
 
376
 
377
  try:
378
  # Generate speech
379
+ # For 'facebook/mms-tts-ara', the audio output might need specific handling.
380
+ # This is a common pattern for Hugging Face TTS pipelines.
381
+ output = tts_pipeline(text_to_speak)
382
+ # Assuming output is a dictionary like {'audio': numpy_array, 'sampling_rate': int}
383
+ audio_array = output['audio']
384
+ sampling_rate = output['sampling_rate']
385
+
386
  # Save the audio to a temporary file
387
  output_path = os.path.join(AUDIO_DIR, filename)
388
+ torchaudio.save(output_path, audio_array.unsqueeze(0), sampling_rate)
 
389
  return output_path
390
  except Exception as e:
391
  print(f"Error generating TTS audio for '{text_to_speak}': {e}")
 
644
  ):
645
  gr.Markdown("### <span style='color:#28a745;'>تعلم حرفاً جديداً كل يوم! (Learn a new letter every day!)</span>", rtl=True)
646
 
 
647
  # The letter image will now be the primary display for the letter.
648
  letter_image = gr.Image(
649
  label="صورة الحرف (Letter Image)",
 
669
 
670
  # --- Word Example Section for the current letter ---
671
  gr.Markdown("### <span style='color:#ffc107;'>كلمات تبدأ بهذا الحرف (Words starting with this letter)</span>", rtl=True)
 
672
  # Display image for the example word
673
  word_image = gr.Image(
674
  label="صورة الكلمة (Word Image)",
 
777
  outputs=[
778
  letter_image,
779
  letter_audio_output,
 
780
  word_image,
781
  word_audio_output
782
  ],
 
790
  outputs=[
791
  letter_image,
792
  letter_audio_output,
 
793
  word_image,
794
  word_audio_output
795
  ]
 
800
  outputs=[
801
  letter_image,
802
  letter_audio_output,
 
803
  word_image,
804
  word_audio_output
805
  ]