Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -228,8 +228,8 @@ arabic_letters_data = [
|
|
| 228 |
"image": os.path.join(IMAGE_DIR, "mim.jpg"),
|
| 229 |
"audio": os.path.join(AUDIO_DIR, "mim.wav"),
|
| 230 |
"word_example": "موزة (Mawzah - Banana)",
|
| 231 |
-
"word_image": os.path.join(IMAGE_DIR, "
|
| 232 |
-
"word_audio": os.path.join(AUDIO_DIR, "
|
| 233 |
},
|
| 234 |
{
|
| 235 |
"letter": "ن",
|
|
@@ -317,16 +317,15 @@ def get_current_letter_content():
|
|
| 317 |
"""
|
| 318 |
if not arabic_letters_data:
|
| 319 |
# Handle case where no letter data is available
|
| 320 |
-
return None, None,
|
| 321 |
|
| 322 |
data = arabic_letters_data[current_letter_idx]
|
| 323 |
|
| 324 |
-
# Return (letter_image, letter_audio,
|
| 325 |
-
#
|
| 326 |
return (
|
| 327 |
data["image"],
|
| 328 |
data["audio"], # This audio is for the letter sound itself
|
| 329 |
-
data["word_example"], # Keep this for `play_word_btn` to access the word's pronunciation
|
| 330 |
data["word_image"],
|
| 331 |
data["word_audio"] # This audio is for the word example sound
|
| 332 |
)
|
|
@@ -377,12 +376,16 @@ def generate_tts_audio(text_to_speak, filename="temp_feedback_audio.wav"):
|
|
| 377 |
|
| 378 |
try:
|
| 379 |
# Generate speech
|
| 380 |
-
|
| 381 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 382 |
# Save the audio to a temporary file
|
| 383 |
output_path = os.path.join(AUDIO_DIR, filename)
|
| 384 |
-
|
| 385 |
-
torchaudio.save(output_path, speech['audio'].unsqueeze(0), speech['sampling_rate'])
|
| 386 |
return output_path
|
| 387 |
except Exception as e:
|
| 388 |
print(f"Error generating TTS audio for '{text_to_speak}': {e}")
|
|
@@ -641,7 +644,6 @@ with gr.Blocks(
|
|
| 641 |
):
|
| 642 |
gr.Markdown("### <span style='color:#28a745;'>تعلم حرفاً جديداً كل يوم! (Learn a new letter every day!)</span>", rtl=True)
|
| 643 |
|
| 644 |
-
# Removed the letter_display gr.Markdown component.
|
| 645 |
# The letter image will now be the primary display for the letter.
|
| 646 |
letter_image = gr.Image(
|
| 647 |
label="صورة الحرف (Letter Image)",
|
|
@@ -667,7 +669,6 @@ with gr.Blocks(
|
|
| 667 |
|
| 668 |
# --- Word Example Section for the current letter ---
|
| 669 |
gr.Markdown("### <span style='color:#ffc107;'>كلمات تبدأ بهذا الحرف (Words starting with this letter)</span>", rtl=True)
|
| 670 |
-
# Removed word_example_display gr.Markdown
|
| 671 |
# Display image for the example word
|
| 672 |
word_image = gr.Image(
|
| 673 |
label="صورة الكلمة (Word Image)",
|
|
@@ -776,7 +777,6 @@ with gr.Blocks(
|
|
| 776 |
outputs=[
|
| 777 |
letter_image,
|
| 778 |
letter_audio_output,
|
| 779 |
-
word_example_display, # word_example_display is still in outputs for the function to return
|
| 780 |
word_image,
|
| 781 |
word_audio_output
|
| 782 |
],
|
|
@@ -790,7 +790,6 @@ with gr.Blocks(
|
|
| 790 |
outputs=[
|
| 791 |
letter_image,
|
| 792 |
letter_audio_output,
|
| 793 |
-
word_example_display, # Still in outputs
|
| 794 |
word_image,
|
| 795 |
word_audio_output
|
| 796 |
]
|
|
@@ -801,7 +800,6 @@ with gr.Blocks(
|
|
| 801 |
outputs=[
|
| 802 |
letter_image,
|
| 803 |
letter_audio_output,
|
| 804 |
-
word_example_display, # Still in outputs
|
| 805 |
word_image,
|
| 806 |
word_audio_output
|
| 807 |
]
|
|
|
|
| 228 |
"image": os.path.join(IMAGE_DIR, "mim.jpg"),
|
| 229 |
"audio": os.path.join(AUDIO_DIR, "mim.wav"),
|
| 230 |
"word_example": "موزة (Mawzah - Banana)",
|
| 231 |
+
"word_image": os.path.join(IMAGE_DIR, "mawzah.jpg"), # Corrected from 'mauzah.jpg'
|
| 232 |
+
"word_audio": os.path.join(AUDIO_DIR, "mawzah.wav"), # Corrected from 'mauzah.wav'
|
| 233 |
},
|
| 234 |
{
|
| 235 |
"letter": "ن",
|
|
|
|
| 317 |
"""
|
| 318 |
if not arabic_letters_data:
|
| 319 |
# Handle case where no letter data is available
|
| 320 |
+
return None, None, None, None # Adjusted return for consistency
|
| 321 |
|
| 322 |
data = arabic_letters_data[current_letter_idx]
|
| 323 |
|
| 324 |
+
# Return (letter_image, letter_audio, word_image, word_audio)
|
| 325 |
+
# word_example is no longer returned as an output to Gradio components
|
| 326 |
return (
|
| 327 |
data["image"],
|
| 328 |
data["audio"], # This audio is for the letter sound itself
|
|
|
|
| 329 |
data["word_image"],
|
| 330 |
data["word_audio"] # This audio is for the word example sound
|
| 331 |
)
|
|
|
|
| 376 |
|
| 377 |
try:
|
| 378 |
# Generate speech
|
| 379 |
+
# For 'facebook/mms-tts-ara', the audio output might need specific handling.
|
| 380 |
+
# This is a common pattern for Hugging Face TTS pipelines.
|
| 381 |
+
output = tts_pipeline(text_to_speak)
|
| 382 |
+
# Assuming output is a dictionary like {'audio': numpy_array, 'sampling_rate': int}
|
| 383 |
+
audio_array = output['audio']
|
| 384 |
+
sampling_rate = output['sampling_rate']
|
| 385 |
+
|
| 386 |
# Save the audio to a temporary file
|
| 387 |
output_path = os.path.join(AUDIO_DIR, filename)
|
| 388 |
+
torchaudio.save(output_path, audio_array.unsqueeze(0), sampling_rate)
|
|
|
|
| 389 |
return output_path
|
| 390 |
except Exception as e:
|
| 391 |
print(f"Error generating TTS audio for '{text_to_speak}': {e}")
|
|
|
|
| 644 |
):
|
| 645 |
gr.Markdown("### <span style='color:#28a745;'>تعلم حرفاً جديداً كل يوم! (Learn a new letter every day!)</span>", rtl=True)
|
| 646 |
|
|
|
|
| 647 |
# The letter image will now be the primary display for the letter.
|
| 648 |
letter_image = gr.Image(
|
| 649 |
label="صورة الحرف (Letter Image)",
|
|
|
|
| 669 |
|
| 670 |
# --- Word Example Section for the current letter ---
|
| 671 |
gr.Markdown("### <span style='color:#ffc107;'>كلمات تبدأ بهذا الحرف (Words starting with this letter)</span>", rtl=True)
|
|
|
|
| 672 |
# Display image for the example word
|
| 673 |
word_image = gr.Image(
|
| 674 |
label="صورة الكلمة (Word Image)",
|
|
|
|
| 777 |
outputs=[
|
| 778 |
letter_image,
|
| 779 |
letter_audio_output,
|
|
|
|
| 780 |
word_image,
|
| 781 |
word_audio_output
|
| 782 |
],
|
|
|
|
| 790 |
outputs=[
|
| 791 |
letter_image,
|
| 792 |
letter_audio_output,
|
|
|
|
| 793 |
word_image,
|
| 794 |
word_audio_output
|
| 795 |
]
|
|
|
|
| 800 |
outputs=[
|
| 801 |
letter_image,
|
| 802 |
letter_audio_output,
|
|
|
|
| 803 |
word_image,
|
| 804 |
word_audio_output
|
| 805 |
]
|