ggoknar
commited on
Commit
·
3f2e1a8
1
Parent(s):
bd470e7
limit speech to 250 characters for now
Browse files
app.py
CHANGED
|
@@ -399,7 +399,13 @@ def generate_speech(history):
|
|
| 399 |
for sentence, history in get_sentence(history):
|
| 400 |
print(sentence)
|
| 401 |
# Sometimes prompt </s> coming on output remove it
|
|
|
|
| 402 |
sentence = sentence.replace("</s>", "")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 403 |
# A fast fix for last chacter, may produce weird sounds if it is with text
|
| 404 |
if sentence[-1] in ["!", "?", ".", ","]:
|
| 405 |
# just add a space
|
|
@@ -410,49 +416,56 @@ def generate_speech(history):
|
|
| 410 |
# generate speech using precomputed latents
|
| 411 |
# This is not streaming but it will be fast
|
| 412 |
# wav = get_voice(sentence,language, latent_map["Female_Voice"], suffix=len(wav_list))
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 439 |
|
| 440 |
if not DIRECT_STREAM:
|
| 441 |
yield (
|
| 442 |
gr.Audio.update(value=None, autoplay=True),
|
| 443 |
history,
|
| 444 |
) # hack to switch autoplay
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
|
|
|
| 449 |
|
| 450 |
-
|
| 451 |
-
|
| 452 |
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
|
| 457 |
except RuntimeError as e:
|
| 458 |
if "device-side assert" in str(e):
|
|
@@ -480,7 +493,7 @@ def generate_speech(history):
|
|
| 480 |
# yield (combined_file_name, history
|
| 481 |
|
| 482 |
wav_bytestream = wave_header_chunk() + wav_bytestream
|
| 483 |
-
time.sleep(0.
|
| 484 |
yield (gr.Audio.update(value=None, autoplay=False), history)
|
| 485 |
yield (gr.Audio.update(value=wav_bytestream, autoplay=False), history)
|
| 486 |
|
|
|
|
| 399 |
for sentence, history in get_sentence(history):
|
| 400 |
print(sentence)
|
| 401 |
# Sometimes prompt </s> coming on output remove it
|
| 402 |
+
# Some post process for speech only
|
| 403 |
sentence = sentence.replace("</s>", "")
|
| 404 |
+
sentence = sentence.replace("```", "")
|
| 405 |
+
sentence = sentence.replace("```", "")
|
| 406 |
+
sentence = sentence.replace("(", " ")
|
| 407 |
+
sentence = sentence.replace(")", " ")
|
| 408 |
+
|
| 409 |
# A fast fix for last chacter, may produce weird sounds if it is with text
|
| 410 |
if sentence[-1] in ["!", "?", ".", ","]:
|
| 411 |
# just add a space
|
|
|
|
| 416 |
# generate speech using precomputed latents
|
| 417 |
# This is not streaming but it will be fast
|
| 418 |
# wav = get_voice(sentence,language, latent_map["Female_Voice"], suffix=len(wav_list))
|
| 419 |
+
if len(sentence) > 250:
|
| 420 |
+
# should not generate voice it will hit token limit
|
| 421 |
+
# It should not generate audio for it
|
| 422 |
+
audio_stream = None
|
| 423 |
+
else:
|
| 424 |
+
audio_stream = get_voice_streaming(
|
| 425 |
+
sentence, language, latent_map["Female_Voice"]
|
| 426 |
+
)
|
| 427 |
+
if audio_stream is not None:
|
| 428 |
+
wav_chunks = wave_header_chunk()
|
| 429 |
+
frame_length = 0
|
| 430 |
+
for chunk in audio_stream:
|
| 431 |
+
try:
|
| 432 |
+
wav_bytestream += chunk
|
| 433 |
+
if DIRECT_STREAM:
|
| 434 |
+
yield (
|
| 435 |
+
gr.Audio.update(
|
| 436 |
+
value=wave_header_chunk() + chunk, autoplay=True
|
| 437 |
+
),
|
| 438 |
+
history,
|
| 439 |
+
)
|
| 440 |
+
wait_time = len(chunk) / 2 / 24000
|
| 441 |
+
wait_time = AUDIO_WAIT_MODIFIER * wait_time
|
| 442 |
+
print("Sleeping till chunk end")
|
| 443 |
+
time.sleep(wait_time)
|
| 444 |
+
|
| 445 |
+
else:
|
| 446 |
+
wav_chunks += chunk
|
| 447 |
+
frame_length += len(chunk)
|
| 448 |
+
except:
|
| 449 |
+
# hack to continue on playing. sometimes last chunk is empty , will be fixed on next TTS
|
| 450 |
+
continue
|
| 451 |
|
| 452 |
if not DIRECT_STREAM:
|
| 453 |
yield (
|
| 454 |
gr.Audio.update(value=None, autoplay=True),
|
| 455 |
history,
|
| 456 |
) # hack to switch autoplay
|
| 457 |
+
if audio_stream is not None:
|
| 458 |
+
yield (gr.Audio.update(value=wav_chunks, autoplay=True), history)
|
| 459 |
+
# Streaming wait time calculation
|
| 460 |
+
# audio_length = frame_length / sample_width/ frame_rate
|
| 461 |
+
wait_time = frame_length / 2 / 24000
|
| 462 |
|
| 463 |
+
# for non streaming
|
| 464 |
+
# wait_time= librosa.get_duration(path=wav)
|
| 465 |
|
| 466 |
+
wait_time = AUDIO_WAIT_MODIFIER * wait_time
|
| 467 |
+
print("Sleeping till audio end")
|
| 468 |
+
time.sleep(wait_time)
|
| 469 |
|
| 470 |
except RuntimeError as e:
|
| 471 |
if "device-side assert" in str(e):
|
|
|
|
| 493 |
# yield (combined_file_name, history
|
| 494 |
|
| 495 |
wav_bytestream = wave_header_chunk() + wav_bytestream
|
| 496 |
+
time.sleep(0.7)
|
| 497 |
yield (gr.Audio.update(value=None, autoplay=False), history)
|
| 498 |
yield (gr.Audio.update(value=wav_bytestream, autoplay=False), history)
|
| 499 |
|