| import gradio as gr |
| import os |
|
|
| os.system('pip install paddlespeech') |
| os.system('pip install paddlepaddle') |
|
|
| from transformers import AutoModel, AutoTokenizer |
| from TTS.api import TTS |
|
|
| tts = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False, gpu=True) |
|
|
| tts1 = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=True) |
|
|
| import torch |
| import torchaudio |
| from speechbrain.pretrained import SpectralMaskEnhancement |
|
|
| enhance_model = SpectralMaskEnhancement.from_hparams( |
| source="speechbrain/metricgan-plus-voicebank", |
| savedir="pretrained_models/metricgan-plus-voicebank", |
| run_opts={"device":"cuda"}, |
| ) |
|
|
| tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) |
| model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda() |
| model = model.eval() |
|
|
| def inference(text): |
| os.system("paddlespeech tts --input '"+text+"' --output output.wav") |
| return "output.wav" |
|
|
| def predict(input, history=None): |
| if history is None: |
| history = [] |
| response, history = model.chat(tokenizer, input, history) |
|
|
| return history, history, response |
|
|
| def chinese(text_cn, upload1, VoiceMicrophone1): |
|
|
| if upload1 is not None: |
| |
| tts.voice_conversion_to_file(source_wav=inference(text_cn), target_wav=upload1, file_path="output0.wav") |
| |
| else: |
| tts.voice_conversion_to_file(source_wav=inference(text_cn), target_wav=VoiceMicrophone1, file_path="output0.wav") |
|
|
|
|
| noisy = enhance_model.load_audio( |
| "output0.wav" |
| ).unsqueeze(0) |
|
|
| enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.])) |
| torchaudio.save("enhanced.wav", enhanced.cpu(), 16000) |
| |
| return "enhanced.wav" |
|
|
| def english(text_en, upload, VoiceMicrophone): |
| if upload is not None: |
| tts1.tts_to_file(text_en.strip(), speaker_wav = upload, language="en", file_path="output.wav") |
|
|
| else: |
| tts1.tts_to_file(text_en.strip(), speaker_wav = VoiceMicrophone, language="en", file_path="output.wav") |
| |
| noisy = enhance_model.load_audio( |
| "output.wav" |
| ).unsqueeze(0) |
|
|
| enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.])) |
| torchaudio.save("enhanced.wav", enhanced.cpu(), 16000) |
|
|
| return "enhanced.wav" |
|
|
| with gr.Blocks() as demo: |
| gr.Markdown( |
| """ # <center>🥳💬💕 - TalktoAI,随时随地,谈天说地!</center> |
| |
| ### <center>🤖 - 让有人文关怀的AI造福每一个人!AI向善,文明璀璨!TalktoAI - Enable the future!</center> |
| |
| """ |
| ) |
| state = gr.State([]) |
| chatbot = gr.Chatbot([], elem_id="chatbot").style(height=300) |
| res = gr.Textbox(lines=1, placeholder="最新的回答在这里", show_label = False).style(container=False) |
| with gr.Row(): |
| |
| txt = gr.Textbox(label = "说点什么吧(中英皆可)", lines=1) |
| |
| button = gr.Button("开始对话吧") |
| txt.submit(predict, [txt, state], [chatbot, state, res]) |
| button.click(predict, [txt, state], [chatbot, state, res]) |
| |
| with gr.Row().style(mobile_collapse=False, equal_height=True): |
| inp3 = res |
| inp4 = gr.Audio(source="upload", label = "请上传您喜欢的声音(wav/mp3文件);长语音(90s左右)效果更好", type="filepath") |
| inp5 = gr.Audio(source="microphone", type="filepath", label = '请用麦克风上传您喜欢的声音,与文件上传二选一即可') |
| btn1 = gr.Button("用喜欢的声音听一听吧(中文)") |
|
|
| btn2 = gr.Button("用喜欢的声音听一听吧(英文)") |
| with gr.Row(): |
| out1 = gr.Audio(label="为您合成的专属声音(中文)") |
| out2 = gr.Audio(label="为您合成的专属声音(英文)") |
| btn1.click(chinese, [inp3, inp4, inp5], [out1]) |
| btn2.click(english, [inp3, inp4, inp5], [out2]) |
|
|
| gr.Markdown( |
| """ ### <center>注意❗:请不要输入或生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及娱乐使用。用户输入或生成的内容与程序开发者无关,请自觉合法合规使用,违反者一切后果自负。</center> |
| |
| ### <center>Model by [ChatGLM-6B](https://huggingface.co/THUDM/chatglm-6b). Thanks to [THUDM](https://github.com/THUDM). Please follow me on [Bilibili](https://space.bilibili.com/501495851?spm_id_from=333.1007.0.0).</center> |
| |
| """ |
| ) |
| |
| gr.HTML(''' |
| <div class="footer"> |
| <p>🎶🖼️🎡 - It’s the intersection of technology and liberal arts that makes our hearts sing. - Steve Jobs |
| </p> |
| <p>注:中文声音克隆实际上是通过声音转换(Voice Conversion)实现,所以输出结果可能更像是一种新的声音,效果不一定很理想,希望大家多多包涵,之后我们也会不断迭代该程序的!为了实现更好的效果,使用中文声音克隆时请尽量上传女声。 |
| </p> |
| </div> |
| ''') |
|
|
| demo.queue().launch(show_error=True) |
|
|