| VOCODER_CKPT=/data1/speech/anhnmt2/Speech2Speech/LLaMA-Omni/vocoder/g_00500000 | |
| VOCODER_CFG=/data1/speech/anhnmt2/Speech2Speech/LLaMA-Omni/vocoder/config.json | |
| question_file=../examples/question.json | |
| answer_file=../examples/answer.json | |
| model_path=/data1/speech/anhnmt2/Speech2Speech/half-streaming-speech-nlp/checkpoints/omni_whisper-medium_Qwen2.5-3B_pretrained-sft-fc-mixed-vfva_speech-decoder | |
| prompt_version=qwen | |
| python3 ../infer_llm_multiturn.py \ | |
| --model-path $model_path \ | |
| --question-file $question_file \ | |
| --answer-file $answer_file \ | |
| --num-chunks 1 \ | |
| --chunk-idx 0 \ | |
| --temperature 0 \ | |
| --conv-mode $prompt_version \ | |
| --input_type mel \ | |
| --mel_size 80 \ | |
| --s2s | |
| python3 ../convert_jsonl_to_txt.py $answer_file answer.unit | |
| python3 /data1/speech/anhnmt2/Speech2Speech/LLaMA-Omni/fairseq/examples/speech_to_speech/generate_waveform_from_code.py \ | |
| --in-code-file answer.unit \ | |
| --vocoder $VOCODER_CKPT --vocoder-cfg $VOCODER_CFG \ | |
| --results-path answer_wav/ --dur-prediction | |