# ====================== # Setup # ====================== !git clone https://github.com/kyutai-labs/delayed-streams-modeling.git %cd delayed-streams-modeling !pip install -q torch torchaudio gradio moshi # ====================== # Import + Load Model # ====================== import torch import torchaudio from moshi.models import TTSModel # Load Kyutai TTS model (English + French) model_id = "kyutai/tts-1.6b-en_fr" model = TTSModel.from_pretrained(model_id, device="cuda" if torch.cuda.is_available() else "cpu") # ====================== # Simple test function # ====================== def synthesize(text, lang="en", filename="out.wav"): audio = model.generate(text, language=lang) torchaudio.save(filename, audio.cpu(), 16000) return filename # Example: run once to check synthesize("Hello, this is Kyutai TTS running on Kaggle!", "en", "demo.wav") import IPython.display as ipd ipd.Audio("demo.wav")