# ======================
# Setup
# ======================
!git clone https://github.com/kyutai-labs/delayed-streams-modeling.git
%cd delayed-streams-modeling

!pip install -q torch torchaudio gradio moshi

# ======================
# Import + Load Model
# ======================
import torch
import torchaudio
from moshi.models import TTSModel

# Load Kyutai TTS model (English + French)
model_id = "kyutai/tts-1.6b-en_fr"
model = TTSModel.from_pretrained(model_id, device="cuda" if torch.cuda.is_available() else "cpu")

# ======================
# Simple test function
# ======================
def synthesize(text, lang="en", filename="out.wav"):
    audio = model.generate(text, language=lang)
    torchaudio.save(filename, audio.cpu(), 16000)
    return filename

# Example: run once to check
synthesize("Hello, this is Kyutai TTS running on Kaggle!", "en", "demo.wav")

import IPython.display as ipd
ipd.Audio("demo.wav")