Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
1ae8754
1
Parent(s):
a8fbf4a
fix
Browse files- tts/frontend_function.py +15 -7
tts/frontend_function.py
CHANGED
@@ -40,11 +40,12 @@ def g2p(self, text_inp):
|
|
40 |
''' Get phoneme2mel align of prompt speech '''
|
41 |
def align(self, wav):
|
42 |
with torch.inference_mode():
|
43 |
-
# Validate input audio
|
44 |
-
|
|
|
45 |
raise ValueError("Input audio contains NaN or infinite values")
|
46 |
|
47 |
-
whisper_wav = librosa.resample(
|
48 |
|
49 |
# Validate resampled audio
|
50 |
if np.any(np.isnan(whisper_wav)) or np.any(np.isinf(whisper_wav)):
|
@@ -52,14 +53,21 @@ def align(self, wav):
|
|
52 |
|
53 |
# Get mel spectrogram with validation
|
54 |
mel_spec = whisper.log_mel_spectrogram(whisper_wav)
|
55 |
-
|
|
|
56 |
raise ValueError("Mel spectrogram contains NaN or infinite values")
|
57 |
|
58 |
mel = torch.FloatTensor(mel_spec.T).to(self.device)[None].transpose(1,2)
|
59 |
|
60 |
-
# Validate tensor before further processing
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
prompt_max_frame = mel.size(2) // self.fm * self.fm
|
64 |
mel = mel[:, :, :prompt_max_frame]
|
65 |
token = torch.LongTensor([[798]]).to(self.device)
|
|
|
40 |
''' Get phoneme2mel align of prompt speech '''
|
41 |
def align(self, wav):
|
42 |
with torch.inference_mode():
|
43 |
+
# Validate input audio - ensure it's numpy array
|
44 |
+
wav_np = np.asarray(wav)
|
45 |
+
if np.any(np.isnan(wav_np)) or np.any(np.isinf(wav_np)):
|
46 |
raise ValueError("Input audio contains NaN or infinite values")
|
47 |
|
48 |
+
whisper_wav = librosa.resample(wav_np, orig_sr=self.sr, target_sr=16000)
|
49 |
|
50 |
# Validate resampled audio
|
51 |
if np.any(np.isnan(whisper_wav)) or np.any(np.isinf(whisper_wav)):
|
|
|
53 |
|
54 |
# Get mel spectrogram with validation
|
55 |
mel_spec = whisper.log_mel_spectrogram(whisper_wav)
|
56 |
+
mel_spec_np = np.asarray(mel_spec)
|
57 |
+
if np.any(np.isnan(mel_spec_np)) or np.any(np.isinf(mel_spec_np)):
|
58 |
raise ValueError("Mel spectrogram contains NaN or infinite values")
|
59 |
|
60 |
mel = torch.FloatTensor(mel_spec.T).to(self.device)[None].transpose(1,2)
|
61 |
|
62 |
+
# Validate tensor before further processing - use safe tensor validation
|
63 |
+
try:
|
64 |
+
if torch.isnan(mel).any().item() or torch.isinf(mel).any().item():
|
65 |
+
raise ValueError("Mel tensor contains NaN or infinite values")
|
66 |
+
except Exception as e:
|
67 |
+
# Fallback to numpy validation if tensor validation fails
|
68 |
+
mel_np = mel.detach().cpu().numpy()
|
69 |
+
if np.any(np.isnan(mel_np)) or np.any(np.isinf(mel_np)):
|
70 |
+
raise ValueError("Mel tensor contains NaN or infinite values")
|
71 |
prompt_max_frame = mel.size(2) // self.fm * self.fm
|
72 |
mel = mel[:, :, :prompt_max_frame]
|
73 |
token = torch.LongTensor([[798]]).to(self.device)
|