mrfakename commited on
Commit
1ae8754
·
1 Parent(s): a8fbf4a
Files changed (1) hide show
  1. tts/frontend_function.py +15 -7
tts/frontend_function.py CHANGED
@@ -40,11 +40,12 @@ def g2p(self, text_inp):
40
  ''' Get phoneme2mel align of prompt speech '''
41
  def align(self, wav):
42
  with torch.inference_mode():
43
- # Validate input audio
44
- if np.any(np.isnan(wav)) or np.any(np.isinf(wav)):
 
45
  raise ValueError("Input audio contains NaN or infinite values")
46
 
47
- whisper_wav = librosa.resample(wav, orig_sr=self.sr, target_sr=16000)
48
 
49
  # Validate resampled audio
50
  if np.any(np.isnan(whisper_wav)) or np.any(np.isinf(whisper_wav)):
@@ -52,14 +53,21 @@ def align(self, wav):
52
 
53
  # Get mel spectrogram with validation
54
  mel_spec = whisper.log_mel_spectrogram(whisper_wav)
55
- if np.any(np.isnan(mel_spec)) or np.any(np.isinf(mel_spec)):
 
56
  raise ValueError("Mel spectrogram contains NaN or infinite values")
57
 
58
  mel = torch.FloatTensor(mel_spec.T).to(self.device)[None].transpose(1,2)
59
 
60
- # Validate tensor before further processing
61
- if torch.isnan(mel).any() or torch.isinf(mel).any():
62
- raise ValueError("Mel tensor contains NaN or infinite values")
 
 
 
 
 
 
63
  prompt_max_frame = mel.size(2) // self.fm * self.fm
64
  mel = mel[:, :, :prompt_max_frame]
65
  token = torch.LongTensor([[798]]).to(self.device)
 
40
  ''' Get phoneme2mel align of prompt speech '''
41
  def align(self, wav):
42
  with torch.inference_mode():
43
+ # Validate input audio - ensure it's numpy array
44
+ wav_np = np.asarray(wav)
45
+ if np.any(np.isnan(wav_np)) or np.any(np.isinf(wav_np)):
46
  raise ValueError("Input audio contains NaN or infinite values")
47
 
48
+ whisper_wav = librosa.resample(wav_np, orig_sr=self.sr, target_sr=16000)
49
 
50
  # Validate resampled audio
51
  if np.any(np.isnan(whisper_wav)) or np.any(np.isinf(whisper_wav)):
 
53
 
54
  # Get mel spectrogram with validation
55
  mel_spec = whisper.log_mel_spectrogram(whisper_wav)
56
+ mel_spec_np = np.asarray(mel_spec)
57
+ if np.any(np.isnan(mel_spec_np)) or np.any(np.isinf(mel_spec_np)):
58
  raise ValueError("Mel spectrogram contains NaN or infinite values")
59
 
60
  mel = torch.FloatTensor(mel_spec.T).to(self.device)[None].transpose(1,2)
61
 
62
+ # Validate tensor before further processing - use safe tensor validation
63
+ try:
64
+ if torch.isnan(mel).any().item() or torch.isinf(mel).any().item():
65
+ raise ValueError("Mel tensor contains NaN or infinite values")
66
+ except Exception as e:
67
+ # Fallback to numpy validation if tensor validation fails
68
+ mel_np = mel.detach().cpu().numpy()
69
+ if np.any(np.isnan(mel_np)) or np.any(np.isinf(mel_np)):
70
+ raise ValueError("Mel tensor contains NaN or infinite values")
71
  prompt_max_frame = mel.size(2) // self.fm * self.fm
72
  mel = mel[:, :, :prompt_max_frame]
73
  token = torch.LongTensor([[798]]).to(self.device)