Plachta commited on
Commit
5e11f7a
·
verified ·
1 Parent(s): 742c575

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -77,8 +77,8 @@ def voice_conversion(source, target, diffusion_steps, length_adjust, inference_c
77
  ref_audio = librosa.load(target, sr=sr)[0]
78
 
79
  # Process audio
80
- source_audio = torch.tensor(source_audio[:sr * 30]).unsqueeze(0).float()
81
- ref_audio = torch.tensor(ref_audio[:sr * 30]).unsqueeze(0).float()
82
 
83
  # Resample
84
  source_waves_16k = torchaudio.functional.resample(source_audio, sr, 16000)
@@ -88,8 +88,8 @@ def voice_conversion(source, target, diffusion_steps, length_adjust, inference_c
88
  S_alt = cosyvoice_frontend.extract_speech_token(source_waves_16k)[0]
89
  S_ori = cosyvoice_frontend.extract_speech_token(ref_waves_16k)[0]
90
 
91
- mel = to_mel(source_audio.float())
92
- mel2 = to_mel(ref_audio.float())
93
 
94
  target_lengths = torch.LongTensor([int(mel.size(2) * length_adjust)]).to(mel.device)
95
  target2_lengths = torch.LongTensor([mel2.size(2)]).to(mel2.device)
 
77
  ref_audio = librosa.load(target, sr=sr)[0]
78
 
79
  # Process audio
80
+ source_audio = torch.tensor(source_audio[:sr * 30]).unsqueeze(0).float().to(device)
81
+ ref_audio = torch.tensor(ref_audio[:sr * 30]).unsqueeze(0).float().to(device)
82
 
83
  # Resample
84
  source_waves_16k = torchaudio.functional.resample(source_audio, sr, 16000)
 
88
  S_alt = cosyvoice_frontend.extract_speech_token(source_waves_16k)[0]
89
  S_ori = cosyvoice_frontend.extract_speech_token(ref_waves_16k)[0]
90
 
91
+ mel = to_mel(source_audio.to(device).float())
92
+ mel2 = to_mel(ref_audio.to(device).float())
93
 
94
  target_lengths = torch.LongTensor([int(mel.size(2) * length_adjust)]).to(mel.device)
95
  target2_lengths = torch.LongTensor([mel2.size(2)]).to(mel2.device)