Arnab Das commited on
Commit
dcb4762
·
1 Parent(s): 8be3e30
Files changed (1) hide show
  1. manipulate_model/utils.py +6 -1
manipulate_model/utils.py CHANGED
@@ -38,6 +38,9 @@ def load_audio(file_path, config):
38
 
39
  if file_path.endswith(".wav") or file_path.endswith(".flac"):
40
  audio, sample_rate = torchaudio.load(file_path)
 
 
 
41
  elif file_path.endswith(".mp3"):
42
  pass
43
  elif file_path.endswith(".mp4"):
@@ -62,7 +65,9 @@ def preprocess_audio(audio, config, step_size=1):
62
  window_size = config.data.window_size
63
  sr = config.data.sr
64
  fps = config.data.fps
65
- print("###########", audio.shape)
 
 
66
  audio_len = audio.shape[1]
67
  step_size = step_size * (sr // fps)
68
  window_size = window_size * (sr // fps)
 
38
 
39
  if file_path.endswith(".wav") or file_path.endswith(".flac"):
40
  audio, sample_rate = torchaudio.load(file_path)
41
+ if sample_rate != config.data.sr:
42
+ print("requires resampling")
43
+ audio = torchaudio.functional.resample(audio, sample_rate, config.data.sr)
44
  elif file_path.endswith(".mp3"):
45
  pass
46
  elif file_path.endswith(".mp4"):
 
65
  window_size = config.data.window_size
66
  sr = config.data.sr
67
  fps = config.data.fps
68
+ if audio.shape[0] > 1:
69
+ print("Warning: multi channel audio")
70
+ audio = audio[0].unsqueeze(0)
71
  audio_len = audio.shape[1]
72
  step_size = step_size * (sr // fps)
73
  window_size = window_size * (sr // fps)