watermelon2 / preprocess.py
Xalphinions's picture
Upload folder using huggingface_hub
ecbd6f6 verified
import torch
import torchaudio
import torchvision
resample_rate = 16000
def process_audio_data(waveform, sample_rate):
try:
waveform = waveform[0] # 使用左声道
waveform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=resample_rate)(waveform)
if waveform.size(0) < 3 * resample_rate:
waveform = torch.nn.functional.pad(waveform, (0, 3 * resample_rate - waveform.size(0)))
else:
waveform = waveform[: 3 * resample_rate]
mfcc = torchaudio.transforms.MFCC(
sample_rate=resample_rate,
n_mfcc=13,
melkwargs={
"n_fft": 256,
"win_length": 256,
"hop_length": 128,
"n_mels": 40,
}
)(waveform)
return mfcc
except Exception as e:
print(f"ERR!: Error in audio processing: {e}")
return None
def process_image_data(image):
try:
image = torchvision.transforms.Resize((1080, 1080))(image)
image = image / 255.0
image = torchvision.transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)(image)
return image
except Exception as e:
print(f"ERR!: Error in image processing: {e}")
return None