Spaces:
Sleeping
Sleeping
import torch | |
import torchaudio | |
import torchvision | |
resample_rate = 16000 | |
def process_audio_data(waveform, sample_rate): | |
try: | |
waveform = waveform[0] # 使用左声道 | |
waveform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=resample_rate)(waveform) | |
if waveform.size(0) < 3 * resample_rate: | |
waveform = torch.nn.functional.pad(waveform, (0, 3 * resample_rate - waveform.size(0))) | |
else: | |
waveform = waveform[: 3 * resample_rate] | |
mfcc = torchaudio.transforms.MFCC( | |
sample_rate=resample_rate, | |
n_mfcc=13, | |
melkwargs={ | |
"n_fft": 256, | |
"win_length": 256, | |
"hop_length": 128, | |
"n_mels": 40, | |
} | |
)(waveform) | |
return mfcc | |
except Exception as e: | |
print(f"ERR!: Error in audio processing: {e}") | |
return None | |
def process_image_data(image): | |
try: | |
image = torchvision.transforms.Resize((1080, 1080))(image) | |
image = image / 255.0 | |
image = torchvision.transforms.Normalize( | |
mean=[0.485, 0.456, 0.406], | |
std=[0.229, 0.224, 0.225] | |
)(image) | |
return image | |
except Exception as e: | |
print(f"ERR!: Error in image processing: {e}") | |
return None | |