Xalphinions commited on
Commit
ecbd6f6
·
verified ·
1 Parent(s): 6541c53

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. preprocess.py +45 -0
preprocess.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchaudio
3
+ import torchvision
4
+
5
+ resample_rate = 16000
6
+
7
+ def process_audio_data(waveform, sample_rate):
8
+ try:
9
+ waveform = waveform[0] # 使用左声道
10
+ waveform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=resample_rate)(waveform)
11
+
12
+ if waveform.size(0) < 3 * resample_rate:
13
+ waveform = torch.nn.functional.pad(waveform, (0, 3 * resample_rate - waveform.size(0)))
14
+ else:
15
+ waveform = waveform[: 3 * resample_rate]
16
+
17
+ mfcc = torchaudio.transforms.MFCC(
18
+ sample_rate=resample_rate,
19
+ n_mfcc=13,
20
+ melkwargs={
21
+ "n_fft": 256,
22
+ "win_length": 256,
23
+ "hop_length": 128,
24
+ "n_mels": 40,
25
+ }
26
+ )(waveform)
27
+
28
+ return mfcc
29
+ except Exception as e:
30
+ print(f"ERR!: Error in audio processing: {e}")
31
+ return None
32
+
33
+ def process_image_data(image):
34
+ try:
35
+ image = torchvision.transforms.Resize((1080, 1080))(image)
36
+ image = image / 255.0
37
+ image = torchvision.transforms.Normalize(
38
+ mean=[0.485, 0.456, 0.406],
39
+ std=[0.229, 0.224, 0.225]
40
+ )(image)
41
+
42
+ return image
43
+ except Exception as e:
44
+ print(f"ERR!: Error in image processing: {e}")
45
+ return None