Spaces:
Running
Running
acumplid
commited on
Commit
·
4bd685e
1
Parent(s):
40501c8
add integration and comment import space module
Browse files- whisper.py +11 -11
whisper.py
CHANGED
@@ -6,7 +6,7 @@ import torchaudio
|
|
6 |
import torch
|
7 |
import re
|
8 |
from transformers import pipeline
|
9 |
-
import spaces
|
10 |
|
11 |
|
12 |
device = 0 if torch.cuda.is_available() else "cpu"
|
@@ -204,8 +204,8 @@ def processing_vad_threshold(audio, output_vad, threshold, max_duration, concate
|
|
204 |
def format_audio(audio_path):
|
205 |
input_audio, sample_rate = torchaudio.load(audio_path)
|
206 |
|
207 |
-
|
208 |
-
|
209 |
|
210 |
resampler = torchaudio.transforms.Resample(sample_rate, 16000)
|
211 |
input_audio = resampler(input_audio)
|
@@ -220,12 +220,12 @@ def transcribe_pipeline(audio, task):
|
|
220 |
def generate(audio_path, use_v5):
|
221 |
audio = AudioSegment.from_wav(audio_path)
|
222 |
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
|
230 |
output_vad = pipeline_vad(audio_path)
|
231 |
concatenated_segment = AudioSegment.empty()
|
@@ -239,7 +239,7 @@ def generate(audio_path, use_v5):
|
|
239 |
|
240 |
clean_output = post_process_transcription(output)
|
241 |
|
242 |
-
|
243 |
-
|
244 |
|
245 |
return clean_output
|
|
|
6 |
import torch
|
7 |
import re
|
8 |
from transformers import pipeline
|
9 |
+
# import spaces
|
10 |
|
11 |
|
12 |
device = 0 if torch.cuda.is_available() else "cpu"
|
|
|
204 |
def format_audio(audio_path):
|
205 |
input_audio, sample_rate = torchaudio.load(audio_path)
|
206 |
|
207 |
+
if input_audio.shape[0] == 2: #stereo2mono
|
208 |
+
input_audio = torch.mean(input_audio, dim=0, keepdim=True)
|
209 |
|
210 |
resampler = torchaudio.transforms.Resample(sample_rate, 16000)
|
211 |
input_audio = resampler(input_audio)
|
|
|
220 |
def generate(audio_path, use_v5):
|
221 |
audio = AudioSegment.from_wav(audio_path)
|
222 |
|
223 |
+
temp_mono_path = None
|
224 |
+
if audio.channels != 1: #stereo2mono
|
225 |
+
audio = audio.set_channels(1)
|
226 |
+
temp_mono_path = "temp_mono.wav"
|
227 |
+
audio.export(temp_mono_path, format="wav")
|
228 |
+
audio_path = temp_mono_path
|
229 |
|
230 |
output_vad = pipeline_vad(audio_path)
|
231 |
concatenated_segment = AudioSegment.empty()
|
|
|
239 |
|
240 |
clean_output = post_process_transcription(output)
|
241 |
|
242 |
+
if temp_mono_path and os.path.exists(temp_mono_path):
|
243 |
+
os.remove(temp_mono_path)
|
244 |
|
245 |
return clean_output
|