acumplid commited on
Commit
4bd685e
·
1 Parent(s): 40501c8

add integration and comment import space module

Browse files
Files changed (1) hide show
  1. whisper.py +11 -11
whisper.py CHANGED
@@ -6,7 +6,7 @@ import torchaudio
6
  import torch
7
  import re
8
  from transformers import pipeline
9
- import spaces
10
 
11
 
12
  device = 0 if torch.cuda.is_available() else "cpu"
@@ -204,8 +204,8 @@ def processing_vad_threshold(audio, output_vad, threshold, max_duration, concate
204
  def format_audio(audio_path):
205
  input_audio, sample_rate = torchaudio.load(audio_path)
206
 
207
- #if input_audio.shape[0] == 2: #stereo2mono
208
- # input_audio = torch.mean(input_audio, dim=0, keepdim=True)
209
 
210
  resampler = torchaudio.transforms.Resample(sample_rate, 16000)
211
  input_audio = resampler(input_audio)
@@ -220,12 +220,12 @@ def transcribe_pipeline(audio, task):
220
  def generate(audio_path, use_v5):
221
  audio = AudioSegment.from_wav(audio_path)
222
 
223
- #temp_mono_path = None
224
- #if audio.channels != 1: #stereo2mono
225
- # audio = audio.set_channels(1)
226
- # temp_mono_path = "temp_mono.wav"
227
- # audio.export(temp_mono_path, format="wav")
228
- # audio_path = temp_mono_path
229
 
230
  output_vad = pipeline_vad(audio_path)
231
  concatenated_segment = AudioSegment.empty()
@@ -239,7 +239,7 @@ def generate(audio_path, use_v5):
239
 
240
  clean_output = post_process_transcription(output)
241
 
242
- #if temp_mono_path and os.path.exists(temp_mono_path):
243
- # os.remove(temp_mono_path)
244
 
245
  return clean_output
 
6
  import torch
7
  import re
8
  from transformers import pipeline
9
+ # import spaces
10
 
11
 
12
  device = 0 if torch.cuda.is_available() else "cpu"
 
204
  def format_audio(audio_path):
205
  input_audio, sample_rate = torchaudio.load(audio_path)
206
 
207
+ if input_audio.shape[0] == 2: #stereo2mono
208
+ input_audio = torch.mean(input_audio, dim=0, keepdim=True)
209
 
210
  resampler = torchaudio.transforms.Resample(sample_rate, 16000)
211
  input_audio = resampler(input_audio)
 
220
  def generate(audio_path, use_v5):
221
  audio = AudioSegment.from_wav(audio_path)
222
 
223
+ temp_mono_path = None
224
+ if audio.channels != 1: #stereo2mono
225
+ audio = audio.set_channels(1)
226
+ temp_mono_path = "temp_mono.wav"
227
+ audio.export(temp_mono_path, format="wav")
228
+ audio_path = temp_mono_path
229
 
230
  output_vad = pipeline_vad(audio_path)
231
  concatenated_segment = AudioSegment.empty()
 
239
 
240
  clean_output = post_process_transcription(output)
241
 
242
+ if temp_mono_path and os.path.exists(temp_mono_path):
243
+ os.remove(temp_mono_path)
244
 
245
  return clean_output