alibababeig commited on
Commit
6eedc89
·
verified ·
1 Parent(s): 648ae92

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -0
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import re
 
2
 
3
  import ffmpeg
4
  import gradio as gr
@@ -36,6 +37,7 @@ def initialize_model():
36
 
37
 
38
  def handle_user_input(audio_path, video_path):
 
39
  audio_asr_result = None
40
  video_asr_result = None
41
 
@@ -71,6 +73,9 @@ def handle_user_input(audio_path, video_path):
71
  # Perform ASR on the audio waveform
72
  video_asr_result = perform_asr(waveform)
73
 
 
 
 
74
  return audio_asr_result, video_asr_result
75
 
76
 
@@ -84,6 +89,7 @@ def perform_asr(waveform):
84
  else:
85
  raise ValueError(f'Bad audio array shape: "{waveform.shape}"')
86
 
 
87
  # Split the audio array into smaller frames
88
  audio_frames = []
89
  start_idx = 0
@@ -107,11 +113,17 @@ def perform_asr(waveform):
107
  audio_frames.append(waveform[start_idx:break_point])
108
  start_idx = break_point
109
 
 
 
 
 
110
  # Apply noise reduction on each audio frame
111
  audio_frames = [
112
  nr.reduce_noise(y=frame, sr=AUDIO_SAMPLING_RATE)
113
  for frame in audio_frames
114
  ]
 
 
115
 
116
  ######################### Method 1 - For Loop #########################
117
 
@@ -135,6 +147,7 @@ def perform_asr(waveform):
135
 
136
  ######################### Method 2 - Batch ############################
137
 
 
138
  # Process the entire batch of audio frames
139
  inputs = processor(
140
  audio=audio_frames,
@@ -154,9 +167,14 @@ def perform_asr(waveform):
154
  predicted_ids,
155
  skip_special_tokens=True
156
  )
 
 
157
 
 
158
  # Clean the model-generated transcriptions
159
  transcriptions = [clean_model_answer(t) for t in transcriptions]
 
 
160
 
161
  return '\n\n'.join(transcriptions)
162
 
 
1
  import re
2
+ import time
3
 
4
  import ffmpeg
5
  import gradio as gr
 
37
 
38
 
39
  def handle_user_input(audio_path, video_path):
40
+ t_start = time.time()
41
  audio_asr_result = None
42
  video_asr_result = None
43
 
 
73
  # Perform ASR on the audio waveform
74
  video_asr_result = perform_asr(waveform)
75
 
76
+ delta_t = time.time() - t_start
77
+ print(f'Total Time = {delta_t:5.1f} s\n')
78
+
79
  return audio_asr_result, video_asr_result
80
 
81
 
 
89
  else:
90
  raise ValueError(f'Bad audio array shape: "{waveform.shape}"')
91
 
92
+ t_start = time.time()
93
  # Split the audio array into smaller frames
94
  audio_frames = []
95
  start_idx = 0
 
113
  audio_frames.append(waveform[start_idx:break_point])
114
  start_idx = break_point
115
 
116
+ delta_t = time.time() - t_start
117
+ print(f'Audio Framing = {delta_t:5.1f} s')
118
+
119
+ t_start = time.time()
120
  # Apply noise reduction on each audio frame
121
  audio_frames = [
122
  nr.reduce_noise(y=frame, sr=AUDIO_SAMPLING_RATE)
123
  for frame in audio_frames
124
  ]
125
+ delta_t = time.time() - t_start
126
+ print(f'Noise Reduction = {delta_t:5.1f} s')
127
 
128
  ######################### Method 1 - For Loop #########################
129
 
 
147
 
148
  ######################### Method 2 - Batch ############################
149
 
150
+ t_start = time.time()
151
  # Process the entire batch of audio frames
152
  inputs = processor(
153
  audio=audio_frames,
 
167
  predicted_ids,
168
  skip_special_tokens=True
169
  )
170
+ delta_t = time.time() - t_start
171
+ print(f'Text Generation = {delta_t:5.1f} s')
172
 
173
+ t_start = time.time()
174
  # Clean the model-generated transcriptions
175
  transcriptions = [clean_model_answer(t) for t in transcriptions]
176
+ delta_t = time.time() - t_start
177
+ print(f'Text Cleaning = {delta_t:5.1f} s')
178
 
179
  return '\n\n'.join(transcriptions)
180