allandclive commited on
Commit
bad795e
·
1 Parent(s): 97b1e27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -14
app.py CHANGED
@@ -6,25 +6,16 @@ from stitched_model import CombinedModel
6
 
7
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
8
 
9
- # Use facebook/mms-1b-all model for ASR and Sunbird/sunbird-mul-en-mbart-merged for translation
10
  model = CombinedModel("facebook/mms-1b-all", "Sunbird/sunbird-mul-en-mbart-merged", device=device)
11
 
12
- def transcribe(audio_file_mic=None, audio_file_upload=None):
13
- if audio_file_mic:
14
- audio_file = audio_file_mic
15
- elif audio_file_upload:
16
- audio_file = audio_file_upload
17
- else:
18
- return "Please upload an audio file or record one"
19
-
20
- # Load the audio file
21
  speech, sample_rate = librosa.load(audio_file, sr=16000, mono=True)
22
-
23
- # Split the audio into 10-second chunks
24
  chunk_size = 10 * 16000
25
  chunks = [speech[i:i + chunk_size] for i in range(0, len(speech), chunk_size)]
 
26
 
27
- # Process each chunk and concatenate the results
28
  transcriptions = []
29
  translations = []
30
  for chunk in chunks:
@@ -39,9 +30,21 @@ def transcribe(audio_file_mic=None, audio_file_upload=None):
39
 
40
  return transcription, translation
41
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  description = '''Luganda to English Speech Translation'''
43
 
44
- iface = gr.Interface(fn=transcribe,
45
  inputs=[
46
  gr.Audio(source="microphone", type="filepath", label="Record Audio"),
47
  gr.Audio(source="upload", type="filepath", label="Upload Audio")],
 
6
 
7
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
8
 
9
+ # Load the model
10
  model = CombinedModel("facebook/mms-1b-all", "Sunbird/sunbird-mul-en-mbart-merged", device=device)
11
 
12
+ def preprocess_audio(audio_file):
 
 
 
 
 
 
 
 
13
  speech, sample_rate = librosa.load(audio_file, sr=16000, mono=True)
 
 
14
  chunk_size = 10 * 16000
15
  chunks = [speech[i:i + chunk_size] for i in range(0, len(speech), chunk_size)]
16
+ return chunks
17
 
18
+ def transcribe(chunks):
19
  transcriptions = []
20
  translations = []
21
  for chunk in chunks:
 
30
 
31
  return transcription, translation
32
 
33
+ def process_audio(audio_file_mic=None, audio_file_upload=None):
34
+ if audio_file_mic:
35
+ audio_file = audio_file_mic
36
+ elif audio_file_upload:
37
+ audio_file = audio_file_upload
38
+ else:
39
+ return "Please upload an audio file or record one"
40
+
41
+ chunks = preprocess_audio(audio_file)
42
+ transcription, translation = transcribe(chunks)
43
+ return transcription, translation
44
+
45
  description = '''Luganda to English Speech Translation'''
46
 
47
+ iface = gr.Interface(fn=process_audio,
48
  inputs=[
49
  gr.Audio(source="microphone", type="filepath", label="Record Audio"),
50
  gr.Audio(source="upload", type="filepath", label="Upload Audio")],