Bookie-Whisper-capitalised-Macedonian-ASR

Sleeping

App Files Files Community

Porjaz commited on Dec 25, 2024

Commit

eeddc3d

verified ·

1 Parent(s): e2e45fb

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -44

app.py CHANGED Viewed

@@ -31,30 +31,6 @@ def recap_sentence(string):
     return recap_result
-@spaces.GPU(duration=30)
-def return_prediction_w2v2(mic=None, file=None, device=device):
-    if mic is not None:
-        waveform, sr = librosa.load(mic, sr=16000)
-        waveform = waveform[:60*sr]
-        w2v2_result = w2v2_classifier.classify_file_w2v2(waveform, device)
-    elif file is not None:
-        waveform, sr = librosa.load(file, sr=16000)
-        waveform = waveform[:60*sr]
-        w2v2_result = w2v2_classifier.classify_file_w2v2(waveform, device)
-    else:
-        return "You must either provide a mic recording or a file"
-    recap_result = recap_sentence(w2v2_result[0])
-    # If the letter after punct is small, recap it
-    for i, letter in enumerate(recap_result):
-        if i > 1 and recap_result[i-2] in [".", "!", "?"] and letter.islower():
-            recap_result = recap_result[:i] + letter.upper() + recap_result[i+1:]
-    clean_up_memory()
-    return recap_result
 @spaces.GPU(duration=30)
 def return_prediction_whisper_mic(mic=None, progress=gr.Progress(), device=device):
     progress(0, desc="Транскриптот се генерира")
@@ -64,7 +40,7 @@ def return_prediction_whisper_mic(mic=None, progress=gr.Progress(), device=devic
         # waveform = waveform[:30*sr]
         whisper_result = whisper_classifier.classify_file_whisper_mkd(waveform, device)
     else:
-        return "You must provide a mic recording", "error.txt"
     recap_result = ""
     prev_segment = ""
@@ -108,7 +84,7 @@ def return_prediction_whisper_file(file=None, progress=gr.Progress(), device=dev
         # waveform = waveform[:3600*sr]
         whisper_result = whisper_classifier.classify_file_whisper_mkd(waveform, device)
     else:
-       return "You must provide a mic recording", "error.txt"
     recap_result = ""
     prev_segment = ""
@@ -161,7 +137,7 @@ recap_model.eval()
 with gr.Blocks() as mic_transcribe_whisper:
     def clear_outputs():
-        return {audio_input: None, output_text: "", download_file: None}
     with gr.Row():
         audio_input = gr.Audio(sources="microphone", type="filepath", label="Record Audio")
@@ -223,16 +199,6 @@ project_description = '''
 4. **Никола Стиков**
 Оваа колаборација е дел од активностите на **Центарот за напредни интердисциплинарни истражувања ([ЦеНИИс](https://ukim.edu.mk/en/centri/centar-za-napredni-interdisciplinarni-istrazhuvanja-ceniis))** при УКИМ.
-## Во тренирањето на овој модел се употребени податоци од:
-1. Дигитален архив за етнолошки и антрополошки ресурси ([ДАЕАР](https://iea.pmf.ukim.edu.mk/tabs/view/61f236ed7d95176b747c20566ddbda1a)) при Институтот за етнологија и антропологија, Природно-математички факултет при УКИМ.
-2. Аудио верзија на меѓународното списание [„ЕтноАнтропоЗум“](https://etno.pmf.ukim.mk/index.php/eaz/issue/archive) на Институтот за етнологија и антропологија, Природно-математички факултет при УКИМ.
-3. Аудио подкастот [„Обични луѓе“](https://obicniluge.mk/episodes/) на Илина Јакимовска
-4. Научните видеа од серијалот [„Наука за деца“](http://naukazadeca.mk), фондација [КАНТАРОТ](https://qantarot.substack.com/)
-5. Македонска верзија на [Mozilla Common Voice](https://commonvoice.mozilla.org/en/datasets) (верзија 18.0)
-## Како да придонесете за подобрување на македонските модели за препознавање на говор?
-На  следниот [линк](https://drive.google.com/file/d/1YdZJz9o1X8AMc6J4MNPnVZjASyIXnvoZ/view?usp=sharing) ќе најдете инструкции за тоа како да донирате македонски говор преку платформата Mozilla Common Voice.
 '''
 # Custom CSS
@@ -261,12 +227,6 @@ with transcriber_app:
     state = gr.State()
     gr.Markdown(project_description, elem_classes="custom-markdown")
-    # gr.TabbedInterface(
-    #     [mic_transcribe_whisper, mic_transcribe_compare],
-    #     ["Буки-Whisper транскрипција", "Споредба на модели"],
-    # )
-    # state = gr.State(value=[], delete_callback=lambda v: print("STATE DELETED"))
     gr.TabbedInterface(
         [mic_transcribe_whisper, file_transcribe_whisper],
         [" Буки-Whisper транскрипција од микрофон", "Буки-Whisper транскрипција од фајл"],
@@ -275,7 +235,6 @@ with transcriber_app:
     transcriber_app.unload(return_prediction_whisper_mic)
     transcriber_app.unload(return_prediction_whisper_file)
-    transcriber_app.unload(return_prediction_w2v2)
 # transcriber_app.launch(debug=True, share=True, ssl_verify=False)

     return recap_result
 @spaces.GPU(duration=30)
 def return_prediction_whisper_mic(mic=None, progress=gr.Progress(), device=device):
     progress(0, desc="Транскриптот се генерира")
         # waveform = waveform[:30*sr]
         whisper_result = whisper_classifier.classify_file_whisper_mkd(waveform, device)
     else:
+        return "You must provide a mic recording"
     recap_result = ""
     prev_segment = ""
         # waveform = waveform[:3600*sr]
         whisper_result = whisper_classifier.classify_file_whisper_mkd(waveform, device)
     else:
+       return "You must provide a mic recording"
     recap_result = ""
     prev_segment = ""
 with gr.Blocks() as mic_transcribe_whisper:
     def clear_outputs():
+        return None, "", None
     with gr.Row():
         audio_input = gr.Audio(sources="microphone", type="filepath", label="Record Audio")
 4. **Никола Стиков**
 Оваа колаборација е дел од активностите на **Центарот за напредни интердисциплинарни истражувања ([ЦеНИИс](https://ukim.edu.mk/en/centri/centar-za-napredni-interdisciplinarni-istrazhuvanja-ceniis))** при УКИМ.
 '''
 # Custom CSS
     state = gr.State()
     gr.Markdown(project_description, elem_classes="custom-markdown")
     gr.TabbedInterface(
         [mic_transcribe_whisper, file_transcribe_whisper],
         [" Буки-Whisper транскрипција од микрофон", "Буки-Whisper транскрипција од фајл"],
     transcriber_app.unload(return_prediction_whisper_mic)
     transcriber_app.unload(return_prediction_whisper_file)
 # transcriber_app.launch(debug=True, share=True, ssl_verify=False)