Whisper_speaker_diarization_for_colab_test

Runtime error

App Files Files Community

TrialAccountHF commited on Dec 17, 2023

Commit

58442c5

•

1 Parent(s): 8cb7f84

Update app.py

Browse files

Files changed (1) hide show

app.py +1 -60

app.py CHANGED Viewed

@@ -27,7 +27,7 @@ import contextlib
 from transformers import pipeline
 import psutil
-whisper_models = ["tiny", "base", "small", "medium", "large-v1", "large-v2"]
 source_languages = {
  "en": "English",
  "zh": "Chinese",
@@ -132,9 +132,6 @@ source_languages = {
 source_language_list = [key[0] for key in source_languages.items()]
-MODEL_NAME = "vumichien/whisper-medium-jp"
-lang = "ja"
 device = 0 if torch.cuda.is_available() else "cpu"
 pipe = pipeline(
  task="automatic-speech-recognition",
@@ -149,23 +146,6 @@ embedding_model = PretrainedSpeakerEmbedding(
  "speechbrain/spkrec-ecapa-voxceleb",
  device=torch.device("cuda" if torch.cuda.is_available() else "cpu"))
-def transcribe(microphone, file_upload):
- warn_output = ""
- if (microphone is not None) and (file_upload is not None):
- warn_output = (
- "WARNING: You've uploaded an audio file and used the microphone. "
- "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
- )
- elif (microphone is None) and (file_upload is None):
- return "ERROR: You have to either use the microphone or upload an audio file"
- file = microphone if microphone is not None else file_upload
- text = pipe(file)["text"]
- return warn_output + text
 def _return_yt_html_embed(yt_url):
  video_id = yt_url.split("?v=")[-1]
  HTML_str = (
@@ -431,43 +411,4 @@ with demo:
  system_info.render()
  gr.Markdown('''<center><img src='https://visitor-badge.glitch.me/badge?page_id=WhisperDiarizationSpeakers' alt='visitor badge'><a href="https://opensource.org/licenses/Apache-2.0"><img src='https://img.shields.io/badge/License-Apache_2.0-blue.svg' alt='License: Apache 2.0'></center>''')
- with gr.Tab("Whisper Transcribe Japanese Audio"):
- gr.Markdown(f'''
- <div>
- <h1 style='text-align: center'>Whisper Transcribe Japanese Audio</h1>
- </div>
- Transcribe long-form microphone or audio inputs with the click of a button! The fine-tuned
- checkpoint <a href='https://huggingface.co/{MODEL_NAME}' target='_blank'><b>{MODEL_NAME}</b></a> to transcribe audio files of arbitrary length.
- ''')
- microphone = gr.inputs.Audio(source="microphone", type="filepath", optional=True)
- upload = gr.inputs.Audio(source="upload", type="filepath", optional=True)
- transcribe_btn = gr.Button("Transcribe Audio")
- text_output = gr.Textbox()
- with gr.Row():
- gr.Markdown('''
- ### You can test by following examples:
- ''')
- examples = gr.Examples(examples=
- [ "sample1.wav",
- "sample2.wav",
- ],
- label="Examples", inputs=[upload])
- transcribe_btn.click(transcribe, [microphone, upload], outputs=text_output)
- with gr.Tab("Whisper Transcribe Japanese YouTube"):
- gr.Markdown(f'''
- <div>
- <h1 style='text-align: center'>Whisper Transcribe Japanese YouTube</h1>
- </div>
- Transcribe long-form YouTube videos with the click of a button! The fine-tuned checkpoint:
- <a href='https://huggingface.co/{MODEL_NAME}' target='_blank'><b>{MODEL_NAME}</b></a> to transcribe audio files of arbitrary length.
- ''')
- youtube_link = gr.Textbox(label="Youtube url", lines=1, interactive=True)
- yt_transcribe_btn = gr.Button("Transcribe YouTube")
- text_output2 = gr.Textbox()
- html_output = gr.Markdown()
- yt_transcribe_btn.click(yt_transcribe, [youtube_link], outputs=[html_output, text_output2])
 demo.launch(debug=True)

 from transformers import pipeline
 import psutil
+whisper_models = ["tiny", "base", "small", "medium", "large-v1", "large-v2", "large-v3"]
 source_languages = {
  "en": "English",
  "zh": "Chinese",
 source_language_list = [key[0] for key in source_languages.items()]
 device = 0 if torch.cuda.is_available() else "cpu"
 pipe = pipeline(
  task="automatic-speech-recognition",
  "speechbrain/spkrec-ecapa-voxceleb",
  device=torch.device("cuda" if torch.cuda.is_available() else "cpu"))
 def _return_yt_html_embed(yt_url):
  video_id = yt_url.split("?v=")[-1]
  HTML_str = (
  system_info.render()
  gr.Markdown('''<center><img src='https://visitor-badge.glitch.me/badge?page_id=WhisperDiarizationSpeakers' alt='visitor badge'><a href="https://opensource.org/licenses/Apache-2.0"><img src='https://img.shields.io/badge/License-Apache_2.0-blue.svg' alt='License: Apache 2.0'></center>''')
 demo.launch(debug=True)