TrialAccountHF commited on
Commit
58442c5
1 Parent(s): 8cb7f84

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -60
app.py CHANGED
@@ -27,7 +27,7 @@ import contextlib
27
  from transformers import pipeline
28
  import psutil
29
 
30
- whisper_models = ["tiny", "base", "small", "medium", "large-v1", "large-v2"]
31
  source_languages = {
32
  "en": "English",
33
  "zh": "Chinese",
@@ -132,9 +132,6 @@ source_languages = {
132
 
133
  source_language_list = [key[0] for key in source_languages.items()]
134
 
135
- MODEL_NAME = "vumichien/whisper-medium-jp"
136
- lang = "ja"
137
-
138
  device = 0 if torch.cuda.is_available() else "cpu"
139
  pipe = pipeline(
140
  task="automatic-speech-recognition",
@@ -149,23 +146,6 @@ embedding_model = PretrainedSpeakerEmbedding(
149
  "speechbrain/spkrec-ecapa-voxceleb",
150
  device=torch.device("cuda" if torch.cuda.is_available() else "cpu"))
151
 
152
- def transcribe(microphone, file_upload):
153
- warn_output = ""
154
- if (microphone is not None) and (file_upload is not None):
155
- warn_output = (
156
- "WARNING: You've uploaded an audio file and used the microphone. "
157
- "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
158
- )
159
-
160
- elif (microphone is None) and (file_upload is None):
161
- return "ERROR: You have to either use the microphone or upload an audio file"
162
-
163
- file = microphone if microphone is not None else file_upload
164
-
165
- text = pipe(file)["text"]
166
-
167
- return warn_output + text
168
-
169
  def _return_yt_html_embed(yt_url):
170
  video_id = yt_url.split("?v=")[-1]
171
  HTML_str = (
@@ -431,43 +411,4 @@ with demo:
431
  system_info.render()
432
  gr.Markdown('''<center><img src='https://visitor-badge.glitch.me/badge?page_id=WhisperDiarizationSpeakers' alt='visitor badge'><a href="https://opensource.org/licenses/Apache-2.0"><img src='https://img.shields.io/badge/License-Apache_2.0-blue.svg' alt='License: Apache 2.0'></center>''')
433
 
434
-
435
-
436
- with gr.Tab("Whisper Transcribe Japanese Audio"):
437
- gr.Markdown(f'''
438
- <div>
439
- <h1 style='text-align: center'>Whisper Transcribe Japanese Audio</h1>
440
- </div>
441
- Transcribe long-form microphone or audio inputs with the click of a button! The fine-tuned
442
- checkpoint <a href='https://huggingface.co/{MODEL_NAME}' target='_blank'><b>{MODEL_NAME}</b></a> to transcribe audio files of arbitrary length.
443
- ''')
444
- microphone = gr.inputs.Audio(source="microphone", type="filepath", optional=True)
445
- upload = gr.inputs.Audio(source="upload", type="filepath", optional=True)
446
- transcribe_btn = gr.Button("Transcribe Audio")
447
- text_output = gr.Textbox()
448
- with gr.Row():
449
- gr.Markdown('''
450
- ### You can test by following examples:
451
- ''')
452
- examples = gr.Examples(examples=
453
- [ "sample1.wav",
454
- "sample2.wav",
455
- ],
456
- label="Examples", inputs=[upload])
457
- transcribe_btn.click(transcribe, [microphone, upload], outputs=text_output)
458
-
459
- with gr.Tab("Whisper Transcribe Japanese YouTube"):
460
- gr.Markdown(f'''
461
- <div>
462
- <h1 style='text-align: center'>Whisper Transcribe Japanese YouTube</h1>
463
- </div>
464
- Transcribe long-form YouTube videos with the click of a button! The fine-tuned checkpoint:
465
- <a href='https://huggingface.co/{MODEL_NAME}' target='_blank'><b>{MODEL_NAME}</b></a> to transcribe audio files of arbitrary length.
466
- ''')
467
- youtube_link = gr.Textbox(label="Youtube url", lines=1, interactive=True)
468
- yt_transcribe_btn = gr.Button("Transcribe YouTube")
469
- text_output2 = gr.Textbox()
470
- html_output = gr.Markdown()
471
- yt_transcribe_btn.click(yt_transcribe, [youtube_link], outputs=[html_output, text_output2])
472
-
473
  demo.launch(debug=True)
 
27
  from transformers import pipeline
28
  import psutil
29
 
30
+ whisper_models = ["tiny", "base", "small", "medium", "large-v1", "large-v2", "large-v3"]
31
  source_languages = {
32
  "en": "English",
33
  "zh": "Chinese",
 
132
 
133
  source_language_list = [key[0] for key in source_languages.items()]
134
 
 
 
 
135
  device = 0 if torch.cuda.is_available() else "cpu"
136
  pipe = pipeline(
137
  task="automatic-speech-recognition",
 
146
  "speechbrain/spkrec-ecapa-voxceleb",
147
  device=torch.device("cuda" if torch.cuda.is_available() else "cpu"))
148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  def _return_yt_html_embed(yt_url):
150
  video_id = yt_url.split("?v=")[-1]
151
  HTML_str = (
 
411
  system_info.render()
412
  gr.Markdown('''<center><img src='https://visitor-badge.glitch.me/badge?page_id=WhisperDiarizationSpeakers' alt='visitor badge'><a href="https://opensource.org/licenses/Apache-2.0"><img src='https://img.shields.io/badge/License-Apache_2.0-blue.svg' alt='License: Apache 2.0'></center>''')
413
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
414
  demo.launch(debug=True)