Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
-
import
|
2 |
|
|
|
3 |
import gradio as gr
|
4 |
import yt_dlp as youtube_dl
|
5 |
from transformers import pipeline
|
@@ -42,11 +43,8 @@ def transcribe(inputs, prompt):
|
|
42 |
prompt = "。" if not prompt else prompt
|
43 |
generate_kwargs['prompt_ids'] = pipe.tokenizer.get_prompt_ids(prompt, return_tensors='pt').to(device)
|
44 |
text = pipe(inputs, generate_kwargs=generate_kwargs)['text']
|
45 |
-
|
46 |
-
|
47 |
-
elif text.startswith(prompt"):
|
48 |
-
text = text[len(prompt):]
|
49 |
-
return text
|
50 |
|
51 |
def _return_yt_html_embed(yt_url):
|
52 |
video_id = yt_url.split("?v=")[-1]
|
@@ -91,11 +89,8 @@ def yt_transcribe(yt_url, prompt, max_filesize=75.0):
|
|
91 |
prompt = "。" if not prompt else prompt
|
92 |
generate_kwargs['prompt_ids'] = pipe.tokenizer.get_prompt_ids(prompt, return_tensors='pt').to(device)
|
93 |
text = pipe(inputs, generate_kwargs=generate_kwargs)['text']
|
94 |
-
|
95 |
-
|
96 |
-
elif text.startswith(prompt"):
|
97 |
-
text = text[len(prompt):]
|
98 |
-
return html_embed_str, text
|
99 |
|
100 |
|
101 |
demo = gr.Blocks()
|
|
|
1 |
+
import re
|
2 |
|
3 |
+
import torch
|
4 |
import gradio as gr
|
5 |
import yt_dlp as youtube_dl
|
6 |
from transformers import pipeline
|
|
|
43 |
prompt = "。" if not prompt else prompt
|
44 |
generate_kwargs['prompt_ids'] = pipe.tokenizer.get_prompt_ids(prompt, return_tensors='pt').to(device)
|
45 |
text = pipe(inputs, generate_kwargs=generate_kwargs)['text']
|
46 |
+
# currently the pipeline for ASR appends the prompt at the beginning of the transcription, so remove it
|
47 |
+
return re.sub(rf"\A\s*{prompt}\s*", "", text)
|
|
|
|
|
|
|
48 |
|
49 |
def _return_yt_html_embed(yt_url):
|
50 |
video_id = yt_url.split("?v=")[-1]
|
|
|
89 |
prompt = "。" if not prompt else prompt
|
90 |
generate_kwargs['prompt_ids'] = pipe.tokenizer.get_prompt_ids(prompt, return_tensors='pt').to(device)
|
91 |
text = pipe(inputs, generate_kwargs=generate_kwargs)['text']
|
92 |
+
# currently the pipeline for ASR appends the prompt at the beginning of the transcription, so remove it
|
93 |
+
return html_embed_str, re.sub(rf"\A\s*{prompt}\s*", "", text)
|
|
|
|
|
|
|
94 |
|
95 |
|
96 |
demo = gr.Blocks()
|