Spaces:
Sleeping
Sleeping
changes
Browse files- __pycache__/utils.cpython-312.pyc +0 -0
- app.py +55 -44
- clean.py +12 -0
- transcribe.py +1 -0
- utils.py +13 -2
__pycache__/utils.cpython-312.pyc
CHANGED
Binary files a/__pycache__/utils.cpython-312.pyc and b/__pycache__/utils.cpython-312.pyc differ
|
|
app.py
CHANGED
@@ -4,51 +4,62 @@ import transcribe
|
|
4 |
|
5 |
with gr.Blocks(theme="base") as demo:
|
6 |
gr.Markdown("<center><h1> π Transcription Delight </h1></center>")
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
)
|
37 |
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
gr.Checkbox(label="Diarize Speakers (coming soon)", interactive=False)
|
42 |
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
demo.launch()
|
|
|
4 |
|
5 |
with gr.Blocks(theme="base") as demo:
|
6 |
gr.Markdown("<center><h1> π Transcription Delight </h1></center>")
|
7 |
+
gr.Markdown("### Step 1: Generate Raw Transcript")
|
8 |
+
with gr.Row():
|
9 |
+
with gr.Column():
|
10 |
+
source = gr.Radio(label="Source type", choices=[("Audio", "audio"), ("Video", "video"), ("YouTube URL", "youtube")], value="audio")
|
11 |
+
@gr.render(inputs=source)
|
12 |
+
def show_source(s):
|
13 |
+
if s == "audio":
|
14 |
+
source_component = gr.Audio(type="filepath")
|
15 |
+
elif s == "video":
|
16 |
+
source_component = gr.Video()
|
17 |
+
else:
|
18 |
+
source_component = gr.Textbox(placeholder="https://www.youtube.com/watch?v=44vi31hehw4")
|
19 |
+
preview = gr.HTML(label="Video preview")
|
20 |
+
source_component.change(utils.convert_to_embed_url, source_component, preview)
|
21 |
+
# transcribe_btn.click(
|
22 |
+
# lambda : gr.Tabs(selected="result"),
|
23 |
+
# None,
|
24 |
+
# tabs
|
25 |
+
# ).then(
|
26 |
+
# utils.generate_audio,
|
27 |
+
# [source, source_component],
|
28 |
+
# [download_audio],
|
29 |
+
# show_progress="minimal"
|
30 |
+
# ).then(
|
31 |
+
# transcribe.transcribe,
|
32 |
+
# [download_audio],
|
33 |
+
# [preliminary_transcript],
|
34 |
+
# show_progress="hidden"
|
35 |
+
# )
|
|
|
36 |
|
37 |
+
with gr.Column():
|
38 |
+
transcribe_btn = gr.Button("Transcribe audio π", variant="primary")
|
39 |
+
preliminary_transcript = gr.Textbox(info="Raw transcript", lines=10, show_copy_button=True, show_label=False, interactive=False)
|
|
|
40 |
|
41 |
+
|
42 |
+
source.change(utils.transcribe_button, source, transcribe_btn)
|
43 |
+
|
44 |
+
gr.Markdown("### Step 2: Clean with an LLM")
|
45 |
+
with gr.Row():
|
46 |
+
with gr.Column():
|
47 |
+
cleanup_options = gr.CheckboxGroup(label="Cleanup Transcript with LLM", choices=["Remove typos", "Separate into paragraphs"])
|
48 |
+
llm_prompt = gr.Textbox(label="LLM Prompt", visible=False, lines=3)
|
49 |
+
cleanup_options.change(
|
50 |
+
utils.generate_prompt,
|
51 |
+
cleanup_options,
|
52 |
+
llm_prompt
|
53 |
+
)
|
54 |
+
|
55 |
+
with gr.Column():
|
56 |
+
clean_btn = gr.Button("Clean transcript β¨", variant="primary", interactive=False)
|
57 |
+
gr.Markdown("*Final transcript will appear here*")
|
58 |
+
# with gr.Tab("Result", id="result"):
|
59 |
+
# with gr.Row():
|
60 |
+
# with gr.Column():
|
61 |
+
# download_audio = gr.DownloadButton("Downloading Audio File (please wait...)", variant="primary", interactive=False, size="sm")
|
62 |
+
# preliminary_transcript = gr.Textbox(info="Raw transcript", lines=10, show_copy_button=True, show_label=False, interactive=False)
|
63 |
+
# with gr.Column():
|
64 |
|
65 |
demo.launch()
|
clean.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from huggingface_hub import InferenceClient
|
2 |
+
|
3 |
+
MODEL_NAME = "meta-llama/Meta-Llama-3-70b-Instruct"
|
4 |
+
|
5 |
+
def clean_transcript(prompt, transcript):
|
6 |
+
messages = [
|
7 |
+
{"role": "user", "content": PROMPT}
|
8 |
+
]
|
9 |
+
client = InferenceClient(model=MODEL_NAME)
|
10 |
+
for c in client.chat_completion(messages, max_tokens=200, stream=True):
|
11 |
+
token = c.choices[0].delta.content
|
12 |
+
print(token, end="")
|
transcribe.py
CHANGED
@@ -25,3 +25,4 @@ def transcribe_audio_in_chunks(audio_path, chunk_length_ms):
|
|
25 |
transcription = transcribe_segment(chunk, i)
|
26 |
yield transcription
|
27 |
|
|
|
|
25 |
transcription = transcribe_segment(chunk, i)
|
26 |
yield transcription
|
27 |
|
28 |
+
|
utils.py
CHANGED
@@ -43,9 +43,9 @@ def convert_video_to_audio(input_file):
|
|
43 |
|
44 |
def transcribe_button(source):
|
45 |
if source == "audio":
|
46 |
-
return gr.Button("Transcribe audio
|
47 |
else:
|
48 |
-
return gr.Button("Transcribe video
|
49 |
|
50 |
def generate_audio(source, source_file):
|
51 |
if source == "audio":
|
@@ -57,3 +57,14 @@ def generate_audio(source, source_file):
|
|
57 |
gr.Info("Downloading audio from YouTube...")
|
58 |
audio_file = download_audio_from_youtube(source_file)
|
59 |
return gr.DownloadButton("Downloading Audio File", value=audio_file, interactive=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
def transcribe_button(source):
|
45 |
if source == "audio":
|
46 |
+
return gr.Button("Transcribe audio π")
|
47 |
else:
|
48 |
+
return gr.Button("Transcribe video π")
|
49 |
|
50 |
def generate_audio(source, source_file):
|
51 |
if source == "audio":
|
|
|
57 |
gr.Info("Downloading audio from YouTube...")
|
58 |
audio_file = download_audio_from_youtube(source_file)
|
59 |
return gr.DownloadButton("Downloading Audio File", value=audio_file, interactive=True)
|
60 |
+
|
61 |
+
def generate_prompt(cleanup):
|
62 |
+
if not cleanup:
|
63 |
+
return gr.Textbox(visible=False)
|
64 |
+
elif cleanup == ["Remove typos"]:
|
65 |
+
return gr.Textbox(visible=True, placeholder="The following is a raw transcript from an automatic transcription system. Remove the typos from the transcript.")
|
66 |
+
elif cleanup == ["Separate into paragraphs"]:
|
67 |
+
return gr.Textbox(visible=True, placeholder="The following is a raw transcript from an automatic transcription system. Separate the transcript into paragraphs based on logical breaks.")
|
68 |
+
elif cleanup == ["Remove typos", "Separate into paragraphs"]:
|
69 |
+
return gr.Textbox(visible=True, placeholder="The following is a raw transcript from an automatic transcription system. Remove the typos and separate the transcript into paragraphs based on logical breaks.")
|
70 |
+
|