whisper-large-v3-1-1

Runtime error

App Files Files Community

danielwm994 commited on Oct 16, 2024

Commit

beeb55d

verified ·

1 Parent(s): b6f76f3

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -8

app.py CHANGED Viewed

@@ -29,8 +29,13 @@ def transcribe(inputs, task):
     if inputs is None:
         raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
-    text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
-    return  text
 def _return_yt_html_embed(yt_url):
@@ -85,9 +90,13 @@ def yt_transcribe(yt_url, task, max_filesize=75.0):
     inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
     inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
-    text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
-    return html_embed_str, text
 demo = gr.Blocks()
@@ -98,7 +107,7 @@ mf_transcribe = gr.Interface(
         gr.Audio(sources="microphone", type="filepath"),
         gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
     ],
-    outputs="text",
     title="Whisper Large V3: Transcribe Audio",
     description=(
         "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
@@ -114,7 +123,7 @@ file_transcribe = gr.Interface(
         gr.Audio(sources="upload", type="filepath", label="Audio file"),
         gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
     ],
-    outputs="text",
     title="Whisper Large V3: Transcribe Audio",
     description=(
         "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
@@ -130,7 +139,7 @@ yt_transcribe = gr.Interface(
         gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
         gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
     ],
-    outputs=["html", "text"],
     title="Whisper Large V3: Transcribe YouTube",
     description=(
         "Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint"
@@ -144,4 +153,3 @@ with demo:
     gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe], ["Microphone", "Audio file", "YouTube"])
 demo.queue().launch()

     if inputs is None:
         raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
+    result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
+    text = result["text"]
+    timestamps = result["chunks"]
+    timestamp_str = "\n".join([f"[{chunk['timestamp']}] {chunk['text']}" for chunk in timestamps])
+    return text, timestamp_str
 def _return_yt_html_embed(yt_url):
     inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
     inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
+    result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
+    text = result["text"]
+    timestamps = result["chunks"]
+    timestamp_str = "\n".join([f"[{chunk['timestamp']}] {chunk['text']}" for chunk in timestamps])
+    return html_embed_str, text, timestamp_str
 demo = gr.Blocks()
         gr.Audio(sources="microphone", type="filepath"),
         gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
     ],
+    outputs=["text", "text"],  # Output both text and timestamps
     title="Whisper Large V3: Transcribe Audio",
     description=(
         "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
         gr.Audio(sources="upload", type="filepath", label="Audio file"),
         gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
     ],
+    outputs=["text", "text"],  # Output both text and timestamps
     title="Whisper Large V3: Transcribe Audio",
     description=(
         "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
         gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
         gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
     ],
+    outputs=["html", "text", "text"],  # Output both text and timestamps
     title="Whisper Large V3: Transcribe YouTube",
     description=(
         "Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint"
     gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe], ["Microphone", "Audio file", "YouTube"])
 demo.queue().launch()