danielwm994 commited on
Commit
beeb55d
·
verified ·
1 Parent(s): b6f76f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -8
app.py CHANGED
@@ -29,8 +29,13 @@ def transcribe(inputs, task):
29
  if inputs is None:
30
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
31
 
32
- text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
33
- return text
 
 
 
 
 
34
 
35
 
36
  def _return_yt_html_embed(yt_url):
@@ -85,9 +90,13 @@ def yt_transcribe(yt_url, task, max_filesize=75.0):
85
  inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
86
  inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
87
 
88
- text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
 
 
89
 
90
- return html_embed_str, text
 
 
91
 
92
 
93
  demo = gr.Blocks()
@@ -98,7 +107,7 @@ mf_transcribe = gr.Interface(
98
  gr.Audio(sources="microphone", type="filepath"),
99
  gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
100
  ],
101
- outputs="text",
102
  title="Whisper Large V3: Transcribe Audio",
103
  description=(
104
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
@@ -114,7 +123,7 @@ file_transcribe = gr.Interface(
114
  gr.Audio(sources="upload", type="filepath", label="Audio file"),
115
  gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
116
  ],
117
- outputs="text",
118
  title="Whisper Large V3: Transcribe Audio",
119
  description=(
120
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
@@ -130,7 +139,7 @@ yt_transcribe = gr.Interface(
130
  gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
131
  gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
132
  ],
133
- outputs=["html", "text"],
134
  title="Whisper Large V3: Transcribe YouTube",
135
  description=(
136
  "Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint"
@@ -144,4 +153,3 @@ with demo:
144
  gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe], ["Microphone", "Audio file", "YouTube"])
145
 
146
  demo.queue().launch()
147
-
 
29
  if inputs is None:
30
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
31
 
32
+ result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
33
+ text = result["text"]
34
+ timestamps = result["chunks"]
35
+
36
+ timestamp_str = "\n".join([f"[{chunk['timestamp']}] {chunk['text']}" for chunk in timestamps])
37
+
38
+ return text, timestamp_str
39
 
40
 
41
  def _return_yt_html_embed(yt_url):
 
90
  inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
91
  inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
92
 
93
+ result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
94
+ text = result["text"]
95
+ timestamps = result["chunks"]
96
 
97
+ timestamp_str = "\n".join([f"[{chunk['timestamp']}] {chunk['text']}" for chunk in timestamps])
98
+
99
+ return html_embed_str, text, timestamp_str
100
 
101
 
102
  demo = gr.Blocks()
 
107
  gr.Audio(sources="microphone", type="filepath"),
108
  gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
109
  ],
110
+ outputs=["text", "text"], # Output both text and timestamps
111
  title="Whisper Large V3: Transcribe Audio",
112
  description=(
113
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
 
123
  gr.Audio(sources="upload", type="filepath", label="Audio file"),
124
  gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
125
  ],
126
+ outputs=["text", "text"], # Output both text and timestamps
127
  title="Whisper Large V3: Transcribe Audio",
128
  description=(
129
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
 
139
  gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
140
  gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
141
  ],
142
+ outputs=["html", "text", "text"], # Output both text and timestamps
143
  title="Whisper Large V3: Transcribe YouTube",
144
  description=(
145
  "Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint"
 
153
  gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe], ["Microphone", "Audio file", "YouTube"])
154
 
155
  demo.queue().launch()