danielwm994 commited on
Commit
af45ac9
·
verified ·
1 Parent(s): 244af64

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -9
app.py CHANGED
@@ -1,11 +1,9 @@
1
  import spaces
2
  import torch
3
-
4
  import gradio as gr
5
  import yt_dlp as youtube_dl
6
  from transformers import pipeline
7
  from transformers.pipelines.audio_utils import ffmpeg_read
8
-
9
  import tempfile
10
  import os
11
 
@@ -23,7 +21,6 @@ pipe = pipeline(
23
  device=device,
24
  )
25
 
26
-
27
  @spaces.GPU
28
  def transcribe(inputs, task):
29
  if inputs is None:
@@ -37,10 +34,48 @@ def transcribe(inputs, task):
37
  word_timestamps = []
38
  for chunk in timestamps:
39
  for word_info in chunk["words"]:
40
- word_timestamps.append(f"{word_info['word']} [{word_info['start']}-{word_info['end']}]")
41
 
42
  return "\n".join(word_timestamps)
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  @spaces.GPU
46
  def yt_transcribe(yt_url, task, max_filesize=75.0):
@@ -63,12 +98,11 @@ def yt_transcribe(yt_url, task, max_filesize=75.0):
63
  word_timestamps = []
64
  for chunk in timestamps:
65
  for word_info in chunk["words"]:
66
- word_timestamps.append(f"{word_info['word']} [{word_info['start']}-{word_info['end']}]")
67
 
68
  return html_embed_str, "\n".join(word_timestamps)
69
 
70
 
71
-
72
  demo = gr.Blocks()
73
 
74
  mf_transcribe = gr.Interface(
@@ -77,7 +111,7 @@ mf_transcribe = gr.Interface(
77
  gr.Audio(sources="microphone", type="filepath"),
78
  gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
79
  ],
80
- outputs=["text", "text"], # Output both text and timestamps
81
  title="Whisper Large V3: Transcribe Audio",
82
  description=(
83
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
@@ -93,7 +127,7 @@ file_transcribe = gr.Interface(
93
  gr.Audio(sources="upload", type="filepath", label="Audio file"),
94
  gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
95
  ],
96
- outputs=["text", "text"], # Output both text and timestamps
97
  title="Whisper Large V3: Transcribe Audio",
98
  description=(
99
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
@@ -109,7 +143,7 @@ yt_transcribe = gr.Interface(
109
  gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
110
  gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
111
  ],
112
- outputs=["html", "text", "text"], # Output both text and timestamps
113
  title="Whisper Large V3: Transcribe YouTube",
114
  description=(
115
  "Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint"
 
1
  import spaces
2
  import torch
 
3
  import gradio as gr
4
  import yt_dlp as youtube_dl
5
  from transformers import pipeline
6
  from transformers.pipelines.audio_utils import ffmpeg_read
 
7
  import tempfile
8
  import os
9
 
 
21
  device=device,
22
  )
23
 
 
24
  @spaces.GPU
25
  def transcribe(inputs, task):
26
  if inputs is None:
 
34
  word_timestamps = []
35
  for chunk in timestamps:
36
  for word_info in chunk["words"]:
37
+ word_timestamps.append(f"{word_info['word']} [{word_info['start']:.2f}-{word_info['end']:.2f}]")
38
 
39
  return "\n".join(word_timestamps)
40
 
41
+ def _return_yt_html_embed(yt_url):
42
+ video_id = yt_url.split("?v=")[-1]
43
+ HTML_str = (
44
+ f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
45
+ " </center>"
46
+ )
47
+ return HTML_str
48
+
49
+ def download_yt_audio(yt_url, filename):
50
+ info_loader = youtube_dl.YoutubeDL()
51
+
52
+ try:
53
+ info = info_loader.extract_info(yt_url, download=False)
54
+ except youtube_dl.utils.DownloadError as err:
55
+ raise gr.Error(str(err))
56
+
57
+ file_length = info["duration_string"]
58
+ file_h_m_s = file_length.split(":")
59
+ file_h_m_s = [int(sub_length) for sub_length in file_h_m_s]
60
+
61
+ if len(file_h_m_s) == 1:
62
+ file_h_m_s.insert(0, 0)
63
+ if len(file_h_m_s) == 2:
64
+ file_h_m_s.insert(0, 0)
65
+ file_length_s = file_h_m_s[0] * 3600 + file_h_m_s[1] * 60 + file_h_m_s[2]
66
+
67
+ if file_length_s > YT_LENGTH_LIMIT_S:
68
+ yt_length_limit_hms = time.strftime("%HH:%MM:%SS", time.gmtime(YT_LENGTH_LIMIT_S))
69
+ file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
70
+ raise gr.Error(f"Maximum YouTube length is {yt_length_limit_hms}, got {file_length_hms} YouTube video.")
71
+
72
+ ydl_opts = {"outtmpl": filename, "format": "worstvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"}
73
+
74
+ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
75
+ try:
76
+ ydl.download([yt_url])
77
+ except youtube_dl.utils.ExtractorError as err:
78
+ raise gr.Error(str(err))
79
 
80
  @spaces.GPU
81
  def yt_transcribe(yt_url, task, max_filesize=75.0):
 
98
  word_timestamps = []
99
  for chunk in timestamps:
100
  for word_info in chunk["words"]:
101
+ word_timestamps.append(f"{word_info['word']} [{word_info['start']:.2f}-{word_info['end']:.2f}]")
102
 
103
  return html_embed_str, "\n".join(word_timestamps)
104
 
105
 
 
106
  demo = gr.Blocks()
107
 
108
  mf_transcribe = gr.Interface(
 
111
  gr.Audio(sources="microphone", type="filepath"),
112
  gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
113
  ],
114
+ outputs="text",
115
  title="Whisper Large V3: Transcribe Audio",
116
  description=(
117
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
 
127
  gr.Audio(sources="upload", type="filepath", label="Audio file"),
128
  gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
129
  ],
130
+ outputs="text",
131
  title="Whisper Large V3: Transcribe Audio",
132
  description=(
133
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
 
143
  gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
144
  gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
145
  ],
146
+ outputs=["html", "text"],
147
  title="Whisper Large V3: Transcribe YouTube",
148
  description=(
149
  "Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint"