razhan commited on
Commit
4295c91
1 Parent(s): edddc85

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -12
app.py CHANGED
@@ -1,11 +1,14 @@
1
  import torch
2
 
3
  import gradio as gr
4
- import pytube as pt
 
5
  from transformers import pipeline
6
  from huggingface_hub import model_info
7
 
8
  MODEL_NAME = "razhan/whisper-small-ckb"
 
 
9
 
10
  device = 0 if torch.cuda.is_available() else "cpu"
11
  pipe = pipeline(
@@ -36,23 +39,79 @@ def transcribe(microphone, file_upload):
36
 
37
 
38
  def _return_yt_html_embed(yt_url):
39
- video_id = yt_url.split("?v=")[-1]
 
 
 
 
40
  HTML_str = (
41
- f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
42
- " </center>"
 
43
  )
44
  return HTML_str
45
 
46
 
47
- def yt_transcribe(yt_url):
48
- yt = pt.YouTube(yt_url)
49
- html_embed_str = _return_yt_html_embed(yt_url)
50
- stream = yt.streams.filter(only_audio=True)[0]
51
- stream.download(filename="audio.mp3")
52
 
53
- text = pipe("audio.mp3")["text"]
54
 
55
- return html_embed_str, text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
 
58
  demo = gr.Blocks()
@@ -77,7 +136,14 @@ mf_transcribe = gr.Interface(
77
  yt_transcribe = gr.Interface(
78
  fn=yt_transcribe,
79
  inputs=[gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL")],
80
- outputs=["html", "text"],
 
 
 
 
 
 
 
81
  theme="huggingface",
82
  title="Whisper Central Kurdish‌ (Sorani) Demo: Transcribe YouTube",
83
  description=(
 
1
  import torch
2
 
3
  import gradio as gr
4
+ import yt_dlp as youtube_dl
5
+
6
  from transformers import pipeline
7
  from huggingface_hub import model_info
8
 
9
  MODEL_NAME = "razhan/whisper-small-ckb"
10
+ BATCH_SIZE = 1
11
+ FILE_LIMIT_MB = 10
12
 
13
  device = 0 if torch.cuda.is_available() else "cpu"
14
  pipe = pipeline(
 
39
 
40
 
41
  def _return_yt_html_embed(yt_url):
42
+ if 'youtu.be' in yt_url:
43
+ video_id = yt_url.split('/')[-1].split('?')[0]
44
+ else:
45
+ video_id = yt_url.split("?v=")[-1].split('&')[0]
46
+
47
  HTML_str = (
48
+ f'<center><iframe width="560" height="315" src="https://www.youtube.com/embed/{video_id}" '
49
+ 'frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" '
50
+ 'allowfullscreen></iframe></center>'
51
  )
52
  return HTML_str
53
 
54
 
 
 
 
 
 
55
 
 
56
 
57
+
58
+ def yt_transcribe(yt_url, task="transcribe", max_filesize=75.0, progress=gr.Progress()):
59
+ html_embed_str = _return_yt_html_embed(yt_url)
60
+
61
+ with tempfile.TemporaryDirectory() as tmpdirname:
62
+ filepath = os.path.join(tmpdirname, "video.mp4")
63
+ download_yt_audio(yt_url, filepath)
64
+ with open(filepath, "rb") as f:
65
+ inputs = f.read()
66
+
67
+ inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
68
+ inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
69
+
70
+
71
+
72
+ start_time = time.time()
73
+ outputs = pipe(inputs, chunk_length_s=30, batch_size=BATCH_SIZE, generate_kwargs={"task": task, "language": "persian"}, return_timestamps=False)
74
+ exec_time = time.time() - start_time
75
+ logging.info(print(f"transcribe: {exec_time} sec."))
76
+
77
+ return html_embed_str, txt, exec_time
78
+
79
+
80
+ def download_yt_audio(yt_url, filename, progress=gr.Progress()):
81
+ if '&list' in yt_url:
82
+ yt_url = yt_url.split('&list')[0]
83
+
84
+ info_loader = youtube_dl.YoutubeDL()
85
+
86
+ try:
87
+ info = info_loader.extract_info(yt_url, download=False)
88
+ except youtube_dl.utils.DownloadError as err:
89
+ raise gr.Error(str(err))
90
+
91
+ file_length = info["duration_string"]
92
+ file_h_m_s = file_length.split(":")
93
+ file_h_m_s = [int(sub_length) for sub_length in file_h_m_s]
94
+
95
+ if len(file_h_m_s) == 1:
96
+ file_h_m_s.insert(0, 0)
97
+ if len(file_h_m_s) == 2:
98
+ file_h_m_s.insert(0, 0)
99
+ file_length_s = file_h_m_s[0] * 3600 + file_h_m_s[1] * 60 + file_h_m_s[2]
100
+
101
+ if file_length_s > YT_LENGTH_LIMIT_S:
102
+ yt_length_limit_hms = time.strftime("%HH:%MM:%SS", time.gmtime(YT_LENGTH_LIMIT_S))
103
+ file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
104
+ raise gr.Error(f"Maximum YouTube length is {yt_length_limit_hms}, got {file_length_hms} YouTube video.")
105
+
106
+ # ydl_opts = {"outtmpl": filename, "format": "worstvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"}
107
+ ydl_opts = {"outtmpl": filename, "format": "bestaudio/best"}
108
+
109
+ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
110
+ try:
111
+ ydl.download([yt_url])
112
+ except youtube_dl.utils.ExtractorError as err:
113
+ raise gr.Error(str(err))
114
+ progress(1, desc="Video downloaded from YouTube!")
115
 
116
 
117
  demo = gr.Blocks()
 
136
  yt_transcribe = gr.Interface(
137
  fn=yt_transcribe,
138
  inputs=[gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL")],
139
+ outputs=["html",
140
+ gr.Textbox(
141
+ label="Output",
142
+ rtl=True,
143
+ show_copy_button=True,
144
+ ),
145
+ gr.Text(label="Transcription Time")
146
+ ],
147
  theme="huggingface",
148
  title="Whisper Central Kurdish‌ (Sorani) Demo: Transcribe YouTube",
149
  description=(