vumichien commited on
Commit
d5aa365
·
1 Parent(s): cb8bff7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -8
app.py CHANGED
@@ -13,6 +13,7 @@ from sklearn.cluster import AgglomerativeClustering
13
  from sklearn.metrics import silhouette_score
14
 
15
  from pytube import YouTube
 
16
  import torch
17
  import pyannote.audio
18
  from pyannote.audio.pipelines.speaker_verification import PretrainedSpeakerEmbedding
@@ -174,21 +175,43 @@ def _return_yt_html_embed(yt_url):
174
  return HTML_str
175
 
176
  def yt_transcribe(yt_url):
177
- yt = YouTube(yt_url)
178
- html_embed_str = _return_yt_html_embed(yt_url)
179
- stream = yt.streams.filter(only_audio=True)[0]
180
- stream.download(filename="audio.mp3")
181
-
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  text = pipe("audio.mp3")["text"]
183
-
184
  return html_embed_str, text
185
 
186
  def convert_time(secs):
187
  return datetime.timedelta(seconds=round(secs))
188
 
189
  def get_youtube(video_url):
190
- yt = YouTube(video_url)
191
- abs_video_path = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download()
 
 
 
 
 
 
 
 
 
 
192
  print("Success download video")
193
  print(abs_video_path)
194
  return abs_video_path
 
13
  from sklearn.metrics import silhouette_score
14
 
15
  from pytube import YouTube
16
+ import yt_dlp
17
  import torch
18
  import pyannote.audio
19
  from pyannote.audio.pipelines.speaker_verification import PretrainedSpeakerEmbedding
 
175
  return HTML_str
176
 
177
  def yt_transcribe(yt_url):
178
+ # yt = YouTube(yt_url)
179
+ # html_embed_str = _return_yt_html_embed(yt_url)
180
+ # stream = yt.streams.filter(only_audio=True)[0]
181
+ # stream.download(filename="audio.mp3")
182
+
183
+ ydl_opts = {
184
+ 'format': 'bestvideo*+bestaudio/best',
185
+ 'postprocessors': [{
186
+ 'key': 'FFmpegExtractAudio',
187
+ 'preferredcodec': 'mp3',
188
+ 'preferredquality': '192',
189
+ }],
190
+ 'outtmpl':'audio.%(ext)s',
191
+ }
192
+
193
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
194
+ ydl.download([yt_url])
195
+
196
  text = pipe("audio.mp3")["text"]
 
197
  return html_embed_str, text
198
 
199
  def convert_time(secs):
200
  return datetime.timedelta(seconds=round(secs))
201
 
202
  def get_youtube(video_url):
203
+ # yt = YouTube(video_url)
204
+ # abs_video_path = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download()
205
+
206
+ ydl_opts = {
207
+ 'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
208
+ }
209
+
210
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
211
+ info = ydl.extract_info(URL, download=False)
212
+ abs_video_path = ydl.prepare_filename(info)
213
+ ydl.process_info(info)
214
+
215
  print("Success download video")
216
  print(abs_video_path)
217
  return abs_video_path