import os import re import yt_dlp import gradio as gr from pathlib import Path from functools import lru_cache from func_timeout import FunctionTimedOut, func_timeout from yt_dlp.extractor.youtube import YoutubeBaseInfoExtractor from dev_utils import DEV_LOGGER class Logger: def __init__(self, filename): self.filename = filename self.logs = open(filename, "w+") self.logs.write("See the demo video transcription below!\n") self.code_pattern = r"(?<=enter code )[\w-]+" self.access_got = False self.logs_list = ["See the demo video transcription below!"] def info(self, message): DEV_LOGGER.info(f'YT INFO MESSAGE: {message}') def error(self, message): DEV_LOGGER.info(f'YT ERROR MESSAGE: {message}') def warning(self, message): DEV_LOGGER.info(f'YT WARNING MESSAGE: {message}') def debug(self, message): DEV_LOGGER.info(f'YT MESSAGE: {message}') if "The download has not started yet!" in self.logs_list or "See demo video transcription below!" in self.logs_list: self.logs_list = [] self.logs.seek(0) self.logs.truncate(0) self.logs.flush() if "Refreshing access token" in message: message = '🌐 Refreshing access token...' if message not in self.logs_list and not self.access_got: self.logs.write(message) self.logs_list.append(message) self.logs.write('\n') self.logs.flush() if "Extracting URL" in message: message = '⏳ Downloading...' if message not in self.logs_list: if "❗ To give yt-dlp access to your account, go to [https://www.google.com/device]." in self.logs_list: self.logs.seek(0) self.logs.truncate(0) self.logs.flush() self.logs.write(message) self.logs_list.append(message) self.logs.write('\n') self.logs.flush() if "https://www.google.com/device" in message: code = re.search(self.code_pattern, message).group() message = "❗ To give yt-dlp access to your account, go to [https://www.google.com/device]." if message not in self.logs_list: self.access_got = True self.logs.write(message) self.logs.write('\n') self.logs.write(f"🚨 Enter code `{code}`") self.logs_list.append(message) self.logs.write('\n') self.logs.flush() def reset_logs(self): self.access_got = False self.logs.seek(0) self.logs.truncate(0) self.logs.write('The download has not started yet!\n') self.logs.flush() self.logs_list = ["The download has not started yet!"] def reset_logs_for_demo(self): self.access_got = False self.logs.seek(0) self.logs.truncate(0) self.logs.write('See the demo video transcription below!\n') self.logs.flush() self.logs_list = ["See the demo video transcription below!"] def remind_about_demo(self): self.access_got = False self.logs.seek(0) self.logs.truncate(0) self.logs.write('Click on the button to get transcription for the demo video!\n') self.logs.flush() self.logs_list = ["Click on the button to get transcription for the demo video!"] def complete_logs(self): message = '✅ Download Completed!\n' if message not in self.logs_list: self.logs.write(message) self.logs_list.append(message) self.logs.flush() def get_logs(self): self.logs.seek(0) content = self.logs.read() self.logs.seek(0, os.SEEK_END) self.logs.flush() return content def get_filename(d): if d['status'] == 'finished': global downloaded_filename downloaded_filename = d['filename'] downloaded_filename = Path(downloaded_filename).with_suffix('.flac') def get_video_info(url, yt_cache_dir, logger): ydl_opts = { 'quiet': False, 'skip-download': True, 'username': 'oauth', 'password': '', 'cachedir': yt_cache_dir, 'logger': logger } with yt_dlp.YoutubeDL(ydl_opts) as ydl: try: info = ydl.extract_info(url, download=False) except: raise gr.Error("Failed to extract video info from Youtube") finally: YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.clear() return info def download_as_audio(url, yt_cache_dir, logger): ydl_opts = { 'format': 'bestaudio/best,channels:1', 'quiet': False, 'username': 'oauth', 'password': '', 'outtmpl': os.path.join(yt_cache_dir, '%(id)s_%(title)s.%(ext)s'), 'postprocessors': [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': 'flac', 'preferredquality': '192', }], 'progress_hooks': [get_filename], 'cachedir': yt_cache_dir, 'logger': logger } with yt_dlp.YoutubeDL(ydl_opts) as ydl: try: ydl.download([url]) except yt_dlp.utils.DownloadError as err: raise gr.Error(str(err)) finally: YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.clear() logger.complete_logs() return Path(yt_cache_dir, downloaded_filename).as_posix() def get_audio_from_youtube(url, yt_cache_dir, logger, max_length=None): DEV_LOGGER.info(f'YT CACHE DIR: {yt_cache_dir}') DEV_LOGGER.info(f'LOGGER FILE: {logger.filename}') try: info = func_timeout(120, get_video_info, args=(url, yt_cache_dir, logger)) except FunctionTimedOut: raise gr.Error('Terminating session due to inactivity...', duration=5) duration = info.get('duration', 0) # Duration in seconds video_id = info.get('id', None) if max_length and duration > max_length: raise gr.Error(f'The audio duration exceeds maximum duration of {max_length // 60} minutes. Please change the input audio.') html = f'