Spaces:
Running
Running
import yt_dlp | |
from faster_whisper import WhisperModel | |
from ollama import Client | |
import os | |
import browser_cookie3 | |
import tempfile | |
def get_youtube_cookies(): | |
""" | |
Attempt to get YouTube cookies from multiple browsers. | |
Returns a cookie file path or None if no cookies found. | |
""" | |
browsers = [ | |
(browser_cookie3.chrome, 'chrome'), | |
(browser_cookie3.firefox, 'firefox'), | |
(browser_cookie3.edge, 'edge'), | |
(browser_cookie3.safari, 'safari') | |
] | |
# Create a temporary file for cookies | |
cookie_file = tempfile.NamedTemporaryFile(delete=False, suffix='.txt') | |
for browser_func, browser_name in browsers: | |
try: | |
cookies = browser_func(domain_name='.youtube.com') | |
with open(cookie_file.name, 'w') as f: | |
for cookie in cookies: | |
f.write(f'{cookie.domain}\tTRUE\t{cookie.path}\t' | |
f'{"TRUE" if cookie.secure else "FALSE"}\t{cookie.expires}\t' | |
f'{cookie.name}\t{cookie.value}\n') | |
return cookie_file.name | |
except: | |
continue | |
return None | |
def download_audio(url): | |
cookie_file = get_youtube_cookies() | |
ydl_opts = { | |
'format': 'bestaudio/best', | |
'postprocessors': [{ | |
'key': 'FFmpegExtractAudio', | |
'preferredcodec': 'wav', | |
'preferredquality': '192', | |
}], | |
'outtmpl': 'audio.%(ext)s', | |
'quiet': True, | |
'no_warnings': True | |
} | |
# Add cookies if available | |
if cookie_file: | |
ydl_opts['cookiefile'] = cookie_file | |
try: | |
with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
ydl.download([url]) | |
# Clean up cookie file | |
if cookie_file and os.path.exists(cookie_file): | |
os.unlink(cookie_file) | |
return 'audio.wav' | |
except Exception as e: | |
if cookie_file and os.path.exists(cookie_file): | |
os.unlink(cookie_file) | |
raise Exception(f"Error downloading audio: {str(e)}") | |
def transcribe_audio(audio_file): | |
# Explicitly specify CPU device and compute type | |
model = WhisperModel("base", device="cpu", compute_type="int8") | |
segments, _ = model.transcribe(audio_file) | |
return " ".join([segment.text for segment in segments]) | |
ollama_client = Client() | |
def generate_notes_and_summary_stream(transcript): | |
prompt = f""" | |
Based on the following transcript of a video, create: | |
1. A set of concise, informative notes | |
2. A brief summary of the main points | |
Transcript: | |
{transcript} | |
Notes and Summary: | |
""" | |
stream = ollama_client.generate(model='llama3.1:latest', prompt=prompt, stream=True) | |
for chunk in stream: | |
yield chunk['response'] | |
def process_video(url): | |
try: | |
audio_file = download_audio(url) | |
transcript = transcribe_audio(audio_file) | |
return generate_notes_and_summary_stream(transcript) | |
except Exception as e: | |
raise Exception(f"An error occurred: {str(e)}") |