youtube-summary-ai / summarizer.py
Chrunos's picture
Update summarizer.py
ce97e85 verified
raw
history blame
2.51 kB
import os
import yt_dlp
from faster_whisper import WhisperModel
from ollama import Client
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def download_audio(url):
"""
Simple audio download function with minimal options
"""
try:
ydl_opts = {
'format': 'm4a/bestaudio/best',
'paths': {'home': './'},
'outtmpl': {
'default': 'audio.%(ext)s'
},
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'wav',
}],
'cookiefile': 'private.txt', # Added cookiefile option
# Add proxy if needed
# 'proxy': 'socks5://proxy-server:1080',
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
error_code = ydl.download([url])
if os.path.exists('audio.wav'):
return 'audio.wav'
raise Exception("Download failed to produce audio file")
except Exception as e:
logger.error(f"Download failed: {str(e)}")
raise
def transcribe_audio(audio_file):
"""
Transcribe audio using Whisper
"""
try:
model = WhisperModel("base", device="cpu", compute_type="int8")
segments, _ = model.transcribe(audio_file)
return " ".join([segment.text for segment in segments])
except Exception as e:
logger.error(f"Transcription failed: {str(e)}")
raise
finally:
if os.path.exists(audio_file):
os.remove(audio_file)
def generate_notes_and_summary_stream(transcript):
"""
Generate summary using Ollama
"""
try:
prompt = f"""Based on this transcript, create:
1. A set of concise, informative notes
2. A brief summary of the main points
Transcript: {transcript}
Notes and Summary:"""
client = Client()
stream = client.generate(model='llama2:latest', prompt=prompt, stream=True)
for chunk in stream:
yield chunk['response']
except Exception as e:
logger.error(f"Summary generation failed: {str(e)}")
raise
def process_video(url):
"""
Process the video URL
"""
try:
audio_file = download_audio(url)
transcript = transcribe_audio(audio_file)
return generate_notes_and_summary_stream(transcript)
except Exception as e:
raise Exception(f"Processing failed: {str(e)}")