youtube-summary-ai

Sleeping

App Files Files Community

youtube-summary-ai / summarizer.py

Chrunos

Update summarizer.py

ce97e85 verified about 1 month ago

raw

history blame

2.51 kB

	import os
	import yt_dlp
	from faster_whisper import WhisperModel
	from ollama import Client
	import logging

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	def download_audio(url):
	"""
	Simple audio download function with minimal options
	"""
	try:
	ydl_opts = {
	'format': 'm4a/bestaudio/best',
	'paths': {'home': './'},
	'outtmpl': {
	'default': 'audio.%(ext)s'
	},
	'postprocessors': [{
	'key': 'FFmpegExtractAudio',
	'preferredcodec': 'wav',
	}],
	'cookiefile': 'private.txt', # Added cookiefile option
	# Add proxy if needed
	# 'proxy': 'socks5://proxy-server:1080',
	}

	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	error_code = ydl.download([url])

	if os.path.exists('audio.wav'):
	return 'audio.wav'
	raise Exception("Download failed to produce audio file")

	except Exception as e:
	logger.error(f"Download failed: {str(e)}")
	raise

	def transcribe_audio(audio_file):
	"""
	Transcribe audio using Whisper
	"""
	try:
	model = WhisperModel("base", device="cpu", compute_type="int8")
	segments, _ = model.transcribe(audio_file)
	return " ".join([segment.text for segment in segments])
	except Exception as e:
	logger.error(f"Transcription failed: {str(e)}")
	raise
	finally:
	if os.path.exists(audio_file):
	os.remove(audio_file)

	def generate_notes_and_summary_stream(transcript):
	"""
	Generate summary using Ollama
	"""
	try:
	prompt = f"""Based on this transcript, create:
	1. A set of concise, informative notes
	2. A brief summary of the main points

	Transcript: {transcript}

	Notes and Summary:"""

	client = Client()
	stream = client.generate(model='llama2:latest', prompt=prompt, stream=True)
	for chunk in stream:
	yield chunk['response']
	except Exception as e:
	logger.error(f"Summary generation failed: {str(e)}")
	raise

	def process_video(url):
	"""
	Process the video URL
	"""
	try:
	audio_file = download_audio(url)
	transcript = transcribe_audio(audio_file)
	return generate_notes_and_summary_stream(transcript)
	except Exception as e:
	raise Exception(f"Processing failed: {str(e)}")