Spaces:
Sleeping
Sleeping
import os | |
import yt_dlp | |
from faster_whisper import WhisperModel | |
from ollama import Client | |
import logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
def download_audio(url): | |
""" | |
Simple audio download function with minimal options | |
""" | |
try: | |
ydl_opts = { | |
'format': 'm4a/bestaudio/best', | |
'paths': {'home': './'}, | |
'outtmpl': { | |
'default': 'audio.%(ext)s' | |
}, | |
'postprocessors': [{ | |
'key': 'FFmpegExtractAudio', | |
'preferredcodec': 'wav', | |
}], | |
'cookiefile': 'private.txt', # Added cookiefile option | |
# Add proxy if needed | |
# 'proxy': 'socks5://proxy-server:1080', | |
} | |
with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
error_code = ydl.download([url]) | |
if os.path.exists('audio.wav'): | |
return 'audio.wav' | |
raise Exception("Download failed to produce audio file") | |
except Exception as e: | |
logger.error(f"Download failed: {str(e)}") | |
raise | |
def transcribe_audio(audio_file): | |
""" | |
Transcribe audio using Whisper | |
""" | |
try: | |
model = WhisperModel("base", device="cpu", compute_type="int8") | |
segments, _ = model.transcribe(audio_file) | |
return " ".join([segment.text for segment in segments]) | |
except Exception as e: | |
logger.error(f"Transcription failed: {str(e)}") | |
raise | |
finally: | |
if os.path.exists(audio_file): | |
os.remove(audio_file) | |
def generate_notes_and_summary_stream(transcript): | |
""" | |
Generate summary using Ollama | |
""" | |
try: | |
prompt = f"""Based on this transcript, create: | |
1. A set of concise, informative notes | |
2. A brief summary of the main points | |
Transcript: {transcript} | |
Notes and Summary:""" | |
client = Client() | |
stream = client.generate(model='llama2:latest', prompt=prompt, stream=True) | |
for chunk in stream: | |
yield chunk['response'] | |
except Exception as e: | |
logger.error(f"Summary generation failed: {str(e)}") | |
raise | |
def process_video(url): | |
""" | |
Process the video URL | |
""" | |
try: | |
audio_file = download_audio(url) | |
transcript = transcribe_audio(audio_file) | |
return generate_notes_and_summary_stream(transcript) | |
except Exception as e: | |
raise Exception(f"Processing failed: {str(e)}") |