from youtube_transcript_api import YouTubeTranscriptApi from urllib.parse import urlparse, parse_qs from pytube import YouTube from huggingface_hub import InferenceClient import gradio as gr model_name = "mistralai/Mixtral-8x7B-Instruct-v0.1" client = InferenceClient(model=model_name) def transcribe_video(url): video_id = parse_youtube_url(url) if video_id: video_metadata = get_video_metadata(video_id) transcript_content = get_transcript_content(video_id) transcript_summary = summarise_transcript(transcript_content) return ( f"Title: {video_metadata['title']}\nAuthor: {video_metadata['author']}", transcript_content, transcript_summary, ) else: return None def parse_youtube_url(url): parsed_url = urlparse(url) video_id = parse_qs(parsed_url.query).get("v") if video_id: return video_id[0] return None def get_video_metadata(video_id): yt = YouTube(f"https://www.youtube.com/watch?v={video_id}") title = yt.title or "Unknown" author = yt.author or "Unknown" metadata = {"title": title, "author": author} return metadata def get_transcript_content(video_id): try: transcript = YouTubeTranscriptApi.get_transcript(video_id) transcript_content = parse_transcript(transcript) return transcript_content except Exception as e: raise e def parse_transcript(transcript): content = " ".join( map( lambda transcript_piece: transcript_piece["text"].strip(" "), transcript, ) ) return content def summarise_transcript(transcript_content): prompt = f"""Provide a summary of the following video transcription in 150-350 words, focusing on the key points and core ideas discussed: {transcript_content}""" message = [{"role": "user", "content": prompt}] result = client.chat_completion( messages=message, max_tokens=2048, temperature=0.1, ) return result.choices[0].message["content"].strip() with gr.Blocks(theme=gr.themes.Base()) as demo: gr.Markdown("

YouTube Transcriber & Summariser

") gr.Markdown( "

Provide a link to a YouTube video and get a transcription and summary

" ) gr.Markdown( "
Update: Recently YouTube has been cracking down on requests and blacklisting known cloud providers. Unfortunately, it seems that requests to YouTube APIs from HuggingFace are currently blocked. If you would still like to try out the summariser, clone the repo and run it locally with gradio app.py
" ) with gr.Row(): with gr.Column(scale=1): video_link = gr.Textbox( label="Link to video", value="https://www.youtube.com/watch?v=ZIyB9e_7a4c", ) transcribe_btn = gr.Button( value="Transcribe & Summarise ⚡️", variant="primary" ) with gr.Column(scale=5): video_info = gr.Textbox(label="Video Info") transcription = gr.TextArea( label="Transcription", scale=1, lines=12, max_lines=12 ) transcription_summary = gr.TextArea( label="Summary", scale=1, lines=12, max_lines=12 ) transcribe_btn.click( fn=transcribe_video, inputs=video_link, outputs=[video_info, transcription, transcription_summary], ) demo.launch()