Spaces:

Kathir0011
/

YouTube_Video_Assistant

Running

Kathir0011 commited on Jan 29

Commit

68bbcdf

verified ·

1 Parent(s): b2ef7b4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import gradio as gr
 import os
-import yt_dlp
 from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
 from youtube_transcript_api import YouTubeTranscriptApi
@@ -14,24 +13,22 @@ from langchain.prompts.chat import (
 )
-def get_transcript_yt_dlp(video_url):
-    """Fetches transcript using yt_dlp."""
-    ydl_opts = {
-        "writesubtitles": True,
-        "writeautomaticsub": True,
-        "skip_download": True,
-        "subtitleslangs": ["en"],  # Fetch English subtitles
-    }
-    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-        info_dict = ydl.extract_info(video_url, download=False)
-        subtitles = info_dict.get("subtitles") or info_dict.get("automatic_captions")
-        if subtitles and "en" in subtitles:
-            sub_url = subtitles["en"][0]["url"]
-            return f"Transcript URL: {sub_url}"
-        else:
-            return "No subtitles available!"
 def create_db_from_video_url(video_url, api_key):
@@ -40,7 +37,7 @@ def create_db_from_video_url(video_url, api_key):
     """
     embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004", google_api_key=api_key)
-    transcripts = get_transcript_yt_dlp(video_url)
     print(transcripts)
     # cannot provide this directly to the model so we are splitting the transcripts into small chunks

 import gradio as gr
 import os
 from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
 from youtube_transcript_api import YouTubeTranscriptApi
 )
+def get_transcript(video_url):
+    try:
+        # Extract video ID from the URL
+        video_id = video_url.split("v=")[-1].split("&")[0]
+        # Fetch transcript
+        transcript = YouTubeTranscriptApi.get_transcript(video_id)
+        # Optional: Format transcript to SRT format
+        formatter = SRTFormatter()
+        formatted_transcript = formatter.format_transcript(transcript)
+        return formatted_transcript  # Or return as plain text
+    except Exception as e:
+        return f"Error fetching transcript: {str(e)}"
 def create_db_from_video_url(video_url, api_key):
     """
     embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004", google_api_key=api_key)
+    transcripts = get_transcript(video_url)
     print(transcripts)
     # cannot provide this directly to the model so we are splitting the transcripts into small chunks