Spaces:

Kathir0011
/

YouTube_Video_Assistant

Running

App Files Files Community

Kathir0011 commited on 16 days ago

Commit

7fcdf2d

verified ·

1 Parent(s): 8ce3ee8

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -7

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import gradio as gr
-import os
 from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
 from youtube_transcript_api import YouTubeTranscriptApi
@@ -16,16 +16,25 @@ from langchain.prompts.chat import (
 def get_transcript(video_url):
     try:
-        video_id = video_url.split("v=")[-1].split("&")[0]
         transcript = YouTubeTranscriptApi.get_transcript(video_id)
         text = "\n".join([t["text"] for t in transcript])
-        print("text1: ->>>>" + text)
-        return text  # Return transcript as string
     except Exception as e:
         return f"Error fetching transcript: {str(e)}"
 def create_db_from_video_url(video_url, api_key):
     """
@@ -36,12 +45,12 @@ def create_db_from_video_url(video_url, api_key):
     transcripts = get_transcript(video_url)
     # Convert transcript string into a Document
-    docs = Document(page_content=transcripts)
-    print(transcripts)
     # cannot provide this directly to the model so we are splitting the transcripts into small chunks
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
-    docs = text_splitter.split_documents(docs)
     print(docs)
     db = FAISS.from_documents(docs, embedding=embeddings)

 import gradio as gr
+import os, re
 from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
 from youtube_transcript_api import YouTubeTranscriptApi
 def get_transcript(video_url):
     try:
+        # Use a regular expression to extract video ID from the YouTube URL
+        video_id_match = re.search(r"(?:https?://)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})", video_url)
+        if not video_id_match:
+            return "Invalid YouTube URL"
+        video_id = video_id_match.group(1)
+        # Fetch the transcript
         transcript = YouTubeTranscriptApi.get_transcript(video_id)
+        # Join the transcript text into a single string
         text = "\n".join([t["text"] for t in transcript])
+        return text  # Return the transcript as a string
     except Exception as e:
         return f"Error fetching transcript: {str(e)}"
 def create_db_from_video_url(video_url, api_key):
     """
     transcripts = get_transcript(video_url)
     # Convert transcript string into a Document
+    doc_convert = Document(page_content=transcripts)
+    print(doc_convert)
     # cannot provide this directly to the model so we are splitting the transcripts into small chunks
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
+    docs = text_splitter.split_documents([doc_convert])
     print(docs)
     db = FAISS.from_documents(docs, embedding=embeddings)