|
import gradio as gr |
|
import os, re |
|
|
|
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI |
|
from youtube_transcript_api import YouTubeTranscriptApi |
|
from langchain.schema import Document |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain_community.vectorstores import FAISS |
|
from langchain.chains import LLMChain |
|
from langchain.prompts.chat import ( |
|
ChatPromptTemplate, |
|
SystemMessagePromptTemplate, |
|
HumanMessagePromptTemplate, |
|
) |
|
|
|
|
|
def get_transcript(video_url): |
|
try: |
|
|
|
video_id_match = re.search(r"(?:https?://)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})", video_url) |
|
|
|
if not video_id_match: |
|
return "Invalid YouTube URL" |
|
|
|
video_id = video_id_match.group(1) |
|
|
|
|
|
transcript = YouTubeTranscriptApi.get_transcript(video_id) |
|
|
|
|
|
text = "\n".join([t["text"] for t in transcript]) |
|
return text |
|
|
|
except Exception as e: |
|
return f"Error fetching transcript: Unable to fetch subtitles." |
|
|
|
|
|
|
|
def create_db_from_video_url(video_url, api_key): |
|
""" |
|
Creates an Embedding of the Video and performs |
|
""" |
|
embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004", google_api_key=api_key) |
|
|
|
transcripts = get_transcript(video_url) |
|
|
|
|
|
doc_convert = Document(page_content=transcripts) |
|
|
|
|
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) |
|
docs = text_splitter.split_documents([doc_convert]) |
|
|
|
db = FAISS.from_documents(docs, embedding=embeddings) |
|
return db |
|
|
|
|
|
def get_response(video, request): |
|
""" |
|
Usind Gemini Pro to get the response. It can handle upto 32k tokens. |
|
""" |
|
API_KEY = os.environ.get("API_Key") |
|
db = create_db_from_video_url(video, API_KEY) |
|
docs = db.similarity_search(query=request, k=5) |
|
docs_content = " ".join([doc.page_content for doc in docs]) |
|
|
|
chat = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=API_KEY, convert_system_message_to_human=True) |
|
|
|
|
|
template = """ |
|
You are an assistant that can answer questions about youtube videos based on |
|
video transcripts: {docs} |
|
|
|
Only use factual information from the transcript to answer the question. |
|
|
|
If you don't have enough information to answer the question, say "I don't know". |
|
|
|
Your Answers should be detailed. |
|
""" |
|
|
|
system_msg_prompt = SystemMessagePromptTemplate.from_template(template) |
|
|
|
|
|
human_template = "Answer the following questions: {question}" |
|
human_msg_prompt = HumanMessagePromptTemplate.from_template(human_template) |
|
|
|
chat_prompt = ChatPromptTemplate.from_messages( |
|
[system_msg_prompt, human_msg_prompt] |
|
) |
|
|
|
chain = LLMChain(llm=chat, prompt=chat_prompt) |
|
|
|
response = chain.run(question=request, docs=docs_content) |
|
|
|
return response |
|
|
|
|
|
title = "YouTube Video Assistant π§βπ»" |
|
description = "Answers to the Questions asked by the user on the specified YouTube video. (English Only).\n\n"\ |
|
"Click here to view [demo](https://huggingface.co/spaces/Kathir0011/YouTube_Video_Assistant/blob/main/README.md)." |
|
article = "Other Projects:<br/>"\ |
|
"π° [Health Insurance Predictor](http://health-insurance-cost-predictor-k19.streamlit.app/)<br/>"\ |
|
"π° [Fake News Detector](https://fake-news-detector-k19.streamlit.app/)<br/>"\ |
|
"πͺΆ [Birds Classifier](https://huggingface.co/spaces/Kathir0011/Birds_Classification)" |
|
|
|
|
|
youtube_video_assistant = gr.Interface( |
|
fn=get_response, |
|
inputs=[gr.Text(label="Enter the Youtube Video URL:", placeholder="Example: https://www.youtube.com/watch?v=MnDudvCyWpc"), |
|
gr.Text(label="Enter your Question", placeholder="Example: What's the video is about?")], |
|
outputs=gr.TextArea(label="Answers using Gemini-1.5-flash:"), |
|
title=title, |
|
description=description, |
|
article=article |
|
) |
|
|
|
|
|
youtube_video_assistant.launch() |
|
|