Samarth991 commited on
Commit
1f8a90f
·
2 Parent(s): 5c372f3 a23d270

Merge branch 'main' of https://huggingface.co/spaces/Samarth991/Youtube-Video-ChatBot into main

Browse files
Files changed (3) hide show
  1. README.md +2 -2
  2. app.py +41 -164
  3. requirements.txt +4 -5
README.md CHANGED
@@ -4,10 +4,10 @@ emoji: 🏃
4
  colorFrom: green
5
  colorTo: green
6
  sdk: gradio
7
- sdk_version: 3.43.2
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
4
  colorFrom: green
5
  colorTo: green
6
  sdk: gradio
7
+ sdk_version: 4.44.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
  ---
12
 
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,190 +1,67 @@
 
1
  import time
2
  import gradio as gr
3
  import logging
4
- from langchain.document_loaders import PDFMinerLoader,CSVLoader ,UnstructuredWordDocumentLoader,TextLoader,OnlinePDFLoader
5
- from langchain.text_splitter import CharacterTextSplitter
6
- from langchain.embeddings import SentenceTransformerEmbeddings
7
- from langchain.vectorstores import FAISS
8
- from langchain.chains import RetrievalQA
9
- from langchain.prompts import PromptTemplate
10
- from langchain.docstore.document import Document
11
  from youtube_transcript_api import YouTubeTranscriptApi
 
 
12
  import chatops
13
 
14
  logger = logging.getLogger(__name__)
15
 
16
  DEVICE = 'cpu'
17
- MAX_NEW_TOKENS = 4096
18
- DEFAULT_TEMPERATURE = 0.1
19
- DEFAULT_MAX_NEW_TOKENS = 2048
20
- MAX_INPUT_TOKEN_LENGTH = 4000
21
- DEFAULT_CHAR_LENGTH = 1000
22
-
23
- EXAMPLES = ["https://www.youtube.com/watch?v=aircAruvnKk&ab_channel=3Blue1Brown",
24
- "https://www.youtube.com/watch?v=Ilg3gGewQ5U",
25
- "https://www.youtube.com/watch?v=WUvTyaaNkzM"
26
- ]
27
-
28
 
 
29
 
30
- def clear_chat():
31
- return []
32
 
33
- def get_text_from_youtube_link(video_link,max_video_length=800):
34
  video_text = ""
 
35
  video_id = video_link.split("watch?v=")[1].split("&")[0]
36
  srt = YouTubeTranscriptApi.get_transcript(video_id)
37
  for text_data in srt:
38
  video_text = video_text + " " + text_data.get("text")
39
  if len(video_text) > max_video_length:
40
- print(video_text)
41
- return video_text[0:max_video_length]
42
- else:
43
- print(video_text)
44
- return video_text
45
-
46
- def process_documents(documents,data_chunk=1500,chunk_overlap=100):
47
- text_splitter = CharacterTextSplitter(chunk_size=data_chunk, chunk_overlap=chunk_overlap,separator='\n')
48
- texts = text_splitter.split_documents(documents)
49
- return texts
50
-
51
- def process_youtube_link(link, document_name="youtube-content",char_length=1000):
52
- try:
53
- metadata = {"source": f"{document_name}.txt"}
54
- return [Document(page_content=get_text_from_youtube_link(video_link=link,max_video_length=char_length), metadata=metadata)]
55
- except Exception as err:
56
- logger.error(f'Error in reading document. {err}')
57
-
58
 
59
- def create_prompt():
60
- prompt_template = """As a chatbot asnwer the questions regarding the content in the video.
61
- Use the following context to answer.
62
- If you don't know the answer, just say I don't know.
63
 
64
- {context}
65
-
66
- Question: {question}
67
- Answer :"""
68
- prompt = PromptTemplate(
69
- template=prompt_template, input_variables=["context", "question"]
70
- )
71
- return prompt
72
-
73
- def youtube_chat(youtube_link,API_key,llm='HuggingFace',temperature=0.1,max_tokens=1096,char_length=1500):
74
 
75
- document = process_youtube_link(link=youtube_link,char_length=char_length)
76
  print("docuemt:",document)
77
- embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-base',model_kwargs={"device": DEVICE})
78
- texts = process_documents(documents=document)
79
- global vector_db
80
- vector_db = FAISS.from_documents(documents=texts, embedding= embedding_model)
81
- global qa
82
-
83
- if llm == 'HuggingFace':
84
- chat = chatops.get_hugging_face_model(
85
- model_id="tiiuae/falcon-7b-instruct",
86
  API_key=API_key,
87
  temperature=temperature,
88
  max_tokens=max_tokens
89
  )
90
- else:
91
- chat = chatops.get_openai_chat_model(API_key=API_key)
92
- chain_type_kwargs = {"prompt": create_prompt()}
93
-
94
- qa = RetrievalQA.from_chain_type(llm=chat,
95
- chain_type='stuff',
96
- retriever=vector_db.as_retriever(),
97
- chain_type_kwargs=chain_type_kwargs,
98
- return_source_documents=True
99
- )
100
- return "Youtube link Processing completed ..."
101
-
102
- def infer(question, history):
103
- # res = []
104
- # # for human, ai in history[:-1]:
105
- # # pair = (human, ai)
106
- # # res.append(pair)
107
-
108
- # chat_history = res
109
- result = qa({"query": question})
110
- matching_docs_score = vector_db.similarity_search_with_score(question)
111
-
112
- return result["result"]
113
-
114
- def bot(history):
115
- response = infer(history[-1][0], history)
116
- history[-1][1] = ""
117
-
118
- for character in response:
119
- history[-1][1] += character
120
- time.sleep(0.05)
121
- yield history
122
-
123
- def add_text(history, text):
124
- history = history + [(text, None)]
125
- return history, ""
126
-
127
-
128
- css="""
129
- #col-container {max-width: 2048px; margin-left: auto; margin-right: auto;}
130
- """
131
-
132
- title = """
133
- <div style="text-align: center;max-width: 2048px;">
134
- <h1>Chat with Youtube Videos </h1>
135
- <p style="text-align: center;">Upload a youtube link of any video-lecture/song/Research/Conference & ask Questions to chatbot with the tool.
136
- <i> Tools uses State of the Art Models from HuggingFace/OpenAI so, make sure to add your key.</i>
137
- </p>
138
- </div>
139
- """
140
-
141
- with gr.Blocks(css=css) as demo:
142
- with gr.Row():
143
- with gr.Column(elem_id="col-container"):
144
- gr.HTML(title)
145
-
146
- with gr.Column():
147
- with gr.Row():
148
- LLM_option = gr.Dropdown(['HuggingFace','OpenAI'],label='Select HuggingFace/OpenAI')
149
- API_key = gr.Textbox(label="Add API key", type="password",autofocus=True)
150
-
151
- with gr.Group():
152
- chatbot = gr.Chatbot(height=270)
153
-
154
- with gr.Row():
155
- question = gr.Textbox(label="Type your question !",lines=1).style(full_width=True)
156
- with gr.Row():
157
- submit_btn = gr.Button(value="Send message", variant="primary", scale = 1)
158
- clean_chat_btn = gr.Button("Delete Chat")
159
-
160
- with gr.Column():
161
- with gr.Box():
162
- youtube_link = gr.Textbox(label="Add your you tube Link",text_align='left',autofocus=True)
163
- with gr.Row():
164
- load_youtube_bt = gr.Button("Process Youtube Link",).style(full_width = False)
165
- langchain_status = gr.Textbox(label="Status", placeholder="", interactive = False)
166
-
167
- with gr.Column():
168
- with gr.Accordion(label='Advanced options', open=False):
169
- max_new_tokens = gr.Slider(
170
- label='Max new tokens',
171
- minimum=2048,
172
- maximum=MAX_NEW_TOKENS,
173
- step=1,
174
- value=DEFAULT_MAX_NEW_TOKENS,
175
- )
176
- temperature = gr.Slider(label='Temperature',minimum=0.1,maximum=4.0,step=0.1,value=DEFAULT_TEMPERATURE,)
177
- char_length = gr.Slider(label='Max Character',
178
- minimum= DEFAULT_CHAR_LENGTH,
179
- maximum = 5*DEFAULT_CHAR_LENGTH,
180
- step = 500,value= 1500
181
- )
182
-
183
- load_youtube_bt.click(youtube_chat,inputs= [youtube_link,API_key,LLM_option,temperature,max_new_tokens,char_length],outputs=[langchain_status], queue=False)
184
-
185
- clean_chat_btn.click(clear_chat, [], chatbot)
186
-
187
- question.submit(add_text, inputs=[chatbot, question], outputs=[chatbot, question]).then(bot, chatbot, chatbot)
188
- submit_btn.click(add_text, inputs=[chatbot, question], outputs=[chatbot, question]).then(bot, chatbot, chatbot)
189
-
190
- demo.launch()
 
1
+ import os
2
  import time
3
  import gradio as gr
4
  import logging
 
 
 
 
 
 
 
5
  from youtube_transcript_api import YouTubeTranscriptApi
6
+ from langchain.docstore.document import Document
7
+ from langchain_groq import ChatGroq
8
  import chatops
9
 
10
  logger = logging.getLogger(__name__)
11
 
12
  DEVICE = 'cpu'
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ DEFAULT_CHAR_LENGTH = 1000
15
 
 
 
16
 
17
+ def youtube_link_dataloader(video_link,max_video_length=1000):
18
  video_text = ""
19
+ meta_data = {"source": f"{video_link}"}
20
  video_id = video_link.split("watch?v=")[1].split("&")[0]
21
  srt = YouTubeTranscriptApi.get_transcript(video_id)
22
  for text_data in srt:
23
  video_text = video_text + " " + text_data.get("text")
24
  if len(video_text) > max_video_length:
25
+ video_text = video_text[0:max_video_length]
26
+ document = [Document(page_content= video_text, metadata= meta_data)]
27
+ return document
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
 
 
 
 
29
 
30
+ def youtube_chat(API_key=None,llm_service='mistralai/Mistral-7B-v0.1',youtube_link=None,char_length=2000):
 
 
 
 
 
 
 
 
 
31
 
32
+ video_document = youtube_link_dataloader(video_link=youtube_link,max_video_length=char_length)
33
  print("docuemt:",document)
34
+
35
+ if llm_service== 'mistralai/Mistral-7B-v0.1':
36
+ llm = chatops.get_hugging_face_model(
37
+ model_id="mistralai/Mistral-7B-v0.1",
 
 
 
 
 
38
  API_key=API_key,
39
  temperature=temperature,
40
  max_tokens=max_tokens
41
  )
42
+ elif llm_service == 'OpenAI':
43
+ llm = chatops.get_openai_chat_model(API_key=API_key)
44
+ elif llm_service == 'llama3-8b-8192':
45
+ os.environ["GROQ_API_KEY"] = API_key
46
+ llm = ChatGroq(model="llama3-8b-8192")
47
+
48
+ summarize_chain = load_summarize_chain(llm=llm, chain_type='stuff', verbose = True )
49
+ results = summarize_chain.invoke({'input_documents':video_document})
50
+ return results['output_text']
51
+
52
+ iface = gr.Interface(
53
+ fn = youtube_chat,
54
+ inputs = [
55
+ gr.Textbox(label="Add API key", type="password"),
56
+ gr.Dropdown(['mistralai/Mistral-7B-v0.1','llama3-8b-8192'],label='Large Language Model',info='LLM Service'),
57
+ gr.Textbox(label='You tube link'),
58
+ gr.Slider(DEFAULT_CHAR_LENGTH,5000,label="Video link Length in seconds",info="Length of video in seconds")
59
+ ],
60
+ outputs="text",
61
+ description ="""Summarize your You tube link using Large Language Models
62
+ The Objective of the space is to use the Large Language models to generate a small Summary of the You tube Link provided.
63
+ It Facilitates to generate notes if you are using you tube for Educational purposes.
64
+ """,
65
+ )
66
+
67
+ iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,11 +1,10 @@
1
  openai
2
  tiktoken
3
- chromadb
4
  langchain
5
- unstructured
6
- unstructured[local-inference]
 
7
  transformers
8
  torch
9
- faiss-cpu
10
  sentence-transformers
11
- youtube-transcript-api
 
1
  openai
2
  tiktoken
 
3
  langchain
4
+ langchain-core
5
+ langchain-community
6
+ langchain_groq
7
  transformers
8
  torch
 
9
  sentence-transformers
10
+ youtube-transcript-api