Samarth991 commited on
Commit
b33089c
·
verified ·
1 Parent(s): 8d2e26b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -150
app.py CHANGED
@@ -1,15 +1,10 @@
 
1
  import time
2
  import gradio as gr
3
  import logging
4
- from langchain_community.document_loaders import OnlinePDFLoader
5
- # from langchain.document_loaders import PDFMinerLoader,CSVLoader ,UnstructuredWordDocumentLoader,TextLoader,OnlinePDFLoader
6
- from langchain.text_splitter import CharacterTextSplitter
7
- from langchain_community.embeddings import SentenceTransformerEmbeddings
8
- from langchain.vectorstores import FAISS
9
- from langchain.chains import RetrievalQA
10
- from langchain.prompts import PromptTemplate
11
- from langchain.docstore.document import Document
12
  from youtube_transcript_api import YouTubeTranscriptApi
 
 
13
  import chatops
14
 
15
  logger = logging.getLogger(__name__)
@@ -26,166 +21,56 @@ EXAMPLES = ["https://www.youtube.com/watch?v=aircAruvnKk&ab_channel=3Blue1Brown"
26
  "https://www.youtube.com/watch?v=WUvTyaaNkzM"
27
  ]
28
 
29
-
30
 
31
  def clear_chat():
32
  return []
33
 
34
- def get_text_from_youtube_link(video_link,max_video_length=800):
35
  video_text = ""
 
36
  video_id = video_link.split("watch?v=")[1].split("&")[0]
37
  srt = YouTubeTranscriptApi.get_transcript(video_id)
38
  for text_data in srt:
39
  video_text = video_text + " " + text_data.get("text")
40
  if len(video_text) > max_video_length:
41
- print(video_text)
42
- return video_text[0:max_video_length]
43
- else:
44
- print("SRT might be disabled for the video . Uunable to get SRT")
45
- return video_text
46
-
47
- def process_documents(documents,data_chunk=1500,chunk_overlap=100):
48
- text_splitter = CharacterTextSplitter(chunk_size=data_chunk, chunk_overlap=chunk_overlap,separator='\n')
49
- texts = text_splitter.split_documents(documents)
50
- return texts
51
-
52
- def process_youtube_link(link, document_name="youtube-content",char_length=1000):
53
- try:
54
- metadata = {"source": f"{document_name}.txt"}
55
- return [Document(page_content=get_text_from_youtube_link(video_link=link,max_video_length=char_length), metadata=metadata)]
56
- except Exception as err:
57
- logger.error(f'Error in reading document. {err}')
58
-
59
-
60
- def create_prompt():
61
- prompt_template = """As a chatbot asnwer the questions regarding the content in the video.
62
- Use the following context to answer.
63
- If you don't know the answer, just say I don't know.
64
-
65
- {context}
66
 
67
- Question: {question}
68
- Answer :"""
69
- prompt = PromptTemplate(
70
- template=prompt_template, input_variables=["context", "question"]
71
- )
72
- return prompt
73
 
74
- def youtube_chat(youtube_link,API_key,llm='HuggingFace',temperature=0.1,max_tokens=1096,char_length=1500):
75
 
76
- document = process_youtube_link(link=youtube_link,char_length=char_length)
77
  print("docuemt:",document)
78
- embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-base',model_kwargs={"device": DEVICE})
79
- texts = process_documents(documents=document)
80
- global vector_db
81
- vector_db = FAISS.from_documents(documents=texts, embedding= embedding_model)
82
- global qa
83
-
84
- if llm == 'HuggingFace':
85
- chat = chatops.get_hugging_face_model(
86
- model_id="tiiuae/falcon-7b-instruct",
87
  API_key=API_key,
88
  temperature=temperature,
89
  max_tokens=max_tokens
90
  )
91
- else:
92
- chat = chatops.get_openai_chat_model(API_key=API_key)
93
- chain_type_kwargs = {"prompt": create_prompt()}
 
 
94
 
95
- qa = RetrievalQA.from_chain_type(llm=chat,
96
- chain_type='stuff',
97
- retriever=vector_db.as_retriever(),
98
- chain_type_kwargs=chain_type_kwargs,
99
- return_source_documents=True
100
- )
101
  return "Youtube link Processing completed ..."
102
 
103
- def infer(question, history):
104
- # res = []
105
- # # for human, ai in history[:-1]:
106
- # # pair = (human, ai)
107
- # # res.append(pair)
108
-
109
- # chat_history = res
110
- result = qa({"query": question})
111
- matching_docs_score = vector_db.similarity_search_with_score(question)
112
-
113
- return result["result"]
114
-
115
- def bot(history):
116
- response = infer(history[-1][0], history)
117
- history[-1][1] = ""
118
-
119
- for character in response:
120
- history[-1][1] += character
121
- time.sleep(0.05)
122
- yield history
123
-
124
- def add_text(history, text):
125
- history = history + [(text, None)]
126
- return history, ""
127
-
128
-
129
- css="""
130
- #col-container {max-width: 2048px; margin-left: auto; margin-right: auto;}
131
- """
132
-
133
- title = """
134
- <div style="text-align: center;max-width: 2048px;">
135
- <h1>Chat with Youtube Videos </h1>
136
- <p style="text-align: center;">Upload a youtube link of any video-lecture/song/Research/Conference & ask Questions to chatbot with the tool.
137
- <i> Tools uses State of the Art Models from HuggingFace/OpenAI so, make sure to add your key.</i>
138
- </p>
139
- </div>
140
- """
141
-
142
- with gr.Blocks(css=css) as demo:
143
- with gr.Row():
144
- with gr.Column(elem_id="col-container"):
145
- gr.HTML(title)
146
-
147
- with gr.Column():
148
- with gr.Row():
149
- LLM_option = gr.Dropdown(['HuggingFace','OpenAI'],label='Select HuggingFace/OpenAI')
150
- API_key = gr.Textbox(label="Add API key", type="password",autofocus=True)
151
-
152
- with gr.Group():
153
- chatbot = gr.Chatbot(height=270)
154
-
155
- with gr.Row():
156
- question = gr.Textbox(label="Type your question !",lines=1)
157
- with gr.Row():
158
- submit_btn = gr.Button(value="Send message", variant="primary", scale = 1)
159
- clean_chat_btn = gr.Button("Delete Chat")
160
-
161
- with gr.Column():
162
- with gr.Row():
163
- youtube_link = gr.Textbox(label="Add your you tube Link",text_align='left',autofocus=True)
164
- with gr.Row():
165
- load_youtube_bt = gr.Button("Process Youtube Link",)
166
- langchain_status = gr.Textbox(label="Status", placeholder="", interactive = False)
167
-
168
- with gr.Column():
169
- with gr.Accordion(label='Advanced options', open=False):
170
- max_new_tokens = gr.Slider(
171
- label='Max new tokens',
172
- minimum=2048,
173
- maximum=MAX_NEW_TOKENS,
174
- step=1,
175
- value=DEFAULT_MAX_NEW_TOKENS,
176
- )
177
- temperature = gr.Slider(label='Temperature',minimum=0.1,maximum=4.0,step=0.1,value=DEFAULT_TEMPERATURE,)
178
- char_length = gr.Slider(label='Max Character',
179
- minimum= DEFAULT_CHAR_LENGTH,
180
- maximum = 5*DEFAULT_CHAR_LENGTH,
181
- step = 500,value= 1500
182
- )
183
-
184
- load_youtube_bt.click(youtube_chat,inputs= [youtube_link,API_key,LLM_option,temperature,max_new_tokens,char_length],outputs=[langchain_status], queue=False)
185
-
186
- clean_chat_btn.click(clear_chat, [], chatbot)
187
-
188
- question.submit(add_text, inputs=[chatbot, question], outputs=[chatbot, question]).then(bot, chatbot, chatbot)
189
- submit_btn.click(add_text, inputs=[chatbot, question], outputs=[chatbot, question]).then(bot, chatbot, chatbot)
190
-
191
- demo.launch()
 
1
+ import os
2
  import time
3
  import gradio as gr
4
  import logging
 
 
 
 
 
 
 
 
5
  from youtube_transcript_api import YouTubeTranscriptApi
6
+ from langchain.docstore.document import Document
7
+ from langchain_groq import ChatGroq
8
  import chatops
9
 
10
  logger = logging.getLogger(__name__)
 
21
  "https://www.youtube.com/watch?v=WUvTyaaNkzM"
22
  ]
23
 
24
+ llm = None
25
 
26
  def clear_chat():
27
  return []
28
 
29
+ def youtube_link_dataloader(video_link,max_video_length=1000):
30
  video_text = ""
31
+ meta_data = {"source": f"{video_link}"}
32
  video_id = video_link.split("watch?v=")[1].split("&")[0]
33
  srt = YouTubeTranscriptApi.get_transcript(video_id)
34
  for text_data in srt:
35
  video_text = video_text + " " + text_data.get("text")
36
  if len(video_text) > max_video_length:
37
+ video_text = video_text[0:max_video_length]
38
+ document = [Document(page_content= video_text, metadata= meta_data)]
39
+ return document
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
 
 
 
 
 
 
41
 
42
+ def youtube_chat(temperature=0.1,max_tokens=1096,API_key=None,llm_service='mistralai/Mistral-7B-v0.1',youtube_link=None,char_length=2000):
43
 
44
+ document = youtube_link_dataloader(link=youtube_link,char_length=char_length)
45
  print("docuemt:",document)
46
+
47
+ if llm_service== 'mistralai/Mistral-7B-v0.1':
48
+ llm = chatops.get_hugging_face_model(
49
+ model_id="mistralai/Mistral-7B-v0.1",
 
 
 
 
 
50
  API_key=API_key,
51
  temperature=temperature,
52
  max_tokens=max_tokens
53
  )
54
+ elif llm_service == 'OpenAI':
55
+ llm = chatops.get_openai_chat_model(API_key=API_key)
56
+ elif llm_service == 'llama':
57
+ os.environ["GROQ_API_KEY"] = API_key
58
+ llm = ChatGroq(model="llama3-8b-8192")
59
 
 
 
 
 
 
 
60
  return "Youtube link Processing completed ..."
61
 
62
+ iface = gr.Interface(
63
+ fn = youtube_chat,
64
+ inputs = [
65
+ gr.Slider(0.01, 0.1, value=0.01, step=0.01 , label="temperature", info="Choose between 0.01 to 0.1"),
66
+ gr.Slider(512,MAX_INPUT_TOKEN_LENGTH,value=1024,step=512,label="max new tokens",info='Max new tokens'),
67
+ gr.Textbox(label="Add API key", type="password"),
68
+ gr.Dropdown(['mistralai/Mistral-7B-v0.1','llama3-8b-8192'],label='Large Language Model',info='LLM Service'),
69
+ gr.Textbox(label='You tube link'),
70
+ gr.Slider(1000,5000,label="Video link Length in seconds",info="Length of video in seconds")
71
+ ],
72
+ outputs="text",
73
+ description ="Summarize your You tube link ",
74
+ )
75
+
76
+ iface.launch()