Spaces:
Sleeping
Sleeping
File size: 6,869 Bytes
2eae17f 2b89dc1 3d48fe6 2b89dc1 8fd5a3e 2b89dc1 a83288e 4a8d123 2b89dc1 2eae17f f097af1 2b89dc1 42032d5 4a8d123 42032d5 2b89dc1 d0bf7dc 2b89dc1 4a8d123 5443811 2b89dc1 a83288e 2b89dc1 a83288e 2b89dc1 4a8d123 a83288e a03e0aa 8601dca a03e0aa 8fd5a3e 2eae17f 8fd5a3e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
import time
import gradio as gr
import logging
from langchain.document_loaders import PDFMinerLoader,CSVLoader ,UnstructuredWordDocumentLoader,TextLoader,OnlinePDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import FAISS
from langchain import HuggingFaceHub
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.docstore.document import Document
from youtube_transcript_api import YouTubeTranscriptApi
import chatops
logger = logging.getLogger(__name__)
DEVICE = 'cpu'
MAX_NEW_TOKENS = 4096
DEFAULT_TEMPERATURE = 0.1
DEFAULT_MAX_NEW_TOKENS = 2048
MAX_INPUT_TOKEN_LENGTH = 4000
DEFAULT_CHAR_LENGTH = 1000
def loading_file():
return "Loading..."
def clear_chat():
return []
def get_text_from_youtube_link(video_link,max_video_length=800):
video_text = ""
video_id = video_link.split("watch?v=")[1].split("&")[0]
srt = YouTubeTranscriptApi.get_transcript(video_id)
for text_data in srt:
video_text = video_text + " " + text_data.get("text")
if len(video_text) > max_video_length:
return video_text[0:max_video_length]
else:
return video_text
def process_documents(documents,data_chunk=1500,chunk_overlap=100):
text_splitter = CharacterTextSplitter(chunk_size=data_chunk, chunk_overlap=chunk_overlap,separator='\n')
texts = text_splitter.split_documents(documents)
return texts
def process_youtube_link(link, document_name="youtube-content"):
try:
metadata = {"source": f"{document_name}.txt"}
return [Document(page_content=get_text_from_youtube_link(video_link=link), metadata=metadata)]
except Exception as err:
logger.error(f'Error in reading document. {err}')
def youtube_chat(youtube_link,API_key,llm='HuggingFace',temperature=0.1,max_tokens=1096,char_length=1500):
document = process_youtube_link(link=youtube_link)
print("Document:",document)
embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-base',model_kwargs={"device": DEVICE})
texts = process_documents(documents=document)
global vector_db
vector_db = FAISS.from_documents(documents=texts, embedding= embedding_model)
global qa
qa = RetrievalQA.from_chain_type(llm=chatops.chat_application(llm_service=llm,key=API_key,
temperature=temperature,
max_tokens=max_tokens
),
chain_type='stuff',
retriever=vector_db.as_retriever(),
# chain_type_kwargs=chain_type_kwargs,
return_source_documents=True
)
return "Youtube link Processing completed ..."
def infer(question, history):
# res = []
# # for human, ai in history[:-1]:
# # pair = (human, ai)
# # res.append(pair)
# chat_history = res
print("Question in infer :",question)
result = qa({"query": question})
matching_docs_score = vector_db.similarity_search_with_score(question)
print(" Matching_doc ",matching_docs_score)
return result["result"]
def bot(history):
response = infer(history[-1][0], history)
history[-1][1] = ""
for character in response:
history[-1][1] += character
time.sleep(0.05)
yield history
def add_text(history, text):
history = history + [(text, None)]
return history, ""
##################################################
##################################################
################### GRADIO #######################
##################################################
##################################################
css="""
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
"""
title ="""<div
style="text-align: center;max-width: 700px;">
<h1>Chat with You Tube videos</h1>
</div>
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.HTML(title)
with gr.Accordion("Chat with Falcon-7B-Instruct", open=False):
with gr.Column():
gr.Markdown(
"""**Chat over you tube videos with [Falcon-7B-Instruct or ChatGPT]!**
""")
with gr.Group():
chatbot = gr.Chatbot(height=300)
with gr.Row():
question = gr.Textbox(label="Type your question !",lines=1).style(full_width=True)
submit_btn = gr.Button(value="Send message", variant="primary", scale = 1)
clean_chat_btn = gr.Button("Delete Chat")
with gr.Column():
with gr.Box():
with gr.Row():
LLM_option = gr.Dropdown(['HuggingFace','OpenAI'],label='Large Language Model Selection',info='LLM Service')
API_key = gr.Textbox(label="Add API key", type="password",autofocus=True)
with gr.Accordion(label='Advanced options', open=False):
max_new_tokens = gr.Slider(
label='Max new tokens',
minimum=2048,
maximum=MAX_NEW_TOKENS,
step=1,
value=DEFAULT_MAX_NEW_TOKENS,
)
temperature = gr.Slider(
label='Temperature',
minimum=0.1,
maximum=4.0,
step=0.1,
value=DEFAULT_TEMPERATURE,
)
char_length = gr.Slider(
label='Max Character',
minimum= DEFAULT_CHAR_LENGTH,
maximum = 5*DEFAULT_CHAR_LENGTH,
step = 500,
value= 1500
)
with gr.Column():
youtube_link = gr.Textbox(label="Add your you tube Link",text_align='left',autofocus=True)
with gr.Box():
with gr.Row():
load_youtube_bt = gr.Button("Process Youtube Link",).style(full_width = False)
langchain_status = gr.Textbox(label="Status", placeholder="", interactive = False)
load_youtube_bt.click(youtube_chat,inputs= [youtube_link,API_key,LLM_option,temperature,max_new_tokens,char_length],outputs=[langchain_status], queue=False)
clean_chat_btn.click(clear_chat, [], chatbot)
question.submit(add_text, inputs=[chatbot, question], outputs=[chatbot, question]).then(bot, chatbot, chatbot)
submit_btn.click(add_text, inputs=[chatbot, question], outputs=[chatbot, question]).then(bot, chatbot, chatbot)
demo.launch() |