Runtime error
Runtime error
commited on
Browse files
@@ -1,102 +1,152 @@
1 |
import streamlit as st
2 |
3 |
from PyPDF2 import PdfReader
4 |
from langchain.text_splitter import CharacterTextSplitter
5 |
6 |
7 |
from langchain.chat_models import ChatOpenAI
8 |
from langchain.memory import ConversationBufferMemory
9 |
from langchain.chains import ConversationalRetrievalChain
10 |
from css_template import css, bot_template, user_template
11 |
from langchain.llms import HuggingFaceHub
12 |
import os
13 |
# os.environ['FAISS_NO_AVX2'] = '1'
14 |
15 |
def method_get_pdf_text(pdf_docs):
16 |
text = ""
17 |
for pdf in pdf_docs:
18 |
pdf_reader = PdfReader(pdf)
19 |
for page in pdf_reader.pages:
20 |
text += page.extract_text()
21 |
return text
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
chunks = text_splitter.split_text(text)
33 |
return chunks
34 |
35 |
36 |
def method_get_vectorstore(text_chunks):
37 |
# embeddings = OpenAIEmbeddings()
38 |
embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
39 |
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
40 |
return vectorstore
41 |
42 |
43 |
44 |
45 |
46 |
47 |
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
48 |
conversation_chain = ConversationalRetrievalChain.from_llm(
49 |
50 |
51 |
52 |
53 |
return conversation_chain
54 |
55 |
56 |
def method_handle_userinput(user_question):
57 |
response = st.session_state.conversation({'question': user_question})
58 |
st.session_state.chat_history = response['chat_history']
59 |
60 |
61 |
62 |
63 |
"{{MSG}}", message.content), unsafe_allow_html=True)
64 |
65 |
66 |
"{{MSG}}", message.content), unsafe_allow_html=True)
67 |
68 |
69 |
70 |
71 |
72 |
st.write(css, unsafe_allow_html=True)
73 |
74 |
75 |
76 |
77 |
78 |
79 |
st.header("Converse with multiple PDFs :books:")
80 |
user_question = st.text_input("Ask a question about your documents:")
81 |
if user_question:
82 |
83 |
84 |
with st.sidebar:
85 |
86 |
87 |
88 |
89 |
# get pdf text
90 |
raw_text =
91 |
# get the text chunks
92 |
93 |
# create vector store
94 |
vectorstore = method_get_vectorstore(
95 |
96 |
97 |
98 |
99 |
100 |
101 |
if __name__ == '__main__':
102 |
1 |
import streamlit as st
2 |
from langchain_community.document_loaders import WebBaseLoader
3 |
from langchain.text_splitter import CharacterTextSplitter
4 |
from langchain_community.vectorstores import Chroma
5 |
from langchain_nomic.embeddings import NomicEmbeddings
6 |
7 |
from langchain_community.llms import HuggingFaceHub
8 |
9 |
# from langchain_core.runnables import RunnablePassthrough
10 |
# from langchain_core.output_parsers import StrOutputParser
11 |
# from langchain_core.prompts import ChatPromptTemplate
12 |
13 |
14 |
def method_get_website_text(url):
15 |
# Convert string of URLs to list
16 |
urls_list = urls.split("\n")
17 |
docs = [WebBaseLoader(url).load() for url in urls_list]
18 |
docs_list = [item for sublist in docs for item in sublist]
19 |
return docs_list
20 |
21 |
22 |
def method_get_text_chunks(text):
23 |
#split the text into chunks
24 |
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=7500, chunk_overlap=100)
25 |
doc_splits = text_splitter.split_documents(docs_list)
26 |
return doc_splits
27 |
28 |
29 |
def method_get_vectorstore(doc_splits):
30 |
#convert text chunks into embeddings and store in vector database
31 |
32 |
# create the open-source embedding function
33 |
embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
34 |
35 |
# create a vectorstore from the chunks
36 |
vector_store = Chroma.from_documents(document_chunks, embeddings)
37 |
return vectorstore
38 |
39 |
def get_context_retriever_chain(vector_store):
40 |
# Initialize the retriever
41 |
retriever = vector_store.as_retriever()
42 |
43 |
# Initialize the language model
44 |
llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-v0.1", model_kwargs={"temperature": 0.6, "max_length": 512})
45 |
46 |
# Define the response template
47 |
response_template = """Answer the question based only on the following context:
48 |
49 |
Question: {question}
50 |
51 |
52 |
return retriever, llm, response_template
53 |
54 |
55 |
# def get_context_retriever_chain(vector_store):
56 |
# #llm = ChatOpenAI()
57 |
# llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-v0.1", model_kwargs={"temperature":0.6, "max_length":512})
58 |
59 |
# retriever = vector_store.as_retriever()
60 |
61 |
# prompt = ChatPromptTemplate.from_messages([
62 |
# MessagesPlaceholder(variable_name="chat_history"),
63 |
# ("user", "{input}"),
64 |
# ("user", "Given the above conversation, generate a search query to look up in order to get information relevant to the conversation")
65 |
# ])
66 |
67 |
# retriever_chain = create_history_aware_retriever(llm, retriever, prompt)
68 |
69 |
# return retriever_chain, llm
70 |
71 |
# def method_get_conversation_chain(retriever_chain, question):
72 |
# # Use the retriever chain to generate a response to the user query
73 |
# response = retriever_chain(question)
74 |
# return response
75 |
76 |
# def method_get_conversation_chain(retriever_chain,llm,question):
77 |
# retriever = vectorstore.as_retriever()
78 |
79 |
# #perform the RAG
80 |
81 |
# after_rag_template = """Answer the question based only on the following context:
82 |
# {context}
83 |
# Question: {question}
84 |
# """
85 |
# after_rag_prompt = ChatPromptTemplate.from_template(after_rag_template)
86 |
# after_rag_chain = (
87 |
# {"context": retriever, "question": RunnablePassthrough()}
88 |
# | after_rag_prompt
89 |
# | model_local
90 |
# | StrOutputParser()
91 |
# )
92 |
# return after_rag_chain.invoke(question)
93 |
94 |
95 |
# #llm = ChatOpenAI()
96 |
# llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
97 |
98 |
# memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
99 |
# conversation_chain = ConversationalRetrievalChain.from_llm(
100 |
# llm=llm,
101 |
# retriever=vectorstore.as_retriever(),
102 |
# memory=memory
103 |
# )
104 |
# return conversation_chain
105 |
106 |
107 |
def main():
108 |
109 |
st.set_page_config(page_title="Chat with websites", page_icon="🤖")
110 |
st.title("Chat with websites")
111 |
112 |
# sidebar
113 |
with st.sidebar:
114 |
115 |
website_url = st.text_input("Website URL")
116 |
117 |
if website_url is None or website_url == "":
118 |
+"Please enter a website URL")
119 |
120 |
121 |
# Input fields
122 |
question = st.text_input("Question")
123 |
124 |
# Button to process input
125 |
if st.button('Query Documents'):
126 |
with st.spinner('Processing...'):
127 |
# get pdf text
128 |
raw_text = method_get_website_text(website_url)
129 |
# get the text chunks
130 |
doc_splits = method_get_text_chunks(raw_text)
131 |
# create vector store
132 |
vectorstore = method_get_vectorstore(doc_splits)
133 |
134 |
# retriever_chain = get_context_retriever_chain(vector_store)
135 |
# # create conversation chain
136 |
# answer = method_get_conversation_chain(retriever_chain,question)
137 |
# st.text_area("Answer", value=answer, height=300, disabled=True)
138 |
139 |
140 |
# Get the retriever, LLM, and response template
141 |
retriever, llm, response_template = get_context_retriever_chain(vectorstore)
142 |
# Retrieve relevant context using the retriever
143 |
context = retriever(question)
144 |
# Generate response using the LLM
145 |
llm_response = llm(question)
146 |
# Apply the response template to format the final answer
147 |
answer = response_template.format(context=context, question=question) + llm_response
148 |
# Display the generated answer
149 |
st.text_area("Answer", value=answer, height=300, disabled=True)
150 |
151 |
if __name__ == '__main__':
152 |