Paul-Joshi's picture
Update app.py
8785aa0 verified
raw
history blame
5.66 kB
import streamlit as st
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_nomic.embeddings import NomicEmbeddings
from langchain_community.llms import HuggingFaceHub
from bs4 import BeautifulSoup
# from langchain_core.runnables import RunnablePassthrough
# from langchain_core.output_parsers import StrOutputParser
# from langchain_core.prompts import ChatPromptTemplate
def method_get_website_text(urls):
# Convert string of URLs to list
urls_list = urls.split("\n")
docs = [WebBaseLoader(url).load() for url in urls_list]
docs_list = [item for sublist in docs for item in sublist]
return docs_list
def method_get_text_chunks(text):
#split the text into chunks
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=7500, chunk_overlap=100)
doc_splits = text_splitter.split_documents(docs_list)
return doc_splits
def method_get_vectorstore(doc_splits):
#convert text chunks into embeddings and store in vector database
# create the open-source embedding function
embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
# create a vectorstore from the chunks
vector_store = Chroma.from_documents(document_chunks, embeddings)
return vectorstore
def get_context_retriever_chain(vector_store):
# Initialize the retriever
retriever = vector_store.as_retriever()
# Initialize the language model
llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-v0.1", model_kwargs={"temperature": 0.6, "max_length": 512})
# Define the response template
response_template = """Answer the question based only on the following context:
{context}
Question: {question}
"""
return retriever, llm, response_template
# def get_context_retriever_chain(vector_store):
# #llm = ChatOpenAI()
# llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-v0.1", model_kwargs={"temperature":0.6, "max_length":512})
# retriever = vector_store.as_retriever()
# prompt = ChatPromptTemplate.from_messages([
# MessagesPlaceholder(variable_name="chat_history"),
# ("user", "{input}"),
# ("user", "Given the above conversation, generate a search query to look up in order to get information relevant to the conversation")
# ])
# retriever_chain = create_history_aware_retriever(llm, retriever, prompt)
# return retriever_chain, llm
# def method_get_conversation_chain(retriever_chain, question):
# # Use the retriever chain to generate a response to the user query
# response = retriever_chain(question)
# return response
# def method_get_conversation_chain(retriever_chain,llm,question):
# retriever = vectorstore.as_retriever()
# #perform the RAG
# after_rag_template = """Answer the question based only on the following context:
# {context}
# Question: {question}
# """
# after_rag_prompt = ChatPromptTemplate.from_template(after_rag_template)
# after_rag_chain = (
# {"context": retriever, "question": RunnablePassthrough()}
# | after_rag_prompt
# | model_local
# | StrOutputParser()
# )
# return after_rag_chain.invoke(question)
# #llm = ChatOpenAI()
# llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
# memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
# conversation_chain = ConversationalRetrievalChain.from_llm(
# llm=llm,
# retriever=vectorstore.as_retriever(),
# memory=memory
# )
# return conversation_chain
def main():
st.set_page_config(page_title="Chat with websites", page_icon="🤖")
st.title("Chat with websites")
# sidebar
with st.sidebar:
st.header("Settings")
website_url = st.text_input("Website URL")
if website_url is None or website_url == "":
st.info("Please enter a website URL")
else:
# Input fields
question = st.text_input("Question")
# Button to process input
if st.button('Query Documents'):
with st.spinner('Processing...'):
# get pdf text
raw_text = method_get_website_text(website_url)
# get the text chunks
doc_splits = method_get_text_chunks(raw_text)
# create vector store
vectorstore = method_get_vectorstore(doc_splits)
st.write(doc_splits)
# retriever_chain = get_context_retriever_chain(vector_store)
# # create conversation chain
# answer = method_get_conversation_chain(retriever_chain,question)
# st.text_area("Answer", value=answer, height=300, disabled=True)
# Get the retriever, LLM, and response template
retriever, llm, response_template = get_context_retriever_chain(vectorstore)
# Retrieve relevant context using the retriever
context = retriever(question)
# Generate response using the LLM
llm_response = llm(question)
# Apply the response template to format the final answer
answer = response_template.format(context=context, question=question) + llm_response
# Display the generated answer
st.text_area("Answer", value=answer, height=300, disabled=True)
if __name__ == '__main__':
main()