Spaces:
Runtime error
Runtime error
import streamlit as st | |
from langchain_community.document_loaders import WebBaseLoader | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain_community.vectorstores import Chroma | |
from langchain_nomic.embeddings import NomicEmbeddings | |
from langchain_community.llms import HuggingFaceHub | |
from bs4 import BeautifulSoup | |
# from langchain_core.runnables import RunnablePassthrough | |
# from langchain_core.output_parsers import StrOutputParser | |
# from langchain_core.prompts import ChatPromptTemplate | |
def method_get_website_text(urls): | |
# Convert string of URLs to list | |
urls_list = urls.split("\n") | |
docs = [WebBaseLoader(url).load() for url in urls_list] | |
docs_list = [item for sublist in docs for item in sublist] | |
return docs_list | |
def method_get_text_chunks(text): | |
#split the text into chunks | |
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=7500, chunk_overlap=100) | |
doc_splits = text_splitter.split_documents(docs_list) | |
return doc_splits | |
def method_get_vectorstore(doc_splits): | |
#convert text chunks into embeddings and store in vector database | |
# create the open-source embedding function | |
embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5") | |
# create a vectorstore from the chunks | |
vector_store = Chroma.from_documents(document_chunks, embeddings) | |
return vectorstore | |
def get_context_retriever_chain(vector_store): | |
# Initialize the retriever | |
retriever = vector_store.as_retriever() | |
# Initialize the language model | |
llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-v0.1", model_kwargs={"temperature": 0.6, "max_length": 512}) | |
# Define the response template | |
response_template = """Answer the question based only on the following context: | |
{context} | |
Question: {question} | |
""" | |
return retriever, llm, response_template | |
# def get_context_retriever_chain(vector_store): | |
# #llm = ChatOpenAI() | |
# llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-v0.1", model_kwargs={"temperature":0.6, "max_length":512}) | |
# retriever = vector_store.as_retriever() | |
# prompt = ChatPromptTemplate.from_messages([ | |
# MessagesPlaceholder(variable_name="chat_history"), | |
# ("user", "{input}"), | |
# ("user", "Given the above conversation, generate a search query to look up in order to get information relevant to the conversation") | |
# ]) | |
# retriever_chain = create_history_aware_retriever(llm, retriever, prompt) | |
# return retriever_chain, llm | |
# def method_get_conversation_chain(retriever_chain, question): | |
# # Use the retriever chain to generate a response to the user query | |
# response = retriever_chain(question) | |
# return response | |
# def method_get_conversation_chain(retriever_chain,llm,question): | |
# retriever = vectorstore.as_retriever() | |
# #perform the RAG | |
# after_rag_template = """Answer the question based only on the following context: | |
# {context} | |
# Question: {question} | |
# """ | |
# after_rag_prompt = ChatPromptTemplate.from_template(after_rag_template) | |
# after_rag_chain = ( | |
# {"context": retriever, "question": RunnablePassthrough()} | |
# | after_rag_prompt | |
# | model_local | |
# | StrOutputParser() | |
# ) | |
# return after_rag_chain.invoke(question) | |
# #llm = ChatOpenAI() | |
# llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512}) | |
# memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True) | |
# conversation_chain = ConversationalRetrievalChain.from_llm( | |
# llm=llm, | |
# retriever=vectorstore.as_retriever(), | |
# memory=memory | |
# ) | |
# return conversation_chain | |
def main(): | |
st.set_page_config(page_title="Chat with websites", page_icon="🤖") | |
st.title("Chat with websites") | |
# sidebar | |
with st.sidebar: | |
st.header("Settings") | |
website_url = st.text_input("Website URL") | |
if website_url is None or website_url == "": | |
st.info("Please enter a website URL") | |
else: | |
# Input fields | |
question = st.text_input("Question") | |
# Button to process input | |
if st.button('Query Documents'): | |
with st.spinner('Processing...'): | |
# get pdf text | |
raw_text = method_get_website_text(website_url) | |
# get the text chunks | |
doc_splits = method_get_text_chunks(raw_text) | |
# create vector store | |
vectorstore = method_get_vectorstore(doc_splits) | |
st.write(doc_splits) | |
# retriever_chain = get_context_retriever_chain(vector_store) | |
# # create conversation chain | |
# answer = method_get_conversation_chain(retriever_chain,question) | |
# st.text_area("Answer", value=answer, height=300, disabled=True) | |
# Get the retriever, LLM, and response template | |
retriever, llm, response_template = get_context_retriever_chain(vectorstore) | |
# Retrieve relevant context using the retriever | |
context = retriever(question) | |
# Generate response using the LLM | |
llm_response = llm(question) | |
# Apply the response template to format the final answer | |
answer = response_template.format(context=context, question=question) + llm_response | |
# Display the generated answer | |
st.text_area("Answer", value=answer, height=300, disabled=True) | |
if __name__ == '__main__': | |
main() |