Spaces:
Runtime error
Runtime error
File size: 5,657 Bytes
a00136a 978d20f a00136a 978d20f a00136a 978d20f a00136a 8785aa0 978d20f a00136a d83001b 978d20f a00136a 978d20f a00136a 978d20f a00136a 978d20f a00136a 978d20f a00136a 978d20f a00136a 978d20f a00136a 978d20f a00136a 978d20f a00136a 978d20f a00136a 978d20f a00136a 978d20f a00136a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
import streamlit as st
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_nomic.embeddings import NomicEmbeddings
from langchain_community.llms import HuggingFaceHub
from bs4 import BeautifulSoup
# from langchain_core.runnables import RunnablePassthrough
# from langchain_core.output_parsers import StrOutputParser
# from langchain_core.prompts import ChatPromptTemplate
def method_get_website_text(urls):
# Convert string of URLs to list
urls_list = urls.split("\n")
docs = [WebBaseLoader(url).load() for url in urls_list]
docs_list = [item for sublist in docs for item in sublist]
return docs_list
def method_get_text_chunks(text):
#split the text into chunks
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=7500, chunk_overlap=100)
doc_splits = text_splitter.split_documents(docs_list)
return doc_splits
def method_get_vectorstore(doc_splits):
#convert text chunks into embeddings and store in vector database
# create the open-source embedding function
embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
# create a vectorstore from the chunks
vector_store = Chroma.from_documents(document_chunks, embeddings)
return vectorstore
def get_context_retriever_chain(vector_store):
# Initialize the retriever
retriever = vector_store.as_retriever()
# Initialize the language model
llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-v0.1", model_kwargs={"temperature": 0.6, "max_length": 512})
# Define the response template
response_template = """Answer the question based only on the following context:
{context}
Question: {question}
"""
return retriever, llm, response_template
# def get_context_retriever_chain(vector_store):
# #llm = ChatOpenAI()
# llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-v0.1", model_kwargs={"temperature":0.6, "max_length":512})
# retriever = vector_store.as_retriever()
# prompt = ChatPromptTemplate.from_messages([
# MessagesPlaceholder(variable_name="chat_history"),
# ("user", "{input}"),
# ("user", "Given the above conversation, generate a search query to look up in order to get information relevant to the conversation")
# ])
# retriever_chain = create_history_aware_retriever(llm, retriever, prompt)
# return retriever_chain, llm
# def method_get_conversation_chain(retriever_chain, question):
# # Use the retriever chain to generate a response to the user query
# response = retriever_chain(question)
# return response
# def method_get_conversation_chain(retriever_chain,llm,question):
# retriever = vectorstore.as_retriever()
# #perform the RAG
# after_rag_template = """Answer the question based only on the following context:
# {context}
# Question: {question}
# """
# after_rag_prompt = ChatPromptTemplate.from_template(after_rag_template)
# after_rag_chain = (
# {"context": retriever, "question": RunnablePassthrough()}
# | after_rag_prompt
# | model_local
# | StrOutputParser()
# )
# return after_rag_chain.invoke(question)
# #llm = ChatOpenAI()
# llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
# memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
# conversation_chain = ConversationalRetrievalChain.from_llm(
# llm=llm,
# retriever=vectorstore.as_retriever(),
# memory=memory
# )
# return conversation_chain
def main():
st.set_page_config(page_title="Chat with websites", page_icon="🤖")
st.title("Chat with websites")
# sidebar
with st.sidebar:
st.header("Settings")
website_url = st.text_input("Website URL")
if website_url is None or website_url == "":
st.info("Please enter a website URL")
else:
# Input fields
question = st.text_input("Question")
# Button to process input
if st.button('Query Documents'):
with st.spinner('Processing...'):
# get pdf text
raw_text = method_get_website_text(website_url)
# get the text chunks
doc_splits = method_get_text_chunks(raw_text)
# create vector store
vectorstore = method_get_vectorstore(doc_splits)
st.write(doc_splits)
# retriever_chain = get_context_retriever_chain(vector_store)
# # create conversation chain
# answer = method_get_conversation_chain(retriever_chain,question)
# st.text_area("Answer", value=answer, height=300, disabled=True)
# Get the retriever, LLM, and response template
retriever, llm, response_template = get_context_retriever_chain(vectorstore)
# Retrieve relevant context using the retriever
context = retriever(question)
# Generate response using the LLM
llm_response = llm(question)
# Apply the response template to format the final answer
answer = response_template.format(context=context, question=question) + llm_response
# Display the generated answer
st.text_area("Answer", value=answer, height=300, disabled=True)
if __name__ == '__main__':
main() |