import streamlit as st from langchain_community.document_loaders import WebBaseLoader from langchain.text_splitter import CharacterTextSplitter from langchain_community.vectorstores import Chroma from langchain_nomic.embeddings import NomicEmbeddings from langchain_community.llms import HuggingFaceHub # from langchain_core.runnables import RunnablePassthrough # from langchain_core.output_parsers import StrOutputParser # from langchain_core.prompts import ChatPromptTemplate def method_get_website_text(url): # Convert string of URLs to list urls_list = urls.split("\n") docs = [WebBaseLoader(url).load() for url in urls_list] docs_list = [item for sublist in docs for item in sublist] return docs_list def method_get_text_chunks(text): #split the text into chunks text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=7500, chunk_overlap=100) doc_splits = text_splitter.split_documents(docs_list) return doc_splits def method_get_vectorstore(doc_splits): #convert text chunks into embeddings and store in vector database # create the open-source embedding function embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5") # create a vectorstore from the chunks vector_store = Chroma.from_documents(document_chunks, embeddings) return vectorstore def get_context_retriever_chain(vector_store): # Initialize the retriever retriever = vector_store.as_retriever() # Initialize the language model llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-v0.1", model_kwargs={"temperature": 0.6, "max_length": 512}) # Define the response template response_template = """Answer the question based only on the following context: {context} Question: {question} """ return retriever, llm, response_template # def get_context_retriever_chain(vector_store): # #llm = ChatOpenAI() # llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-v0.1", model_kwargs={"temperature":0.6, "max_length":512}) # retriever = vector_store.as_retriever() # prompt = ChatPromptTemplate.from_messages([ # MessagesPlaceholder(variable_name="chat_history"), # ("user", "{input}"), # ("user", "Given the above conversation, generate a search query to look up in order to get information relevant to the conversation") # ]) # retriever_chain = create_history_aware_retriever(llm, retriever, prompt) # return retriever_chain, llm # def method_get_conversation_chain(retriever_chain, question): # # Use the retriever chain to generate a response to the user query # response = retriever_chain(question) # return response # def method_get_conversation_chain(retriever_chain,llm,question): # retriever = vectorstore.as_retriever() # #perform the RAG # after_rag_template = """Answer the question based only on the following context: # {context} # Question: {question} # """ # after_rag_prompt = ChatPromptTemplate.from_template(after_rag_template) # after_rag_chain = ( # {"context": retriever, "question": RunnablePassthrough()} # | after_rag_prompt # | model_local # | StrOutputParser() # ) # return after_rag_chain.invoke(question) # #llm = ChatOpenAI() # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512}) # memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True) # conversation_chain = ConversationalRetrievalChain.from_llm( # llm=llm, # retriever=vectorstore.as_retriever(), # memory=memory # ) # return conversation_chain def main(): load_dotenv() st.set_page_config(page_title="Chat with websites", page_icon="🤖") st.title("Chat with websites") # sidebar with st.sidebar: st.header("Settings") website_url = st.text_input("Website URL") if website_url is None or website_url == "": st.info("Please enter a website URL") else: # Input fields question = st.text_input("Question") # Button to process input if st.button('Query Documents'): with st.spinner('Processing...'): # get pdf text raw_text = method_get_website_text(website_url) # get the text chunks doc_splits = method_get_text_chunks(raw_text) # create vector store vectorstore = method_get_vectorstore(doc_splits) st.write(doc_splits) # retriever_chain = get_context_retriever_chain(vector_store) # # create conversation chain # answer = method_get_conversation_chain(retriever_chain,question) # st.text_area("Answer", value=answer, height=300, disabled=True) # Get the retriever, LLM, and response template retriever, llm, response_template = get_context_retriever_chain(vectorstore) # Retrieve relevant context using the retriever context = retriever(question) # Generate response using the LLM llm_response = llm(question) # Apply the response template to format the final answer answer = response_template.format(context=context, question=question) + llm_response # Display the generated answer st.text_area("Answer", value=answer, height=300, disabled=True) if __name__ == '__main__': main()