Spaces:

Paul-Joshi
/

website-summarizers-RAG

Runtime error

App Files Files Community

Paul-Joshi commited on Apr 11, 2024

Commit

b24c6a0

•

1 Parent(s): 51940eb

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -117

app.py DELETED Viewed

@@ -1,117 +0,0 @@
-import streamlit as st
-from langchain_community.document_loaders import WebBaseLoader
-from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
-from langchain_community.vectorstores import Chroma
-from langchain_nomic.embeddings import NomicEmbeddings
-from langchain_community.llms import HuggingFaceHub
-from bs4 import BeautifulSoup
-from langchain_core.runnables import RunnablePassthrough
-from langchain_core.output_parsers import StrOutputParser
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain import hub
-from sentence_transformers import SentenceTransformer
-import os
-# Convert string of URLs to list
-def method_get_website_text(urls):
-    urls_list = urls.split("\n")
-    docs = [WebBaseLoader(url).load() for url in urls_list]
-    docs_list = [item for sublist in docs for item in sublist]
-    return docs_list
-#split the text into chunks
-def method_get_text_chunks(text):
-    #text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=7500, chunk_overlap=100)
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=7500, chunk_overlap=100)
-    doc_splits = text_splitter.split_documents(text)
-    return doc_splits
-#convert text chunks into embeddings and store in vector database
-def method_get_vectorstore(document_chunks):
-    embeddings = HuggingFaceEmbeddings()
-    #embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
-    # create a vectorstore from the chunks
-    vector_store = Chroma.from_documents(document_chunks, embeddings)
-    return vector_store
-def get_context_retriever_chain(vector_store,question):
-    # Initialize the retriever
-    retriever = vector_store.as_retriever()
-    # Define the RAG template
-    after_rag_template = """Answer the question based only on the following context:
-    {context}
-    Question: {question}
-    """
-    # Create the RAG prompt template
-    after_rag_prompt = ChatPromptTemplate.from_template(after_rag_template)
-    # Initialize the Hugging Face language model (LLM)
-    llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.2", model_kwargs={"temperature":0.6, "max_length":1024})
-    # Construct the RAG pipeline
-    after_rag_chain = (
-        {"context": retriever, "question": RunnablePassthrough()}
-        | after_rag_prompt
-        | llm
-        | StrOutputParser()
-    )
-    return after_rag_chain.invoke(question)
-def main():
-    st.set_page_config(page_title="Chat with websites", page_icon="🤖")
-    st.title("Chat with websites")
-    # sidebar
-    with st.sidebar:
-        st.header("Settings")
-        website_url = st.text_input("Website URL")
-    if website_url is None or website_url == "":
-        st.info("Please enter a website URL")
-    else:
-        # Input fields
-        st.subheader('Your are gonna interact with the below Website:')
-        st.button("Start", type="primary")
-        st.subheader('Click on the Start button', divider='rainbow')
-        # Button to pre-process input
-        if st.button("Start"):
-            with st.spinner('Tokenizing and Embedding the Website Data'):
-                # get pdf text
-                raw_text = method_get_website_text(website_url)
-                # get the text chunks
-                doc_splits = method_get_text_chunks(raw_text)
-                # create vector store
-                vector_store = method_get_vectorstore(doc_splits)
-        # Input fields
-        question = st.text_input("Question")
-        # Button to process input and get output
-        if st.button('Query Documents'):
-            with st.spinner('Processing...'):
-                # # get pdf text
-                # raw_text = method_get_website_text(website_url)
-                # # get the text chunks
-                # doc_splits = method_get_text_chunks(raw_text)
-                # # create vector store
-                # vector_store = method_get_vectorstore(doc_splits)
-                # Generate response using the RAG pipeline
-                answer = get_context_retriever_chain(vector_store,question)
-                # Display the generated answer
-                split_string = "Question: " + str(question)
-                result = answer.split(split_string)[-1]
-                st.text_area("Answer", value=result, height=300, disabled=True)
-if __name__ == '__main__':
-    main()