Spaces:

Paul-Joshi
/

website-summarizers-RAG

Runtime error

App Files Files Community

Paul-Joshi commited on Apr 11, 2024

Commit

5ac0ce9

verified ·

1 Parent(s): 8370d00

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -21

app.py CHANGED Viewed

@@ -31,12 +31,9 @@ def method_get_text_chunks(text):
     return doc_splits
 #convert text chunks into embeddings and store in vector database
-def method_get_vectorstore(document_chunks,nomic_apikey=None):
-    # create the open-source embedding function
-    if nomic_apikey is None:
-        embeddings = HuggingFaceEmbeddings()
-    else:
-        embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
     # create a vectorstore from the chunks
     vector_store = Chroma.from_documents(document_chunks, embeddings)
@@ -77,33 +74,40 @@ def main():
     with st.sidebar:
         st.header("Settings")
         website_url = st.text_input("Website URL")
-        nomic_apikey = st.text_input("NOMIC API Key for Embeddings")
     if website_url is None or website_url == "":
         st.info("Please enter a website URL")
     else:
         # Input fields
-        question = st.text_input("Question")
-        # Button to process input
-        if st.button('Query Documents'):
-            with st.spinner('Processing...'):
-                st.write(nomic_apikey)
-                if nomic_apikey is None or nomic_apikey == "":
-                    nomic_apikey = None
-                else:
-                    # Set the environment variable
-                    os.environ['NOMIC_API_KEY'] = nomic_apikey
                 # get pdf text
                 raw_text = method_get_website_text(website_url)
                 # get the text chunks
                 doc_splits = method_get_text_chunks(raw_text)
-                #access the environment variable
-                nomic_apikey = os.environ['NOMIC_API_KEY']
                 # create vector store
-                vector_store = method_get_vectorstore(doc_splits,nomic_apikey)
                 # Generate response using the RAG pipeline
                 answer = get_context_retriever_chain(vector_store,question)
                 # Display the generated answer
                 split_string = "Question: " + str(question)

     return doc_splits
 #convert text chunks into embeddings and store in vector database
+def method_get_vectorstore(document_chunks):
+    embeddings = HuggingFaceEmbeddings()
+    #embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
     # create a vectorstore from the chunks
     vector_store = Chroma.from_documents(document_chunks, embeddings)
     with st.sidebar:
         st.header("Settings")
         website_url = st.text_input("Website URL")
     if website_url is None or website_url == "":
         st.info("Please enter a website URL")
     else:
         # Input fields
+        st.subheader('Your are gonna interact with the below Website:')
+        st.button("Start", type="primary")
+        st.subheader('Click on the Start button', divider='rainbow')
+        # Button to pre-process input
+        if st.button("Reset"):
+            with st.spinner('Tokenizing and Embedding the Website Data'):
                 # get pdf text
                 raw_text = method_get_website_text(website_url)
                 # get the text chunks
                 doc_splits = method_get_text_chunks(raw_text)
                 # create vector store
+                vector_store = method_get_vectorstore(doc_splits)
                 # Generate response using the RAG pipeline
+        # Input fields
+        question = st.text_input("Question")
+        # Button to process input and get output
+        if st.button('Query Documents'):
+            with st.spinner('Processing...'):
+                # # get pdf text
+                # raw_text = method_get_website_text(website_url)
+                # # get the text chunks
+                # doc_splits = method_get_text_chunks(raw_text)
+                # # create vector store
+                # vector_store = method_get_vectorstore(doc_splits)
+                # # Generate response using the RAG pipeline
                 answer = get_context_retriever_chain(vector_store,question)
                 # Display the generated answer
                 split_string = "Question: " + str(question)