Spaces:

Paul-Joshi
/

website-summarizers-RAG

Runtime error

App Files Files Community

Paul-Joshi commited on Apr 10, 2024

Commit

080b037

verified ·

1 Parent(s): 0260572

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -43

app.py CHANGED Viewed

@@ -26,23 +26,11 @@ def method_get_text_chunks(text):
     #split the text into chunks
     #text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=7500, chunk_overlap=100)
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
     doc_splits = text_splitter.split_documents(text)
     return doc_splits
-def method_get_vectorstore(document_chunks):
-    #convert text chunks into embeddings and store in vector database
-    # create the open-source embedding function
-    #embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
-    embeddings = HuggingFaceEmbeddings()
-    # create a vectorstore from the chunks
-    vector_store = Chroma.from_documents(document_chunks, embeddings)
-    return vector_store
-def get_context_retriever_chain(vector_store, question):
     # Initialize the retriever
     retriever = vector_store.as_retriever()
@@ -60,40 +48,13 @@ def get_context_retriever_chain(vector_store, question):
     # Construct the RAG pipeline
     after_rag_chain = (
-        {"context": retriever, "question": question}
         | after_rag_prompt
         | llm
         | StrOutputParser()
     )
-    # Invoke the RAG pipeline and return the generated answer
     return after_rag_chain.invoke(question)
-# def get_context_retriever_chain(vector_store,question):
-#     # Initialize the retriever
-#     retriever = vector_store.as_retriever()
-#     # Define the RAG template
-#     after_rag_template = """Answer the question based only on the following context:
-#     {context}
-#     Question: {question}
-#     """
-#     # Create the RAG prompt template
-#     after_rag_prompt = ChatPromptTemplate.from_template(after_rag_template)
-#     # Initialize the Hugging Face language model (LLM)
-#     llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.2")
-#     # Construct the RAG pipeline
-#     after_rag_chain = (
-#         {"context": retriever, "question": RunnablePassthrough()}
-#         | after_rag_prompt
-#         | llm
-#         | StrOutputParser()
-#     )
-#     return after_rag_chain.invoke(question)
 def main():
     st.set_page_config(page_title="Chat with websites", page_icon="🤖")

     #split the text into chunks
     #text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=7500, chunk_overlap=100)
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
     doc_splits = text_splitter.split_documents(text)
     return doc_splits
+def get_context_retriever_chain(vector_store,question):
     # Initialize the retriever
     retriever = vector_store.as_retriever()
     # Construct the RAG pipeline
     after_rag_chain = (
+        {"context": retriever, "question": RunnablePassthrough()}
         | after_rag_prompt
         | llm
         | StrOutputParser()
     )
     return after_rag_chain.invoke(question)
 def main():
     st.set_page_config(page_title="Chat with websites", page_icon="🤖")