Paul-Joshi commited on
Commit
b24c6a0
1 Parent(s): 51940eb

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -117
app.py DELETED
@@ -1,117 +0,0 @@
1
- import streamlit as st
2
- from langchain_community.document_loaders import WebBaseLoader
3
- from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
4
- from langchain_community.vectorstores import Chroma
5
- from langchain_nomic.embeddings import NomicEmbeddings
6
-
7
- from langchain_community.llms import HuggingFaceHub
8
-
9
- from bs4 import BeautifulSoup
10
- from langchain_core.runnables import RunnablePassthrough
11
- from langchain_core.output_parsers import StrOutputParser
12
- from langchain_core.prompts import ChatPromptTemplate
13
-
14
- from langchain_community.embeddings import HuggingFaceEmbeddings
15
- from langchain import hub
16
- from sentence_transformers import SentenceTransformer
17
- import os
18
-
19
- # Convert string of URLs to list
20
- def method_get_website_text(urls):
21
- urls_list = urls.split("\n")
22
- docs = [WebBaseLoader(url).load() for url in urls_list]
23
- docs_list = [item for sublist in docs for item in sublist]
24
- return docs_list
25
-
26
- #split the text into chunks
27
- def method_get_text_chunks(text):
28
- #text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=7500, chunk_overlap=100)
29
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=7500, chunk_overlap=100)
30
- doc_splits = text_splitter.split_documents(text)
31
- return doc_splits
32
-
33
- #convert text chunks into embeddings and store in vector database
34
- def method_get_vectorstore(document_chunks):
35
- embeddings = HuggingFaceEmbeddings()
36
- #embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
37
-
38
- # create a vectorstore from the chunks
39
- vector_store = Chroma.from_documents(document_chunks, embeddings)
40
- return vector_store
41
-
42
-
43
- def get_context_retriever_chain(vector_store,question):
44
- # Initialize the retriever
45
- retriever = vector_store.as_retriever()
46
-
47
- # Define the RAG template
48
- after_rag_template = """Answer the question based only on the following context:
49
- {context}
50
- Question: {question}
51
- """
52
-
53
- # Create the RAG prompt template
54
- after_rag_prompt = ChatPromptTemplate.from_template(after_rag_template)
55
-
56
- # Initialize the Hugging Face language model (LLM)
57
- llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.2", model_kwargs={"temperature":0.6, "max_length":1024})
58
-
59
- # Construct the RAG pipeline
60
- after_rag_chain = (
61
- {"context": retriever, "question": RunnablePassthrough()}
62
- | after_rag_prompt
63
- | llm
64
- | StrOutputParser()
65
- )
66
-
67
- return after_rag_chain.invoke(question)
68
-
69
- def main():
70
- st.set_page_config(page_title="Chat with websites", page_icon="🤖")
71
- st.title("Chat with websites")
72
-
73
- # sidebar
74
- with st.sidebar:
75
- st.header("Settings")
76
- website_url = st.text_input("Website URL")
77
-
78
- if website_url is None or website_url == "":
79
- st.info("Please enter a website URL")
80
-
81
- else:
82
- # Input fields
83
- st.subheader('Your are gonna interact with the below Website:')
84
- st.button("Start", type="primary")
85
- st.subheader('Click on the Start button', divider='rainbow')
86
-
87
- # Button to pre-process input
88
- if st.button("Start"):
89
- with st.spinner('Tokenizing and Embedding the Website Data'):
90
- # get pdf text
91
- raw_text = method_get_website_text(website_url)
92
- # get the text chunks
93
- doc_splits = method_get_text_chunks(raw_text)
94
- # create vector store
95
- vector_store = method_get_vectorstore(doc_splits)
96
-
97
- # Input fields
98
- question = st.text_input("Question")
99
-
100
- # Button to process input and get output
101
- if st.button('Query Documents'):
102
- with st.spinner('Processing...'):
103
- # # get pdf text
104
- # raw_text = method_get_website_text(website_url)
105
- # # get the text chunks
106
- # doc_splits = method_get_text_chunks(raw_text)
107
- # # create vector store
108
- # vector_store = method_get_vectorstore(doc_splits)
109
- # Generate response using the RAG pipeline
110
- answer = get_context_retriever_chain(vector_store,question)
111
- # Display the generated answer
112
- split_string = "Question: " + str(question)
113
- result = answer.split(split_string)[-1]
114
- st.text_area("Answer", value=result, height=300, disabled=True)
115
-
116
- if __name__ == '__main__':
117
- main()