sankar12345 commited on
Commit
6d05a0d
·
verified ·
1 Parent(s): 0310be1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -103
app.py CHANGED
@@ -1,104 +1,111 @@
1
- import streamlit as st
2
- from dotenv import load_dotenv
3
- from PyPDF2 import PdfReader
4
- from langchain.text_splitter import CharacterTextSplitter
5
- from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
6
- from langchain.vectorstores import FAISS
7
- from langchain.chat_models import ChatOpenAI
8
- from langchain.memory import ConversationBufferMemory
9
- from langchain.chains import ConversationalRetrievalChain
10
- from htmlTemplates import css, bot_template, user_template
11
- from langchain.llms import HuggingFaceHub
12
-
13
- def get_pdf_text(pdf_docs):
14
- text = ""
15
- for pdf in pdf_docs:
16
- pdf_reader = PdfReader(pdf)
17
- for page in pdf_reader.pages:
18
- text += page.extract_text()
19
- return text
20
-
21
-
22
- def get_text_chunks(text):
23
- text_splitter = CharacterTextSplitter(
24
- separator="\n",
25
- chunk_size=1000,
26
- chunk_overlap=200,
27
- length_function=len
28
- )
29
- chunks = text_splitter.split_text(text)
30
- return chunks
31
-
32
-
33
- def get_vectorstore(text_chunks):
34
- # embeddings = OpenAIEmbeddings()
35
- embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
36
- vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
37
- return vectorstore
38
-
39
-
40
- def get_conversation_chain(vectorstore):
41
- # llm = ChatOpenAI()
42
- llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
43
-
44
- memory = ConversationBufferMemory(
45
- memory_key='chat_history', return_messages=True)
46
- conversation_chain = ConversationalRetrievalChain.from_llm(
47
- llm=llm,
48
- retriever=vectorstore.as_retriever(),
49
- memory=memory
50
- )
51
- return conversation_chain
52
-
53
-
54
- def handle_userinput(user_question):
55
- response = st.session_state.conversation({'question': user_question})
56
- st.session_state.chat_history = response['chat_history']
57
-
58
- for i, message in enumerate(st.session_state.chat_history):
59
- if i % 2 == 0:
60
- st.write(user_template.replace(
61
- "{{MSG}}", message.content), unsafe_allow_html=True)
62
- else:
63
- st.write(bot_template.replace(
64
- "{{MSG}}", message.content), unsafe_allow_html=True)
65
-
66
-
67
- def main():
68
- load_dotenv()
69
- st.set_page_config(page_title="Chat with multiple PDFs",
70
- page_icon=":books:")
71
- st.write(css, unsafe_allow_html=True)
72
-
73
- if "conversation" not in st.session_state:
74
- st.session_state.conversation = None
75
- if "chat_history" not in st.session_state:
76
- st.session_state.chat_history = None
77
-
78
- st.header("Chat with multiple PDFs :books:")
79
- user_question = st.text_input("Ask a question about your documents:")
80
- if user_question:
81
- handle_userinput(user_question)
82
-
83
- with st.sidebar:
84
- st.subheader("Your documents")
85
- pdf_docs = st.file_uploader(
86
- "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
87
- if st.button("Process"):
88
- with st.spinner("Processing"):
89
- # get pdf text
90
- raw_text = get_pdf_text(pdf_docs)
91
-
92
- # get the text chunks
93
- text_chunks = get_text_chunks(raw_text)
94
-
95
- # create vector store
96
- vectorstore = get_vectorstore(text_chunks)
97
-
98
- # create conversation chain
99
- st.session_state.conversation = get_conversation_chain(
100
- vectorstore)
101
-
102
-
103
- if __name__ == '__main__':
 
 
 
 
 
 
 
104
  main()
 
1
+ import os
2
+ import streamlit as st
3
+ from dotenv import load_dotenv
4
+ from PyPDF2 import PdfReader
5
+ from langchain.text_splitter import CharacterTextSplitter
6
+ from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
7
+ from langchain.vectorstores import FAISS
8
+ from langchain.chat_models import ChatOpenAI
9
+ from langchain.memory import ConversationBufferMemory
10
+ from langchain.chains import ConversationalRetrievalChain
11
+ from htmlTemplates import css, bot_template, user_template
12
+ from langchain.llms import HuggingFaceHub
13
+
14
+ # Load environment variables from .env file
15
+ load_dotenv()
16
+
17
+ # Get Hugging Face token from environment variables
18
+ HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
19
+
20
+ def get_pdf_text(pdf_docs):
21
+ text = ""
22
+ for pdf in pdf_docs:
23
+ pdf_reader = PdfReader(pdf)
24
+ for page in pdf_reader.pages:
25
+ text += page.extract_text()
26
+ return text
27
+
28
+
29
+ def get_text_chunks(text):
30
+ text_splitter = CharacterTextSplitter(
31
+ separator="\n",
32
+ chunk_size=1000,
33
+ chunk_overlap=200,
34
+ length_function=len
35
+ )
36
+ chunks = text_splitter.split_text(text)
37
+ return chunks
38
+
39
+
40
+ def get_vectorstore(text_chunks):
41
+ # embeddings = OpenAIEmbeddings()
42
+ embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
43
+ vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
44
+ return vectorstore
45
+
46
+
47
+ def get_conversation_chain(vectorstore):
48
+ # llm = ChatOpenAI()
49
+ llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
50
+
51
+ memory = ConversationBufferMemory(
52
+ memory_key='chat_history', return_messages=True)
53
+ conversation_chain = ConversationalRetrievalChain.from_llm(
54
+ llm=llm,
55
+ retriever=vectorstore.as_retriever(),
56
+ memory=memory
57
+ )
58
+ return conversation_chain
59
+
60
+
61
+ def handle_userinput(user_question):
62
+ response = st.session_state.conversation({'question': user_question})
63
+ st.session_state.chat_history = response['chat_history']
64
+
65
+ for i, message in enumerate(st.session_state.chat_history):
66
+ if i % 2 == 0:
67
+ st.write(user_template.replace(
68
+ "{{MSG}}", message.content), unsafe_allow_html=True)
69
+ else:
70
+ st.write(bot_template.replace(
71
+ "{{MSG}}", message.content), unsafe_allow_html=True)
72
+
73
+
74
+ def main():
75
+ load_dotenv()
76
+ st.set_page_config(page_title="Chat with multiple PDFs",
77
+ page_icon=":books:")
78
+ st.write(css, unsafe_allow_html=True)
79
+
80
+ if "conversation" not in st.session_state:
81
+ st.session_state.conversation = None
82
+ if "chat_history" not in st.session_state:
83
+ st.session_state.chat_history = None
84
+
85
+ st.header("Chat with multiple PDFs :books:")
86
+ user_question = st.text_input("Ask a question about your documents:")
87
+ if user_question:
88
+ handle_userinput(user_question)
89
+
90
+ with st.sidebar:
91
+ st.subheader("Your documents")
92
+ pdf_docs = st.file_uploader(
93
+ "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
94
+ if st.button("Process"):
95
+ with st.spinner("Processing"):
96
+ # get pdf text
97
+ raw_text = get_pdf_text(pdf_docs)
98
+
99
+ # get the text chunks
100
+ text_chunks = get_text_chunks(raw_text)
101
+
102
+ # create vector store
103
+ vectorstore = get_vectorstore(text_chunks)
104
+
105
+ # create conversation chain
106
+ st.session_state.conversation = get_conversation_chain(
107
+ vectorstore)
108
+
109
+
110
+ if __name__ == '__main__':
111
  main()