Madhumitha19 commited on
Commit
4bd855a
·
verified ·
1 Parent(s): 88daab6

Upload 3 files

Browse files
Files changed (3) hide show
  1. .env +1 -0
  2. app.py +76 -0
  3. requirements.txt +13 -0
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ OPENAI_API_KEY = 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
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from dotenv import load_dotenv
4
+ from PyPDF2 import PdfReader
5
+ from langchain.text_splitter import CharacterTextSplitter
6
+ from langchain_community.embeddings import OpenAIEmbeddings
7
+ from langchain_community.vectorstores import FAISS
8
+ from langchain.memory import ConversationBufferMemory
9
+ from langchain.chains import ConversationalRetrievalChain
10
+ from langchain.chat_models import ChatOpenAI
11
+
12
+ def read_pdf(pdf):
13
+ text = ""
14
+ pdf_reader = PdfReader(pdf)
15
+ for page in pdf_reader.pages:
16
+ text += page.extract_text()
17
+ return text
18
+
19
+ def get_chunk_data(text):
20
+ text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=250, length_function=len)
21
+ chunks = text_splitter.split_text(text)
22
+ return chunks
23
+
24
+ def get_vector_store(text):
25
+ api_key = os.getenv("OPENAI_API_KEY")
26
+ embeddings = OpenAIEmbeddings(openai_api_key=api_key)
27
+ vectorstore = FAISS.from_texts(texts=text, embedding=embeddings)
28
+ return vectorstore
29
+
30
+ def get_conversation(vectorstore):
31
+ api_key = os.getenv("OPENAI_API_KEY")
32
+ llm = ChatOpenAI(openai_api_key=api_key)
33
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
34
+ conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(), memory=memory)
35
+ return conversation_chain
36
+
37
+ def handleInput(user_text, conversation_chain):
38
+ res = conversation_chain({'question': user_text})
39
+ chat_history = res['chat_history']
40
+ ans = res['answer']
41
+ st.write(ans)
42
+
43
+ def main():
44
+ load_dotenv()
45
+
46
+ st.set_page_config(page_title="Chat with PDF")
47
+
48
+ if "conversation" not in st.session_state:
49
+ st.session_state.conversation = None
50
+ if "chat_history" not in st.session_state:
51
+ st.session_state.chat_history = None
52
+
53
+ st.header("Chat With PDF")
54
+
55
+ user_text = st.text_input("Ask question:")
56
+ if user_text and st.session_state.conversation:
57
+ handleInput(user_text, st.session_state.conversation)
58
+
59
+ with st.sidebar:
60
+ st.subheader("Your Documents")
61
+ pdf = st.file_uploader("Upload PDF")
62
+ if pdf and st.button("Submit"):
63
+ with st.spinner("Processing..."):
64
+ # Read data from pdf
65
+ raw_text = read_pdf(pdf)
66
+ # Split data into chunks
67
+ load_chunks = get_chunk_data(raw_text)
68
+ # Create a vector store
69
+ vector_store = get_vector_store(load_chunks)
70
+
71
+ # Create conversation chain
72
+ conversation_chain = get_conversation(vector_store)
73
+ st.session_state.conversation = conversation_chain # Save the conversation chain to session state
74
+
75
+ if __name__ == '__main__':
76
+ main()
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ python-dotenv
3
+ ipykernel
4
+ langchain_community
5
+ pypdf
6
+ bs4
7
+ langchain-text-splitters
8
+ langchain-openai
9
+ streamlit
10
+ faiss-cpu
11
+ openai
12
+ huggingface
13
+ PyPDF2