gowtham28 commited on
Commit
c0c0037
·
verified ·
1 Parent(s): dd4a1ae

Upload 2 files

Browse files
Files changed (2) hide show
  1. main.py +105 -0
  2. requirements.txt +14 -0
main.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ import os
5
+ from langchain_community.vectorstores import FAISS
6
+ from langchain.chains.question_answering import load_qa_chain
7
+ from langchain.prompts import PromptTemplate
8
+ from dotenv import load_dotenv
9
+ from langchain_openai import OpenAI, ChatOpenAI
10
+ from langchain_openai import OpenAIEmbeddings
11
+
12
+ load_dotenv()
13
+ os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
14
+
15
+ def get_pdf_text(pdf_docs):
16
+ text = ""
17
+ for pdf in pdf_docs:
18
+ pdf_reader = PdfReader(pdf)
19
+ for page in pdf_reader.pages:
20
+ try:
21
+ page_text = page.extract_text()
22
+ if page_text:
23
+ text += page_text
24
+ except Exception as e:
25
+ print(f"Error reading page: {e}")
26
+ return text
27
+
28
+ def get_text_chunks(text):
29
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=2500, chunk_overlap=750)
30
+ chunks = text_splitter.split_text(text)
31
+ return chunks
32
+
33
+ def get_vector_store(text_chunks):
34
+ vector_store = FAISS.from_texts(text_chunks, OpenAIEmbeddings())
35
+ vector_store.save_local("faiss_index")
36
+
37
+ def get_conversational_chain():
38
+ prompt_template = """You are an assistant for teachers. Your objective is to provide
39
+ comprehensive and accurate responses based on the context provided. Make sure that
40
+ you generate whole output.
41
+ context: {context}
42
+ question: {question}
43
+ """
44
+
45
+ model = ChatOpenAI(model="gpt-3.5-turbo")
46
+ prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
47
+ chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
48
+
49
+ return chain
50
+
51
+ def user_input(user_question):
52
+ embeddings = OpenAIEmbeddings()
53
+ new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
54
+ docs = new_db.similarity_search(user_question)
55
+
56
+ chain = get_conversational_chain()
57
+ result = ""
58
+ with st.spinner("Processing..."):
59
+ response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
60
+ result = response["output_text"]
61
+
62
+ st.session_state.chat_history.append({"role": "assistant", "content": result})
63
+
64
+ def main():
65
+ st.set_page_config("Chat PDF")
66
+ st.header("Lesson Plan Maker With AI💁")
67
+
68
+ if "chat_history" not in st.session_state:
69
+ st.session_state.chat_history = []
70
+
71
+ with st.sidebar:
72
+ # st.logo("pic123.png")
73
+ st.image("pic123.png")
74
+ st.title("Menu:")
75
+ pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
76
+ if st.button("Submit & Process"):
77
+ with st.spinner("Processing..."):
78
+ raw_text = get_pdf_text(pdf_docs)
79
+ text_chunks = get_text_chunks(raw_text)
80
+ get_vector_store(text_chunks)
81
+ st.success("Done")
82
+
83
+ # Display chat history
84
+ for idx, chat in enumerate(st.session_state.chat_history):
85
+ with st.chat_message(chat["role"]):
86
+ st.write(chat["content"])
87
+ if chat["role"] == "assistant":
88
+ st.download_button(
89
+ label="Download",
90
+ data=chat["content"],
91
+ file_name=f"response_{idx}.txt",
92
+ mime="text/plain",
93
+ key=f"download_{idx}",
94
+ )
95
+
96
+ user_question = st.chat_input("Ask a Question from the PDF Files")
97
+
98
+ if user_question:
99
+ st.session_state.chat_history.append({"role": "user", "content": user_question})
100
+ st.chat_message("user").write(user_question)
101
+ user_input(user_question)
102
+ st.experimental_rerun()
103
+
104
+ if __name__ == "__main__":
105
+ main()
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ python-dotenv
2
+ langchain
3
+ langchain_openai
4
+ pypdf
5
+ faiss-cpu
6
+ Flask
7
+ langchain-core
8
+ langchain-community
9
+ streamlit
10
+ chromadb
11
+ PyPDF2
12
+ PyMuPDF
13
+ langchain-openai
14
+ ipykernel