barghavani commited on
Commit
0b76712
·
verified ·
1 Parent(s): 6c1b94a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -69
app.py CHANGED
@@ -1,7 +1,4 @@
1
  import streamlit as st
2
- from bokeh.models.widgets import Button
3
- from bokeh.models import CustomJS
4
- from streamlit_bokeh_events import streamlit_bokeh_events
5
  from PyPDF2 import PdfReader
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  import os
@@ -12,110 +9,113 @@ from langchain_google_genai import ChatGoogleGenerativeAI
12
  from langchain.chains.question_answering import load_qa_chain
13
  from langchain.prompts import PromptTemplate
14
  from dotenv import load_dotenv
 
15
 
16
- # Load environment variables and configure API
17
  load_dotenv()
18
  os.getenv("GOOGLE_API_KEY")
19
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
20
 
 
 
 
 
 
21
  def get_pdf_text(pdf_docs):
22
- text = ""
23
  for pdf in pdf_docs:
24
- pdf_reader = PdfReader(pdf)
25
  for page in pdf_reader.pages:
26
- text += page.extract_text()
27
- return text
 
 
28
 
29
  def get_text_chunks(text):
30
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
31
  chunks = text_splitter.split_text(text)
32
  return chunks
33
 
 
34
  def get_vector_store(text_chunks):
35
- embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
36
  vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
37
  vector_store.save_local("faiss_index")
38
 
 
39
  def get_conversational_chain():
 
40
  prompt_template = """
41
  Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
42
- provided context just say, 'answer is not available in the context', don't provide the wrong answer\n\n
43
  Context:\n {context}?\n
44
  Question: \n{question}\n
45
  Answer:
46
  """
47
- model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
48
- prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
 
 
 
49
  chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
 
50
  return chain
51
 
 
 
52
  def user_input(user_question):
53
- embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
 
54
  new_db = FAISS.load_local("faiss_index", embeddings)
55
  docs = new_db.similarity_search(user_question)
 
56
  chain = get_conversational_chain()
57
- response = chain({"input_documents":docs, "question": user_question}, return_only_outputs=True)
 
 
 
 
 
 
58
  st.write("Reply: ", response["output_text"])
59
 
60
- def main():
61
- st.set_page_config("Chat PDF")
62
- st.header("Chat with PDF using Gemini💁")
63
 
64
- # Organizing layout to separate PDF upload and STT
65
- col1, col2 = st.columns(2)
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
- with col1:
68
- st.subheader("Upload PDF")
69
- pdf_docs = st.file_uploader("Upload your PDF Files", accept_multiple_files=True)
70
- if st.button("Process PDF"):
71
- with st.spinner("Processing PDF..."):
 
 
 
 
72
  raw_text = get_pdf_text(pdf_docs)
73
  text_chunks = get_text_chunks(raw_text)
74
  get_vector_store(text_chunks)
75
- st.success("PDF processing complete.")
76
-
77
- with col2:
78
- st.subheader("Voice Question")
79
- # Bokeh button to activate speech recognition
80
- stt_button = Button(label="Speak", width=100)
81
- stt_button.js_on_event("button_click", CustomJS(code="""
82
- var recognition = new webkitSpeechRecognition();
83
- recognition.continuous = true;
84
- recognition.interimResults = true;
85
-
86
- recognition.onresult = function (e) {
87
- var value = "";
88
- for (var i = e.resultIndex; i < e.results.length; ++i) {
89
- if (e.results[i].isFinal) {
90
- value += e.results[i][0].transcript;
91
- }
92
- }
93
- if (value != "") {
94
- document.dispatchEvent(new CustomEvent("GET_TEXT", {detail: value}));
95
- }
96
- }
97
- recognition.onerror = function (event) {
98
- console.error('Speech recognition error', event);
99
- }
100
- recognition.start();
101
- """))
102
-
103
- # Streamlit Bokeh event for receiving transcribed text
104
- result = streamlit_bokeh_events(
105
- stt_button,
106
- events="GET_TEXT",
107
- key="listen",
108
- refresh_on_update=False,
109
- override_height=75,
110
- debounce_time=0
111
- )
112
-
113
- # Process the transcribed text
114
- if result:
115
- if "GET_TEXT" in result:
116
- user_question = result.get("GET_TEXT")
117
- st.write(f"Transcribed Question: {user_question}")
118
- user_input(user_question)
119
 
120
  if __name__ == "__main__":
121
- main()
 
1
  import streamlit as st
 
 
 
2
  from PyPDF2 import PdfReader
3
  from langchain.text_splitter import RecursiveCharacterTextSplitter
4
  import os
 
9
  from langchain.chains.question_answering import load_qa_chain
10
  from langchain.prompts import PromptTemplate
11
  from dotenv import load_dotenv
12
+ import speech_recognition as sr
13
 
 
14
  load_dotenv()
15
  os.getenv("GOOGLE_API_KEY")
16
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
17
 
18
+
19
+
20
+
21
+
22
+
23
  def get_pdf_text(pdf_docs):
24
+ text=""
25
  for pdf in pdf_docs:
26
+ pdf_reader= PdfReader(pdf)
27
  for page in pdf_reader.pages:
28
+ text+= page.extract_text()
29
+ return text
30
+
31
+
32
 
33
  def get_text_chunks(text):
34
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
35
  chunks = text_splitter.split_text(text)
36
  return chunks
37
 
38
+
39
  def get_vector_store(text_chunks):
40
+ embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
41
  vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
42
  vector_store.save_local("faiss_index")
43
 
44
+
45
  def get_conversational_chain():
46
+
47
  prompt_template = """
48
  Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
49
+ provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
50
  Context:\n {context}?\n
51
  Question: \n{question}\n
52
  Answer:
53
  """
54
+
55
+ model = ChatGoogleGenerativeAI(model="gemini-pro",
56
+ temperature=0.3)
57
+
58
+ prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
59
  chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
60
+
61
  return chain
62
 
63
+
64
+
65
  def user_input(user_question):
66
+ embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
67
+
68
  new_db = FAISS.load_local("faiss_index", embeddings)
69
  docs = new_db.similarity_search(user_question)
70
+
71
  chain = get_conversational_chain()
72
+
73
+
74
+ response = chain(
75
+ {"input_documents":docs, "question": user_question}
76
+ , return_only_outputs=True)
77
+
78
+ print(response)
79
  st.write("Reply: ", response["output_text"])
80
 
 
 
 
81
 
82
+ def record_audio():
83
+ r = sr.Recognizer()
84
+ with sr.Microphone() as source:
85
+ st.write("Please speak your question...")
86
+ audio = r.listen(source)
87
+ try:
88
+ text = r.recognize_google(audio)
89
+ st.write("You said: " + text)
90
+ return text
91
+ except sr.UnknownValueError:
92
+ st.error("Could not understand audio")
93
+ return None
94
+ except sr.RequestError as e:
95
+ st.error(f"Could not request results; {e}")
96
+ return None
97
 
98
+ ef main():
99
+ st.set_page_config("Chat PDF")
100
+ st.header("Chat with PDF using Gemini💁")
101
+
102
+ with st.sidebar:
103
+ st.title("Menu:")
104
+ pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
105
+ if st.button("Submit & Process"):
106
+ with st.spinner("Processing..."):
107
  raw_text = get_pdf_text(pdf_docs)
108
  text_chunks = get_text_chunks(raw_text)
109
  get_vector_store(text_chunks)
110
+ st.success("Done")
111
+
112
+ # User can choose to input question via text or voice
113
+ user_question = st.text_input("Ask a Question from the PDF Files")
114
+ if st.button("Record Question via Microphone"):
115
+ user_question = record_audio()
116
+
117
+ if user_question:
118
+ user_input(user_question)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
  if __name__ == "__main__":
121
+ main()