droushb commited on
Commit
8b52ce3
1 Parent(s): 902508d

Increased number of documents to 50000

Browse files
Files changed (4) hide show
  1. app.py +10 -2
  2. config.py +1 -1
  3. model/main.py +24 -5
  4. retriever.pkl +3 -0
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import streamlit as st
2
  from config import CONFIG
3
- from model.main import process_query
4
 
5
  st.title("RAG Question Answering System")
6
 
@@ -50,10 +50,18 @@ if st.button("Generate Answear"):
50
  st.write(f"- {doc}")
51
 
52
  st.subheader("Generated Answer")
53
- st.text_area("Generated Answer", value=answer, height=CONFIG['TEXTAREA_HEIGHT'], disabled=True)
54
  except Exception as e:
55
  st.error(f"An error occurred: {e}")
56
 
 
 
 
 
 
 
 
 
57
  st.markdown(
58
  """
59
  <style>
 
1
  import streamlit as st
2
  from config import CONFIG
3
+ from model.main import process_query, prepare_retriever
4
 
5
  st.title("RAG Question Answering System")
6
 
 
50
  st.write(f"- {doc}")
51
 
52
  st.subheader("Generated Answer")
53
+ st.text_area("Generated Answer", value=answer, height=CONFIG['TEXTAREA_HEIGHT'])
54
  except Exception as e:
55
  st.error(f"An error occurred: {e}")
56
 
57
+ # if st.button("Prepare Retriever"):
58
+ # with st.spinner("Preparing retriever..."):
59
+ # try:
60
+ # prepare_retriever()
61
+ # st.success("Retriever prepared successfully!")
62
+ # except Exception as e:
63
+ # st.error(f"Failed to prepare retriever: {e}")
64
+
65
  st.markdown(
66
  """
67
  <style>
config.py CHANGED
@@ -1,6 +1,6 @@
1
  CONFIG = {
2
  "DATASET": "aalksii/ml-arxiv-papers",
3
- "MAX_NUM_OF_RECORDS": 1000,
4
  "TEXTAREA_HEIGHT": 200,
5
  "CHUNK_SIZE": 200,
6
  "OPENAI_ENGINE": "gpt-4o-mini",
 
1
  CONFIG = {
2
  "DATASET": "aalksii/ml-arxiv-papers",
3
+ "MAX_NUM_OF_RECORDS": 50000,
4
  "TEXTAREA_HEIGHT": 200,
5
  "CHUNK_SIZE": 200,
6
  "OPENAI_ENGINE": "gpt-4o-mini",
model/main.py CHANGED
@@ -1,14 +1,24 @@
1
  import streamlit as st
 
2
  from model.questionAnsweringBot import QuestionAnsweringBot
3
  from model.retriever import Retriever
4
 
5
  def process_query(llm_key, query, retrieval_method):
 
 
 
 
 
 
6
  if "retriever" not in st.session_state:
7
- st.session_state.retriever = Retriever()
8
- print("Loading and preparing dataset...")
9
- st.session_state.retriever.load_and_prepare_dataset()
10
- st.session_state.retriever.prepare_bm25()
11
- st.session_state.retriever.compute_embeddings()
 
 
 
12
 
13
  retriever = st.session_state.retriever
14
 
@@ -42,3 +52,12 @@ def getPrompt(retrieved_docs, query):
42
  prompt += f"\nQuery: {query}\n"
43
 
44
  return prompt
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import pickle
3
  from model.questionAnsweringBot import QuestionAnsweringBot
4
  from model.retriever import Retriever
5
 
6
  def process_query(llm_key, query, retrieval_method):
7
+ # if "retriever" not in st.session_state:
8
+ # st.session_state.retriever = Retriever()
9
+ # print("Loading and preparing dataset...")
10
+ # st.session_state.retriever.load_and_prepare_dataset()
11
+ # st.session_state.retriever.prepare_bm25()
12
+ # st.session_state.retriever.compute_embeddings()
13
  if "retriever" not in st.session_state:
14
+ with st.spinner("Loading precomputed retriever..."):
15
+ try:
16
+ import pickle
17
+ with open("retriever.pkl", "rb") as f:
18
+ st.session_state.retriever = pickle.load(f)
19
+ st.success("Preloaded retriever successfully!")
20
+ except Exception as e:
21
+ st.error(f"Failed to load precomputed retriever: {e}")
22
 
23
  retriever = st.session_state.retriever
24
 
 
52
  prompt += f"\nQuery: {query}\n"
53
 
54
  return prompt
55
+
56
+ def prepare_retriever():
57
+ retriever = Retriever()
58
+ retriever.load_and_prepare_dataset()
59
+ retriever.prepare_bm25()
60
+ retriever.compute_embeddings()
61
+
62
+ with open("retriever.pkl", "wb") as f:
63
+ pickle.dump(retriever, f)
retriever.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82ba6dd3aacd7ce192db5c240791ce7bea2f0f7d4ff4a90eba4ae697d370939c
3
+ size 316691228