Stefano Fiorucci commited on
Commit
8314602
β€’
1 Parent(s): 5bbc60d

refactoring

Browse files
app.py CHANGED
@@ -9,20 +9,9 @@ from typing import List, Dict, Any, Tuple, Optional
9
  from annotated_text import annotation
10
  from urllib.parse import unquote
11
 
12
- from haystack_utils import (set_state_if_absent, load_questions,
13
- query)
14
-
15
- INDEX_DIR = 'data/index'
16
- QUESTIONS_PATH = 'data/questions.txt'
17
- RETRIEVER_MODEL = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
18
- RETRIEVER_MODEL_FORMAT = "sentence_transformers"
19
- READER_MODEL = "deepset/roberta-base-squad2"
20
- READER_CONFIG_THRESHOLD = 0.15
21
- RETRIEVER_TOP_K = 10
22
- READER_TOP_K = 5
23
-
24
-
25
-
26
 
27
  def main():
28
 
@@ -35,13 +24,19 @@ def main():
35
  set_state_if_absent('raw_json', None)
36
  set_state_if_absent('random_question_requested', False)
37
 
38
- # Small callback to reset the interface in case the text of the question changes
39
- def reset_results(*args):
40
- st.session_state.answer = None
41
- st.session_state.results = None
42
- st.session_state.raw_json = None
43
 
44
- # sidebar style
 
 
 
 
 
 
 
 
 
 
 
45
  st.markdown(
46
  """
47
  <style>
@@ -55,18 +50,6 @@ def main():
55
  """,
56
  unsafe_allow_html=True,
57
  )
58
- # Title
59
- st.write("# Who killed Laura Palmer?")
60
- st.write("### The first Twin Peaks Question Answering system!")
61
-
62
- st.markdown("""
63
- Ask any question about [Twin Peaks] (https://twinpeaks.fandom.com/wiki/Twin_Peaks)
64
- and see if the AI ​​can find an answer...
65
-
66
- *Note: do not use keywords, but full-fledged questions.*
67
- """)
68
-
69
- # Sidebar
70
  st.sidebar.header("Who killed Laura Palmer?")
71
  st.sidebar.image(
72
  "https://upload.wikimedia.org/wikipedia/it/3/39/Twin-peaks-1990.jpg")
 
9
  from annotated_text import annotation
10
  from urllib.parse import unquote
11
 
12
+ from backend_utils import load_questions, query
13
+ from frontend_utils import set_state_if_absent, reset_results
14
+ from config import RETRIEVER_TOP_K, READER_TOP_K
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def main():
17
 
 
24
  set_state_if_absent('raw_json', None)
25
  set_state_if_absent('random_question_requested', False)
26
 
 
 
 
 
 
27
 
28
+ # Header
29
+ st.write("# Who killed Laura Palmer?")
30
+ st.write("### The first Twin Peaks Question Answering system!")
31
+ st.markdown("""
32
+ Ask any question about [Twin Peaks] (https://twinpeaks.fandom.com/wiki/Twin_Peaks)
33
+ and see if the AI ​​can find an answer...
34
+
35
+ *Note: do not use keywords, but full-fledged questions.*
36
+ """)
37
+
38
+ # Sidebar
39
+ # sidebar style
40
  st.markdown(
41
  """
42
  <style>
 
50
  """,
51
  unsafe_allow_html=True,
52
  )
 
 
 
 
 
 
 
 
 
 
 
 
53
  st.sidebar.header("Who killed Laura Palmer?")
54
  st.sidebar.image(
55
  "https://upload.wikimedia.org/wikipedia/it/3/39/Twin-peaks-1990.jpg")
haystack_utils.py β†’ backend_utils.py RENAMED
@@ -8,6 +8,7 @@ import streamlit as st
8
  from config import (INDEX_DIR, RETRIEVER_MODEL, RETRIEVER_MODEL_FORMAT,
9
  READER_MODEL, READER_CONFIG_THRESHOLD, QUESTIONS_PATH)
10
 
 
11
  @st.cache(hash_funcs={"builtins.SwigPyObject": lambda _: None},
12
  allow_output_mutation=True)
13
  def start_haystack():
@@ -33,32 +34,7 @@ def start_haystack():
33
  pipe = ExtractiveQAPipeline(reader, retriever)
34
  return pipe
35
 
36
- def set_state_if_absent(key, value):
37
- if key not in st.session_state:
38
- st.session_state[key] = value
39
-
40
- @st.cache()
41
- def load_questions():
42
- with open(QUESTIONS_PATH) as fin:
43
- questions = [line.strip() for line in fin.readlines()
44
- if not line.startswith('#')]
45
- return questions
46
-
47
- # # the following function is a wrapper for start_haystack,
48
- # # which loads document store, retriever, reader and creates pipeline.
49
- # # cached to make index and models load only at start
50
- # @st.cache(hash_funcs={"builtins.SwigPyObject": lambda _: None},
51
- # allow_output_mutation=True)
52
- # def start_app():
53
- # return start_haystack()
54
-
55
-
56
- # @st.cache()
57
- # def load_questions_wrapper():
58
- # return load_questions()
59
-
60
  pipe = start_haystack()
61
-
62
  # the pipeline is not included as parameter of the following function,
63
  # because it is difficult to cache
64
  @st.cache(persist=True, allow_output_mutation=True)
@@ -67,4 +43,14 @@ def query(question: str, retriever_top_k: int = 10, reader_top_k: int = 5):
67
  params = {"Retriever": {"top_k": retriever_top_k},
68
  "Reader": {"top_k": reader_top_k}}
69
  results = pipe.run(question, params=params)
70
- return results
 
 
 
 
 
 
 
 
 
 
 
8
  from config import (INDEX_DIR, RETRIEVER_MODEL, RETRIEVER_MODEL_FORMAT,
9
  READER_MODEL, READER_CONFIG_THRESHOLD, QUESTIONS_PATH)
10
 
11
+ # cached to make index and models load only at start
12
  @st.cache(hash_funcs={"builtins.SwigPyObject": lambda _: None},
13
  allow_output_mutation=True)
14
  def start_haystack():
 
34
  pipe = ExtractiveQAPipeline(reader, retriever)
35
  return pipe
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  pipe = start_haystack()
 
38
  # the pipeline is not included as parameter of the following function,
39
  # because it is difficult to cache
40
  @st.cache(persist=True, allow_output_mutation=True)
 
43
  params = {"Retriever": {"top_k": retriever_top_k},
44
  "Reader": {"top_k": reader_top_k}}
45
  results = pipe.run(question, params=params)
46
+ return results
47
+
48
+ @st.cache()
49
+ def load_questions():
50
+ """Load selected questions from file"""
51
+ with open(QUESTIONS_PATH) as fin:
52
+ questions = [line.strip() for line in fin.readlines()
53
+ if not line.startswith('#')]
54
+ return questions
55
+
56
+
config.py CHANGED
@@ -1,6 +1,6 @@
1
 
2
  INDEX_DIR = 'data/index'
3
- QUESTIONS_PATH = 'data/questions.txt'
4
  RETRIEVER_MODEL = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
5
  RETRIEVER_MODEL_FORMAT = "sentence_transformers"
6
  READER_MODEL = "deepset/roberta-base-squad2"
 
1
 
2
  INDEX_DIR = 'data/index'
3
+ QUESTIONS_PATH = 'data/questions/selected_questions.txt'
4
  RETRIEVER_MODEL = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
5
  RETRIEVER_MODEL_FORMAT = "sentence_transformers"
6
  READER_MODEL = "deepset/roberta-base-squad2"
data/questions/generated_questions.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/{questions.txt β†’ questions/selected_questions.txt} RENAMED
File without changes
frontend_utils.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def set_state_if_absent(key, value):
4
+ if key not in st.session_state:
5
+ st.session_state[key] = value
6
+
7
+ # Small callback to reset the interface in case the text of the question changes
8
+ def reset_results(*args):
9
+ st.session_state.answer = None
10
+ st.session_state.results = None
11
+ st.session_state.raw_json = None
12
+
13
+
14
+