Spaces:

anakin87
/

who-killed-laura-palmer

Sleeping

App Files Files Community

Stefano Fiorucci commited on May 13, 2022

Commit

8314602

1 Parent(s): 5bbc60d

refactoring

Browse files

Files changed (6) hide show

app.py +15 -32
haystack_utils.py → backend_utils.py +12 -26
config.py +1 -1
data/questions/generated_questions.txt +0 -0
data/{questions.txt → questions/selected_questions.txt} +0 -0
frontend_utils.py +14 -0

app.py CHANGED Viewed

@@ -9,20 +9,9 @@ from typing import List, Dict, Any, Tuple, Optional
 from annotated_text import annotation
 from urllib.parse import unquote
-from haystack_utils import (set_state_if_absent, load_questions,
-    query)
-INDEX_DIR = 'data/index'
-QUESTIONS_PATH = 'data/questions.txt'
-RETRIEVER_MODEL = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
-RETRIEVER_MODEL_FORMAT = "sentence_transformers"
-READER_MODEL = "deepset/roberta-base-squad2"
-READER_CONFIG_THRESHOLD = 0.15
-RETRIEVER_TOP_K = 10
-READER_TOP_K = 5
 def main():
@@ -35,13 +24,19 @@ def main():
     set_state_if_absent('raw_json', None)
     set_state_if_absent('random_question_requested', False)
-    # Small callback to reset the interface in case the text of the question changes
-    def reset_results(*args):
-        st.session_state.answer = None
-        st.session_state.results = None
-        st.session_state.raw_json = None
-    # sidebar style
     st.markdown(
         """
     <style>
@@ -55,18 +50,6 @@ def main():
     """,
         unsafe_allow_html=True,
     )
-    # Title
-    st.write("# Who killed Laura Palmer?")
-    st.write("### The first Twin Peaks Question Answering system!")
-    st.markdown("""
-Ask any question about [Twin Peaks] (https://twinpeaks.fandom.com/wiki/Twin_Peaks)
-and see if the AI can find an answer...
-*Note: do not use keywords, but full-fledged questions.*
-""")
-    # Sidebar
     st.sidebar.header("Who killed Laura Palmer?")
     st.sidebar.image(
         "https://upload.wikimedia.org/wikipedia/it/3/39/Twin-peaks-1990.jpg")

 from annotated_text import annotation
 from urllib.parse import unquote
+from backend_utils import load_questions, query
+from frontend_utils import set_state_if_absent, reset_results
+from config import RETRIEVER_TOP_K, READER_TOP_K
 def main():
     set_state_if_absent('raw_json', None)
     set_state_if_absent('random_question_requested', False)
+    # Header
+    st.write("# Who killed Laura Palmer?")
+    st.write("### The first Twin Peaks Question Answering system!")
+    st.markdown("""
+Ask any question about [Twin Peaks] (https://twinpeaks.fandom.com/wiki/Twin_Peaks)
+and see if the AI can find an answer...
+*Note: do not use keywords, but full-fledged questions.*
+""")
+    # Sidebar
+        # sidebar style
     st.markdown(
         """
     <style>
     """,
         unsafe_allow_html=True,
     )
     st.sidebar.header("Who killed Laura Palmer?")
     st.sidebar.image(
         "https://upload.wikimedia.org/wikipedia/it/3/39/Twin-peaks-1990.jpg")

haystack_utils.py → backend_utils.py RENAMED Viewed

@@ -8,6 +8,7 @@ import streamlit as st
 from config import (INDEX_DIR, RETRIEVER_MODEL, RETRIEVER_MODEL_FORMAT,
     READER_MODEL, READER_CONFIG_THRESHOLD, QUESTIONS_PATH)
 @st.cache(hash_funcs={"builtins.SwigPyObject": lambda _: None},
           allow_output_mutation=True)
 def start_haystack():
@@ -33,32 +34,7 @@ def start_haystack():
     pipe = ExtractiveQAPipeline(reader, retriever)
     return pipe
-def set_state_if_absent(key, value):
-    if key not in st.session_state:
-        st.session_state[key] = value
-@st.cache()
-def load_questions():
-    with open(QUESTIONS_PATH) as fin:
-        questions = [line.strip() for line in fin.readlines()
-                     if not line.startswith('#')]
-    return questions
-# # the following function is a wrapper for start_haystack,
-# # which loads document store, retriever, reader and creates pipeline.
-# # cached to make index and models load only at start
-# @st.cache(hash_funcs={"builtins.SwigPyObject": lambda _: None},
-#           allow_output_mutation=True)
-# def start_app():
-#     return start_haystack()
-# @st.cache()
-# def load_questions_wrapper():
-#     return load_questions()
 pipe = start_haystack()
 # the pipeline is not included as parameter of the following function,
 # because it is difficult to cache
 @st.cache(persist=True, allow_output_mutation=True)
@@ -67,4 +43,14 @@ def query(question: str, retriever_top_k: int = 10, reader_top_k: int = 5):
     params = {"Retriever": {"top_k": retriever_top_k},
               "Reader": {"top_k": reader_top_k}}
     results = pipe.run(question, params=params)
-    return results

 from config import (INDEX_DIR, RETRIEVER_MODEL, RETRIEVER_MODEL_FORMAT,
     READER_MODEL, READER_CONFIG_THRESHOLD, QUESTIONS_PATH)
+# cached to make index and models load only at start
 @st.cache(hash_funcs={"builtins.SwigPyObject": lambda _: None},
           allow_output_mutation=True)
 def start_haystack():
     pipe = ExtractiveQAPipeline(reader, retriever)
     return pipe
 pipe = start_haystack()
 # the pipeline is not included as parameter of the following function,
 # because it is difficult to cache
 @st.cache(persist=True, allow_output_mutation=True)
     params = {"Retriever": {"top_k": retriever_top_k},
               "Reader": {"top_k": reader_top_k}}
     results = pipe.run(question, params=params)
+    return results
+@st.cache()
+def load_questions():
+    """Load selected questions from file"""
+    with open(QUESTIONS_PATH) as fin:
+        questions = [line.strip() for line in fin.readlines()
+                     if not line.startswith('#')]
+    return questions

config.py CHANGED Viewed

@@ -1,6 +1,6 @@
 INDEX_DIR = 'data/index'
-QUESTIONS_PATH = 'data/questions.txt'
 RETRIEVER_MODEL = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
 RETRIEVER_MODEL_FORMAT = "sentence_transformers"
 READER_MODEL = "deepset/roberta-base-squad2"

 INDEX_DIR = 'data/index'
+QUESTIONS_PATH = 'data/questions/selected_questions.txt'
 RETRIEVER_MODEL = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
 RETRIEVER_MODEL_FORMAT = "sentence_transformers"
 READER_MODEL = "deepset/roberta-base-squad2"

data/questions/generated_questions.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

data/{questions.txt → questions/selected_questions.txt} RENAMED Viewed

File without changes

frontend_utils.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import streamlit as st
+def set_state_if_absent(key, value):
+    if key not in st.session_state:
+        st.session_state[key] = value
+# Small callback to reset the interface in case the text of the question changes
+def reset_results(*args):
+    st.session_state.answer = None
+    st.session_state.results = None
+    st.session_state.raw_json = None