Spaces:

anakin87
/

fact-checking-rocks

Running

App Files Files Community

anakin87 commited on Aug 24, 2022

Commit

35f0167

•

1 Parent(s): 4c2a969

steps in present output

Browse files

Files changed (6) hide show

Rock_fact_checker.py +79 -27
app_utils/backend_utils.py +21 -7
app_utils/frontend_utils.py +3 -2
data/index/faiss_document_store.db +2 -2
data/index/my_faiss_index.faiss +2 -2
data/statements.txt +18 -3

Rock_fact_checker.py CHANGED Viewed

@@ -1,30 +1,30 @@
 import streamlit as st
 import time
-import streamlit as st
 import logging
 from json import JSONDecodeError
 # from markdown import markdown
 # from annotated_text import annotation
 # from urllib.parse import unquote
 import random
-from app_utils.backend_utils import load_questions, query
-from app_utils.frontend_utils import set_state_if_absent, reset_results
 from app_utils.config import RETRIEVER_TOP_K
 def main():
-    questions = load_questions()
     # Persistent state
-    set_state_if_absent('question', "Elvis Presley is alive")
     set_state_if_absent('answer', '')
     set_state_if_absent('results', None)
     set_state_if_absent('raw_json', None)
-    set_state_if_absent('random_question_requested', False)
     ## MAIN CONTAINER
@@ -34,7 +34,7 @@ def main():
     ##### Enter a factual statement about [Rock music](https://en.wikipedia.org/wiki/List_of_mainstream_rock_performers) and let the AI check it out for you...
     """)
     # Search bar
-    question = st.text_input("", value=st.session_state.question,
                              max_chars=100, on_change=reset_results)
     col1, col2 = st.columns(2)
     col1.markdown(
@@ -43,33 +43,33 @@ def main():
         "<style>.stButton button {width:100%;}</style>", unsafe_allow_html=True)
     # Run button
     run_pressed = col1.button("Run")
-    # Random question button
-    if col2.button("Random question"):
         reset_results()
-        question = random.choice(questions)
-        # Avoid picking the same question twice (the change is not visible on the UI)
-        while question == st.session_state.question:
-            question = random.choice(questions)
-        st.session_state.question = question
-        st.session_state.random_question_requested = True
-        # Re-runs the script setting the random question as the textbox value
-        # Unfortunately necessary as the Random Question button is _below_ the textbox
         # raise st.script_runner.RerunException(
         #     st.script_request_queue.RerunData(None))
     else:
-        st.session_state.random_question_requested = False
-    run_query = (run_pressed or question != st.session_state.question) \
-        and not st.session_state.random_question_requested
     # Get results for query
-    if run_query and question:
         time_start = time.time()
         reset_results()
-        st.session_state.question = question
         with st.spinner("🧠 &nbsp;&nbsp; Performing neural search on documents..."):
             try:
                 st.session_state.results = query(
-                    question, RETRIEVER_TOP_K)
                 time_end = time.time()
                 print(time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()))
                 print(f'elapsed time: {time_end - time_start}')
@@ -82,10 +82,53 @@ def main():
                 st.error("🐞 &nbsp;&nbsp; An error occurred during the request.")
                 return
-    # # Display results
-    # if st.session_state.results:
-    #     st.write("## Results:")
-    #     alert_irrelevance = True
     #     if len(st.session_state.results['answers']) == 0:
     #         st.info("""🤔 &nbsp;&nbsp; Haystack is unsure whether any of
     # the documents contain an answer to your question. Try to reformulate it!""")
@@ -114,4 +157,13 @@ def main():
     #         st.markdown(
     #             f"**Score:** {result['score']:.2f} -  **Source:** {source}")
 main()

 import streamlit as st
 import time
 import logging
 from json import JSONDecodeError
 # from markdown import markdown
 # from annotated_text import annotation
 # from urllib.parse import unquote
 import random
+import pandas as pd
+from app_utils.backend_utils import load_statements, query
+from app_utils.frontend_utils import set_state_if_absent, reset_results, entailment_html_messages
 from app_utils.config import RETRIEVER_TOP_K
 def main():
+    statements = load_statements()
     # Persistent state
+    set_state_if_absent('statement', "Elvis Presley is alive")
     set_state_if_absent('answer', '')
     set_state_if_absent('results', None)
     set_state_if_absent('raw_json', None)
+    set_state_if_absent('random_statement_requested', False)
     ## MAIN CONTAINER
     ##### Enter a factual statement about [Rock music](https://en.wikipedia.org/wiki/List_of_mainstream_rock_performers) and let the AI check it out for you...
     """)
     # Search bar
+    statement = st.text_input("", value=st.session_state.statement,
                              max_chars=100, on_change=reset_results)
     col1, col2 = st.columns(2)
     col1.markdown(
         "<style>.stButton button {width:100%;}</style>", unsafe_allow_html=True)
     # Run button
     run_pressed = col1.button("Run")
+    # Random statement button
+    if col2.button("Random statement"):
         reset_results()
+        statement = random.choice(statements)
+        # Avoid picking the same statement twice (the change is not visible on the UI)
+        while statement == st.session_state.statement:
+            statement = random.choice(statements)
+        st.session_state.statement = statement
+        st.session_state.random_statement_requested = True
+        # Re-runs the script setting the random statement as the textbox value
+        # Unfortunately necessary as the Random statement button is _below_ the textbox
         # raise st.script_runner.RerunException(
         #     st.script_request_queue.RerunData(None))
     else:
+        st.session_state.random_statement_requested = False
+    run_query = (run_pressed or statement != st.session_state.statement) \
+        and not st.session_state.random_statement_requested
     # Get results for query
+    if run_query and statement:
         time_start = time.time()
         reset_results()
+        st.session_state.statement = statement
         with st.spinner("🧠 &nbsp;&nbsp; Performing neural search on documents..."):
             try:
                 st.session_state.results = query(
+                    statement, RETRIEVER_TOP_K)
                 time_end = time.time()
                 print(time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()))
                 print(f'elapsed time: {time_end - time_start}')
                 st.error("🐞 &nbsp;&nbsp; An error occurred during the request.")
                 return
+    # Display results
+    if st.session_state.results:
+        results = st.session_state.results
+        docs, agg_entailment_info = results['documents'], results['agg_entailment_info']
+        print(results)
+        max_key = max(agg_entailment_info, key=agg_entailment_info.get)
+        message = entailment_html_messages[max_key]
+        st.markdown(f'<h4>{message}</h4>', unsafe_allow_html=True)
+        st.markdown(f'###### Aggregate entailment information:')
+        st.write(results['agg_entailment_info'])
+        st.markdown(f'###### Relevant snippets:')
+        # colms = st.columns((2, 5, 1, 1, 1, 1))
+        # fields = ["Page title",'Content', 'Relevance', 'contradiction', 'neutral', 'entailment']
+        # for col, field_name in zip(colms, fields):
+        #     # header
+        #     col.write(field_name)
+        df = []
+        for doc in docs:
+        #     col1, col2, col3, col4, col5, col6 = st.columns((2, 5, 1, 1, 1, 1))
+        #     col1.write(f"[{doc.meta['name']}]({doc.meta['url']})")
+        #     col2.write(f"{doc.content}")
+        #     col3.write(f"{doc.score:.3f}")
+        #     col4.write(f"{doc.meta['entailment_info']['contradiction']:.2f}")
+        #     col5.write(f"{doc.meta['entailment_info']['neutral']:.2f}")
+        #     col6.write(f"{doc.meta['entailment_info']['entailment']:.2f}")
+            #         'con': f"{doc.meta['entailment_info']['contradiction']:.2f}",
+            #         'neu': f"{doc.meta['entailment_info']['neutral']:.2f}",
+            #         'ent': f"{doc.meta['entailment_info']['entailment']:.2f}",
+            #         # 'url': doc.meta['url'],
+            #         'Content': doc.content}
+            #
+            #
+            #
+            row = {'Title': doc.meta['name'],
+                    'Relevance': f"{doc.score:.3f}",
+                    'con': f"{doc.meta['entailment_info']['contradiction']:.2f}",
+                    'neu': f"{doc.meta['entailment_info']['neutral']:.2f}",
+                    'ent': f"{doc.meta['entailment_info']['entailment']:.2f}",
+                    # 'url': doc.meta['url'],
+                    'Content': doc.content}
+            df.append(row)
+        st.dataframe(pd.DataFrame(df))#.style.apply(highlight))
     #     if len(st.session_state.results['answers']) == 0:
     #         st.info("""🤔 &nbsp;&nbsp; Haystack is unsure whether any of
     # the documents contain an answer to your question. Try to reformulate it!""")
     #         st.markdown(
     #             f"**Score:** {result['score']:.2f} -  **Source:** {source}")
+# def make_pretty(styler):
+#     styler.set_caption("Weather Conditions")
+#     # styler.format(rain_condition)
+#     styler.format_con(lambda v: v.float(v))
+#     styler.background_gradient(axis=None, vmin=0, vmax=1, cmap="YlGnBu")
+#     return styler
+def highlight(s):
+    return ['background-color: red']*5
 main()

app_utils/backend_utils.py CHANGED Viewed

@@ -40,19 +40,33 @@ pipe = start_haystack()
 # the pipeline is not included as parameter of the following function,
 # because it is difficult to cache
 @st.cache(persist=True, allow_output_mutation=True)
-def query(question: str, retriever_top_k: int = 5):
-    """Run query and get answers"""
     params = {"retriever": {"top_k": retriever_top_k}}
-    results = pipe.run(question, params=params)
-    print(results)
     return results
 @st.cache()
-def load_questions():
     """Load statements from file"""
     with open(STATEMENTS_PATH) as fin:
-        questions = [line.strip() for line in fin.readlines()
                      if not line.startswith('#')]
-    return questions

 # the pipeline is not included as parameter of the following function,
 # because it is difficult to cache
 @st.cache(persist=True, allow_output_mutation=True)
+def query(statement: str, retriever_top_k: int = 5):
+    """Run query and verify statement"""
     params = {"retriever": {"top_k": retriever_top_k}}
+    results = pipe.run(statement, params=params)
+    scores, agg_con, agg_neu, agg_ent = 0,0,0,0
+    for doc in results['documents']:
+        scores+=doc.score
+        ent_info=doc.meta['entailment_info']
+        con,neu,ent = ent_info['contradiction'], ent_info['neutral'], ent_info['entailment']
+        agg_con+=con*doc.score
+        agg_neu+=neu*doc.score
+        agg_ent+=ent*doc.score
+    results['agg_entailment_info'] = {
+        'contradiction': round(agg_con/scores, 2),
+                            'neutral': round(agg_neu/scores, 2),
+                            'entailment': round(agg_ent/scores, 2)}
     return results
 @st.cache()
+def load_statements():
     """Load statements from file"""
     with open(STATEMENTS_PATH) as fin:
+        statements = [line.strip() for line in fin.readlines()
                      if not line.startswith('#')]
+    return statements

app_utils/frontend_utils.py CHANGED Viewed

@@ -11,5 +11,6 @@ def reset_results(*args):
     st.session_state.results = None
     st.session_state.raw_json = None

     st.session_state.results = None
     st.session_state.raw_json = None
+entailment_html_messages = {'entailment': 'The knowledge base seems to <span style="color:green">confirm</span> your statement',
+                            'contradiction': 'The knowledge base seems to <span style="color:red">contradict</span> your statement',
+                            'neutral': 'The knowledge base is <span style="color:darkgray">neutral</span> about your statement'}

data/index/faiss_document_store.db CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:91914481925284e0a0e193ad9fd90c820e226304d5ad35b25371afaa046281f4
-size 75456512

 version https://git-lfs.github.com/spec/v1
+oid sha256:449d3708074efc81a6c59bbe4164b18f5ac45b28db530fee8eec5eff74504e45
+size 73195520

data/index/my_faiss_index.faiss CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:285c67834753ed442f76e2ea907d984416ca0166aa2d48bf85106d0801fe3113
-size 153560109

 version https://git-lfs.github.com/spec/v1
+oid sha256:b971dc989aec74c8e34d808a4f9c713dc93c3ae247ec5cbefb0e00d98a3fcc56
+size 150488109

data/statements.txt CHANGED Viewed

@@ -1,5 +1,20 @@
 Kurt Cobain died in 1994
 Kurt Cobain died in 2008
-Green Day are a heavy metal band
-Green Day are a punk rock band
-The Beatles' first album was released in 1985

 Kurt Cobain died in 1994
 Kurt Cobain died in 2008
+Green Day is a heavy metal band
+Green Day is a punk rock band
+The Beatles' first album was released in 1985
+The Offspring is a French punk rock band
+Kurt Cobain was a biker
+Joe Cocker was American
+The Eagles won a Grammy Award
+Mick Jagger was part of the Beatles
+London calling is a hit by The Clash
+Red Hot Chili Peppers were formed in New York
+The Smiths is a very long-lived band
+U2 have participated in philanthropic initiatives
+Sweet Home Alabama is a popular song by Lynyrd Skynyrd
+Steve Vai collaborated with Frank Zappa
+The White Stripes were a trio
+The White Stripes were composed by Jack White and Meg White
+Scorpions is a German trap band
+Sepultura is a heavy metal band