Spaces:
Running
Running
anakin87
commited on
Commit
•
35f0167
1
Parent(s):
4c2a969
steps in present output
Browse files- Rock_fact_checker.py +79 -27
- app_utils/backend_utils.py +21 -7
- app_utils/frontend_utils.py +3 -2
- data/index/faiss_document_store.db +2 -2
- data/index/my_faiss_index.faiss +2 -2
- data/statements.txt +18 -3
Rock_fact_checker.py
CHANGED
@@ -1,30 +1,30 @@
|
|
1 |
import streamlit as st
|
2 |
|
3 |
import time
|
4 |
-
import streamlit as st
|
5 |
import logging
|
6 |
from json import JSONDecodeError
|
7 |
# from markdown import markdown
|
8 |
# from annotated_text import annotation
|
9 |
# from urllib.parse import unquote
|
10 |
import random
|
|
|
11 |
|
12 |
-
from app_utils.backend_utils import
|
13 |
-
from app_utils.frontend_utils import set_state_if_absent, reset_results
|
14 |
from app_utils.config import RETRIEVER_TOP_K
|
15 |
|
16 |
|
17 |
def main():
|
18 |
|
19 |
|
20 |
-
|
21 |
|
22 |
# Persistent state
|
23 |
-
set_state_if_absent('
|
24 |
set_state_if_absent('answer', '')
|
25 |
set_state_if_absent('results', None)
|
26 |
set_state_if_absent('raw_json', None)
|
27 |
-
set_state_if_absent('
|
28 |
|
29 |
|
30 |
## MAIN CONTAINER
|
@@ -34,7 +34,7 @@ def main():
|
|
34 |
##### Enter a factual statement about [Rock music](https://en.wikipedia.org/wiki/List_of_mainstream_rock_performers) and let the AI check it out for you...
|
35 |
""")
|
36 |
# Search bar
|
37 |
-
|
38 |
max_chars=100, on_change=reset_results)
|
39 |
col1, col2 = st.columns(2)
|
40 |
col1.markdown(
|
@@ -43,33 +43,33 @@ def main():
|
|
43 |
"<style>.stButton button {width:100%;}</style>", unsafe_allow_html=True)
|
44 |
# Run button
|
45 |
run_pressed = col1.button("Run")
|
46 |
-
# Random
|
47 |
-
if col2.button("Random
|
48 |
reset_results()
|
49 |
-
|
50 |
-
# Avoid picking the same
|
51 |
-
while
|
52 |
-
|
53 |
-
st.session_state.
|
54 |
-
st.session_state.
|
55 |
-
# Re-runs the script setting the random
|
56 |
-
# Unfortunately necessary as the Random
|
57 |
# raise st.script_runner.RerunException(
|
58 |
# st.script_request_queue.RerunData(None))
|
59 |
else:
|
60 |
-
st.session_state.
|
61 |
-
run_query = (run_pressed or
|
62 |
-
and not st.session_state.
|
63 |
|
64 |
# Get results for query
|
65 |
-
if run_query and
|
66 |
time_start = time.time()
|
67 |
reset_results()
|
68 |
-
st.session_state.
|
69 |
with st.spinner("🧠 Performing neural search on documents..."):
|
70 |
try:
|
71 |
st.session_state.results = query(
|
72 |
-
|
73 |
time_end = time.time()
|
74 |
print(time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()))
|
75 |
print(f'elapsed time: {time_end - time_start}')
|
@@ -82,10 +82,53 @@ def main():
|
|
82 |
st.error("🐞 An error occurred during the request.")
|
83 |
return
|
84 |
|
85 |
-
#
|
86 |
-
|
87 |
-
|
88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
# if len(st.session_state.results['answers']) == 0:
|
90 |
# st.info("""🤔 Haystack is unsure whether any of
|
91 |
# the documents contain an answer to your question. Try to reformulate it!""")
|
@@ -114,4 +157,13 @@ def main():
|
|
114 |
# st.markdown(
|
115 |
# f"**Score:** {result['score']:.2f} - **Source:** {source}")
|
116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
main()
|
|
|
1 |
import streamlit as st
|
2 |
|
3 |
import time
|
|
|
4 |
import logging
|
5 |
from json import JSONDecodeError
|
6 |
# from markdown import markdown
|
7 |
# from annotated_text import annotation
|
8 |
# from urllib.parse import unquote
|
9 |
import random
|
10 |
+
import pandas as pd
|
11 |
|
12 |
+
from app_utils.backend_utils import load_statements, query
|
13 |
+
from app_utils.frontend_utils import set_state_if_absent, reset_results, entailment_html_messages
|
14 |
from app_utils.config import RETRIEVER_TOP_K
|
15 |
|
16 |
|
17 |
def main():
|
18 |
|
19 |
|
20 |
+
statements = load_statements()
|
21 |
|
22 |
# Persistent state
|
23 |
+
set_state_if_absent('statement', "Elvis Presley is alive")
|
24 |
set_state_if_absent('answer', '')
|
25 |
set_state_if_absent('results', None)
|
26 |
set_state_if_absent('raw_json', None)
|
27 |
+
set_state_if_absent('random_statement_requested', False)
|
28 |
|
29 |
|
30 |
## MAIN CONTAINER
|
|
|
34 |
##### Enter a factual statement about [Rock music](https://en.wikipedia.org/wiki/List_of_mainstream_rock_performers) and let the AI check it out for you...
|
35 |
""")
|
36 |
# Search bar
|
37 |
+
statement = st.text_input("", value=st.session_state.statement,
|
38 |
max_chars=100, on_change=reset_results)
|
39 |
col1, col2 = st.columns(2)
|
40 |
col1.markdown(
|
|
|
43 |
"<style>.stButton button {width:100%;}</style>", unsafe_allow_html=True)
|
44 |
# Run button
|
45 |
run_pressed = col1.button("Run")
|
46 |
+
# Random statement button
|
47 |
+
if col2.button("Random statement"):
|
48 |
reset_results()
|
49 |
+
statement = random.choice(statements)
|
50 |
+
# Avoid picking the same statement twice (the change is not visible on the UI)
|
51 |
+
while statement == st.session_state.statement:
|
52 |
+
statement = random.choice(statements)
|
53 |
+
st.session_state.statement = statement
|
54 |
+
st.session_state.random_statement_requested = True
|
55 |
+
# Re-runs the script setting the random statement as the textbox value
|
56 |
+
# Unfortunately necessary as the Random statement button is _below_ the textbox
|
57 |
# raise st.script_runner.RerunException(
|
58 |
# st.script_request_queue.RerunData(None))
|
59 |
else:
|
60 |
+
st.session_state.random_statement_requested = False
|
61 |
+
run_query = (run_pressed or statement != st.session_state.statement) \
|
62 |
+
and not st.session_state.random_statement_requested
|
63 |
|
64 |
# Get results for query
|
65 |
+
if run_query and statement:
|
66 |
time_start = time.time()
|
67 |
reset_results()
|
68 |
+
st.session_state.statement = statement
|
69 |
with st.spinner("🧠 Performing neural search on documents..."):
|
70 |
try:
|
71 |
st.session_state.results = query(
|
72 |
+
statement, RETRIEVER_TOP_K)
|
73 |
time_end = time.time()
|
74 |
print(time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()))
|
75 |
print(f'elapsed time: {time_end - time_start}')
|
|
|
82 |
st.error("🐞 An error occurred during the request.")
|
83 |
return
|
84 |
|
85 |
+
# Display results
|
86 |
+
if st.session_state.results:
|
87 |
+
results = st.session_state.results
|
88 |
+
docs, agg_entailment_info = results['documents'], results['agg_entailment_info']
|
89 |
+
print(results)
|
90 |
+
|
91 |
+
max_key = max(agg_entailment_info, key=agg_entailment_info.get)
|
92 |
+
message = entailment_html_messages[max_key]
|
93 |
+
st.markdown(f'<h4>{message}</h4>', unsafe_allow_html=True)
|
94 |
+
st.markdown(f'###### Aggregate entailment information:')
|
95 |
+
st.write(results['agg_entailment_info'])
|
96 |
+
st.markdown(f'###### Relevant snippets:')
|
97 |
+
|
98 |
+
# colms = st.columns((2, 5, 1, 1, 1, 1))
|
99 |
+
# fields = ["Page title",'Content', 'Relevance', 'contradiction', 'neutral', 'entailment']
|
100 |
+
# for col, field_name in zip(colms, fields):
|
101 |
+
# # header
|
102 |
+
# col.write(field_name)
|
103 |
+
df = []
|
104 |
+
for doc in docs:
|
105 |
+
# col1, col2, col3, col4, col5, col6 = st.columns((2, 5, 1, 1, 1, 1))
|
106 |
+
# col1.write(f"[{doc.meta['name']}]({doc.meta['url']})")
|
107 |
+
# col2.write(f"{doc.content}")
|
108 |
+
# col3.write(f"{doc.score:.3f}")
|
109 |
+
# col4.write(f"{doc.meta['entailment_info']['contradiction']:.2f}")
|
110 |
+
# col5.write(f"{doc.meta['entailment_info']['neutral']:.2f}")
|
111 |
+
# col6.write(f"{doc.meta['entailment_info']['entailment']:.2f}")
|
112 |
+
|
113 |
+
# 'con': f"{doc.meta['entailment_info']['contradiction']:.2f}",
|
114 |
+
# 'neu': f"{doc.meta['entailment_info']['neutral']:.2f}",
|
115 |
+
# 'ent': f"{doc.meta['entailment_info']['entailment']:.2f}",
|
116 |
+
# # 'url': doc.meta['url'],
|
117 |
+
# 'Content': doc.content}
|
118 |
+
#
|
119 |
+
#
|
120 |
+
#
|
121 |
+
row = {'Title': doc.meta['name'],
|
122 |
+
'Relevance': f"{doc.score:.3f}",
|
123 |
+
'con': f"{doc.meta['entailment_info']['contradiction']:.2f}",
|
124 |
+
'neu': f"{doc.meta['entailment_info']['neutral']:.2f}",
|
125 |
+
'ent': f"{doc.meta['entailment_info']['entailment']:.2f}",
|
126 |
+
# 'url': doc.meta['url'],
|
127 |
+
'Content': doc.content}
|
128 |
+
df.append(row)
|
129 |
+
st.dataframe(pd.DataFrame(df))#.style.apply(highlight))
|
130 |
+
|
131 |
+
|
132 |
# if len(st.session_state.results['answers']) == 0:
|
133 |
# st.info("""🤔 Haystack is unsure whether any of
|
134 |
# the documents contain an answer to your question. Try to reformulate it!""")
|
|
|
157 |
# st.markdown(
|
158 |
# f"**Score:** {result['score']:.2f} - **Source:** {source}")
|
159 |
|
160 |
+
# def make_pretty(styler):
|
161 |
+
# styler.set_caption("Weather Conditions")
|
162 |
+
# # styler.format(rain_condition)
|
163 |
+
# styler.format_con(lambda v: v.float(v))
|
164 |
+
# styler.background_gradient(axis=None, vmin=0, vmax=1, cmap="YlGnBu")
|
165 |
+
# return styler
|
166 |
+
|
167 |
+
def highlight(s):
|
168 |
+
return ['background-color: red']*5
|
169 |
main()
|
app_utils/backend_utils.py
CHANGED
@@ -40,19 +40,33 @@ pipe = start_haystack()
|
|
40 |
# the pipeline is not included as parameter of the following function,
|
41 |
# because it is difficult to cache
|
42 |
@st.cache(persist=True, allow_output_mutation=True)
|
43 |
-
def query(
|
44 |
-
"""Run query and
|
45 |
params = {"retriever": {"top_k": retriever_top_k}}
|
46 |
-
results = pipe.run(
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
return results
|
49 |
|
50 |
@st.cache()
|
51 |
-
def
|
52 |
"""Load statements from file"""
|
53 |
with open(STATEMENTS_PATH) as fin:
|
54 |
-
|
55 |
if not line.startswith('#')]
|
56 |
-
return
|
57 |
|
58 |
|
|
|
40 |
# the pipeline is not included as parameter of the following function,
|
41 |
# because it is difficult to cache
|
42 |
@st.cache(persist=True, allow_output_mutation=True)
|
43 |
+
def query(statement: str, retriever_top_k: int = 5):
|
44 |
+
"""Run query and verify statement"""
|
45 |
params = {"retriever": {"top_k": retriever_top_k}}
|
46 |
+
results = pipe.run(statement, params=params)
|
47 |
+
|
48 |
+
scores, agg_con, agg_neu, agg_ent = 0,0,0,0
|
49 |
+
for doc in results['documents']:
|
50 |
+
scores+=doc.score
|
51 |
+
ent_info=doc.meta['entailment_info']
|
52 |
+
con,neu,ent = ent_info['contradiction'], ent_info['neutral'], ent_info['entailment']
|
53 |
+
agg_con+=con*doc.score
|
54 |
+
agg_neu+=neu*doc.score
|
55 |
+
agg_ent+=ent*doc.score
|
56 |
+
|
57 |
+
results['agg_entailment_info'] = {
|
58 |
+
'contradiction': round(agg_con/scores, 2),
|
59 |
+
'neutral': round(agg_neu/scores, 2),
|
60 |
+
'entailment': round(agg_ent/scores, 2)}
|
61 |
+
|
62 |
return results
|
63 |
|
64 |
@st.cache()
|
65 |
+
def load_statements():
|
66 |
"""Load statements from file"""
|
67 |
with open(STATEMENTS_PATH) as fin:
|
68 |
+
statements = [line.strip() for line in fin.readlines()
|
69 |
if not line.startswith('#')]
|
70 |
+
return statements
|
71 |
|
72 |
|
app_utils/frontend_utils.py
CHANGED
@@ -11,5 +11,6 @@ def reset_results(*args):
|
|
11 |
st.session_state.results = None
|
12 |
st.session_state.raw_json = None
|
13 |
|
14 |
-
|
15 |
-
|
|
|
|
11 |
st.session_state.results = None
|
12 |
st.session_state.raw_json = None
|
13 |
|
14 |
+
entailment_html_messages = {'entailment': 'The knowledge base seems to <span style="color:green">confirm</span> your statement',
|
15 |
+
'contradiction': 'The knowledge base seems to <span style="color:red">contradict</span> your statement',
|
16 |
+
'neutral': 'The knowledge base is <span style="color:darkgray">neutral</span> about your statement'}
|
data/index/faiss_document_store.db
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:449d3708074efc81a6c59bbe4164b18f5ac45b28db530fee8eec5eff74504e45
|
3 |
+
size 73195520
|
data/index/my_faiss_index.faiss
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b971dc989aec74c8e34d808a4f9c713dc93c3ae247ec5cbefb0e00d98a3fcc56
|
3 |
+
size 150488109
|
data/statements.txt
CHANGED
@@ -1,5 +1,20 @@
|
|
1 |
Kurt Cobain died in 1994
|
2 |
Kurt Cobain died in 2008
|
3 |
-
Green Day
|
4 |
-
Green Day
|
5 |
-
The Beatles' first album was released in 1985
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Kurt Cobain died in 1994
|
2 |
Kurt Cobain died in 2008
|
3 |
+
Green Day is a heavy metal band
|
4 |
+
Green Day is a punk rock band
|
5 |
+
The Beatles' first album was released in 1985
|
6 |
+
The Offspring is a French punk rock band
|
7 |
+
Kurt Cobain was a biker
|
8 |
+
Joe Cocker was American
|
9 |
+
The Eagles won a Grammy Award
|
10 |
+
Mick Jagger was part of the Beatles
|
11 |
+
London calling is a hit by The Clash
|
12 |
+
Red Hot Chili Peppers were formed in New York
|
13 |
+
The Smiths is a very long-lived band
|
14 |
+
U2 have participated in philanthropic initiatives
|
15 |
+
Sweet Home Alabama is a popular song by Lynyrd Skynyrd
|
16 |
+
Steve Vai collaborated with Frank Zappa
|
17 |
+
The White Stripes were a trio
|
18 |
+
The White Stripes were composed by Jack White and Meg White
|
19 |
+
Scorpions is a German trap band
|
20 |
+
Sepultura is a heavy metal band
|