orionweller
commited on
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import os
|
3 |
+
import pandas as pd
|
4 |
+
import json
|
5 |
+
from html import escape
|
6 |
+
import difflib
|
7 |
+
|
8 |
+
def generate_diff_html_word_level(text1, text2):
|
9 |
+
"""
|
10 |
+
Generates word-level difference between text1 and text2 as HTML, correctly handling spaces.
|
11 |
+
"""
|
12 |
+
words1 = text1.split()
|
13 |
+
words2 = text2.split()
|
14 |
+
|
15 |
+
diff = []
|
16 |
+
matcher = difflib.SequenceMatcher(None, words1, words2)
|
17 |
+
|
18 |
+
for opcode in matcher.get_opcodes():
|
19 |
+
tag, i1, i2, j1, j2 = opcode
|
20 |
+
if tag == 'replace':
|
21 |
+
diff.append('<del style="background-color: #fbb6ce;">' + escape(' '.join(words1[i1:i2])) + '</del>')
|
22 |
+
diff.append('<ins style="background-color: #b7e4c7;">' + escape(' '.join(words2[j1:j2])) + '</ins>')
|
23 |
+
elif tag == 'delete':
|
24 |
+
diff.append('<del style="background-color: #fbb6ce;">' + escape(' '.join(words1[i1:i2])) + '</del>')
|
25 |
+
elif tag == 'insert':
|
26 |
+
diff.append('<ins style="background-color: #b7e4c7;">' + escape(' '.join(words2[j1:j2])) + '</ins>')
|
27 |
+
elif tag == 'equal':
|
28 |
+
diff.append(escape(' '.join(words1[i1:i2])))
|
29 |
+
|
30 |
+
final_html = ' '.join(diff).replace('</del> <ins', '</del> <ins')
|
31 |
+
return f'<pre style="white-space: pre-wrap;">{final_html}</pre>'
|
32 |
+
|
33 |
+
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
|
34 |
+
st.set_page_config(layout="wide")
|
35 |
+
|
36 |
+
@st.cache_data
|
37 |
+
def convert_df(df):
|
38 |
+
return df.to_csv(index=False, quotechar='"').encode('utf-8')
|
39 |
+
|
40 |
+
@st.cache_data
|
41 |
+
def load_narratives_data():
|
42 |
+
data = []
|
43 |
+
with open("narratives.jsonl", "r") as f:
|
44 |
+
for line in f:
|
45 |
+
data.append(json.loads(line))
|
46 |
+
return pd.DataFrame(data)
|
47 |
+
|
48 |
+
narratives_df = load_narratives_data()
|
49 |
+
|
50 |
+
col1, col2 = st.columns([1, 3], gap="large")
|
51 |
+
|
52 |
+
with st.sidebar:
|
53 |
+
st.title("Options")
|
54 |
+
|
55 |
+
with col1:
|
56 |
+
st.title("Narratives")
|
57 |
+
narrative_ids = narratives_df["id"].tolist()
|
58 |
+
container_for_nav = st.container()
|
59 |
+
|
60 |
+
def sync_from_drop():
|
61 |
+
if st.session_state.selectbox_narrative == "Overview":
|
62 |
+
st.session_state.narrative_index = -1
|
63 |
+
else:
|
64 |
+
st.session_state.narrative_index = narrative_ids.index(st.session_state.selectbox_narrative)
|
65 |
+
|
66 |
+
def sync_from_number():
|
67 |
+
st.session_state.narrative_index = st.session_state.narrative_number
|
68 |
+
if st.session_state.narrative_number == -1:
|
69 |
+
st.session_state.selectbox_narrative = "Overview"
|
70 |
+
else:
|
71 |
+
st.session_state.selectbox_narrative = narrative_ids[st.session_state.narrative_number]
|
72 |
+
|
73 |
+
narrative_number = container_for_nav.number_input(
|
74 |
+
min_value=-1, step=1, max_value=len(narrative_ids) - 1,
|
75 |
+
on_change=sync_from_number,
|
76 |
+
label=f"Select narrative by index (up to **{len(narrative_ids) - 1}**)",
|
77 |
+
key="narrative_number"
|
78 |
+
)
|
79 |
+
selectbox_narrative = container_for_nav.selectbox(
|
80 |
+
"Select narrative by ID",
|
81 |
+
["Overview"] + narrative_ids,
|
82 |
+
on_change=sync_from_drop,
|
83 |
+
key="selectbox_narrative"
|
84 |
+
)
|
85 |
+
st.divider()
|
86 |
+
|
87 |
+
with col2:
|
88 |
+
narrative_index = narrative_number
|
89 |
+
|
90 |
+
if narrative_index >= 0:
|
91 |
+
narrative = narratives_df.iloc[narrative_index]
|
92 |
+
|
93 |
+
st.markdown("<h1 style='text-align: center; color: black;text-decoration: underline;'>Editor</h1>", unsafe_allow_html=True)
|
94 |
+
|
95 |
+
container = st.container()
|
96 |
+
|
97 |
+
container.subheader(f"Narrative ID: {narrative['id']}")
|
98 |
+
container.divider()
|
99 |
+
|
100 |
+
container.subheader("Diff: Original English vs Altered English")
|
101 |
+
processed_diff = generate_diff_html_word_level(narrative['original_english'], narrative['altered_english'])
|
102 |
+
with container.container(border=True):
|
103 |
+
st.markdown(processed_diff, unsafe_allow_html=True)
|
104 |
+
container.divider()
|
105 |
+
|
106 |
+
container.subheader("Original Text")
|
107 |
+
original_input = container.text_area("Edit the original text", value=narrative['original'], height=300)
|
108 |
+
|
109 |
+
|
110 |
+
elif narrative_index < 0:
|
111 |
+
st.title("Overview")
|
112 |
+
st.write(f"Total number of narratives: {len(narratives_df)}")
|
113 |
+
st.write("Select a narrative from the sidebar to view and edit its details.")
|