Linker1907's picture
init
724b1ea
raw
history blame
1.73 kB
import streamlit as st
import streamlit.components.v1 as components
import json
BAD_EXAMPLES_PATH = "bad_examples"
DATA_PATH = "data"
def load_jsonl(file_path):
data = []
with open(file_path, 'r') as f:
for line in f:
data.append(json.loads(line))
return data
if 'idx' not in st.session_state:
st.session_state.idx = 0
def get_next_item():
st.session_state.idx += 1
def save_and_get_next_item(sample):
with open(f'{BAD_EXAMPLES_PATH}/{dataset}_bad_examples.jsonl', 'a') as f:
f.write(json.dumps(sample) + '\n')
get_next_item()
datasets = ['gutenberg_raw', "stackexchange2", "bigcode_python_code", "bigcode_python_github_issues", "bigcode_python_jupyter_scripts_dedup_filtered", "books3", "c4", "s2orc_raw"]
dataset = st.sidebar.selectbox("Dataset", datasets)
data = load_jsonl(f'{DATA_PATH}/{dataset}_examples_with_stats.json')
# create bad file if it does not exists
with open(f'{BAD_EXAMPLES_PATH}/{dataset}_bad_examples.jsonl', 'a') as f:
pass
st.sidebar.button("Reset Index", on_click=lambda: st.session_state.__delitem__('idx'))
with open(f'{BAD_EXAMPLES_PATH}/{dataset}_bad_examples.jsonl', "r+") as f:
st.sidebar.download_button('Download bad example JSON file', f)
st.sidebar.button("Clear bad examples file", on_click=lambda: open(f'{BAD_EXAMPLES_PATH}/{dataset}_bad_examples.jsonl', 'w').close())
with st.form(key='checkbox', clear_on_submit=True):
sample = data[st.session_state.idx]
text = sample["text"]
st.text_area(f"text id: {st.session_state.idx}", text, height=500)
good = st.form_submit_button('GOOD', on_click=get_next_item)
bad = st.form_submit_button('BAD', on_click=save_and_get_next_item, args=(sample,))