Update app.py
Browse filesAdd examples and information
app.py
CHANGED
@@ -2,6 +2,22 @@ import streamlit as st
|
|
2 |
from transformers import AutoTokenizer, T5ForConditionalGeneration
|
3 |
import post_ocr
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
# Load model
|
7 |
@st.cache_resource
|
@@ -23,10 +39,13 @@ post_ocr.set_model(model, tokenizer)
|
|
23 |
|
24 |
# Title
|
25 |
st.title(':memo: Swedish OCR correction')
|
|
|
|
|
26 |
# Input and output areas
|
27 |
tab1, tab2 = st.tabs(["Text input", "From file"])
|
28 |
|
29 |
|
|
|
30 |
def clean_inputs():
|
31 |
st.session_state.inputs = {'tab1': None, 'tab2': None}
|
32 |
|
@@ -43,15 +62,29 @@ if 'outputs' not in st.session_state:
|
|
43 |
|
44 |
# Sidebar (settings and stuff)
|
45 |
with st.sidebar:
|
46 |
-
st.header('Settings')
|
47 |
-
n_candidates = st.number_input('Overlap', help='A higher value may lead to better quality, but takes longer time', value=1, min_value=1, max_value=7, step=2, on_change=clean_inputs)
|
48 |
|
49 |
-
st.header('
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
show_changes = st.toggle('Show changes')
|
51 |
|
52 |
|
53 |
def handle_input(input_, id_):
|
54 |
-
|
|
|
55 |
with st.container(border=True):
|
56 |
st.caption('Output')
|
57 |
|
@@ -70,8 +103,22 @@ def handle_input(input_, id_):
|
|
70 |
|
71 |
# Manual entry tab
|
72 |
with tab1:
|
73 |
-
|
74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
|
77 |
# File upload tab
|
|
|
2 |
from transformers import AutoTokenizer, T5ForConditionalGeneration
|
3 |
import post_ocr
|
4 |
|
5 |
+
# Sidebar information
|
6 |
+
info = '''Welcome to the demo of the [swedish-ocr-correction](https://huggingface.co/viklofg/swedish-ocr-correction) model.
|
7 |
+
|
8 |
+
Enter or upload OCR output and the model will attempt to correct it.
|
9 |
+
|
10 |
+
:clock2: Slow generation? Try a shorter input.
|
11 |
+
'''
|
12 |
+
|
13 |
+
# Example inputs
|
14 |
+
examples = {
|
15 |
+
'Examples': None,
|
16 |
+
'Example 1': 'En Gosse fur plats nu genast ! inetallyrkc, JU 83 Drottninggatan.',
|
17 |
+
'Example 2': '— Storartad gåfva till Göteborgs Museum. Den i HandelstidniDgens g&rdagsnnmmer omtalade hvalfisken, sorn fångats i Frölnndaviken, har i dag af hr brukspatronen James Dickson blifvit inköpt för 1,500 rdr och skänkt till härvarande Museum.',
|
18 |
+
'Example 3': 'Sn underlig race att ſtudera, desfa uppſinnare! utropar en Londontidnings fronifôr. Wet ni hur ſtort antalet är af patenter, ſom ſiſtlidet är utfärdades i British Patent Office? Jo, 14,000 ſty>en !! Det kan man ju fkalla en rif rd! Fjorton tuſen uppfinninnar! Herre Gud, hwilfet märkrwoärdigt tidehrvarf wi lefroa i!'
|
19 |
+
}
|
20 |
+
|
21 |
|
22 |
# Load model
|
23 |
@st.cache_resource
|
|
|
39 |
|
40 |
# Title
|
41 |
st.title(':memo: Swedish OCR correction')
|
42 |
+
|
43 |
+
|
44 |
# Input and output areas
|
45 |
tab1, tab2 = st.tabs(["Text input", "From file"])
|
46 |
|
47 |
|
48 |
+
# Initialize session state
|
49 |
def clean_inputs():
|
50 |
st.session_state.inputs = {'tab1': None, 'tab2': None}
|
51 |
|
|
|
62 |
|
63 |
# Sidebar (settings and stuff)
|
64 |
with st.sidebar:
|
|
|
|
|
65 |
|
66 |
+
st.header('Welcome')
|
67 |
+
st.markdown(info)
|
68 |
+
|
69 |
+
st.header('Settings')
|
70 |
+
overlap2candidates = {'None': 1, 'Little': 3, 'Much': 5}
|
71 |
+
overlap_help = '''Long texts are processed in chunks using a sliding window technique.
|
72 |
+
Here you can choose how much overlap the sliding window should have with the previous
|
73 |
+
processed chunk. No overlap is the fastest, but some overlap may increase accuracy.'''
|
74 |
+
overlap = st.selectbox(
|
75 |
+
'Overlap',
|
76 |
+
options=overlap2candidates,
|
77 |
+
help=overlap_help,
|
78 |
+
on_change=clean_inputs)
|
79 |
+
n_candidates = overlap2candidates[overlap]
|
80 |
+
|
81 |
+
st.subheader('Output')
|
82 |
show_changes = st.toggle('Show changes')
|
83 |
|
84 |
|
85 |
def handle_input(input_, id_):
|
86 |
+
"""Generate and display output"""
|
87 |
+
|
88 |
with st.container(border=True):
|
89 |
st.caption('Output')
|
90 |
|
|
|
103 |
|
104 |
# Manual entry tab
|
105 |
with tab1:
|
106 |
+
col1, col2 = st.columns([4, 1])
|
107 |
+
|
108 |
+
with col2:
|
109 |
+
example_title = st.selectbox('Examples', options=examples,
|
110 |
+
label_visibility='collapsed')
|
111 |
+
|
112 |
+
with col1:
|
113 |
+
text = st.text_area(
|
114 |
+
label='Input text',
|
115 |
+
value=examples[example_titlessssssssssssssssss],
|
116 |
+
height=200,
|
117 |
+
label_visibility='collapsed',
|
118 |
+
placeholder='Enter OCR generated text or choose an example')
|
119 |
+
|
120 |
+
if text is not None:
|
121 |
+
handle_input(text, 'tab1')
|
122 |
|
123 |
|
124 |
# File upload tab
|