Update app.py
Browse filesAdd examples and information
@@ -2,6 +2,22 @@ import streamlit as st
2 |
from transformers import AutoTokenizer, T5ForConditionalGeneration
3 |
import post_ocr
4 |
5 |
6 |
# Load model
7 |
@@ -23,10 +39,13 @@ post_ocr.set_model(model, tokenizer)
23 |
24 |
# Title
25 |
st.title(':memo: Swedish OCR correction')
26 |
# Input and output areas
27 |
tab1, tab2 = st.tabs(["Text input", "From file"])
28 |
29 |
30 |
def clean_inputs():
31 |
st.session_state.inputs = {'tab1': None, 'tab2': None}
32 |
@@ -43,15 +62,29 @@ if 'outputs' not in st.session_state:
43 |
44 |
# Sidebar (settings and stuff)
45 |
with st.sidebar:
46 |
47 |
n_candidates = st.number_input('Overlap', help='A higher value may lead to better quality, but takes longer time', value=1, min_value=1, max_value=7, step=2, on_change=clean_inputs)
48 |
49 |
50 |
show_changes = st.toggle('Show changes')
51 |
52 |
53 |
def handle_input(input_, id_):
54 |
55 |
with st.container(border=True):
56 |
57 |
@@ -70,8 +103,22 @@ def handle_input(input_, id_):
70 |
71 |
# Manual entry tab
72 |
with tab1:
73 |
74 |
75 |
76 |
77 |
# File upload tab
2 |
from transformers import AutoTokenizer, T5ForConditionalGeneration
3 |
import post_ocr
4 |
5 |
# Sidebar information
6 |
info = '''Welcome to the demo of the [swedish-ocr-correction](https://huggingface.co/viklofg/swedish-ocr-correction) model.
7 |
8 |
Enter or upload OCR output and the model will attempt to correct it.
9 |
10 |
:clock2: Slow generation? Try a shorter input.
11 |
12 |
13 |
# Example inputs
14 |
examples = {
15 |
'Examples': None,
16 |
'Example 1': 'En Gosse fur plats nu genast ! inetallyrkc, JU 83 Drottninggatan.',
17 |
'Example 2': '— Storartad gåfva till Göteborgs Museum. Den i HandelstidniDgens g&rdagsnnmmer omtalade hvalfisken, sorn fångats i Frölnndaviken, har i dag af hr brukspatronen James Dickson blifvit inköpt för 1,500 rdr och skänkt till härvarande Museum.',
18 |
'Example 3': 'Sn underlig race att ſtudera, desfa uppſinnare! utropar en Londontidnings fronifôr. Wet ni hur ſtort antalet är af patenter, ſom ſiſtlidet är utfärdades i British Patent Office? Jo, 14,000 ſty>en !! Det kan man ju fkalla en rif rd! Fjorton tuſen uppfinninnar! Herre Gud, hwilfet märkrwoärdigt tidehrvarf wi lefroa i!'
19 |
20 |
21 |
22 |
# Load model
23 |
39 |
40 |
# Title
41 |
st.title(':memo: Swedish OCR correction')
42 |
43 |
44 |
# Input and output areas
45 |
tab1, tab2 = st.tabs(["Text input", "From file"])
46 |
47 |
48 |
# Initialize session state
49 |
def clean_inputs():
50 |
st.session_state.inputs = {'tab1': None, 'tab2': None}
51 |
62 |
63 |
# Sidebar (settings and stuff)
64 |
with st.sidebar:
65 |
66 |
67 |
68 |
69 |
70 |
overlap2candidates = {'None': 1, 'Little': 3, 'Much': 5}
71 |
overlap_help = '''Long texts are processed in chunks using a sliding window technique.
72 |
Here you can choose how much overlap the sliding window should have with the previous
73 |
processed chunk. No overlap is the fastest, but some overlap may increase accuracy.'''
74 |
overlap = st.selectbox(
75 |
76 |
77 |
78 |
79 |
n_candidates = overlap2candidates[overlap]
80 |
81 |
82 |
show_changes = st.toggle('Show changes')
83 |
84 |
85 |
def handle_input(input_, id_):
86 |
"""Generate and display output"""
87 |
88 |
with st.container(border=True):
89 |
90 |
103 |
104 |
# Manual entry tab
105 |
with tab1:
106 |
col1, col2 = st.columns([4, 1])
107 |
108 |
with col2:
109 |
example_title = st.selectbox('Examples', options=examples,
110 |
111 |
112 |
with col1:
113 |
text = st.text_area(
114 |
label='Input text',
115 |
116 |
117 |
118 |
placeholder='Enter OCR generated text or choose an example')
119 |
120 |
if text is not None:
121 |
handle_input(text, 'tab1')
122 |
123 |
124 |
# File upload tab