viklofg commited on
Commit
2a8aa8e
1 Parent(s): 5dd6b29

Update app.py

Browse files

Add examples and information

Files changed (1) hide show
  1. app.py +53 -6
app.py CHANGED
@@ -2,6 +2,22 @@ import streamlit as st
2
  from transformers import AutoTokenizer, T5ForConditionalGeneration
3
  import post_ocr
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  # Load model
7
  @st.cache_resource
@@ -23,10 +39,13 @@ post_ocr.set_model(model, tokenizer)
23
 
24
  # Title
25
  st.title(':memo: Swedish OCR correction')
 
 
26
  # Input and output areas
27
  tab1, tab2 = st.tabs(["Text input", "From file"])
28
 
29
 
 
30
  def clean_inputs():
31
  st.session_state.inputs = {'tab1': None, 'tab2': None}
32
 
@@ -43,15 +62,29 @@ if 'outputs' not in st.session_state:
43
 
44
  # Sidebar (settings and stuff)
45
  with st.sidebar:
46
- st.header('Settings')
47
- n_candidates = st.number_input('Overlap', help='A higher value may lead to better quality, but takes longer time', value=1, min_value=1, max_value=7, step=2, on_change=clean_inputs)
48
 
49
- st.header('Output')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  show_changes = st.toggle('Show changes')
51
 
52
 
53
  def handle_input(input_, id_):
54
-
 
55
  with st.container(border=True):
56
  st.caption('Output')
57
 
@@ -70,8 +103,22 @@ def handle_input(input_, id_):
70
 
71
  # Manual entry tab
72
  with tab1:
73
- typed_input = st.text_area('Input OCR', placeholder='Enter OCR generated text', label_visibility='collapsed')
74
- handle_input(typed_input, 'tab1')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
 
77
  # File upload tab
 
2
  from transformers import AutoTokenizer, T5ForConditionalGeneration
3
  import post_ocr
4
 
5
+ # Sidebar information
6
+ info = '''Welcome to the demo of the [swedish-ocr-correction](https://huggingface.co/viklofg/swedish-ocr-correction) model.
7
+
8
+ Enter or upload OCR output and the model will attempt to correct it.
9
+
10
+ :clock2: Slow generation? Try a shorter input.
11
+ '''
12
+
13
+ # Example inputs
14
+ examples = {
15
+ 'Examples': None,
16
+ 'Example 1': 'En Gosse fur plats nu genast ! inetallyrkc, JU 83 Drottninggatan.',
17
+ 'Example 2': '— Storartad gåfva till Göteborgs Museum. Den i HandelstidniDgens g&rdagsnnmmer omtalade hvalfisken, sorn fångats i Frölnndaviken, har i dag af hr brukspatronen James Dickson blifvit inköpt för 1,500 rdr och skänkt till härvarande Museum.',
18
+ 'Example 3': 'Sn underlig race att ſtudera, desfa uppſinnare! utropar en Londontidnings fronifôr. Wet ni hur ſtort antalet är af patenter, ſom ſiſtlidet är utfärdades i British Patent Office? Jo, 14,000 ſty>en !! Det kan man ju fkalla en rif rd! Fjorton tuſen uppfinninnar! Herre Gud, hwilfet märkrwoärdigt tidehrvarf wi lefroa i!'
19
+ }
20
+
21
 
22
  # Load model
23
  @st.cache_resource
 
39
 
40
  # Title
41
  st.title(':memo: Swedish OCR correction')
42
+
43
+
44
  # Input and output areas
45
  tab1, tab2 = st.tabs(["Text input", "From file"])
46
 
47
 
48
+ # Initialize session state
49
  def clean_inputs():
50
  st.session_state.inputs = {'tab1': None, 'tab2': None}
51
 
 
62
 
63
  # Sidebar (settings and stuff)
64
  with st.sidebar:
 
 
65
 
66
+ st.header('Welcome')
67
+ st.markdown(info)
68
+
69
+ st.header('Settings')
70
+ overlap2candidates = {'None': 1, 'Little': 3, 'Much': 5}
71
+ overlap_help = '''Long texts are processed in chunks using a sliding window technique.
72
+ Here you can choose how much overlap the sliding window should have with the previous
73
+ processed chunk. No overlap is the fastest, but some overlap may increase accuracy.'''
74
+ overlap = st.selectbox(
75
+ 'Overlap',
76
+ options=overlap2candidates,
77
+ help=overlap_help,
78
+ on_change=clean_inputs)
79
+ n_candidates = overlap2candidates[overlap]
80
+
81
+ st.subheader('Output')
82
  show_changes = st.toggle('Show changes')
83
 
84
 
85
  def handle_input(input_, id_):
86
+ """Generate and display output"""
87
+
88
  with st.container(border=True):
89
  st.caption('Output')
90
 
 
103
 
104
  # Manual entry tab
105
  with tab1:
106
+ col1, col2 = st.columns([4, 1])
107
+
108
+ with col2:
109
+ example_title = st.selectbox('Examples', options=examples,
110
+ label_visibility='collapsed')
111
+
112
+ with col1:
113
+ text = st.text_area(
114
+ label='Input text',
115
+ value=examples[example_titlessssssssssssssssss],
116
+ height=200,
117
+ label_visibility='collapsed',
118
+ placeholder='Enter OCR generated text or choose an example')
119
+
120
+ if text is not None:
121
+ handle_input(text, 'tab1')
122
 
123
 
124
  # File upload tab