Upload 35 files
Browse files- test/bot_test.py +11 -0
- test/ocr/__init__.py +0 -0
- test/ocr/__pycache__/__init__.cpython-310.pyc +0 -0
- test/ocr/__pycache__/__init__.cpython-312.pyc +0 -0
- test/ocr/__pycache__/characters.cpython-310.pyc +0 -0
- test/ocr/__pycache__/characters.cpython-312.pyc +0 -0
- test/ocr/__pycache__/datahelpers.cpython-310.pyc +0 -0
- test/ocr/__pycache__/datahelpers.cpython-312.pyc +0 -0
- test/ocr/__pycache__/helpers.cpython-310.pyc +0 -0
- test/ocr/__pycache__/helpers.cpython-312.pyc +0 -0
- test/ocr/__pycache__/normalization.cpython-310.pyc +0 -0
- test/ocr/__pycache__/normalization.cpython-312.pyc +0 -0
- test/ocr/__pycache__/page.cpython-310.pyc +0 -0
- test/ocr/__pycache__/page.cpython-312.pyc +0 -0
- test/ocr/__pycache__/tfhelpers.cpython-310.pyc +0 -0
- test/ocr/__pycache__/tfhelpers.cpython-312.pyc +0 -0
- test/ocr/__pycache__/viz.cpython-310.pyc +0 -0
- test/ocr/__pycache__/viz.cpython-312.pyc +0 -0
- test/ocr/__pycache__/words.cpython-310.pyc +0 -0
- test/ocr/__pycache__/words.cpython-312.pyc +0 -0
- test/ocr/characters.py +102 -0
- test/ocr/datahelpers.py +287 -0
- test/ocr/dataiterator.py +98 -0
- test/ocr/helpers.py +45 -0
- test/ocr/imgtransform.py +29 -0
- test/ocr/mlhelpers.py +102 -0
- test/ocr/normalization.py +207 -0
- test/ocr/page.py +121 -0
- test/ocr/tfhelpers.py +73 -0
- test/ocr/viz.py +22 -0
- test/ocr/words.py +223 -0
- test/ocr_test.py +61 -0
- test/openai_demo.py +27 -0
- test/streamlit_demo.py +19 -0
- test/test.py +14 -0
test/bot_test.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from bots import classify, feedback, vocab, assessment, speaking
|
2 |
+
|
3 |
+
# print(feedback.transcribe_handwriting('https://media.cnn.com/api/v1/images/stellar/prod/160122124623-01-national-handwriting-day.jpg?q=w_3264,h_1836,x_0,y_0,c_fill'))
|
4 |
+
|
5 |
+
# print(vocab.vocab_chat_with_model('hello', 'gpt-4'))
|
6 |
+
|
7 |
+
# print(assessment.chat_assessment_with_model('hello', 'gpt-4'))
|
8 |
+
|
9 |
+
# speaking.convert_to_mp3('sample.mp3', 'output.mp3')
|
10 |
+
# print(speaking.transcribe_audio('output.mp3'))
|
11 |
+
# speaking.text_to_speech('hello, world!', 'text2speech.mp3')
|
test/ocr/__init__.py
ADDED
File without changes
|
test/ocr/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (157 Bytes). View file
|
|
test/ocr/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (162 Bytes). View file
|
|
test/ocr/__pycache__/characters.cpython-310.pyc
ADDED
Binary file (2.57 kB). View file
|
|
test/ocr/__pycache__/characters.cpython-312.pyc
ADDED
Binary file (4.25 kB). View file
|
|
test/ocr/__pycache__/datahelpers.cpython-310.pyc
ADDED
Binary file (9.15 kB). View file
|
|
test/ocr/__pycache__/datahelpers.cpython-312.pyc
ADDED
Binary file (15 kB). View file
|
|
test/ocr/__pycache__/helpers.cpython-310.pyc
ADDED
Binary file (1.37 kB). View file
|
|
test/ocr/__pycache__/helpers.cpython-312.pyc
ADDED
Binary file (1.92 kB). View file
|
|
test/ocr/__pycache__/normalization.cpython-310.pyc
ADDED
Binary file (6.33 kB). View file
|
|
test/ocr/__pycache__/normalization.cpython-312.pyc
ADDED
Binary file (11.6 kB). View file
|
|
test/ocr/__pycache__/page.cpython-310.pyc
ADDED
Binary file (3.11 kB). View file
|
|
test/ocr/__pycache__/page.cpython-312.pyc
ADDED
Binary file (5.69 kB). View file
|
|
test/ocr/__pycache__/tfhelpers.cpython-310.pyc
ADDED
Binary file (2.77 kB). View file
|
|
test/ocr/__pycache__/tfhelpers.cpython-312.pyc
ADDED
Binary file (4.03 kB). View file
|
|
test/ocr/__pycache__/viz.cpython-310.pyc
ADDED
Binary file (800 Bytes). View file
|
|
test/ocr/__pycache__/viz.cpython-312.pyc
ADDED
Binary file (1.02 kB). View file
|
|
test/ocr/__pycache__/words.cpython-310.pyc
ADDED
Binary file (6.05 kB). View file
|
|
test/ocr/__pycache__/words.cpython-312.pyc
ADDED
Binary file (10.8 kB). View file
|
|
test/ocr/characters.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
import os
|
3 |
+
import numpy as np
|
4 |
+
#import tensorflow as tf
|
5 |
+
import tensorflow.compat.v1 as tf
|
6 |
+
tf.disable_v2_behavior()
|
7 |
+
import cv2
|
8 |
+
import math
|
9 |
+
|
10 |
+
from .helpers import *
|
11 |
+
from .tfhelpers import Model
|
12 |
+
|
13 |
+
# Preloading trained model with activation function
|
14 |
+
# Loading is slow -> prevent multiple loads
|
15 |
+
print("Loading segmentation models...")
|
16 |
+
location = os.path.dirname(os.path.abspath(__file__))
|
17 |
+
CNN_model = Model(
|
18 |
+
os.path.join(location, '../../models/gap-clas/CNN-CG'))
|
19 |
+
CNN_slider = (60, 30)
|
20 |
+
RNN_model = Model(
|
21 |
+
os.path.join(location, '../../models/gap-clas/RNN/Bi-RNN-new'),
|
22 |
+
'prediction')
|
23 |
+
RNN_slider = (60, 60)
|
24 |
+
|
25 |
+
|
26 |
+
def _classify(img, step=2, RNN=False, slider=(60, 60)):
|
27 |
+
"""Slice the image and return raw output of classifier."""
|
28 |
+
length = (img.shape[1] - slider[1]) // 2 + 1
|
29 |
+
if RNN:
|
30 |
+
input_seq = np.zeros((1, length, slider[0]*slider[1]), dtype=np.float32)
|
31 |
+
input_seq[0][:] = [img[:, loc * step: loc * step + slider[1]].flatten()
|
32 |
+
for loc in range(length)]
|
33 |
+
pred = RNN_model.eval_feed({'inputs:0': input_seq,
|
34 |
+
'length:0': [length],
|
35 |
+
'keep_prob:0': 1})[0]
|
36 |
+
else:
|
37 |
+
input_seq = np.zeros((length, slider[0]*slider[1]), dtype=np.float32)
|
38 |
+
input_seq[:] = [img[:, loc * step: loc * step + slider[1]].flatten()
|
39 |
+
for loc in range(length)]
|
40 |
+
pred = CNN_model.run(input_seq)
|
41 |
+
|
42 |
+
return pred
|
43 |
+
|
44 |
+
|
45 |
+
def segment(img, step=2, RNN=False, debug=False):
|
46 |
+
"""Take preprocessed image of word and
|
47 |
+
returns array of positions separating characters.
|
48 |
+
"""
|
49 |
+
slider = CNN_slider
|
50 |
+
if RNN:
|
51 |
+
slider = RNN_slider
|
52 |
+
|
53 |
+
# Run the classifier
|
54 |
+
pred = _classify(img, step=step, RNN=RNN, slider=slider)
|
55 |
+
|
56 |
+
# Finalize the gap positions from raw prediction
|
57 |
+
gaps = []
|
58 |
+
last_gap = 0
|
59 |
+
gap_count = 1
|
60 |
+
gap_position_sum = slider[1] / 2
|
61 |
+
first_gap = True
|
62 |
+
gap_block_first = 0
|
63 |
+
gap_block_last = slider[1] / 2
|
64 |
+
|
65 |
+
for i, p in enumerate(pred):
|
66 |
+
if p == 1:
|
67 |
+
gap_position_sum += i * step + slider[1] / 2
|
68 |
+
gap_block_last = i * step + slider[1] / 2
|
69 |
+
gap_count += 1
|
70 |
+
last_gap = 0
|
71 |
+
if gap_block_first == 0:
|
72 |
+
gap_block_first = i * step + slider[1] / 2
|
73 |
+
else:
|
74 |
+
if gap_count != 0 and last_gap >= 1:
|
75 |
+
if first_gap:
|
76 |
+
gaps.append(int(gap_block_last))
|
77 |
+
first_gap = False
|
78 |
+
else:
|
79 |
+
gaps.append(int(gap_position_sum // gap_count))
|
80 |
+
gap_position_sum = 0
|
81 |
+
gap_count = 0
|
82 |
+
gap_block_first = 0
|
83 |
+
last_gap += 1
|
84 |
+
|
85 |
+
# Adding final gap position
|
86 |
+
if gap_block_first != 0:
|
87 |
+
gaps.append(int(gap_block_first))
|
88 |
+
else:
|
89 |
+
gap_position_sum += (len(pred) - 1) * 2 + slider[1]/2
|
90 |
+
gaps.append(int(gap_position_sum / (gap_count + 1)))
|
91 |
+
|
92 |
+
if debug:
|
93 |
+
# Drawing lines
|
94 |
+
img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
|
95 |
+
for gap in gaps:
|
96 |
+
cv2.line(img,
|
97 |
+
((int)(gap), 0),
|
98 |
+
((int)(gap), slider[0]),
|
99 |
+
(0, 255, 0), 1)
|
100 |
+
implt(img, t="Separated characters")
|
101 |
+
|
102 |
+
return gaps
|
test/ocr/datahelpers.py
ADDED
@@ -0,0 +1,287 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""
|
3 |
+
Helper functions for loading and creating datasets
|
4 |
+
"""
|
5 |
+
import numpy as np
|
6 |
+
import glob
|
7 |
+
import simplejson
|
8 |
+
import os
|
9 |
+
import cv2
|
10 |
+
import csv
|
11 |
+
import sys
|
12 |
+
import unidecode
|
13 |
+
|
14 |
+
from .helpers import implt
|
15 |
+
from .normalization import letter_normalization
|
16 |
+
from .viz import print_progress_bar
|
17 |
+
|
18 |
+
|
19 |
+
CHARS = ['', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
|
20 |
+
'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S',
|
21 |
+
'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c',
|
22 |
+
'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
|
23 |
+
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
|
24 |
+
'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6',
|
25 |
+
'7', '8', '9', '.', '-', '+', "'"]
|
26 |
+
CHAR_SIZE = len(CHARS)
|
27 |
+
idxs = [i for i in range(len(CHARS))]
|
28 |
+
idx_2_chars = dict(zip(idxs, CHARS))
|
29 |
+
chars_2_idx = dict(zip(CHARS, idxs))
|
30 |
+
|
31 |
+
def char2idx(c, sequence=False):
|
32 |
+
if sequence:
|
33 |
+
return chars_2_idx[c] + 1
|
34 |
+
return chars_2_idx[c]
|
35 |
+
|
36 |
+
def idx2char(idx, sequence=False):
|
37 |
+
if sequence:
|
38 |
+
return idx_2_chars[idx-1]
|
39 |
+
return idx_2_chars[idx]
|
40 |
+
|
41 |
+
|
42 |
+
def load_words_data(dataloc='data/words/', is_csv=False, load_gaplines=False):
|
43 |
+
"""
|
44 |
+
Load word images with corresponding labels and gaplines (if load_gaplines == True).
|
45 |
+
Args:
|
46 |
+
dataloc: image folder location/CSV file - can be list of multiple locations
|
47 |
+
is_csv: using CSV files
|
48 |
+
load_gaplines: wheter or not load gaplines positions files
|
49 |
+
Returns:
|
50 |
+
(images, labels (, gaplines))
|
51 |
+
"""
|
52 |
+
print("Loading words...")
|
53 |
+
if type(dataloc) is not list:
|
54 |
+
dataloc = [dataloc]
|
55 |
+
|
56 |
+
if is_csv:
|
57 |
+
csv.field_size_limit(sys.maxsize)
|
58 |
+
length = 0
|
59 |
+
for loc in dataloc:
|
60 |
+
with open(loc) as csvfile:
|
61 |
+
reader = csv.reader(csvfile)
|
62 |
+
length += max(sum(1 for row in csvfile)-1, 0)
|
63 |
+
|
64 |
+
labels = np.empty(length, dtype=object)
|
65 |
+
images = np.empty(length, dtype=object)
|
66 |
+
i = 0
|
67 |
+
for loc in dataloc:
|
68 |
+
print(loc)
|
69 |
+
with open(loc) as csvfile:
|
70 |
+
reader = csv.DictReader(csvfile)
|
71 |
+
for row in reader:
|
72 |
+
shape = np.fromstring(
|
73 |
+
row['shape'],
|
74 |
+
sep=',',
|
75 |
+
dtype=int)
|
76 |
+
img = np.fromstring(
|
77 |
+
row['image'],
|
78 |
+
sep=', ',
|
79 |
+
dtype=np.uint8).reshape(shape)
|
80 |
+
labels[i] = row['label']
|
81 |
+
images[i] = img
|
82 |
+
|
83 |
+
print_progress_bar(i, length)
|
84 |
+
i += 1
|
85 |
+
else:
|
86 |
+
img_list = []
|
87 |
+
tmp_labels = []
|
88 |
+
for loc in dataloc:
|
89 |
+
tmp_list = glob.glob(os.path.join(loc, '*.png'))
|
90 |
+
img_list += tmp_list
|
91 |
+
tmp_labels += [name[len(loc):].split("_")[0] for name in tmp_list]
|
92 |
+
|
93 |
+
labels = np.array(tmp_labels)
|
94 |
+
images = np.empty(len(img_list), dtype=object)
|
95 |
+
|
96 |
+
# Load grayscaled images
|
97 |
+
for i, img in enumerate(img_list):
|
98 |
+
images[i] = cv2.imread(img, 0)
|
99 |
+
print_progress_bar(i, len(img_list))
|
100 |
+
|
101 |
+
# Load gaplines (lines separating letters) from txt files
|
102 |
+
if load_gaplines:
|
103 |
+
gaplines = np.empty(len(img_list), dtype=object)
|
104 |
+
for i, name in enumerate(img_list):
|
105 |
+
with open(name[:-3] + 'txt', 'r') as fp:
|
106 |
+
gaplines[i] = np.array(simplejson.load(fp))
|
107 |
+
|
108 |
+
if load_gaplines:
|
109 |
+
assert len(labels) == len(images) == len(gaplines)
|
110 |
+
else:
|
111 |
+
assert len(labels) == len(images)
|
112 |
+
print("-> Number of words:", len(labels))
|
113 |
+
|
114 |
+
if load_gaplines:
|
115 |
+
return (images, labels, gaplines)
|
116 |
+
return (images, labels)
|
117 |
+
|
118 |
+
|
119 |
+
def _words2chars(images, labels, gaplines):
|
120 |
+
"""Transform word images with gaplines into individual chars."""
|
121 |
+
# Total number of chars
|
122 |
+
length = sum([len(l) for l in labels])
|
123 |
+
|
124 |
+
imgs = np.empty(length, dtype=object)
|
125 |
+
new_labels = []
|
126 |
+
|
127 |
+
height = images[0].shape[0]
|
128 |
+
|
129 |
+
idx = 0;
|
130 |
+
for i, gaps in enumerate(gaplines):
|
131 |
+
for pos in range(len(gaps) - 1):
|
132 |
+
imgs[idx] = images[i][0:height, gaps[pos]:gaps[pos+1]]
|
133 |
+
new_labels.append(char2idx(labels[i][pos]))
|
134 |
+
idx += 1
|
135 |
+
|
136 |
+
print("Loaded chars from words:", length)
|
137 |
+
return imgs, new_labels
|
138 |
+
|
139 |
+
|
140 |
+
def load_chars_data(charloc='data/charclas/', wordloc='data/words/', lang='cz'):
|
141 |
+
"""
|
142 |
+
Load chars images with corresponding labels.
|
143 |
+
Args:
|
144 |
+
charloc: char images FOLDER LOCATION
|
145 |
+
wordloc: word images with gaplines FOLDER LOCATION
|
146 |
+
Returns:
|
147 |
+
(images, labels)
|
148 |
+
"""
|
149 |
+
print("Loading chars...")
|
150 |
+
images = np.zeros((1, 4096))
|
151 |
+
labels = []
|
152 |
+
|
153 |
+
if charloc != '':
|
154 |
+
# Get subfolders with chars
|
155 |
+
dir_list = glob.glob(os.path.join(charloc, lang, "*/"))
|
156 |
+
dir_list.sort()
|
157 |
+
|
158 |
+
# if lang == 'en':
|
159 |
+
chars = CHARS[:53]
|
160 |
+
|
161 |
+
assert [d[-2] if d[-2] != '0' else '' for d in dir_list] == chars
|
162 |
+
|
163 |
+
# For every label load images and create corresponding labels
|
164 |
+
# cv2.imread(img, 0) - for loading images in grayscale
|
165 |
+
# Images are scaled to 64x64 = 4096 px
|
166 |
+
for i in range(len(chars)):
|
167 |
+
img_list = glob.glob(os.path.join(dir_list[i], '*.jpg'))
|
168 |
+
imgs = np.array([letter_normalization(cv2.imread(img, 0)) for img in img_list])
|
169 |
+
images = np.concatenate([images, imgs.reshape(len(imgs), 4096)])
|
170 |
+
labels.extend([i] * len(imgs))
|
171 |
+
|
172 |
+
if wordloc != '':
|
173 |
+
imgs, words, gaplines = load_words_data(wordloc, load_gaplines=True)
|
174 |
+
if lang != 'cz':
|
175 |
+
words = np.array([unidecode.unidecode(w) for w in words])
|
176 |
+
imgs, chars = _words2chars(imgs, words, gaplines)
|
177 |
+
|
178 |
+
labels.extend(chars)
|
179 |
+
images2 = np.zeros((len(imgs), 4096))
|
180 |
+
for i in range(len(imgs)):
|
181 |
+
print_progress_bar(i, len(imgs))
|
182 |
+
images2[i] = letter_normalization(imgs[i]).reshape(1, 4096)
|
183 |
+
|
184 |
+
images = np.concatenate([images, images2])
|
185 |
+
|
186 |
+
images = images[1:]
|
187 |
+
labels = np.array(labels)
|
188 |
+
|
189 |
+
print("-> Number of chars:", len(labels))
|
190 |
+
return (images, labels)
|
191 |
+
|
192 |
+
|
193 |
+
def load_gap_data(loc='data/gapdet/large/', slider=(60, 120), seq=False, flatten=True):
|
194 |
+
"""
|
195 |
+
Load gap data from location with corresponding labels.
|
196 |
+
Args:
|
197 |
+
loc: location of folder with words separated into gap data
|
198 |
+
images have to by named as label_timestamp.jpg, label is 0 or 1
|
199 |
+
slider: dimensions of of output images
|
200 |
+
seq: Store images from one word as a sequence
|
201 |
+
flatten: Flatten the output images
|
202 |
+
Returns:
|
203 |
+
(images, labels)
|
204 |
+
"""
|
205 |
+
print('Loading gap data...')
|
206 |
+
dir_list = glob.glob(os.path.join(loc, "*/"))
|
207 |
+
dir_list.sort()
|
208 |
+
|
209 |
+
if slider[1] > 120:
|
210 |
+
# TODO Implement for higher dimmensions
|
211 |
+
slider[1] = 120
|
212 |
+
|
213 |
+
cut_s = None if (120 - slider[1]) // 2 <= 0 else (120 - slider[1]) // 2
|
214 |
+
cut_e = None if (120 - slider[1]) // 2 <= 0 else -(120 - slider[1]) // 2
|
215 |
+
|
216 |
+
if seq:
|
217 |
+
images = np.empty(len(dir_list), dtype=object)
|
218 |
+
labels = np.empty(len(dir_list), dtype=object)
|
219 |
+
|
220 |
+
for i, loc in enumerate(dir_list):
|
221 |
+
# TODO Check for empty directories
|
222 |
+
img_list = glob.glob(os.path.join(loc, '*.jpg'))
|
223 |
+
if (len(img_list) != 0):
|
224 |
+
img_list = sorted(imglist, key=lambda x: int(x[len(loc):].split("_")[1][:-4]))
|
225 |
+
images[i] = np.array([(cv2.imread(img, 0)[:, cut_s:cut_e].flatten() if flatten else
|
226 |
+
cv2.imread(img, 0)[:, cut_s:cut_e])
|
227 |
+
for img in img_list])
|
228 |
+
labels[i] = np.array([int(name[len(loc):].split("_")[0]) for name in img_list])
|
229 |
+
|
230 |
+
else:
|
231 |
+
images = np.zeros((1, slider[0]*slider[1]))
|
232 |
+
labels = []
|
233 |
+
|
234 |
+
for i in range(len(dir_list)):
|
235 |
+
img_list = glob.glob(os.path.join(dir_list[i], '*.jpg'))
|
236 |
+
if (len(img_list) != 0):
|
237 |
+
imgs = np.array([cv2.imread(img, 0)[:, cut_s:cut_e] for img in img_list])
|
238 |
+
images = np.concatenate([images, imgs.reshape(len(imgs), slider[0]*slider[1])])
|
239 |
+
labels.extend([int(img[len(dirlist[i])]) for img in img_list])
|
240 |
+
|
241 |
+
images = images[1:]
|
242 |
+
labels = np.array(labels)
|
243 |
+
|
244 |
+
if seq:
|
245 |
+
print("-> Number of words / gaps and letters:",
|
246 |
+
len(labels), '/', sum([len(l) for l in labels]))
|
247 |
+
else:
|
248 |
+
print("-> Number of gaps and letters:", len(labels))
|
249 |
+
return (images, labels)
|
250 |
+
|
251 |
+
|
252 |
+
def corresponding_shuffle(a):
|
253 |
+
"""
|
254 |
+
Shuffle array of numpy arrays such that
|
255 |
+
each pair a[x][i] and a[y][i] remains the same.
|
256 |
+
Args:
|
257 |
+
a: array of same length numpy arrays
|
258 |
+
Returns:
|
259 |
+
Array a with shuffled numpy arrays
|
260 |
+
"""
|
261 |
+
assert all([len(a[0]) == len(a[i]) for i in range(len(a))])
|
262 |
+
p = np.random.permutation(len(a[0]))
|
263 |
+
for i in range(len(a)):
|
264 |
+
a[i] = a[i][p]
|
265 |
+
return a
|
266 |
+
|
267 |
+
|
268 |
+
def sequences_to_sparse(sequences):
|
269 |
+
"""
|
270 |
+
Create a sparse representention of sequences.
|
271 |
+
Args:
|
272 |
+
sequences: a list of lists of type dtype where each element is a sequence
|
273 |
+
Returns:
|
274 |
+
A tuple with (indices, values, shape)
|
275 |
+
"""
|
276 |
+
indices = []
|
277 |
+
values = []
|
278 |
+
|
279 |
+
for n, seq in enumerate(sequences):
|
280 |
+
indices.extend(zip([n]*len(seq), range(len(seq))))
|
281 |
+
values.extend(seq)
|
282 |
+
|
283 |
+
indices = np.asarray(indices, dtype=np.int64)
|
284 |
+
values = np.asarray(values, dtype=np.int32)
|
285 |
+
shape = np.asarray([len(sequences), np.asarray(indices).max(0)[1]+1], dtype=np.int64)
|
286 |
+
|
287 |
+
return indices, values, shape
|
test/ocr/dataiterator.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""Classes for feeding data during training."""
|
3 |
+
import numpy as np
|
4 |
+
import pandas as pd
|
5 |
+
from .helpers import img_extend
|
6 |
+
from .datahelpers import sequences_to_sparse
|
7 |
+
|
8 |
+
|
9 |
+
class BucketDataIterator():
|
10 |
+
"""Iterator for feeding CTC model during training."""
|
11 |
+
def __init__(self,
|
12 |
+
images,
|
13 |
+
targets,
|
14 |
+
num_buckets=5,
|
15 |
+
slider=(60, 30),
|
16 |
+
augmentation=None,
|
17 |
+
dropout=0.0,
|
18 |
+
train=True):
|
19 |
+
|
20 |
+
self.train = train
|
21 |
+
self.slider = slider
|
22 |
+
self.augmentation = augmentation
|
23 |
+
self.dropout = dropout
|
24 |
+
for i in range(len(images)):
|
25 |
+
images[i] = img_extend(
|
26 |
+
images[i],
|
27 |
+
(self.slider[0],
|
28 |
+
max(images[i].shape[1], self.slider[1])))
|
29 |
+
in_length = [image.shape[1] for image in images]
|
30 |
+
|
31 |
+
# Create pandas dataFrame and sort it by images width (length)
|
32 |
+
self.dataFrame = pd.DataFrame({
|
33 |
+
'in_length': in_length,
|
34 |
+
'images': images,
|
35 |
+
'targets': targets}).sort_values('in_length').reset_index(drop=True)
|
36 |
+
|
37 |
+
bsize = int(len(images) / num_buckets)
|
38 |
+
self.num_buckets = num_buckets
|
39 |
+
self.buckets = []
|
40 |
+
for bucket in range(num_buckets-1):
|
41 |
+
self.buckets.append(
|
42 |
+
self.dataFrame.iloc[bucket * bsize: (bucket+1) * bsize])
|
43 |
+
self.buckets.append(self.dataFrame.iloc[(num_buckets-1) * bsize:])
|
44 |
+
|
45 |
+
self.buckets_size = [len(bucket) for bucket in self.buckets]
|
46 |
+
self.cursor = np.array([0] * num_buckets)
|
47 |
+
self.bucket_order = np.random.permutation(num_buckets)
|
48 |
+
self.bucket_cursor = 0
|
49 |
+
self.shuffle()
|
50 |
+
print("Iterator created.")
|
51 |
+
|
52 |
+
|
53 |
+
def shuffle(self, idx=None):
|
54 |
+
"""Shuffle idx bucket or each bucket separately."""
|
55 |
+
for i in [idx] if idx is not None else range(self.num_buckets):
|
56 |
+
self.buckets[i] = self.buckets[i].sample(frac=1).reset_index(drop=True)
|
57 |
+
self.cursor[i] = 0
|
58 |
+
|
59 |
+
|
60 |
+
def next_batch(self, batch_size):
|
61 |
+
"""Creates next training batch of size.
|
62 |
+
Args:
|
63 |
+
batch_size: size of next batch
|
64 |
+
Retruns:
|
65 |
+
(images, labels, images lengths, labels lengths)
|
66 |
+
"""
|
67 |
+
i_bucket = self.bucket_order[self.bucket_cursor]
|
68 |
+
# Increment cursor and shuffle in case of new round
|
69 |
+
self.bucket_cursor = (self.bucket_cursor + 1) % self.num_buckets
|
70 |
+
if self.bucket_cursor == 0:
|
71 |
+
self.bucket_order = np.random.permutation(self.num_buckets)
|
72 |
+
|
73 |
+
if self.cursor[i_bucket] + batch_size > self.buckets_size[i_bucket]:
|
74 |
+
self.shuffle(i_bucket)
|
75 |
+
|
76 |
+
# Handle too big batch sizes
|
77 |
+
if (batch_size > self.buckets_size[i_bucket]):
|
78 |
+
batch_size = self.buckets_size[i_bucket]
|
79 |
+
|
80 |
+
res = self.buckets[i_bucket].iloc[self.cursor[i_bucket]:
|
81 |
+
self.cursor[i_bucket]+batch_size]
|
82 |
+
self.cursor[i_bucket] += batch_size
|
83 |
+
|
84 |
+
# PAD input sequence and output
|
85 |
+
input_max = max(res['in_length'])
|
86 |
+
|
87 |
+
input_imgs = np.zeros(
|
88 |
+
(batch_size, self.slider[0], input_max, 1), dtype=np.uint8)
|
89 |
+
for i, img in enumerate(res['images']):
|
90 |
+
input_imgs[i][:, :res['in_length'].values[i], 0] = img
|
91 |
+
|
92 |
+
if self.train:
|
93 |
+
input_imgs = self.augmentation.augment_images(input_imgs)
|
94 |
+
input_imgs = input_imgs.astype(np.float32)
|
95 |
+
|
96 |
+
targets = sequences_to_sparse(res['targets'].values)
|
97 |
+
return input_imgs, targets, res['in_length'].values
|
98 |
+
|
test/ocr/helpers.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""
|
3 |
+
Helper functions for ocr project
|
4 |
+
"""
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
import numpy as np
|
7 |
+
import cv2
|
8 |
+
|
9 |
+
|
10 |
+
SMALL_HEIGHT = 800
|
11 |
+
|
12 |
+
|
13 |
+
def implt(img, cmp=None, t=''):
|
14 |
+
"""Show image using plt."""
|
15 |
+
plt.imshow(img, cmap=cmp)
|
16 |
+
plt.title(t)
|
17 |
+
plt.show()
|
18 |
+
|
19 |
+
|
20 |
+
def resize(img, height=SMALL_HEIGHT, always=False):
|
21 |
+
"""Resize image to given height."""
|
22 |
+
if (img.shape[0] > height or always):
|
23 |
+
rat = height / img.shape[0]
|
24 |
+
return cv2.resize(img, (int(rat * img.shape[1]), height))
|
25 |
+
|
26 |
+
return img
|
27 |
+
|
28 |
+
|
29 |
+
def ratio(img, height=SMALL_HEIGHT):
|
30 |
+
"""Getting scale ratio."""
|
31 |
+
return img.shape[0] / height
|
32 |
+
|
33 |
+
|
34 |
+
def img_extend(img, shape):
|
35 |
+
"""Extend 2D image (numpy array) in vertical and horizontal direction.
|
36 |
+
Shape of result image will match 'shape'
|
37 |
+
Args:
|
38 |
+
img: image to be extended
|
39 |
+
shape: shape (touple) of result image
|
40 |
+
Returns:
|
41 |
+
Extended image
|
42 |
+
"""
|
43 |
+
x = np.zeros(shape, np.uint8)
|
44 |
+
x[:img.shape[0], :img.shape[1]] = img
|
45 |
+
return x
|
test/ocr/imgtransform.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""
|
3 |
+
Functions for transforming and preprocessing images for training
|
4 |
+
"""
|
5 |
+
import numpy as np
|
6 |
+
import pandas as pd
|
7 |
+
import cv2
|
8 |
+
from scipy.ndimage.interpolation import map_coordinates
|
9 |
+
|
10 |
+
|
11 |
+
def coordinates_remap(image, factor_alpha, factor_sigma):
|
12 |
+
"""Transforming image using remaping coordinates."""
|
13 |
+
alpha = image.shape[1] * factor_alpha
|
14 |
+
sigma = image.shape[1] * factor_sigma
|
15 |
+
shape = image.shape
|
16 |
+
|
17 |
+
blur_size = int(4*sigma) | 1
|
18 |
+
dx = alpha * cv2.GaussianBlur((np.random.rand(*shape) * 2 - 1),
|
19 |
+
ksize=(blur_size, blur_size),
|
20 |
+
sigmaX=sigma)
|
21 |
+
dy = alpha * cv2.GaussianBlur((np.random.rand(*shape) * 2 - 1),
|
22 |
+
ksize=(blur_size, blur_size),
|
23 |
+
sigmaX=sigma)
|
24 |
+
|
25 |
+
x, y = np.meshgrid(np.arange(shape[1]), np.arange(shape[0]))
|
26 |
+
indices = np.reshape(y+dy, (-1, 1)), np.reshape(x+dx, (-1, 1))
|
27 |
+
|
28 |
+
# TODO use cv2.remap(image, dx, dy, interpolation=cv2.INTER_LINEAR)
|
29 |
+
return np.array(map_coordinates(image, indices, order=1, mode='constant').reshape(shape))
|
test/ocr/mlhelpers.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""
|
3 |
+
Classes for controling machine learning processes
|
4 |
+
"""
|
5 |
+
import numpy as np
|
6 |
+
import math
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
import csv
|
9 |
+
|
10 |
+
|
11 |
+
class TrainingPlot:
|
12 |
+
"""
|
13 |
+
Creating live plot during training
|
14 |
+
REUIRES notebook backend: %matplotlib notebook
|
15 |
+
@TODO Migrate to Tensorboard
|
16 |
+
"""
|
17 |
+
train_loss = []
|
18 |
+
train_acc = []
|
19 |
+
valid_acc = []
|
20 |
+
test_iter = 0
|
21 |
+
loss_iter = 0
|
22 |
+
interval = 0
|
23 |
+
ax1 = None
|
24 |
+
ax2 = None
|
25 |
+
fig = None
|
26 |
+
|
27 |
+
def __init__(self, steps, test_itr, loss_itr):
|
28 |
+
self.test_iter = test_itr
|
29 |
+
self.loss_iter = loss_itr
|
30 |
+
self.interval = steps
|
31 |
+
|
32 |
+
self.fig, self.ax1 = plt.subplots()
|
33 |
+
self.ax2 = self.ax1.twinx()
|
34 |
+
self.ax1.set_autoscaley_on(True)
|
35 |
+
plt.ion()
|
36 |
+
|
37 |
+
self._update_plot()
|
38 |
+
|
39 |
+
# Description
|
40 |
+
self.ax1.set_xlabel('Iteration')
|
41 |
+
self.ax1.set_ylabel('Train Loss')
|
42 |
+
self.ax2.set_ylabel('Valid. Accuracy')
|
43 |
+
|
44 |
+
# Axes limits
|
45 |
+
self.ax1.set_ylim([0,10])
|
46 |
+
|
47 |
+
def _update_plot(self):
|
48 |
+
self.fig.canvas.draw()
|
49 |
+
|
50 |
+
def update_loss(self, loss_train, index):
|
51 |
+
self.trainLoss.append(loss_train)
|
52 |
+
if len(self.train_loss) == 1:
|
53 |
+
self.ax1.set_ylim([0, min(10, math.ceil(loss_train))])
|
54 |
+
self.ax1.plot(self.lossInterval * np.arange(len(self.train_loss)),
|
55 |
+
self.train_loss, 'b', linewidth=1.0)
|
56 |
+
|
57 |
+
self.updatePlot()
|
58 |
+
|
59 |
+
def update_acc(self, acc_val, acc_train, index):
|
60 |
+
self.validAcc.append(acc_val)
|
61 |
+
self.trainAcc.append(acc_train)
|
62 |
+
|
63 |
+
self.ax2.plot(self.test_iter * np.arange(len(self.valid_acc)),
|
64 |
+
self.valid_acc, 'r', linewidth=1.0)
|
65 |
+
self.ax2.plot(self.test_iter * np.arange(len(self.train_acc)),
|
66 |
+
self.train_acc, 'g',linewidth=1.0)
|
67 |
+
|
68 |
+
self.ax2.set_title('Valid. Accuracy: {:.4f}'.format(self.valid_acc[-1]))
|
69 |
+
|
70 |
+
self.updatePlot()
|
71 |
+
|
72 |
+
|
73 |
+
class DataSet:
|
74 |
+
"""Class for training data and feeding train function."""
|
75 |
+
images = None
|
76 |
+
labels = None
|
77 |
+
length = 0
|
78 |
+
index = 0
|
79 |
+
|
80 |
+
def __init__(self, img, lbl):
|
81 |
+
self.images = img
|
82 |
+
self.labels = lbl
|
83 |
+
self.length = len(img)
|
84 |
+
self.index = 0
|
85 |
+
|
86 |
+
def next_batch(self, batch_size):
|
87 |
+
"""Return the next batch from the data set."""
|
88 |
+
start = self.index
|
89 |
+
self.index += batch_size
|
90 |
+
|
91 |
+
if self.index > self.length:
|
92 |
+
# Shuffle the data
|
93 |
+
perm = np.arange(self.length)
|
94 |
+
np.random.shuffle(perm)
|
95 |
+
self.images = self.images[perm]
|
96 |
+
self.labels = self.labels[perm]
|
97 |
+
# Start next epoch
|
98 |
+
start = 0
|
99 |
+
self.index = batch_size
|
100 |
+
|
101 |
+
end = self.index
|
102 |
+
return self.images[start:end], self.labels[start:end]
|
test/ocr/normalization.py
ADDED
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""
|
3 |
+
Include functions for normalizing images of words and letters
|
4 |
+
Main functions: word_normalization, letter_normalization, image_standardization
|
5 |
+
"""
|
6 |
+
import numpy as np
|
7 |
+
import cv2
|
8 |
+
import math
|
9 |
+
|
10 |
+
from .helpers import *
|
11 |
+
|
12 |
+
|
13 |
+
def image_standardization(image):
|
14 |
+
"""Image standardization should result in same output
|
15 |
+
as tf.image.per_image_standardization.
|
16 |
+
"""
|
17 |
+
return (image - np.mean(image)) / max(np.std(image), 1.0/math.sqrt(image.size))
|
18 |
+
|
19 |
+
|
20 |
+
def _crop_add_border(img, height, threshold=50, border=True, border_size=15):
|
21 |
+
"""Crop and add border to word image of letter segmentation."""
|
22 |
+
# Clear small values
|
23 |
+
|
24 |
+
ret, img = cv2.threshold(img, threshold, 255, cv2.THRESH_TOZERO)
|
25 |
+
|
26 |
+
x0 = 0
|
27 |
+
y0 = 0
|
28 |
+
x1 = img.shape[1]
|
29 |
+
y1 = img.shape[0]
|
30 |
+
|
31 |
+
for i in range(img.shape[0]):
|
32 |
+
if np.count_nonzero(img[i, :]) > 1:
|
33 |
+
y0 = i
|
34 |
+
break
|
35 |
+
for i in reversed(range(img.shape[0])):
|
36 |
+
if np.count_nonzero(img[i, :]) > 1:
|
37 |
+
y1 = i+1
|
38 |
+
break
|
39 |
+
for i in range(img.shape[1]):
|
40 |
+
if np.count_nonzero(img[:, i]) > 1:
|
41 |
+
x0 = i
|
42 |
+
break
|
43 |
+
for i in reversed(range(img.shape[1])):
|
44 |
+
if np.count_nonzero(img[:, i]) > 1:
|
45 |
+
x1 = i+1
|
46 |
+
break
|
47 |
+
|
48 |
+
if height != 0:
|
49 |
+
img = resize(img[y0:y1, x0:x1], height, True)
|
50 |
+
else:
|
51 |
+
img = img[y0:y1, x0:x1]
|
52 |
+
|
53 |
+
if border:
|
54 |
+
return cv2.copyMakeBorder(img, 0, 0, border_size, border_size,
|
55 |
+
cv2.BORDER_CONSTANT,
|
56 |
+
value=[0, 0, 0])
|
57 |
+
return img
|
58 |
+
|
59 |
+
|
60 |
+
def _word_tilt(img, height, border=True, border_size=15):
|
61 |
+
"""Detect the angle and tilt the image."""
|
62 |
+
edges = cv2.Canny(img, 50, 150, apertureSize = 3)
|
63 |
+
lines = cv2.HoughLines(edges, 1, np.pi/180, 30)
|
64 |
+
|
65 |
+
if lines is not None:
|
66 |
+
meanAngle = 0
|
67 |
+
# Set min number of valid lines (try higher)
|
68 |
+
numLines = np.sum(1 for l in lines if l[0][1] < 0.7 or l[0][1] > 2.6)
|
69 |
+
if numLines > 1:
|
70 |
+
meanAngle = np.mean([l[0][1] for l in lines if l[0][1] < 0.7 or l[0][1] > 2.6])
|
71 |
+
|
72 |
+
# Look for angle with correct value
|
73 |
+
if meanAngle != 0 and (meanAngle < 0.7 or meanAngle > 2.6):
|
74 |
+
img = _tilt_by_angle(img, meanAngle, height)
|
75 |
+
return _crop_add_border(img, height, 50, border, border_size)
|
76 |
+
|
77 |
+
|
78 |
+
def _tilt_by_angle(img, angle, height):
|
79 |
+
"""Tilt the image by given angle."""
|
80 |
+
dist = np.tan(angle) * height
|
81 |
+
width = len(img[0])
|
82 |
+
sPoints = np.float32([[0,0], [0,height], [width,height], [width,0]])
|
83 |
+
|
84 |
+
# Dist is positive for angle < 0.7; negative for angle > 2.6
|
85 |
+
# Image must be shifed to right
|
86 |
+
if dist > 0:
|
87 |
+
tPoints = np.float32([[0,0],
|
88 |
+
[dist,height],
|
89 |
+
[width+dist,height],
|
90 |
+
[width,0]])
|
91 |
+
else:
|
92 |
+
tPoints = np.float32([[-dist,0],
|
93 |
+
[0,height],
|
94 |
+
[width,height],
|
95 |
+
[width-dist,0]])
|
96 |
+
|
97 |
+
M = cv2.getPerspectiveTransform(sPoints, tPoints)
|
98 |
+
return cv2.warpPerspective(img, M, (int(width+abs(dist)), height))
|
99 |
+
|
100 |
+
|
101 |
+
def _sobel_detect(channel):
|
102 |
+
"""The Sobel Operator."""
|
103 |
+
sobelX = cv2.Sobel(channel, cv2.CV_16S, 1, 0)
|
104 |
+
sobelY = cv2.Sobel(channel, cv2.CV_16S, 0, 1)
|
105 |
+
# Combine x, y gradient magnitudes sqrt(x^2 + y^2)
|
106 |
+
sobel = np.hypot(sobelX, sobelY)
|
107 |
+
sobel[sobel > 255] = 255
|
108 |
+
return np.uint8(sobel)
|
109 |
+
|
110 |
+
|
111 |
+
class HysterThresh:
|
112 |
+
def __init__(self, img):
|
113 |
+
img = 255 - img
|
114 |
+
img = (img - np.min(img)) / (np.max(img) - np.min(img)) * 255
|
115 |
+
hist, bins = np.histogram(img.ravel(), 256, [0,256])
|
116 |
+
|
117 |
+
self.high = np.argmax(hist) + 65
|
118 |
+
self.low = np.argmax(hist) + 45
|
119 |
+
self.diff = 255 - self.high
|
120 |
+
|
121 |
+
self.img = img
|
122 |
+
self.im = np.zeros(img.shape, dtype=img.dtype)
|
123 |
+
|
124 |
+
def get_image(self):
|
125 |
+
self._hyster()
|
126 |
+
return np.uint8(self.im)
|
127 |
+
|
128 |
+
def _hyster_rec(self, r, c):
|
129 |
+
h, w = self.img.shape
|
130 |
+
for ri in range(r-1, r+2):
|
131 |
+
for ci in range(c-1, c+2):
|
132 |
+
if (h > ri >= 0
|
133 |
+
and w > ci >= 0
|
134 |
+
and self.im[ri, ci] == 0
|
135 |
+
and self.high > self.img[ri, ci] >= self.low):
|
136 |
+
self.im[ri, ci] = self.img[ri, ci] + self.diff
|
137 |
+
self._hyster_rec(ri, ci)
|
138 |
+
|
139 |
+
def _hyster(self):
|
140 |
+
r, c = self.img.shape
|
141 |
+
for ri in range(r):
|
142 |
+
for ci in range(c):
|
143 |
+
if (self.img[ri, ci] >= self.high):
|
144 |
+
self.im[ri, ci] = 255
|
145 |
+
self.img[ri, ci] = 255
|
146 |
+
self._hyster_rec(ri, ci)
|
147 |
+
|
148 |
+
|
149 |
+
def _hyst_word_norm(image):
|
150 |
+
"""Word normalization using hystheresis thresholding."""
|
151 |
+
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
|
152 |
+
# img = cv2.bilateralFilter(gray, 0, 10, 30)
|
153 |
+
img = cv2.bilateralFilter(gray, 10, 10, 30)
|
154 |
+
return HysterThresh(img).get_image()
|
155 |
+
|
156 |
+
|
157 |
+
def word_normalization(image, height, border=True, tilt=True, border_size=15, hyst_norm=False):
|
158 |
+
""" Preprocess a word - resize, binarize, tilt world."""
|
159 |
+
image = resize(image, height, True)
|
160 |
+
|
161 |
+
if hyst_norm:
|
162 |
+
th = _hyst_word_norm(image)
|
163 |
+
else:
|
164 |
+
img = cv2.bilateralFilter(image, 10, 30, 30)
|
165 |
+
gray = 255 - cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
|
166 |
+
norm = cv2.normalize(gray, None, 0, 255, cv2.NORM_MINMAX)
|
167 |
+
ret,th = cv2.threshold(norm, 50, 255, cv2.THRESH_TOZERO)
|
168 |
+
|
169 |
+
if tilt:
|
170 |
+
return _word_tilt(th, height, border, border_size)
|
171 |
+
return _crop_add_border(th, height=height, border=border, border_size=border_size)
|
172 |
+
|
173 |
+
|
174 |
+
def _resize_letter(img, size = 56):
|
175 |
+
"""Resize bigger side of the image to given size."""
|
176 |
+
if (img.shape[0] > img.shape[1]):
|
177 |
+
rat = size / img.shape[0]
|
178 |
+
return cv2.resize(img, (int(rat * img.shape[1]), size))
|
179 |
+
else:
|
180 |
+
rat = size / img.shape[1]
|
181 |
+
return cv2.resize(img, (size, int(rat * img.shape[0])))
|
182 |
+
return img
|
183 |
+
|
184 |
+
|
185 |
+
def letter_normalization(image, is_thresh=True, dim=False):
|
186 |
+
"""Preprocess a letter - crop, resize"""
|
187 |
+
if is_thresh and image.shape[0] > 0 and image.shape[1] > 0:
|
188 |
+
image = _crop_add_border(image, height=0, threshold=80, border=False)
|
189 |
+
|
190 |
+
resized = image
|
191 |
+
if image.shape[0] > 1 and image.shape[1] > 1:
|
192 |
+
resized = _resize_letter(image)
|
193 |
+
|
194 |
+
result = np.zeros((64, 64), np.uint8)
|
195 |
+
offset = [0, 0]
|
196 |
+
# Calculate offset for smaller size
|
197 |
+
if image.shape[0] > image.shape[1]:
|
198 |
+
offset = [int((result.shape[1] - resized.shape[1])/2), 4]
|
199 |
+
else:
|
200 |
+
offset = [4, int((result.shape[0] - resized.shape[0])/2)]
|
201 |
+
# Replace zeros by image
|
202 |
+
result[offset[1]:offset[1] + resized.shape[0],
|
203 |
+
offset[0]:offset[0] + resized.shape[1]] = resized
|
204 |
+
|
205 |
+
if dim:
|
206 |
+
return result, image.shape
|
207 |
+
return result
|
test/ocr/page.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""
|
3 |
+
Crop background and transform perspective from the photo of page
|
4 |
+
"""
|
5 |
+
import numpy as np
|
6 |
+
import cv2
|
7 |
+
|
8 |
+
from .helpers import *
|
9 |
+
|
10 |
+
def detection(image, area_thresh = 0.5):
|
11 |
+
"""Finding Page."""
|
12 |
+
small = resize(image)
|
13 |
+
# Edge detection
|
14 |
+
image_edges = _edges_detection(small, 200, 250)
|
15 |
+
|
16 |
+
# Close gaps between edges (double page clouse => rectangle kernel)
|
17 |
+
closed_edges = cv2.morphologyEx(image_edges,
|
18 |
+
cv2.MORPH_CLOSE,
|
19 |
+
np.ones((5, 11)))
|
20 |
+
# Countours
|
21 |
+
page_contour = _find_page_contours(closed_edges, small, area_thresh)
|
22 |
+
|
23 |
+
# Recalculate to original scale
|
24 |
+
page_contour = page_contour.dot(ratio(image, small.shape[0]))
|
25 |
+
# Transform prespective
|
26 |
+
new_image = _persp_transform(image, page_contour)
|
27 |
+
return new_image
|
28 |
+
|
29 |
+
|
30 |
+
def _edges_detection(img, minVal, maxVal):
|
31 |
+
"""Preprocessing (gray, thresh, filter, border) + Canny edge detection."""
|
32 |
+
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
33 |
+
|
34 |
+
img = cv2.bilateralFilter(img, 9, 75, 75)
|
35 |
+
img = cv2.adaptiveThreshold(img, 255,
|
36 |
+
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
37 |
+
cv2.THRESH_BINARY, 115, 4)
|
38 |
+
|
39 |
+
# Median blur replace center pixel by median of pixels under kelner
|
40 |
+
# => removes thin details
|
41 |
+
img = cv2.medianBlur(img, 11)
|
42 |
+
|
43 |
+
# Add black border - detection of border touching pages
|
44 |
+
img = cv2.copyMakeBorder(img, 5, 5, 5, 5,
|
45 |
+
cv2.BORDER_CONSTANT,
|
46 |
+
value=[0, 0, 0])
|
47 |
+
return cv2.Canny(img, minVal, maxVal)
|
48 |
+
|
49 |
+
|
50 |
+
def _four_corners_sort(pts):
|
51 |
+
"""Sort corners in order: top-left, bot-left, bot-right, top-right."""
|
52 |
+
diff = np.diff(pts, axis=1)
|
53 |
+
summ = pts.sum(axis=1)
|
54 |
+
return np.array([pts[np.argmin(summ)],
|
55 |
+
pts[np.argmax(diff)],
|
56 |
+
pts[np.argmax(summ)],
|
57 |
+
pts[np.argmin(diff)]])
|
58 |
+
|
59 |
+
|
60 |
+
def _contour_offset(cnt, offset):
|
61 |
+
"""Offset contour because of 5px border."""
|
62 |
+
cnt += offset
|
63 |
+
cnt[cnt < 0] = 0
|
64 |
+
return cnt
|
65 |
+
|
66 |
+
|
67 |
+
def _find_page_contours(edges, img, area_thresh):
|
68 |
+
"""Finding corner points of page contour."""
|
69 |
+
contours, hierarchy = cv2.findContours(edges,
|
70 |
+
cv2.RETR_TREE,
|
71 |
+
cv2.CHAIN_APPROX_SIMPLE)
|
72 |
+
|
73 |
+
# Finding biggest rectangle otherwise return original corners
|
74 |
+
height = edges.shape[0]
|
75 |
+
width = edges.shape[1]
|
76 |
+
MIN_COUNTOUR_AREA = height * width * area_thresh
|
77 |
+
MAX_COUNTOUR_AREA = (width - 10) * (height - 10)
|
78 |
+
|
79 |
+
max_area = MIN_COUNTOUR_AREA
|
80 |
+
page_contour = np.array([[0, 0],
|
81 |
+
[0, height-5],
|
82 |
+
[width-5, height-5],
|
83 |
+
[width-5, 0]])
|
84 |
+
|
85 |
+
for cnt in contours:
|
86 |
+
perimeter = cv2.arcLength(cnt, True)
|
87 |
+
approx = cv2.approxPolyDP(cnt, 0.03 * perimeter, True)
|
88 |
+
|
89 |
+
# Page has 4 corners and it is convex
|
90 |
+
if (len(approx) == 4 and
|
91 |
+
cv2.isContourConvex(approx) and
|
92 |
+
max_area < cv2.contourArea(approx) < MAX_COUNTOUR_AREA):
|
93 |
+
|
94 |
+
max_area = cv2.contourArea(approx)
|
95 |
+
page_contour = approx[:, 0]
|
96 |
+
|
97 |
+
# Sort corners and offset them
|
98 |
+
page_contour = _four_corners_sort(page_contour)
|
99 |
+
return _contour_offset(page_contour, (-5, -5))
|
100 |
+
|
101 |
+
|
102 |
+
def _persp_transform(img, s_points):
|
103 |
+
"""Transform perspective from start points to target points."""
|
104 |
+
# Euclidean distance - calculate maximum height and width
|
105 |
+
height = max(np.linalg.norm(s_points[0] - s_points[1]),
|
106 |
+
np.linalg.norm(s_points[2] - s_points[3]))
|
107 |
+
width = max(np.linalg.norm(s_points[1] - s_points[2]),
|
108 |
+
np.linalg.norm(s_points[3] - s_points[0]))
|
109 |
+
|
110 |
+
# Create target points
|
111 |
+
t_points = np.array([[0, 0],
|
112 |
+
[0, height],
|
113 |
+
[width, height],
|
114 |
+
[width, 0]], np.float32)
|
115 |
+
|
116 |
+
# getPerspectiveTransform() needs float32
|
117 |
+
if s_points.dtype != np.float32:
|
118 |
+
s_points = s_points.astype(np.float32)
|
119 |
+
|
120 |
+
M = cv2.getPerspectiveTransform(s_points, t_points)
|
121 |
+
return cv2.warpPerspective(img, M, (int(width), int(height)))
|
test/ocr/tfhelpers.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""
|
3 |
+
Provide functions and classes:
|
4 |
+
Model = Class for loading and using trained models from tensorflow
|
5 |
+
create_cell = function for creatting RNN cells with wrappers
|
6 |
+
"""
|
7 |
+
#import tensorflow as tf
|
8 |
+
import tensorflow.compat.v1 as tf
|
9 |
+
from tensorflow.python.ops.rnn_cell_impl import LSTMCell, ResidualWrapper, DropoutWrapper, MultiRNNCell
|
10 |
+
|
11 |
+
class Model():
|
12 |
+
"""Loading and running isolated tf graph."""
|
13 |
+
def __init__(self, loc, operation='activation', input_name='x'):
|
14 |
+
"""
|
15 |
+
loc: location of file containing saved model
|
16 |
+
operation: name of operation for running the model
|
17 |
+
input_name: name of input placeholder
|
18 |
+
"""
|
19 |
+
self.input = input_name + ":0"
|
20 |
+
self.graph = tf.Graph()
|
21 |
+
self.sess = tf.Session(graph=self.graph)
|
22 |
+
with self.graph.as_default():
|
23 |
+
saver = tf.train.import_meta_graph(loc + '.meta', clear_devices=True)
|
24 |
+
saver.restore(self.sess, loc)
|
25 |
+
self.op = self.graph.get_operation_by_name(operation).outputs[0]
|
26 |
+
|
27 |
+
def run(self, data):
|
28 |
+
"""Run the specified operation on given data."""
|
29 |
+
return self.sess.run(self.op, feed_dict={self.input: data})
|
30 |
+
|
31 |
+
def eval_feed(self, feed):
|
32 |
+
"""Run the specified operation with given feed."""
|
33 |
+
return self.sess.run(self.op, feed_dict=feed)
|
34 |
+
|
35 |
+
def run_op(self, op, feed, output=True):
|
36 |
+
"""Run given operation with the feed."""
|
37 |
+
if output:
|
38 |
+
return self.sess.run(
|
39 |
+
self.graph.get_operation_by_name(op).outputs[0],
|
40 |
+
feed_dict=feed)
|
41 |
+
else:
|
42 |
+
self.sess.run(
|
43 |
+
self.graph.get_operation_by_name(op),
|
44 |
+
feed_dict=feed)
|
45 |
+
|
46 |
+
|
47 |
+
|
48 |
+
def _create_single_cell(cell_fn, num_units, is_residual=False, is_dropout=False, keep_prob=None):
|
49 |
+
"""Create single RNN cell based on cell_fn."""
|
50 |
+
cell = cell_fn(num_units)
|
51 |
+
if is_dropout:
|
52 |
+
cell = DropoutWrapper(cell, input_keep_prob=keep_prob)
|
53 |
+
if is_residual:
|
54 |
+
cell = ResidualWrapper(cell)
|
55 |
+
return cell
|
56 |
+
|
57 |
+
|
58 |
+
def create_cell(num_units, num_layers, num_residual_layers, is_dropout=False, keep_prob=None, cell_fn=LSTMCell):
|
59 |
+
"""Create corresponding number of RNN cells with given wrappers."""
|
60 |
+
cell_list = []
|
61 |
+
|
62 |
+
for i in range(num_layers):
|
63 |
+
cell_list.append(_create_single_cell(
|
64 |
+
cell_fn=cell_fn,
|
65 |
+
num_units=num_units,
|
66 |
+
is_residual=(i >= num_layers - num_residual_layers),
|
67 |
+
is_dropout=is_dropout,
|
68 |
+
keep_prob=keep_prob
|
69 |
+
))
|
70 |
+
|
71 |
+
if num_layers == 1:
|
72 |
+
return cell_list[0]
|
73 |
+
return MultiRNNCell(cell_list)
|
test/ocr/viz.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def print_progress_bar(iteration,
|
2 |
+
total,
|
3 |
+
prefix = '',
|
4 |
+
suffix = ''):
|
5 |
+
"""Call in a loop to create terminal progress bar.
|
6 |
+
Args:
|
7 |
+
iteration: current iteration (Int)
|
8 |
+
total: total iterations (Int)
|
9 |
+
prefix: prefix string (Str)
|
10 |
+
suffix: suffix string (Str)
|
11 |
+
"""
|
12 |
+
# Printing slowes down the loop
|
13 |
+
if iteration % (total // 100) == 0:
|
14 |
+
length = 40
|
15 |
+
iteration += 1
|
16 |
+
percent = (100 * iteration) // (total * 99/100)
|
17 |
+
filled_length = int(length * percent / 100)
|
18 |
+
bar = '█' * filled_length + '-' * (length - filled_length)
|
19 |
+
print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end = '\r')
|
20 |
+
|
21 |
+
if iteration >= total * 99/100:
|
22 |
+
print()
|
test/ocr/words.py
ADDED
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""
|
3 |
+
Detect words on the page
|
4 |
+
return array of words' bounding boxes
|
5 |
+
"""
|
6 |
+
import numpy as np
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
import cv2
|
9 |
+
|
10 |
+
from .helpers import *
|
11 |
+
|
12 |
+
|
13 |
+
def detection(image, join=False):
|
14 |
+
"""Detecting the words bounding boxes.
|
15 |
+
Return: numpy array of bounding boxes [x, y, x+w, y+h]
|
16 |
+
"""
|
17 |
+
# Preprocess image for word detection
|
18 |
+
blurred = cv2.GaussianBlur(image, (5, 5), 18)
|
19 |
+
edge_img = _edge_detect(blurred)
|
20 |
+
ret, edge_img = cv2.threshold(edge_img, 50, 255, cv2.THRESH_BINARY)
|
21 |
+
bw_img = cv2.morphologyEx(edge_img, cv2.MORPH_CLOSE,
|
22 |
+
np.ones((15,15), np.uint8))
|
23 |
+
|
24 |
+
return _text_detect(bw_img, image, join)
|
25 |
+
|
26 |
+
|
27 |
+
def sort_words(boxes):
|
28 |
+
"""Sort boxes - (x, y, x+w, y+h) from left to right, top to bottom."""
|
29 |
+
mean_height = sum([y2 - y1 for _, y1, _, y2 in boxes]) / len(boxes)
|
30 |
+
|
31 |
+
boxes.view('i8,i8,i8,i8').sort(order=['f1'], axis=0)
|
32 |
+
current_line = boxes[0][1]
|
33 |
+
lines = []
|
34 |
+
tmp_line = []
|
35 |
+
for box in boxes:
|
36 |
+
if box[1] > current_line + mean_height:
|
37 |
+
lines.append(tmp_line)
|
38 |
+
tmp_line = [box]
|
39 |
+
current_line = box[1]
|
40 |
+
continue
|
41 |
+
tmp_line.append(box)
|
42 |
+
lines.append(tmp_line)
|
43 |
+
|
44 |
+
for line in lines:
|
45 |
+
line.sort(key=lambda box: box[0])
|
46 |
+
|
47 |
+
return lines
|
48 |
+
|
49 |
+
|
50 |
+
def _edge_detect(im):
|
51 |
+
"""
|
52 |
+
Edge detection using sobel operator on each layer individually.
|
53 |
+
Sobel operator is applied for each image layer (RGB)
|
54 |
+
"""
|
55 |
+
return np.max(np.array([_sobel_detect(im[:,:, 0]),
|
56 |
+
_sobel_detect(im[:,:, 1]),
|
57 |
+
_sobel_detect(im[:,:, 2])]), axis=0)
|
58 |
+
|
59 |
+
|
60 |
+
def _sobel_detect(channel):
|
61 |
+
"""Sobel operator."""
|
62 |
+
sobelX = cv2.Sobel(channel, cv2.CV_16S, 1, 0)
|
63 |
+
sobelY = cv2.Sobel(channel, cv2.CV_16S, 0, 1)
|
64 |
+
sobel = np.hypot(sobelX, sobelY)
|
65 |
+
sobel[sobel > 255] = 255
|
66 |
+
return np.uint8(sobel)
|
67 |
+
|
68 |
+
|
69 |
+
def union(a,b):
|
70 |
+
x = min(a[0], b[0])
|
71 |
+
y = min(a[1], b[1])
|
72 |
+
w = max(a[0]+a[2], b[0]+b[2]) - x
|
73 |
+
h = max(a[1]+a[3], b[1]+b[3]) - y
|
74 |
+
return [x, y, w, h]
|
75 |
+
|
76 |
+
def _intersect(a,b):
|
77 |
+
x = max(a[0], b[0])
|
78 |
+
y = max(a[1], b[1])
|
79 |
+
w = min(a[0]+a[2], b[0]+b[2]) - x
|
80 |
+
h = min(a[1]+a[3], b[1]+b[3]) - y
|
81 |
+
if w<0 or h<0:
|
82 |
+
return False
|
83 |
+
return True
|
84 |
+
|
85 |
+
def _group_rectangles(rec):
|
86 |
+
"""
|
87 |
+
Uion intersecting rectangles.
|
88 |
+
Args:
|
89 |
+
rec - list of rectangles in form [x, y, w, h]
|
90 |
+
Return:
|
91 |
+
list of grouped ractangles
|
92 |
+
"""
|
93 |
+
tested = [False for i in range(len(rec))]
|
94 |
+
final = []
|
95 |
+
i = 0
|
96 |
+
while i < len(rec):
|
97 |
+
if not tested[i]:
|
98 |
+
j = i+1
|
99 |
+
while j < len(rec):
|
100 |
+
if not tested[j] and _intersect(rec[i], rec[j]):
|
101 |
+
rec[i] = union(rec[i], rec[j])
|
102 |
+
tested[j] = True
|
103 |
+
j = i
|
104 |
+
j += 1
|
105 |
+
final += [rec[i]]
|
106 |
+
i += 1
|
107 |
+
|
108 |
+
return final
|
109 |
+
|
110 |
+
|
111 |
+
def _text_detect(img, image, join=False):
|
112 |
+
"""Text detection using contours."""
|
113 |
+
small = resize(img, 2000)
|
114 |
+
|
115 |
+
# Finding contours
|
116 |
+
mask = np.zeros(small.shape, np.uint8)
|
117 |
+
cnt, hierarchy = cv2.findContours(np.copy(small),
|
118 |
+
cv2.RETR_CCOMP,
|
119 |
+
cv2.CHAIN_APPROX_SIMPLE)
|
120 |
+
|
121 |
+
index = 0
|
122 |
+
boxes = []
|
123 |
+
# Go through all contours in top level
|
124 |
+
while (index >= 0):
|
125 |
+
x,y,w,h = cv2.boundingRect(cnt[index])
|
126 |
+
cv2.drawContours(mask, cnt, index, (255, 255, 255), cv2.FILLED)
|
127 |
+
maskROI = mask[y:y+h, x:x+w]
|
128 |
+
# Ratio of white pixels to area of bounding rectangle
|
129 |
+
r = cv2.countNonZero(maskROI) / (w * h)
|
130 |
+
|
131 |
+
# Limits for text
|
132 |
+
if (r > 0.1
|
133 |
+
and 1600 > w > 10
|
134 |
+
and 1600 > h > 10
|
135 |
+
and h/w < 3
|
136 |
+
and w/h < 10
|
137 |
+
and (60 // h) * w < 1000):
|
138 |
+
boxes += [[x, y, w, h]]
|
139 |
+
|
140 |
+
index = hierarchy[0][index][0]
|
141 |
+
|
142 |
+
if join:
|
143 |
+
# Need more work
|
144 |
+
boxes = _group_rectangles(boxes)
|
145 |
+
|
146 |
+
# image for drawing bounding boxes
|
147 |
+
small = cv2.cvtColor(small, cv2.COLOR_GRAY2RGB)
|
148 |
+
bounding_boxes = np.array([0,0,0,0])
|
149 |
+
for (x, y, w, h) in boxes:
|
150 |
+
cv2.rectangle(small, (x, y),(x+w,y+h), (0, 255, 0), 2)
|
151 |
+
bounding_boxes = np.vstack((bounding_boxes,
|
152 |
+
np.array([x, y, x+w, y+h])))
|
153 |
+
|
154 |
+
implt(small, t='Bounding rectangles')
|
155 |
+
|
156 |
+
boxes = bounding_boxes.dot(ratio(image, small.shape[0])).astype(np.int64)
|
157 |
+
return boxes[1:]
|
158 |
+
|
159 |
+
|
160 |
+
def textDetectWatershed(thresh):
|
161 |
+
"""NOT IN USE - Text detection using watershed algorithm.
|
162 |
+
Based on: http://docs.opencv.org/trunk/d3/db4/tutorial_py_watershed.html
|
163 |
+
"""
|
164 |
+
img = cv2.cvtColor(cv2.imread("data/textdet/%s.jpg" % IMG),
|
165 |
+
cv2.COLOR_BGR2RGB)
|
166 |
+
img = resize(img, 3000)
|
167 |
+
thresh = resize(thresh, 3000)
|
168 |
+
# noise removal
|
169 |
+
kernel = np.ones((3,3),np.uint8)
|
170 |
+
opening = cv2.morphologyEx(thresh,cv2.MORPH_OPEN,kernel, iterations = 3)
|
171 |
+
|
172 |
+
# sure background area
|
173 |
+
sure_bg = cv2.dilate(opening,kernel,iterations=3)
|
174 |
+
|
175 |
+
# Finding sure foreground area
|
176 |
+
dist_transform = cv2.distanceTransform(opening,cv2.DIST_L2,5)
|
177 |
+
ret, sure_fg = cv2.threshold(dist_transform,
|
178 |
+
0.01*dist_transform.max(), 255, 0)
|
179 |
+
|
180 |
+
# Finding unknown region
|
181 |
+
sure_fg = np.uint8(sure_fg)
|
182 |
+
unknown = cv2.subtract(sure_bg,sure_fg)
|
183 |
+
|
184 |
+
# Marker labelling
|
185 |
+
ret, markers = cv2.connectedComponents(sure_fg)
|
186 |
+
|
187 |
+
# Add one to all labels so that sure background is not 0, but 1
|
188 |
+
markers += 1
|
189 |
+
|
190 |
+
# Now, mark the region of unknown with zero
|
191 |
+
markers[unknown == 255] = 0
|
192 |
+
|
193 |
+
markers = cv2.watershed(img, markers)
|
194 |
+
implt(markers, t='Markers')
|
195 |
+
image = img.copy()
|
196 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
197 |
+
|
198 |
+
for mark in np.unique(markers):
|
199 |
+
# mark == 0 --> background
|
200 |
+
if mark == 0:
|
201 |
+
continue
|
202 |
+
|
203 |
+
# Draw it on mask and detect biggest contour
|
204 |
+
mask = np.zeros(gray.shape, dtype="uint8")
|
205 |
+
mask[markers == mark] = 255
|
206 |
+
|
207 |
+
cnts = cv2.findContours(mask.copy(),
|
208 |
+
cv2.RETR_EXTERNAL,
|
209 |
+
cv2.CHAIN_APPROX_SIMPLE)[-2]
|
210 |
+
c = max(cnts, key=cv2.contourArea)
|
211 |
+
|
212 |
+
# Draw a bounding rectangle if it contains text
|
213 |
+
x,y,w,h = cv2.boundingRect(c)
|
214 |
+
cv2.drawContours(mask, c, 0, (255, 255, 255), cv2.FILLED)
|
215 |
+
maskROI = mask[y:y+h, x:x+w]
|
216 |
+
# Ratio of white pixels to area of bounding rectangle
|
217 |
+
r = cv2.countNonZero(maskROI) / (w * h)
|
218 |
+
|
219 |
+
# Limits for text
|
220 |
+
if r > 0.2 and 2000 > w > 15 and 1500 > h > 15:
|
221 |
+
cv2.rectangle(image, (x, y),(x+w,y+h), (0, 255, 0), 2)
|
222 |
+
|
223 |
+
implt(image)
|
test/ocr_test.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import tensorflow as tf
|
6 |
+
import cv2
|
7 |
+
|
8 |
+
sys.path.append('../src')
|
9 |
+
from ocr.normalization import word_normalization, letter_normalization
|
10 |
+
from ocr import page, words, characters
|
11 |
+
from ocr.helpers import implt, resize
|
12 |
+
from ocr.tfhelpers import Model
|
13 |
+
from ocr.datahelpers import idx2char
|
14 |
+
|
15 |
+
IMG = '../data/test.jpg' # 1, 2, 3
|
16 |
+
LANG = 'en'
|
17 |
+
# You can use only one of these two
|
18 |
+
# You HABE TO train the CTC model by yourself using word_classifier_CTC.ipynb
|
19 |
+
MODEL_LOC_CHARS = f'../models/char-clas/{LANG}/CharClassifier'
|
20 |
+
MODEL_LOC_CTC = '../models/word-clas/CTC/Classifier1'
|
21 |
+
|
22 |
+
CHARACTER_MODEL = Model(MODEL_LOC_CHARS)
|
23 |
+
CTC_MODEL = Model(MODEL_LOC_CTC, 'word_prediction')
|
24 |
+
|
25 |
+
image = cv2.cvtColor(cv2.imread(IMG), cv2.COLOR_BGR2RGB)
|
26 |
+
# implt(image)
|
27 |
+
|
28 |
+
# Crop image and get bounding boxes
|
29 |
+
crop = page.detection(image)
|
30 |
+
# implt(crop)
|
31 |
+
boxes = words.detection(crop)
|
32 |
+
lines = words.sort_words(boxes)
|
33 |
+
|
34 |
+
|
35 |
+
def recognise(img):
|
36 |
+
"""Recognising words using CTC Model."""
|
37 |
+
img = word_normalization(
|
38 |
+
img,
|
39 |
+
64,
|
40 |
+
border=False,
|
41 |
+
tilt=False,
|
42 |
+
hyst_norm=False)
|
43 |
+
length = img.shape[1]
|
44 |
+
# Input has shape [batch_size, height, width, 1]
|
45 |
+
input_imgs = np.zeros(
|
46 |
+
(1, 64, length, 1), dtype=np.uint8)
|
47 |
+
input_imgs[0][:, :length, 0] = img
|
48 |
+
|
49 |
+
pred = CTC_MODEL.eval_feed({
|
50 |
+
'inputs:0': input_imgs,
|
51 |
+
'inputs_length:0': [length],
|
52 |
+
'keep_prob:0': 1})[0]
|
53 |
+
|
54 |
+
word = ''
|
55 |
+
for i in pred:
|
56 |
+
word += idx2char(i + 1)
|
57 |
+
return word
|
58 |
+
|
59 |
+
# implt(crop)
|
60 |
+
for line in lines:
|
61 |
+
print(" ".join([recognise(crop[y1:y2, x1:x2]) for (x1, y1, x2, y2) in line]))
|
test/openai_demo.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from openai import OpenAI
|
3 |
+
from llamaapi import LlamaAPI
|
4 |
+
|
5 |
+
# Initialize the llamaapi with your api_token
|
6 |
+
llama = LlamaAPI("LL-AirERHEk0jLIE1yEPvMXeobNfLsqLWJWcxLRS53obrZ3XyqMTfZc4EAuOs7r3wso")
|
7 |
+
|
8 |
+
api_key = "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"
|
9 |
+
os.environ["OPENAI_API_KEY"] = api_key
|
10 |
+
client = OpenAI()
|
11 |
+
|
12 |
+
prompt = 'hello, who are you ?'
|
13 |
+
chat_completion = client.chat.completions.create(
|
14 |
+
model="gpt-4",
|
15 |
+
messages=[
|
16 |
+
{"role": "system", "content": "Provide feedback on the inputted writing sample from an ESL learner. "
|
17 |
+
"Focus on areas such as grammar, vocabulary usage, and overall coherence and organization of the essay. "
|
18 |
+
"Offer corrective feedback on errors, suggest improvements, and highlight positive aspects to encourage "
|
19 |
+
"the learner. Please ensure the feedback is constructive, clear, and supportive to help the learner "
|
20 |
+
"understand and apply the suggestions. Always frame feedback in a positive, constructive manner. "
|
21 |
+
"Focus on how the student can improve rather than just highlighting mistakes. Provide clear examples "
|
22 |
+
"when pointing out errors or suggesting improvements. Prompt the learner to reflect on specific parts of "
|
23 |
+
"their writing"},
|
24 |
+
{"role": "user", "content": prompt},
|
25 |
+
]
|
26 |
+
)
|
27 |
+
print(chat_completion.choices[0].message.content.strip())
|
test/streamlit_demo.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from faker import Faker
|
3 |
+
|
4 |
+
st.title('Smart Robot')
|
5 |
+
|
6 |
+
user_input = st.chat_input('你想说什么')
|
7 |
+
fake = Faker()
|
8 |
+
|
9 |
+
|
10 |
+
def generate_response():
|
11 |
+
output = fake.text()
|
12 |
+
return output
|
13 |
+
|
14 |
+
|
15 |
+
if user_input:
|
16 |
+
container = st.container(border=True)
|
17 |
+
bot_response = generate_response()
|
18 |
+
container.write("机器人:" + bot_response)
|
19 |
+
container.write("asdjkl")
|
test/test.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import webbrowser
|
3 |
+
|
4 |
+
# 定义跳转到 Google 的函数
|
5 |
+
def redirect_to_google():
|
6 |
+
url = 'https://www.google.com'
|
7 |
+
webbrowser.open_new_tab(url)
|
8 |
+
|
9 |
+
|
10 |
+
# # 在应用程序中调用跳转函数
|
11 |
+
# redirect_to_google()
|
12 |
+
if st.button('go'):
|
13 |
+
print('go')
|
14 |
+
redirect_to_google()
|