Parechan commited on
Commit
0c84ee8
·
verified ·
1 Parent(s): 1b2eabd

Upload 35 files

Browse files
test/bot_test.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from bots import classify, feedback, vocab, assessment, speaking
2
+
3
+ # print(feedback.transcribe_handwriting('https://media.cnn.com/api/v1/images/stellar/prod/160122124623-01-national-handwriting-day.jpg?q=w_3264,h_1836,x_0,y_0,c_fill'))
4
+
5
+ # print(vocab.vocab_chat_with_model('hello', 'gpt-4'))
6
+
7
+ # print(assessment.chat_assessment_with_model('hello', 'gpt-4'))
8
+
9
+ # speaking.convert_to_mp3('sample.mp3', 'output.mp3')
10
+ # print(speaking.transcribe_audio('output.mp3'))
11
+ # speaking.text_to_speech('hello, world!', 'text2speech.mp3')
test/ocr/__init__.py ADDED
File without changes
test/ocr/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (157 Bytes). View file
 
test/ocr/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (162 Bytes). View file
 
test/ocr/__pycache__/characters.cpython-310.pyc ADDED
Binary file (2.57 kB). View file
 
test/ocr/__pycache__/characters.cpython-312.pyc ADDED
Binary file (4.25 kB). View file
 
test/ocr/__pycache__/datahelpers.cpython-310.pyc ADDED
Binary file (9.15 kB). View file
 
test/ocr/__pycache__/datahelpers.cpython-312.pyc ADDED
Binary file (15 kB). View file
 
test/ocr/__pycache__/helpers.cpython-310.pyc ADDED
Binary file (1.37 kB). View file
 
test/ocr/__pycache__/helpers.cpython-312.pyc ADDED
Binary file (1.92 kB). View file
 
test/ocr/__pycache__/normalization.cpython-310.pyc ADDED
Binary file (6.33 kB). View file
 
test/ocr/__pycache__/normalization.cpython-312.pyc ADDED
Binary file (11.6 kB). View file
 
test/ocr/__pycache__/page.cpython-310.pyc ADDED
Binary file (3.11 kB). View file
 
test/ocr/__pycache__/page.cpython-312.pyc ADDED
Binary file (5.69 kB). View file
 
test/ocr/__pycache__/tfhelpers.cpython-310.pyc ADDED
Binary file (2.77 kB). View file
 
test/ocr/__pycache__/tfhelpers.cpython-312.pyc ADDED
Binary file (4.03 kB). View file
 
test/ocr/__pycache__/viz.cpython-310.pyc ADDED
Binary file (800 Bytes). View file
 
test/ocr/__pycache__/viz.cpython-312.pyc ADDED
Binary file (1.02 kB). View file
 
test/ocr/__pycache__/words.cpython-310.pyc ADDED
Binary file (6.05 kB). View file
 
test/ocr/__pycache__/words.cpython-312.pyc ADDED
Binary file (10.8 kB). View file
 
test/ocr/characters.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ import os
3
+ import numpy as np
4
+ #import tensorflow as tf
5
+ import tensorflow.compat.v1 as tf
6
+ tf.disable_v2_behavior()
7
+ import cv2
8
+ import math
9
+
10
+ from .helpers import *
11
+ from .tfhelpers import Model
12
+
13
+ # Preloading trained model with activation function
14
+ # Loading is slow -> prevent multiple loads
15
+ print("Loading segmentation models...")
16
+ location = os.path.dirname(os.path.abspath(__file__))
17
+ CNN_model = Model(
18
+ os.path.join(location, '../../models/gap-clas/CNN-CG'))
19
+ CNN_slider = (60, 30)
20
+ RNN_model = Model(
21
+ os.path.join(location, '../../models/gap-clas/RNN/Bi-RNN-new'),
22
+ 'prediction')
23
+ RNN_slider = (60, 60)
24
+
25
+
26
+ def _classify(img, step=2, RNN=False, slider=(60, 60)):
27
+ """Slice the image and return raw output of classifier."""
28
+ length = (img.shape[1] - slider[1]) // 2 + 1
29
+ if RNN:
30
+ input_seq = np.zeros((1, length, slider[0]*slider[1]), dtype=np.float32)
31
+ input_seq[0][:] = [img[:, loc * step: loc * step + slider[1]].flatten()
32
+ for loc in range(length)]
33
+ pred = RNN_model.eval_feed({'inputs:0': input_seq,
34
+ 'length:0': [length],
35
+ 'keep_prob:0': 1})[0]
36
+ else:
37
+ input_seq = np.zeros((length, slider[0]*slider[1]), dtype=np.float32)
38
+ input_seq[:] = [img[:, loc * step: loc * step + slider[1]].flatten()
39
+ for loc in range(length)]
40
+ pred = CNN_model.run(input_seq)
41
+
42
+ return pred
43
+
44
+
45
+ def segment(img, step=2, RNN=False, debug=False):
46
+ """Take preprocessed image of word and
47
+ returns array of positions separating characters.
48
+ """
49
+ slider = CNN_slider
50
+ if RNN:
51
+ slider = RNN_slider
52
+
53
+ # Run the classifier
54
+ pred = _classify(img, step=step, RNN=RNN, slider=slider)
55
+
56
+ # Finalize the gap positions from raw prediction
57
+ gaps = []
58
+ last_gap = 0
59
+ gap_count = 1
60
+ gap_position_sum = slider[1] / 2
61
+ first_gap = True
62
+ gap_block_first = 0
63
+ gap_block_last = slider[1] / 2
64
+
65
+ for i, p in enumerate(pred):
66
+ if p == 1:
67
+ gap_position_sum += i * step + slider[1] / 2
68
+ gap_block_last = i * step + slider[1] / 2
69
+ gap_count += 1
70
+ last_gap = 0
71
+ if gap_block_first == 0:
72
+ gap_block_first = i * step + slider[1] / 2
73
+ else:
74
+ if gap_count != 0 and last_gap >= 1:
75
+ if first_gap:
76
+ gaps.append(int(gap_block_last))
77
+ first_gap = False
78
+ else:
79
+ gaps.append(int(gap_position_sum // gap_count))
80
+ gap_position_sum = 0
81
+ gap_count = 0
82
+ gap_block_first = 0
83
+ last_gap += 1
84
+
85
+ # Adding final gap position
86
+ if gap_block_first != 0:
87
+ gaps.append(int(gap_block_first))
88
+ else:
89
+ gap_position_sum += (len(pred) - 1) * 2 + slider[1]/2
90
+ gaps.append(int(gap_position_sum / (gap_count + 1)))
91
+
92
+ if debug:
93
+ # Drawing lines
94
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
95
+ for gap in gaps:
96
+ cv2.line(img,
97
+ ((int)(gap), 0),
98
+ ((int)(gap), slider[0]),
99
+ (0, 255, 0), 1)
100
+ implt(img, t="Separated characters")
101
+
102
+ return gaps
test/ocr/datahelpers.py ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Helper functions for loading and creating datasets
4
+ """
5
+ import numpy as np
6
+ import glob
7
+ import simplejson
8
+ import os
9
+ import cv2
10
+ import csv
11
+ import sys
12
+ import unidecode
13
+
14
+ from .helpers import implt
15
+ from .normalization import letter_normalization
16
+ from .viz import print_progress_bar
17
+
18
+
19
+ CHARS = ['', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
20
+ 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S',
21
+ 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c',
22
+ 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
23
+ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
24
+ 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6',
25
+ '7', '8', '9', '.', '-', '+', "'"]
26
+ CHAR_SIZE = len(CHARS)
27
+ idxs = [i for i in range(len(CHARS))]
28
+ idx_2_chars = dict(zip(idxs, CHARS))
29
+ chars_2_idx = dict(zip(CHARS, idxs))
30
+
31
+ def char2idx(c, sequence=False):
32
+ if sequence:
33
+ return chars_2_idx[c] + 1
34
+ return chars_2_idx[c]
35
+
36
+ def idx2char(idx, sequence=False):
37
+ if sequence:
38
+ return idx_2_chars[idx-1]
39
+ return idx_2_chars[idx]
40
+
41
+
42
+ def load_words_data(dataloc='data/words/', is_csv=False, load_gaplines=False):
43
+ """
44
+ Load word images with corresponding labels and gaplines (if load_gaplines == True).
45
+ Args:
46
+ dataloc: image folder location/CSV file - can be list of multiple locations
47
+ is_csv: using CSV files
48
+ load_gaplines: wheter or not load gaplines positions files
49
+ Returns:
50
+ (images, labels (, gaplines))
51
+ """
52
+ print("Loading words...")
53
+ if type(dataloc) is not list:
54
+ dataloc = [dataloc]
55
+
56
+ if is_csv:
57
+ csv.field_size_limit(sys.maxsize)
58
+ length = 0
59
+ for loc in dataloc:
60
+ with open(loc) as csvfile:
61
+ reader = csv.reader(csvfile)
62
+ length += max(sum(1 for row in csvfile)-1, 0)
63
+
64
+ labels = np.empty(length, dtype=object)
65
+ images = np.empty(length, dtype=object)
66
+ i = 0
67
+ for loc in dataloc:
68
+ print(loc)
69
+ with open(loc) as csvfile:
70
+ reader = csv.DictReader(csvfile)
71
+ for row in reader:
72
+ shape = np.fromstring(
73
+ row['shape'],
74
+ sep=',',
75
+ dtype=int)
76
+ img = np.fromstring(
77
+ row['image'],
78
+ sep=', ',
79
+ dtype=np.uint8).reshape(shape)
80
+ labels[i] = row['label']
81
+ images[i] = img
82
+
83
+ print_progress_bar(i, length)
84
+ i += 1
85
+ else:
86
+ img_list = []
87
+ tmp_labels = []
88
+ for loc in dataloc:
89
+ tmp_list = glob.glob(os.path.join(loc, '*.png'))
90
+ img_list += tmp_list
91
+ tmp_labels += [name[len(loc):].split("_")[0] for name in tmp_list]
92
+
93
+ labels = np.array(tmp_labels)
94
+ images = np.empty(len(img_list), dtype=object)
95
+
96
+ # Load grayscaled images
97
+ for i, img in enumerate(img_list):
98
+ images[i] = cv2.imread(img, 0)
99
+ print_progress_bar(i, len(img_list))
100
+
101
+ # Load gaplines (lines separating letters) from txt files
102
+ if load_gaplines:
103
+ gaplines = np.empty(len(img_list), dtype=object)
104
+ for i, name in enumerate(img_list):
105
+ with open(name[:-3] + 'txt', 'r') as fp:
106
+ gaplines[i] = np.array(simplejson.load(fp))
107
+
108
+ if load_gaplines:
109
+ assert len(labels) == len(images) == len(gaplines)
110
+ else:
111
+ assert len(labels) == len(images)
112
+ print("-> Number of words:", len(labels))
113
+
114
+ if load_gaplines:
115
+ return (images, labels, gaplines)
116
+ return (images, labels)
117
+
118
+
119
+ def _words2chars(images, labels, gaplines):
120
+ """Transform word images with gaplines into individual chars."""
121
+ # Total number of chars
122
+ length = sum([len(l) for l in labels])
123
+
124
+ imgs = np.empty(length, dtype=object)
125
+ new_labels = []
126
+
127
+ height = images[0].shape[0]
128
+
129
+ idx = 0;
130
+ for i, gaps in enumerate(gaplines):
131
+ for pos in range(len(gaps) - 1):
132
+ imgs[idx] = images[i][0:height, gaps[pos]:gaps[pos+1]]
133
+ new_labels.append(char2idx(labels[i][pos]))
134
+ idx += 1
135
+
136
+ print("Loaded chars from words:", length)
137
+ return imgs, new_labels
138
+
139
+
140
+ def load_chars_data(charloc='data/charclas/', wordloc='data/words/', lang='cz'):
141
+ """
142
+ Load chars images with corresponding labels.
143
+ Args:
144
+ charloc: char images FOLDER LOCATION
145
+ wordloc: word images with gaplines FOLDER LOCATION
146
+ Returns:
147
+ (images, labels)
148
+ """
149
+ print("Loading chars...")
150
+ images = np.zeros((1, 4096))
151
+ labels = []
152
+
153
+ if charloc != '':
154
+ # Get subfolders with chars
155
+ dir_list = glob.glob(os.path.join(charloc, lang, "*/"))
156
+ dir_list.sort()
157
+
158
+ # if lang == 'en':
159
+ chars = CHARS[:53]
160
+
161
+ assert [d[-2] if d[-2] != '0' else '' for d in dir_list] == chars
162
+
163
+ # For every label load images and create corresponding labels
164
+ # cv2.imread(img, 0) - for loading images in grayscale
165
+ # Images are scaled to 64x64 = 4096 px
166
+ for i in range(len(chars)):
167
+ img_list = glob.glob(os.path.join(dir_list[i], '*.jpg'))
168
+ imgs = np.array([letter_normalization(cv2.imread(img, 0)) for img in img_list])
169
+ images = np.concatenate([images, imgs.reshape(len(imgs), 4096)])
170
+ labels.extend([i] * len(imgs))
171
+
172
+ if wordloc != '':
173
+ imgs, words, gaplines = load_words_data(wordloc, load_gaplines=True)
174
+ if lang != 'cz':
175
+ words = np.array([unidecode.unidecode(w) for w in words])
176
+ imgs, chars = _words2chars(imgs, words, gaplines)
177
+
178
+ labels.extend(chars)
179
+ images2 = np.zeros((len(imgs), 4096))
180
+ for i in range(len(imgs)):
181
+ print_progress_bar(i, len(imgs))
182
+ images2[i] = letter_normalization(imgs[i]).reshape(1, 4096)
183
+
184
+ images = np.concatenate([images, images2])
185
+
186
+ images = images[1:]
187
+ labels = np.array(labels)
188
+
189
+ print("-> Number of chars:", len(labels))
190
+ return (images, labels)
191
+
192
+
193
+ def load_gap_data(loc='data/gapdet/large/', slider=(60, 120), seq=False, flatten=True):
194
+ """
195
+ Load gap data from location with corresponding labels.
196
+ Args:
197
+ loc: location of folder with words separated into gap data
198
+ images have to by named as label_timestamp.jpg, label is 0 or 1
199
+ slider: dimensions of of output images
200
+ seq: Store images from one word as a sequence
201
+ flatten: Flatten the output images
202
+ Returns:
203
+ (images, labels)
204
+ """
205
+ print('Loading gap data...')
206
+ dir_list = glob.glob(os.path.join(loc, "*/"))
207
+ dir_list.sort()
208
+
209
+ if slider[1] > 120:
210
+ # TODO Implement for higher dimmensions
211
+ slider[1] = 120
212
+
213
+ cut_s = None if (120 - slider[1]) // 2 <= 0 else (120 - slider[1]) // 2
214
+ cut_e = None if (120 - slider[1]) // 2 <= 0 else -(120 - slider[1]) // 2
215
+
216
+ if seq:
217
+ images = np.empty(len(dir_list), dtype=object)
218
+ labels = np.empty(len(dir_list), dtype=object)
219
+
220
+ for i, loc in enumerate(dir_list):
221
+ # TODO Check for empty directories
222
+ img_list = glob.glob(os.path.join(loc, '*.jpg'))
223
+ if (len(img_list) != 0):
224
+ img_list = sorted(imglist, key=lambda x: int(x[len(loc):].split("_")[1][:-4]))
225
+ images[i] = np.array([(cv2.imread(img, 0)[:, cut_s:cut_e].flatten() if flatten else
226
+ cv2.imread(img, 0)[:, cut_s:cut_e])
227
+ for img in img_list])
228
+ labels[i] = np.array([int(name[len(loc):].split("_")[0]) for name in img_list])
229
+
230
+ else:
231
+ images = np.zeros((1, slider[0]*slider[1]))
232
+ labels = []
233
+
234
+ for i in range(len(dir_list)):
235
+ img_list = glob.glob(os.path.join(dir_list[i], '*.jpg'))
236
+ if (len(img_list) != 0):
237
+ imgs = np.array([cv2.imread(img, 0)[:, cut_s:cut_e] for img in img_list])
238
+ images = np.concatenate([images, imgs.reshape(len(imgs), slider[0]*slider[1])])
239
+ labels.extend([int(img[len(dirlist[i])]) for img in img_list])
240
+
241
+ images = images[1:]
242
+ labels = np.array(labels)
243
+
244
+ if seq:
245
+ print("-> Number of words / gaps and letters:",
246
+ len(labels), '/', sum([len(l) for l in labels]))
247
+ else:
248
+ print("-> Number of gaps and letters:", len(labels))
249
+ return (images, labels)
250
+
251
+
252
+ def corresponding_shuffle(a):
253
+ """
254
+ Shuffle array of numpy arrays such that
255
+ each pair a[x][i] and a[y][i] remains the same.
256
+ Args:
257
+ a: array of same length numpy arrays
258
+ Returns:
259
+ Array a with shuffled numpy arrays
260
+ """
261
+ assert all([len(a[0]) == len(a[i]) for i in range(len(a))])
262
+ p = np.random.permutation(len(a[0]))
263
+ for i in range(len(a)):
264
+ a[i] = a[i][p]
265
+ return a
266
+
267
+
268
+ def sequences_to_sparse(sequences):
269
+ """
270
+ Create a sparse representention of sequences.
271
+ Args:
272
+ sequences: a list of lists of type dtype where each element is a sequence
273
+ Returns:
274
+ A tuple with (indices, values, shape)
275
+ """
276
+ indices = []
277
+ values = []
278
+
279
+ for n, seq in enumerate(sequences):
280
+ indices.extend(zip([n]*len(seq), range(len(seq))))
281
+ values.extend(seq)
282
+
283
+ indices = np.asarray(indices, dtype=np.int64)
284
+ values = np.asarray(values, dtype=np.int32)
285
+ shape = np.asarray([len(sequences), np.asarray(indices).max(0)[1]+1], dtype=np.int64)
286
+
287
+ return indices, values, shape
test/ocr/dataiterator.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Classes for feeding data during training."""
3
+ import numpy as np
4
+ import pandas as pd
5
+ from .helpers import img_extend
6
+ from .datahelpers import sequences_to_sparse
7
+
8
+
9
+ class BucketDataIterator():
10
+ """Iterator for feeding CTC model during training."""
11
+ def __init__(self,
12
+ images,
13
+ targets,
14
+ num_buckets=5,
15
+ slider=(60, 30),
16
+ augmentation=None,
17
+ dropout=0.0,
18
+ train=True):
19
+
20
+ self.train = train
21
+ self.slider = slider
22
+ self.augmentation = augmentation
23
+ self.dropout = dropout
24
+ for i in range(len(images)):
25
+ images[i] = img_extend(
26
+ images[i],
27
+ (self.slider[0],
28
+ max(images[i].shape[1], self.slider[1])))
29
+ in_length = [image.shape[1] for image in images]
30
+
31
+ # Create pandas dataFrame and sort it by images width (length)
32
+ self.dataFrame = pd.DataFrame({
33
+ 'in_length': in_length,
34
+ 'images': images,
35
+ 'targets': targets}).sort_values('in_length').reset_index(drop=True)
36
+
37
+ bsize = int(len(images) / num_buckets)
38
+ self.num_buckets = num_buckets
39
+ self.buckets = []
40
+ for bucket in range(num_buckets-1):
41
+ self.buckets.append(
42
+ self.dataFrame.iloc[bucket * bsize: (bucket+1) * bsize])
43
+ self.buckets.append(self.dataFrame.iloc[(num_buckets-1) * bsize:])
44
+
45
+ self.buckets_size = [len(bucket) for bucket in self.buckets]
46
+ self.cursor = np.array([0] * num_buckets)
47
+ self.bucket_order = np.random.permutation(num_buckets)
48
+ self.bucket_cursor = 0
49
+ self.shuffle()
50
+ print("Iterator created.")
51
+
52
+
53
+ def shuffle(self, idx=None):
54
+ """Shuffle idx bucket or each bucket separately."""
55
+ for i in [idx] if idx is not None else range(self.num_buckets):
56
+ self.buckets[i] = self.buckets[i].sample(frac=1).reset_index(drop=True)
57
+ self.cursor[i] = 0
58
+
59
+
60
+ def next_batch(self, batch_size):
61
+ """Creates next training batch of size.
62
+ Args:
63
+ batch_size: size of next batch
64
+ Retruns:
65
+ (images, labels, images lengths, labels lengths)
66
+ """
67
+ i_bucket = self.bucket_order[self.bucket_cursor]
68
+ # Increment cursor and shuffle in case of new round
69
+ self.bucket_cursor = (self.bucket_cursor + 1) % self.num_buckets
70
+ if self.bucket_cursor == 0:
71
+ self.bucket_order = np.random.permutation(self.num_buckets)
72
+
73
+ if self.cursor[i_bucket] + batch_size > self.buckets_size[i_bucket]:
74
+ self.shuffle(i_bucket)
75
+
76
+ # Handle too big batch sizes
77
+ if (batch_size > self.buckets_size[i_bucket]):
78
+ batch_size = self.buckets_size[i_bucket]
79
+
80
+ res = self.buckets[i_bucket].iloc[self.cursor[i_bucket]:
81
+ self.cursor[i_bucket]+batch_size]
82
+ self.cursor[i_bucket] += batch_size
83
+
84
+ # PAD input sequence and output
85
+ input_max = max(res['in_length'])
86
+
87
+ input_imgs = np.zeros(
88
+ (batch_size, self.slider[0], input_max, 1), dtype=np.uint8)
89
+ for i, img in enumerate(res['images']):
90
+ input_imgs[i][:, :res['in_length'].values[i], 0] = img
91
+
92
+ if self.train:
93
+ input_imgs = self.augmentation.augment_images(input_imgs)
94
+ input_imgs = input_imgs.astype(np.float32)
95
+
96
+ targets = sequences_to_sparse(res['targets'].values)
97
+ return input_imgs, targets, res['in_length'].values
98
+
test/ocr/helpers.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Helper functions for ocr project
4
+ """
5
+ import matplotlib.pyplot as plt
6
+ import numpy as np
7
+ import cv2
8
+
9
+
10
+ SMALL_HEIGHT = 800
11
+
12
+
13
+ def implt(img, cmp=None, t=''):
14
+ """Show image using plt."""
15
+ plt.imshow(img, cmap=cmp)
16
+ plt.title(t)
17
+ plt.show()
18
+
19
+
20
+ def resize(img, height=SMALL_HEIGHT, always=False):
21
+ """Resize image to given height."""
22
+ if (img.shape[0] > height or always):
23
+ rat = height / img.shape[0]
24
+ return cv2.resize(img, (int(rat * img.shape[1]), height))
25
+
26
+ return img
27
+
28
+
29
+ def ratio(img, height=SMALL_HEIGHT):
30
+ """Getting scale ratio."""
31
+ return img.shape[0] / height
32
+
33
+
34
+ def img_extend(img, shape):
35
+ """Extend 2D image (numpy array) in vertical and horizontal direction.
36
+ Shape of result image will match 'shape'
37
+ Args:
38
+ img: image to be extended
39
+ shape: shape (touple) of result image
40
+ Returns:
41
+ Extended image
42
+ """
43
+ x = np.zeros(shape, np.uint8)
44
+ x[:img.shape[0], :img.shape[1]] = img
45
+ return x
test/ocr/imgtransform.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Functions for transforming and preprocessing images for training
4
+ """
5
+ import numpy as np
6
+ import pandas as pd
7
+ import cv2
8
+ from scipy.ndimage.interpolation import map_coordinates
9
+
10
+
11
+ def coordinates_remap(image, factor_alpha, factor_sigma):
12
+ """Transforming image using remaping coordinates."""
13
+ alpha = image.shape[1] * factor_alpha
14
+ sigma = image.shape[1] * factor_sigma
15
+ shape = image.shape
16
+
17
+ blur_size = int(4*sigma) | 1
18
+ dx = alpha * cv2.GaussianBlur((np.random.rand(*shape) * 2 - 1),
19
+ ksize=(blur_size, blur_size),
20
+ sigmaX=sigma)
21
+ dy = alpha * cv2.GaussianBlur((np.random.rand(*shape) * 2 - 1),
22
+ ksize=(blur_size, blur_size),
23
+ sigmaX=sigma)
24
+
25
+ x, y = np.meshgrid(np.arange(shape[1]), np.arange(shape[0]))
26
+ indices = np.reshape(y+dy, (-1, 1)), np.reshape(x+dx, (-1, 1))
27
+
28
+ # TODO use cv2.remap(image, dx, dy, interpolation=cv2.INTER_LINEAR)
29
+ return np.array(map_coordinates(image, indices, order=1, mode='constant').reshape(shape))
test/ocr/mlhelpers.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Classes for controling machine learning processes
4
+ """
5
+ import numpy as np
6
+ import math
7
+ import matplotlib.pyplot as plt
8
+ import csv
9
+
10
+
11
+ class TrainingPlot:
12
+ """
13
+ Creating live plot during training
14
+ REUIRES notebook backend: %matplotlib notebook
15
+ @TODO Migrate to Tensorboard
16
+ """
17
+ train_loss = []
18
+ train_acc = []
19
+ valid_acc = []
20
+ test_iter = 0
21
+ loss_iter = 0
22
+ interval = 0
23
+ ax1 = None
24
+ ax2 = None
25
+ fig = None
26
+
27
+ def __init__(self, steps, test_itr, loss_itr):
28
+ self.test_iter = test_itr
29
+ self.loss_iter = loss_itr
30
+ self.interval = steps
31
+
32
+ self.fig, self.ax1 = plt.subplots()
33
+ self.ax2 = self.ax1.twinx()
34
+ self.ax1.set_autoscaley_on(True)
35
+ plt.ion()
36
+
37
+ self._update_plot()
38
+
39
+ # Description
40
+ self.ax1.set_xlabel('Iteration')
41
+ self.ax1.set_ylabel('Train Loss')
42
+ self.ax2.set_ylabel('Valid. Accuracy')
43
+
44
+ # Axes limits
45
+ self.ax1.set_ylim([0,10])
46
+
47
+ def _update_plot(self):
48
+ self.fig.canvas.draw()
49
+
50
+ def update_loss(self, loss_train, index):
51
+ self.trainLoss.append(loss_train)
52
+ if len(self.train_loss) == 1:
53
+ self.ax1.set_ylim([0, min(10, math.ceil(loss_train))])
54
+ self.ax1.plot(self.lossInterval * np.arange(len(self.train_loss)),
55
+ self.train_loss, 'b', linewidth=1.0)
56
+
57
+ self.updatePlot()
58
+
59
+ def update_acc(self, acc_val, acc_train, index):
60
+ self.validAcc.append(acc_val)
61
+ self.trainAcc.append(acc_train)
62
+
63
+ self.ax2.plot(self.test_iter * np.arange(len(self.valid_acc)),
64
+ self.valid_acc, 'r', linewidth=1.0)
65
+ self.ax2.plot(self.test_iter * np.arange(len(self.train_acc)),
66
+ self.train_acc, 'g',linewidth=1.0)
67
+
68
+ self.ax2.set_title('Valid. Accuracy: {:.4f}'.format(self.valid_acc[-1]))
69
+
70
+ self.updatePlot()
71
+
72
+
73
+ class DataSet:
74
+ """Class for training data and feeding train function."""
75
+ images = None
76
+ labels = None
77
+ length = 0
78
+ index = 0
79
+
80
+ def __init__(self, img, lbl):
81
+ self.images = img
82
+ self.labels = lbl
83
+ self.length = len(img)
84
+ self.index = 0
85
+
86
+ def next_batch(self, batch_size):
87
+ """Return the next batch from the data set."""
88
+ start = self.index
89
+ self.index += batch_size
90
+
91
+ if self.index > self.length:
92
+ # Shuffle the data
93
+ perm = np.arange(self.length)
94
+ np.random.shuffle(perm)
95
+ self.images = self.images[perm]
96
+ self.labels = self.labels[perm]
97
+ # Start next epoch
98
+ start = 0
99
+ self.index = batch_size
100
+
101
+ end = self.index
102
+ return self.images[start:end], self.labels[start:end]
test/ocr/normalization.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Include functions for normalizing images of words and letters
4
+ Main functions: word_normalization, letter_normalization, image_standardization
5
+ """
6
+ import numpy as np
7
+ import cv2
8
+ import math
9
+
10
+ from .helpers import *
11
+
12
+
13
+ def image_standardization(image):
14
+ """Image standardization should result in same output
15
+ as tf.image.per_image_standardization.
16
+ """
17
+ return (image - np.mean(image)) / max(np.std(image), 1.0/math.sqrt(image.size))
18
+
19
+
20
+ def _crop_add_border(img, height, threshold=50, border=True, border_size=15):
21
+ """Crop and add border to word image of letter segmentation."""
22
+ # Clear small values
23
+
24
+ ret, img = cv2.threshold(img, threshold, 255, cv2.THRESH_TOZERO)
25
+
26
+ x0 = 0
27
+ y0 = 0
28
+ x1 = img.shape[1]
29
+ y1 = img.shape[0]
30
+
31
+ for i in range(img.shape[0]):
32
+ if np.count_nonzero(img[i, :]) > 1:
33
+ y0 = i
34
+ break
35
+ for i in reversed(range(img.shape[0])):
36
+ if np.count_nonzero(img[i, :]) > 1:
37
+ y1 = i+1
38
+ break
39
+ for i in range(img.shape[1]):
40
+ if np.count_nonzero(img[:, i]) > 1:
41
+ x0 = i
42
+ break
43
+ for i in reversed(range(img.shape[1])):
44
+ if np.count_nonzero(img[:, i]) > 1:
45
+ x1 = i+1
46
+ break
47
+
48
+ if height != 0:
49
+ img = resize(img[y0:y1, x0:x1], height, True)
50
+ else:
51
+ img = img[y0:y1, x0:x1]
52
+
53
+ if border:
54
+ return cv2.copyMakeBorder(img, 0, 0, border_size, border_size,
55
+ cv2.BORDER_CONSTANT,
56
+ value=[0, 0, 0])
57
+ return img
58
+
59
+
60
+ def _word_tilt(img, height, border=True, border_size=15):
61
+ """Detect the angle and tilt the image."""
62
+ edges = cv2.Canny(img, 50, 150, apertureSize = 3)
63
+ lines = cv2.HoughLines(edges, 1, np.pi/180, 30)
64
+
65
+ if lines is not None:
66
+ meanAngle = 0
67
+ # Set min number of valid lines (try higher)
68
+ numLines = np.sum(1 for l in lines if l[0][1] < 0.7 or l[0][1] > 2.6)
69
+ if numLines > 1:
70
+ meanAngle = np.mean([l[0][1] for l in lines if l[0][1] < 0.7 or l[0][1] > 2.6])
71
+
72
+ # Look for angle with correct value
73
+ if meanAngle != 0 and (meanAngle < 0.7 or meanAngle > 2.6):
74
+ img = _tilt_by_angle(img, meanAngle, height)
75
+ return _crop_add_border(img, height, 50, border, border_size)
76
+
77
+
78
+ def _tilt_by_angle(img, angle, height):
79
+ """Tilt the image by given angle."""
80
+ dist = np.tan(angle) * height
81
+ width = len(img[0])
82
+ sPoints = np.float32([[0,0], [0,height], [width,height], [width,0]])
83
+
84
+ # Dist is positive for angle < 0.7; negative for angle > 2.6
85
+ # Image must be shifed to right
86
+ if dist > 0:
87
+ tPoints = np.float32([[0,0],
88
+ [dist,height],
89
+ [width+dist,height],
90
+ [width,0]])
91
+ else:
92
+ tPoints = np.float32([[-dist,0],
93
+ [0,height],
94
+ [width,height],
95
+ [width-dist,0]])
96
+
97
+ M = cv2.getPerspectiveTransform(sPoints, tPoints)
98
+ return cv2.warpPerspective(img, M, (int(width+abs(dist)), height))
99
+
100
+
101
+ def _sobel_detect(channel):
102
+ """The Sobel Operator."""
103
+ sobelX = cv2.Sobel(channel, cv2.CV_16S, 1, 0)
104
+ sobelY = cv2.Sobel(channel, cv2.CV_16S, 0, 1)
105
+ # Combine x, y gradient magnitudes sqrt(x^2 + y^2)
106
+ sobel = np.hypot(sobelX, sobelY)
107
+ sobel[sobel > 255] = 255
108
+ return np.uint8(sobel)
109
+
110
+
111
+ class HysterThresh:
112
+ def __init__(self, img):
113
+ img = 255 - img
114
+ img = (img - np.min(img)) / (np.max(img) - np.min(img)) * 255
115
+ hist, bins = np.histogram(img.ravel(), 256, [0,256])
116
+
117
+ self.high = np.argmax(hist) + 65
118
+ self.low = np.argmax(hist) + 45
119
+ self.diff = 255 - self.high
120
+
121
+ self.img = img
122
+ self.im = np.zeros(img.shape, dtype=img.dtype)
123
+
124
+ def get_image(self):
125
+ self._hyster()
126
+ return np.uint8(self.im)
127
+
128
+ def _hyster_rec(self, r, c):
129
+ h, w = self.img.shape
130
+ for ri in range(r-1, r+2):
131
+ for ci in range(c-1, c+2):
132
+ if (h > ri >= 0
133
+ and w > ci >= 0
134
+ and self.im[ri, ci] == 0
135
+ and self.high > self.img[ri, ci] >= self.low):
136
+ self.im[ri, ci] = self.img[ri, ci] + self.diff
137
+ self._hyster_rec(ri, ci)
138
+
139
+ def _hyster(self):
140
+ r, c = self.img.shape
141
+ for ri in range(r):
142
+ for ci in range(c):
143
+ if (self.img[ri, ci] >= self.high):
144
+ self.im[ri, ci] = 255
145
+ self.img[ri, ci] = 255
146
+ self._hyster_rec(ri, ci)
147
+
148
+
149
+ def _hyst_word_norm(image):
150
+ """Word normalization using hystheresis thresholding."""
151
+ gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
152
+ # img = cv2.bilateralFilter(gray, 0, 10, 30)
153
+ img = cv2.bilateralFilter(gray, 10, 10, 30)
154
+ return HysterThresh(img).get_image()
155
+
156
+
157
+ def word_normalization(image, height, border=True, tilt=True, border_size=15, hyst_norm=False):
158
+ """ Preprocess a word - resize, binarize, tilt world."""
159
+ image = resize(image, height, True)
160
+
161
+ if hyst_norm:
162
+ th = _hyst_word_norm(image)
163
+ else:
164
+ img = cv2.bilateralFilter(image, 10, 30, 30)
165
+ gray = 255 - cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
166
+ norm = cv2.normalize(gray, None, 0, 255, cv2.NORM_MINMAX)
167
+ ret,th = cv2.threshold(norm, 50, 255, cv2.THRESH_TOZERO)
168
+
169
+ if tilt:
170
+ return _word_tilt(th, height, border, border_size)
171
+ return _crop_add_border(th, height=height, border=border, border_size=border_size)
172
+
173
+
174
+ def _resize_letter(img, size = 56):
175
+ """Resize bigger side of the image to given size."""
176
+ if (img.shape[0] > img.shape[1]):
177
+ rat = size / img.shape[0]
178
+ return cv2.resize(img, (int(rat * img.shape[1]), size))
179
+ else:
180
+ rat = size / img.shape[1]
181
+ return cv2.resize(img, (size, int(rat * img.shape[0])))
182
+ return img
183
+
184
+
185
+ def letter_normalization(image, is_thresh=True, dim=False):
186
+ """Preprocess a letter - crop, resize"""
187
+ if is_thresh and image.shape[0] > 0 and image.shape[1] > 0:
188
+ image = _crop_add_border(image, height=0, threshold=80, border=False)
189
+
190
+ resized = image
191
+ if image.shape[0] > 1 and image.shape[1] > 1:
192
+ resized = _resize_letter(image)
193
+
194
+ result = np.zeros((64, 64), np.uint8)
195
+ offset = [0, 0]
196
+ # Calculate offset for smaller size
197
+ if image.shape[0] > image.shape[1]:
198
+ offset = [int((result.shape[1] - resized.shape[1])/2), 4]
199
+ else:
200
+ offset = [4, int((result.shape[0] - resized.shape[0])/2)]
201
+ # Replace zeros by image
202
+ result[offset[1]:offset[1] + resized.shape[0],
203
+ offset[0]:offset[0] + resized.shape[1]] = resized
204
+
205
+ if dim:
206
+ return result, image.shape
207
+ return result
test/ocr/page.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Crop background and transform perspective from the photo of page
4
+ """
5
+ import numpy as np
6
+ import cv2
7
+
8
+ from .helpers import *
9
+
10
+ def detection(image, area_thresh = 0.5):
11
+ """Finding Page."""
12
+ small = resize(image)
13
+ # Edge detection
14
+ image_edges = _edges_detection(small, 200, 250)
15
+
16
+ # Close gaps between edges (double page clouse => rectangle kernel)
17
+ closed_edges = cv2.morphologyEx(image_edges,
18
+ cv2.MORPH_CLOSE,
19
+ np.ones((5, 11)))
20
+ # Countours
21
+ page_contour = _find_page_contours(closed_edges, small, area_thresh)
22
+
23
+ # Recalculate to original scale
24
+ page_contour = page_contour.dot(ratio(image, small.shape[0]))
25
+ # Transform prespective
26
+ new_image = _persp_transform(image, page_contour)
27
+ return new_image
28
+
29
+
30
+ def _edges_detection(img, minVal, maxVal):
31
+ """Preprocessing (gray, thresh, filter, border) + Canny edge detection."""
32
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
33
+
34
+ img = cv2.bilateralFilter(img, 9, 75, 75)
35
+ img = cv2.adaptiveThreshold(img, 255,
36
+ cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
37
+ cv2.THRESH_BINARY, 115, 4)
38
+
39
+ # Median blur replace center pixel by median of pixels under kelner
40
+ # => removes thin details
41
+ img = cv2.medianBlur(img, 11)
42
+
43
+ # Add black border - detection of border touching pages
44
+ img = cv2.copyMakeBorder(img, 5, 5, 5, 5,
45
+ cv2.BORDER_CONSTANT,
46
+ value=[0, 0, 0])
47
+ return cv2.Canny(img, minVal, maxVal)
48
+
49
+
50
+ def _four_corners_sort(pts):
51
+ """Sort corners in order: top-left, bot-left, bot-right, top-right."""
52
+ diff = np.diff(pts, axis=1)
53
+ summ = pts.sum(axis=1)
54
+ return np.array([pts[np.argmin(summ)],
55
+ pts[np.argmax(diff)],
56
+ pts[np.argmax(summ)],
57
+ pts[np.argmin(diff)]])
58
+
59
+
60
+ def _contour_offset(cnt, offset):
61
+ """Offset contour because of 5px border."""
62
+ cnt += offset
63
+ cnt[cnt < 0] = 0
64
+ return cnt
65
+
66
+
67
+ def _find_page_contours(edges, img, area_thresh):
68
+ """Finding corner points of page contour."""
69
+ contours, hierarchy = cv2.findContours(edges,
70
+ cv2.RETR_TREE,
71
+ cv2.CHAIN_APPROX_SIMPLE)
72
+
73
+ # Finding biggest rectangle otherwise return original corners
74
+ height = edges.shape[0]
75
+ width = edges.shape[1]
76
+ MIN_COUNTOUR_AREA = height * width * area_thresh
77
+ MAX_COUNTOUR_AREA = (width - 10) * (height - 10)
78
+
79
+ max_area = MIN_COUNTOUR_AREA
80
+ page_contour = np.array([[0, 0],
81
+ [0, height-5],
82
+ [width-5, height-5],
83
+ [width-5, 0]])
84
+
85
+ for cnt in contours:
86
+ perimeter = cv2.arcLength(cnt, True)
87
+ approx = cv2.approxPolyDP(cnt, 0.03 * perimeter, True)
88
+
89
+ # Page has 4 corners and it is convex
90
+ if (len(approx) == 4 and
91
+ cv2.isContourConvex(approx) and
92
+ max_area < cv2.contourArea(approx) < MAX_COUNTOUR_AREA):
93
+
94
+ max_area = cv2.contourArea(approx)
95
+ page_contour = approx[:, 0]
96
+
97
+ # Sort corners and offset them
98
+ page_contour = _four_corners_sort(page_contour)
99
+ return _contour_offset(page_contour, (-5, -5))
100
+
101
+
102
+ def _persp_transform(img, s_points):
103
+ """Transform perspective from start points to target points."""
104
+ # Euclidean distance - calculate maximum height and width
105
+ height = max(np.linalg.norm(s_points[0] - s_points[1]),
106
+ np.linalg.norm(s_points[2] - s_points[3]))
107
+ width = max(np.linalg.norm(s_points[1] - s_points[2]),
108
+ np.linalg.norm(s_points[3] - s_points[0]))
109
+
110
+ # Create target points
111
+ t_points = np.array([[0, 0],
112
+ [0, height],
113
+ [width, height],
114
+ [width, 0]], np.float32)
115
+
116
+ # getPerspectiveTransform() needs float32
117
+ if s_points.dtype != np.float32:
118
+ s_points = s_points.astype(np.float32)
119
+
120
+ M = cv2.getPerspectiveTransform(s_points, t_points)
121
+ return cv2.warpPerspective(img, M, (int(width), int(height)))
test/ocr/tfhelpers.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Provide functions and classes:
4
+ Model = Class for loading and using trained models from tensorflow
5
+ create_cell = function for creatting RNN cells with wrappers
6
+ """
7
+ #import tensorflow as tf
8
+ import tensorflow.compat.v1 as tf
9
+ from tensorflow.python.ops.rnn_cell_impl import LSTMCell, ResidualWrapper, DropoutWrapper, MultiRNNCell
10
+
11
+ class Model():
12
+ """Loading and running isolated tf graph."""
13
+ def __init__(self, loc, operation='activation', input_name='x'):
14
+ """
15
+ loc: location of file containing saved model
16
+ operation: name of operation for running the model
17
+ input_name: name of input placeholder
18
+ """
19
+ self.input = input_name + ":0"
20
+ self.graph = tf.Graph()
21
+ self.sess = tf.Session(graph=self.graph)
22
+ with self.graph.as_default():
23
+ saver = tf.train.import_meta_graph(loc + '.meta', clear_devices=True)
24
+ saver.restore(self.sess, loc)
25
+ self.op = self.graph.get_operation_by_name(operation).outputs[0]
26
+
27
+ def run(self, data):
28
+ """Run the specified operation on given data."""
29
+ return self.sess.run(self.op, feed_dict={self.input: data})
30
+
31
+ def eval_feed(self, feed):
32
+ """Run the specified operation with given feed."""
33
+ return self.sess.run(self.op, feed_dict=feed)
34
+
35
+ def run_op(self, op, feed, output=True):
36
+ """Run given operation with the feed."""
37
+ if output:
38
+ return self.sess.run(
39
+ self.graph.get_operation_by_name(op).outputs[0],
40
+ feed_dict=feed)
41
+ else:
42
+ self.sess.run(
43
+ self.graph.get_operation_by_name(op),
44
+ feed_dict=feed)
45
+
46
+
47
+
48
+ def _create_single_cell(cell_fn, num_units, is_residual=False, is_dropout=False, keep_prob=None):
49
+ """Create single RNN cell based on cell_fn."""
50
+ cell = cell_fn(num_units)
51
+ if is_dropout:
52
+ cell = DropoutWrapper(cell, input_keep_prob=keep_prob)
53
+ if is_residual:
54
+ cell = ResidualWrapper(cell)
55
+ return cell
56
+
57
+
58
+ def create_cell(num_units, num_layers, num_residual_layers, is_dropout=False, keep_prob=None, cell_fn=LSTMCell):
59
+ """Create corresponding number of RNN cells with given wrappers."""
60
+ cell_list = []
61
+
62
+ for i in range(num_layers):
63
+ cell_list.append(_create_single_cell(
64
+ cell_fn=cell_fn,
65
+ num_units=num_units,
66
+ is_residual=(i >= num_layers - num_residual_layers),
67
+ is_dropout=is_dropout,
68
+ keep_prob=keep_prob
69
+ ))
70
+
71
+ if num_layers == 1:
72
+ return cell_list[0]
73
+ return MultiRNNCell(cell_list)
test/ocr/viz.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def print_progress_bar(iteration,
2
+ total,
3
+ prefix = '',
4
+ suffix = ''):
5
+ """Call in a loop to create terminal progress bar.
6
+ Args:
7
+ iteration: current iteration (Int)
8
+ total: total iterations (Int)
9
+ prefix: prefix string (Str)
10
+ suffix: suffix string (Str)
11
+ """
12
+ # Printing slowes down the loop
13
+ if iteration % (total // 100) == 0:
14
+ length = 40
15
+ iteration += 1
16
+ percent = (100 * iteration) // (total * 99/100)
17
+ filled_length = int(length * percent / 100)
18
+ bar = '█' * filled_length + '-' * (length - filled_length)
19
+ print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end = '\r')
20
+
21
+ if iteration >= total * 99/100:
22
+ print()
test/ocr/words.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Detect words on the page
4
+ return array of words' bounding boxes
5
+ """
6
+ import numpy as np
7
+ import matplotlib.pyplot as plt
8
+ import cv2
9
+
10
+ from .helpers import *
11
+
12
+
13
+ def detection(image, join=False):
14
+ """Detecting the words bounding boxes.
15
+ Return: numpy array of bounding boxes [x, y, x+w, y+h]
16
+ """
17
+ # Preprocess image for word detection
18
+ blurred = cv2.GaussianBlur(image, (5, 5), 18)
19
+ edge_img = _edge_detect(blurred)
20
+ ret, edge_img = cv2.threshold(edge_img, 50, 255, cv2.THRESH_BINARY)
21
+ bw_img = cv2.morphologyEx(edge_img, cv2.MORPH_CLOSE,
22
+ np.ones((15,15), np.uint8))
23
+
24
+ return _text_detect(bw_img, image, join)
25
+
26
+
27
+ def sort_words(boxes):
28
+ """Sort boxes - (x, y, x+w, y+h) from left to right, top to bottom."""
29
+ mean_height = sum([y2 - y1 for _, y1, _, y2 in boxes]) / len(boxes)
30
+
31
+ boxes.view('i8,i8,i8,i8').sort(order=['f1'], axis=0)
32
+ current_line = boxes[0][1]
33
+ lines = []
34
+ tmp_line = []
35
+ for box in boxes:
36
+ if box[1] > current_line + mean_height:
37
+ lines.append(tmp_line)
38
+ tmp_line = [box]
39
+ current_line = box[1]
40
+ continue
41
+ tmp_line.append(box)
42
+ lines.append(tmp_line)
43
+
44
+ for line in lines:
45
+ line.sort(key=lambda box: box[0])
46
+
47
+ return lines
48
+
49
+
50
+ def _edge_detect(im):
51
+ """
52
+ Edge detection using sobel operator on each layer individually.
53
+ Sobel operator is applied for each image layer (RGB)
54
+ """
55
+ return np.max(np.array([_sobel_detect(im[:,:, 0]),
56
+ _sobel_detect(im[:,:, 1]),
57
+ _sobel_detect(im[:,:, 2])]), axis=0)
58
+
59
+
60
+ def _sobel_detect(channel):
61
+ """Sobel operator."""
62
+ sobelX = cv2.Sobel(channel, cv2.CV_16S, 1, 0)
63
+ sobelY = cv2.Sobel(channel, cv2.CV_16S, 0, 1)
64
+ sobel = np.hypot(sobelX, sobelY)
65
+ sobel[sobel > 255] = 255
66
+ return np.uint8(sobel)
67
+
68
+
69
+ def union(a,b):
70
+ x = min(a[0], b[0])
71
+ y = min(a[1], b[1])
72
+ w = max(a[0]+a[2], b[0]+b[2]) - x
73
+ h = max(a[1]+a[3], b[1]+b[3]) - y
74
+ return [x, y, w, h]
75
+
76
+ def _intersect(a,b):
77
+ x = max(a[0], b[0])
78
+ y = max(a[1], b[1])
79
+ w = min(a[0]+a[2], b[0]+b[2]) - x
80
+ h = min(a[1]+a[3], b[1]+b[3]) - y
81
+ if w<0 or h<0:
82
+ return False
83
+ return True
84
+
85
+ def _group_rectangles(rec):
86
+ """
87
+ Uion intersecting rectangles.
88
+ Args:
89
+ rec - list of rectangles in form [x, y, w, h]
90
+ Return:
91
+ list of grouped ractangles
92
+ """
93
+ tested = [False for i in range(len(rec))]
94
+ final = []
95
+ i = 0
96
+ while i < len(rec):
97
+ if not tested[i]:
98
+ j = i+1
99
+ while j < len(rec):
100
+ if not tested[j] and _intersect(rec[i], rec[j]):
101
+ rec[i] = union(rec[i], rec[j])
102
+ tested[j] = True
103
+ j = i
104
+ j += 1
105
+ final += [rec[i]]
106
+ i += 1
107
+
108
+ return final
109
+
110
+
111
+ def _text_detect(img, image, join=False):
112
+ """Text detection using contours."""
113
+ small = resize(img, 2000)
114
+
115
+ # Finding contours
116
+ mask = np.zeros(small.shape, np.uint8)
117
+ cnt, hierarchy = cv2.findContours(np.copy(small),
118
+ cv2.RETR_CCOMP,
119
+ cv2.CHAIN_APPROX_SIMPLE)
120
+
121
+ index = 0
122
+ boxes = []
123
+ # Go through all contours in top level
124
+ while (index >= 0):
125
+ x,y,w,h = cv2.boundingRect(cnt[index])
126
+ cv2.drawContours(mask, cnt, index, (255, 255, 255), cv2.FILLED)
127
+ maskROI = mask[y:y+h, x:x+w]
128
+ # Ratio of white pixels to area of bounding rectangle
129
+ r = cv2.countNonZero(maskROI) / (w * h)
130
+
131
+ # Limits for text
132
+ if (r > 0.1
133
+ and 1600 > w > 10
134
+ and 1600 > h > 10
135
+ and h/w < 3
136
+ and w/h < 10
137
+ and (60 // h) * w < 1000):
138
+ boxes += [[x, y, w, h]]
139
+
140
+ index = hierarchy[0][index][0]
141
+
142
+ if join:
143
+ # Need more work
144
+ boxes = _group_rectangles(boxes)
145
+
146
+ # image for drawing bounding boxes
147
+ small = cv2.cvtColor(small, cv2.COLOR_GRAY2RGB)
148
+ bounding_boxes = np.array([0,0,0,0])
149
+ for (x, y, w, h) in boxes:
150
+ cv2.rectangle(small, (x, y),(x+w,y+h), (0, 255, 0), 2)
151
+ bounding_boxes = np.vstack((bounding_boxes,
152
+ np.array([x, y, x+w, y+h])))
153
+
154
+ implt(small, t='Bounding rectangles')
155
+
156
+ boxes = bounding_boxes.dot(ratio(image, small.shape[0])).astype(np.int64)
157
+ return boxes[1:]
158
+
159
+
160
+ def textDetectWatershed(thresh):
161
+ """NOT IN USE - Text detection using watershed algorithm.
162
+ Based on: http://docs.opencv.org/trunk/d3/db4/tutorial_py_watershed.html
163
+ """
164
+ img = cv2.cvtColor(cv2.imread("data/textdet/%s.jpg" % IMG),
165
+ cv2.COLOR_BGR2RGB)
166
+ img = resize(img, 3000)
167
+ thresh = resize(thresh, 3000)
168
+ # noise removal
169
+ kernel = np.ones((3,3),np.uint8)
170
+ opening = cv2.morphologyEx(thresh,cv2.MORPH_OPEN,kernel, iterations = 3)
171
+
172
+ # sure background area
173
+ sure_bg = cv2.dilate(opening,kernel,iterations=3)
174
+
175
+ # Finding sure foreground area
176
+ dist_transform = cv2.distanceTransform(opening,cv2.DIST_L2,5)
177
+ ret, sure_fg = cv2.threshold(dist_transform,
178
+ 0.01*dist_transform.max(), 255, 0)
179
+
180
+ # Finding unknown region
181
+ sure_fg = np.uint8(sure_fg)
182
+ unknown = cv2.subtract(sure_bg,sure_fg)
183
+
184
+ # Marker labelling
185
+ ret, markers = cv2.connectedComponents(sure_fg)
186
+
187
+ # Add one to all labels so that sure background is not 0, but 1
188
+ markers += 1
189
+
190
+ # Now, mark the region of unknown with zero
191
+ markers[unknown == 255] = 0
192
+
193
+ markers = cv2.watershed(img, markers)
194
+ implt(markers, t='Markers')
195
+ image = img.copy()
196
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
197
+
198
+ for mark in np.unique(markers):
199
+ # mark == 0 --> background
200
+ if mark == 0:
201
+ continue
202
+
203
+ # Draw it on mask and detect biggest contour
204
+ mask = np.zeros(gray.shape, dtype="uint8")
205
+ mask[markers == mark] = 255
206
+
207
+ cnts = cv2.findContours(mask.copy(),
208
+ cv2.RETR_EXTERNAL,
209
+ cv2.CHAIN_APPROX_SIMPLE)[-2]
210
+ c = max(cnts, key=cv2.contourArea)
211
+
212
+ # Draw a bounding rectangle if it contains text
213
+ x,y,w,h = cv2.boundingRect(c)
214
+ cv2.drawContours(mask, c, 0, (255, 255, 255), cv2.FILLED)
215
+ maskROI = mask[y:y+h, x:x+w]
216
+ # Ratio of white pixels to area of bounding rectangle
217
+ r = cv2.countNonZero(maskROI) / (w * h)
218
+
219
+ # Limits for text
220
+ if r > 0.2 and 2000 > w > 15 and 1500 > h > 15:
221
+ cv2.rectangle(image, (x, y),(x+w,y+h), (0, 255, 0), 2)
222
+
223
+ implt(image)
test/ocr_test.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import numpy as np
3
+ import pandas as pd
4
+ import matplotlib.pyplot as plt
5
+ import tensorflow as tf
6
+ import cv2
7
+
8
+ sys.path.append('../src')
9
+ from ocr.normalization import word_normalization, letter_normalization
10
+ from ocr import page, words, characters
11
+ from ocr.helpers import implt, resize
12
+ from ocr.tfhelpers import Model
13
+ from ocr.datahelpers import idx2char
14
+
15
+ IMG = '../data/test.jpg' # 1, 2, 3
16
+ LANG = 'en'
17
+ # You can use only one of these two
18
+ # You HABE TO train the CTC model by yourself using word_classifier_CTC.ipynb
19
+ MODEL_LOC_CHARS = f'../models/char-clas/{LANG}/CharClassifier'
20
+ MODEL_LOC_CTC = '../models/word-clas/CTC/Classifier1'
21
+
22
+ CHARACTER_MODEL = Model(MODEL_LOC_CHARS)
23
+ CTC_MODEL = Model(MODEL_LOC_CTC, 'word_prediction')
24
+
25
+ image = cv2.cvtColor(cv2.imread(IMG), cv2.COLOR_BGR2RGB)
26
+ # implt(image)
27
+
28
+ # Crop image and get bounding boxes
29
+ crop = page.detection(image)
30
+ # implt(crop)
31
+ boxes = words.detection(crop)
32
+ lines = words.sort_words(boxes)
33
+
34
+
35
+ def recognise(img):
36
+ """Recognising words using CTC Model."""
37
+ img = word_normalization(
38
+ img,
39
+ 64,
40
+ border=False,
41
+ tilt=False,
42
+ hyst_norm=False)
43
+ length = img.shape[1]
44
+ # Input has shape [batch_size, height, width, 1]
45
+ input_imgs = np.zeros(
46
+ (1, 64, length, 1), dtype=np.uint8)
47
+ input_imgs[0][:, :length, 0] = img
48
+
49
+ pred = CTC_MODEL.eval_feed({
50
+ 'inputs:0': input_imgs,
51
+ 'inputs_length:0': [length],
52
+ 'keep_prob:0': 1})[0]
53
+
54
+ word = ''
55
+ for i in pred:
56
+ word += idx2char(i + 1)
57
+ return word
58
+
59
+ # implt(crop)
60
+ for line in lines:
61
+ print(" ".join([recognise(crop[y1:y2, x1:x2]) for (x1, y1, x2, y2) in line]))
test/openai_demo.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from openai import OpenAI
3
+ from llamaapi import LlamaAPI
4
+
5
+ # Initialize the llamaapi with your api_token
6
+ llama = LlamaAPI("LL-AirERHEk0jLIE1yEPvMXeobNfLsqLWJWcxLRS53obrZ3XyqMTfZc4EAuOs7r3wso")
7
+
8
+ api_key = "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"
9
+ os.environ["OPENAI_API_KEY"] = api_key
10
+ client = OpenAI()
11
+
12
+ prompt = 'hello, who are you ?'
13
+ chat_completion = client.chat.completions.create(
14
+ model="gpt-4",
15
+ messages=[
16
+ {"role": "system", "content": "Provide feedback on the inputted writing sample from an ESL learner. "
17
+ "Focus on areas such as grammar, vocabulary usage, and overall coherence and organization of the essay. "
18
+ "Offer corrective feedback on errors, suggest improvements, and highlight positive aspects to encourage "
19
+ "the learner. Please ensure the feedback is constructive, clear, and supportive to help the learner "
20
+ "understand and apply the suggestions. Always frame feedback in a positive, constructive manner. "
21
+ "Focus on how the student can improve rather than just highlighting mistakes. Provide clear examples "
22
+ "when pointing out errors or suggesting improvements. Prompt the learner to reflect on specific parts of "
23
+ "their writing"},
24
+ {"role": "user", "content": prompt},
25
+ ]
26
+ )
27
+ print(chat_completion.choices[0].message.content.strip())
test/streamlit_demo.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from faker import Faker
3
+
4
+ st.title('Smart Robot')
5
+
6
+ user_input = st.chat_input('你想说什么')
7
+ fake = Faker()
8
+
9
+
10
+ def generate_response():
11
+ output = fake.text()
12
+ return output
13
+
14
+
15
+ if user_input:
16
+ container = st.container(border=True)
17
+ bot_response = generate_response()
18
+ container.write("机器人:" + bot_response)
19
+ container.write("asdjkl")
test/test.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import webbrowser
3
+
4
+ # 定义跳转到 Google 的函数
5
+ def redirect_to_google():
6
+ url = 'https://www.google.com'
7
+ webbrowser.open_new_tab(url)
8
+
9
+
10
+ # # 在应用程序中调用跳转函数
11
+ # redirect_to_google()
12
+ if st.button('go'):
13
+ print('go')
14
+ redirect_to_google()