|
|
|
import os |
|
import numpy as np |
|
|
|
import tensorflow.compat.v1 as tf |
|
tf.disable_v2_behavior() |
|
import cv2 |
|
import math |
|
|
|
from .helpers import * |
|
from .tfhelpers import Model |
|
|
|
|
|
|
|
print("Loading segmentation models...") |
|
location = os.path.dirname(os.path.abspath(__file__)) |
|
CNN_model = Model( |
|
os.path.join(location, '../../models/gap-clas/CNN-CG')) |
|
CNN_slider = (60, 30) |
|
RNN_model = Model( |
|
os.path.join(location, '../../models/gap-clas/RNN/Bi-RNN-new'), |
|
'prediction') |
|
RNN_slider = (60, 60) |
|
|
|
|
|
def _classify(img, step=2, RNN=False, slider=(60, 60)): |
|
"""Slice the image and return raw output of classifier.""" |
|
length = (img.shape[1] - slider[1]) // 2 + 1 |
|
if RNN: |
|
input_seq = np.zeros((1, length, slider[0]*slider[1]), dtype=np.float32) |
|
input_seq[0][:] = [img[:, loc * step: loc * step + slider[1]].flatten() |
|
for loc in range(length)] |
|
pred = RNN_model.eval_feed({'inputs:0': input_seq, |
|
'length:0': [length], |
|
'keep_prob:0': 1})[0] |
|
else: |
|
input_seq = np.zeros((length, slider[0]*slider[1]), dtype=np.float32) |
|
input_seq[:] = [img[:, loc * step: loc * step + slider[1]].flatten() |
|
for loc in range(length)] |
|
pred = CNN_model.run(input_seq) |
|
|
|
return pred |
|
|
|
|
|
def segment(img, step=2, RNN=False, debug=False): |
|
"""Take preprocessed image of word and |
|
returns array of positions separating characters. |
|
""" |
|
slider = CNN_slider |
|
if RNN: |
|
slider = RNN_slider |
|
|
|
|
|
pred = _classify(img, step=step, RNN=RNN, slider=slider) |
|
|
|
|
|
gaps = [] |
|
last_gap = 0 |
|
gap_count = 1 |
|
gap_position_sum = slider[1] / 2 |
|
first_gap = True |
|
gap_block_first = 0 |
|
gap_block_last = slider[1] / 2 |
|
|
|
for i, p in enumerate(pred): |
|
if p == 1: |
|
gap_position_sum += i * step + slider[1] / 2 |
|
gap_block_last = i * step + slider[1] / 2 |
|
gap_count += 1 |
|
last_gap = 0 |
|
if gap_block_first == 0: |
|
gap_block_first = i * step + slider[1] / 2 |
|
else: |
|
if gap_count != 0 and last_gap >= 1: |
|
if first_gap: |
|
gaps.append(int(gap_block_last)) |
|
first_gap = False |
|
else: |
|
gaps.append(int(gap_position_sum // gap_count)) |
|
gap_position_sum = 0 |
|
gap_count = 0 |
|
gap_block_first = 0 |
|
last_gap += 1 |
|
|
|
|
|
if gap_block_first != 0: |
|
gaps.append(int(gap_block_first)) |
|
else: |
|
gap_position_sum += (len(pred) - 1) * 2 + slider[1]/2 |
|
gaps.append(int(gap_position_sum / (gap_count + 1))) |
|
|
|
if debug: |
|
|
|
img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) |
|
for gap in gaps: |
|
cv2.line(img, |
|
((int)(gap), 0), |
|
((int)(gap), slider[0]), |
|
(0, 255, 0), 1) |
|
implt(img, t="Separated characters") |
|
|
|
return gaps |