Spaces:

Parechan
/

lanpip

Runtime error

App Files Files Community

Parechan commited on Apr 4, 2024

Commit

0c84ee8

verified ·

1 Parent(s): 1b2eabd

Upload 35 files

Browse files

Files changed (35) hide show

test/bot_test.py +11 -0
test/ocr/__init__.py +0 -0
test/ocr/__pycache__/__init__.cpython-310.pyc +0 -0
test/ocr/__pycache__/__init__.cpython-312.pyc +0 -0
test/ocr/__pycache__/characters.cpython-310.pyc +0 -0
test/ocr/__pycache__/characters.cpython-312.pyc +0 -0
test/ocr/__pycache__/datahelpers.cpython-310.pyc +0 -0
test/ocr/__pycache__/datahelpers.cpython-312.pyc +0 -0
test/ocr/__pycache__/helpers.cpython-310.pyc +0 -0
test/ocr/__pycache__/helpers.cpython-312.pyc +0 -0
test/ocr/__pycache__/normalization.cpython-310.pyc +0 -0
test/ocr/__pycache__/normalization.cpython-312.pyc +0 -0
test/ocr/__pycache__/page.cpython-310.pyc +0 -0
test/ocr/__pycache__/page.cpython-312.pyc +0 -0
test/ocr/__pycache__/tfhelpers.cpython-310.pyc +0 -0
test/ocr/__pycache__/tfhelpers.cpython-312.pyc +0 -0
test/ocr/__pycache__/viz.cpython-310.pyc +0 -0
test/ocr/__pycache__/viz.cpython-312.pyc +0 -0
test/ocr/__pycache__/words.cpython-310.pyc +0 -0
test/ocr/__pycache__/words.cpython-312.pyc +0 -0
test/ocr/characters.py +102 -0
test/ocr/datahelpers.py +287 -0
test/ocr/dataiterator.py +98 -0
test/ocr/helpers.py +45 -0
test/ocr/imgtransform.py +29 -0
test/ocr/mlhelpers.py +102 -0
test/ocr/normalization.py +207 -0
test/ocr/page.py +121 -0
test/ocr/tfhelpers.py +73 -0
test/ocr/viz.py +22 -0
test/ocr/words.py +223 -0
test/ocr_test.py +61 -0
test/openai_demo.py +27 -0
test/streamlit_demo.py +19 -0
test/test.py +14 -0

test/bot_test.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from bots import classify, feedback, vocab, assessment, speaking
+# print(feedback.transcribe_handwriting('https://media.cnn.com/api/v1/images/stellar/prod/160122124623-01-national-handwriting-day.jpg?q=w_3264,h_1836,x_0,y_0,c_fill'))
+# print(vocab.vocab_chat_with_model('hello', 'gpt-4'))
+# print(assessment.chat_assessment_with_model('hello', 'gpt-4'))
+# speaking.convert_to_mp3('sample.mp3', 'output.mp3')
+# print(speaking.transcribe_audio('output.mp3'))
+# speaking.text_to_speech('hello, world!', 'text2speech.mp3')

test/ocr/__init__.py ADDED Viewed

File without changes

test/ocr/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (157 Bytes). View file

test/ocr/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (162 Bytes). View file

test/ocr/__pycache__/characters.cpython-310.pyc ADDED Viewed

Binary file (2.57 kB). View file

test/ocr/__pycache__/characters.cpython-312.pyc ADDED Viewed

Binary file (4.25 kB). View file

test/ocr/__pycache__/datahelpers.cpython-310.pyc ADDED Viewed

Binary file (9.15 kB). View file

test/ocr/__pycache__/datahelpers.cpython-312.pyc ADDED Viewed

Binary file (15 kB). View file

test/ocr/__pycache__/helpers.cpython-310.pyc ADDED Viewed

Binary file (1.37 kB). View file

test/ocr/__pycache__/helpers.cpython-312.pyc ADDED Viewed

Binary file (1.92 kB). View file

test/ocr/__pycache__/normalization.cpython-310.pyc ADDED Viewed

Binary file (6.33 kB). View file

test/ocr/__pycache__/normalization.cpython-312.pyc ADDED Viewed

Binary file (11.6 kB). View file

test/ocr/__pycache__/page.cpython-310.pyc ADDED Viewed

Binary file (3.11 kB). View file

test/ocr/__pycache__/page.cpython-312.pyc ADDED Viewed

Binary file (5.69 kB). View file

test/ocr/__pycache__/tfhelpers.cpython-310.pyc ADDED Viewed

Binary file (2.77 kB). View file

test/ocr/__pycache__/tfhelpers.cpython-312.pyc ADDED Viewed

Binary file (4.03 kB). View file

test/ocr/__pycache__/viz.cpython-310.pyc ADDED Viewed

Binary file (800 Bytes). View file

test/ocr/__pycache__/viz.cpython-312.pyc ADDED Viewed

Binary file (1.02 kB). View file

test/ocr/__pycache__/words.cpython-310.pyc ADDED Viewed

Binary file (6.05 kB). View file

test/ocr/__pycache__/words.cpython-312.pyc ADDED Viewed

Binary file (10.8 kB). View file

test/ocr/characters.py ADDED Viewed

	@@ -0,0 +1,102 @@

+# -*- coding: utf-8 -*-
+import os
+import numpy as np
+#import tensorflow as tf
+import tensorflow.compat.v1 as tf
+tf.disable_v2_behavior()
+import cv2
+import math
+from .helpers import *
+from .tfhelpers import Model
+# Preloading trained model with activation function
+# Loading is slow -> prevent multiple loads
+print("Loading segmentation models...")
+location = os.path.dirname(os.path.abspath(__file__))
+CNN_model = Model(
+    os.path.join(location, '../../models/gap-clas/CNN-CG'))
+CNN_slider = (60, 30)
+RNN_model = Model(
+    os.path.join(location, '../../models/gap-clas/RNN/Bi-RNN-new'),
+    'prediction')
+RNN_slider = (60, 60)
+def _classify(img, step=2, RNN=False, slider=(60, 60)):
+    """Slice the image and return raw output of classifier."""
+    length = (img.shape[1] - slider[1]) // 2 + 1
+    if RNN:
+        input_seq = np.zeros((1, length, slider[0]*slider[1]), dtype=np.float32)
+        input_seq[0][:] = [img[:, loc * step: loc * step + slider[1]].flatten()
+                           for loc in range(length)]
+        pred = RNN_model.eval_feed({'inputs:0': input_seq,
+                                    'length:0': [length],
+                                    'keep_prob:0': 1})[0]
+    else:
+        input_seq = np.zeros((length, slider[0]*slider[1]), dtype=np.float32)
+        input_seq[:] = [img[:, loc * step: loc * step + slider[1]].flatten()
+                        for loc in range(length)]
+        pred = CNN_model.run(input_seq)
+    return pred
+def segment(img, step=2, RNN=False, debug=False):
+    """Take preprocessed image of word and
+    returns array of positions separating characters.
+    """
+    slider = CNN_slider
+    if RNN:
+        slider = RNN_slider
+    # Run the classifier
+    pred = _classify(img, step=step, RNN=RNN, slider=slider)
+    # Finalize the gap positions from raw prediction
+    gaps = []
+    last_gap = 0
+    gap_count = 1
+    gap_position_sum = slider[1] / 2
+    first_gap = True
+    gap_block_first = 0
+    gap_block_last = slider[1] / 2
+    for i, p in enumerate(pred):
+        if p == 1:
+            gap_position_sum += i * step + slider[1] / 2
+            gap_block_last = i * step + slider[1] / 2
+            gap_count += 1
+            last_gap = 0
+            if gap_block_first == 0:
+                gap_block_first = i * step + slider[1] / 2
+        else:
+            if gap_count != 0 and last_gap >= 1:
+                if first_gap:
+                    gaps.append(int(gap_block_last))
+                    first_gap = False
+                else:
+                    gaps.append(int(gap_position_sum // gap_count))
+                gap_position_sum = 0
+                gap_count = 0
+            gap_block_first = 0
+            last_gap += 1
+    # Adding final gap position
+    if gap_block_first != 0:
+        gaps.append(int(gap_block_first))
+    else:
+        gap_position_sum += (len(pred) - 1) * 2 + slider[1]/2
+        gaps.append(int(gap_position_sum / (gap_count + 1)))
+    if debug:
+        # Drawing lines
+        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
+        for gap in gaps:
+            cv2.line(img,
+                     ((int)(gap), 0),
+                     ((int)(gap), slider[0]),
+                     (0, 255, 0), 1)
+        implt(img, t="Separated characters")
+    return gaps

test/ocr/datahelpers.py ADDED Viewed

	@@ -0,0 +1,287 @@

+# -*- coding: utf-8 -*-
+"""
+Helper functions for loading and creating datasets
+"""
+import numpy as np
+import glob
+import simplejson
+import os
+import cv2
+import csv
+import sys
+import unidecode
+from .helpers import implt
+from .normalization import letter_normalization
+from .viz import print_progress_bar
+CHARS = ['', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
+         'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S',
+         'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c',
+         'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+         'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
+         'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6',
+         '7', '8', '9', '.', '-', '+', "'"]
+CHAR_SIZE = len(CHARS)
+idxs = [i for i in range(len(CHARS))]
+idx_2_chars = dict(zip(idxs, CHARS))
+chars_2_idx = dict(zip(CHARS, idxs))
+def char2idx(c, sequence=False):
+    if sequence:
+        return chars_2_idx[c] + 1
+    return chars_2_idx[c]
+def idx2char(idx, sequence=False):
+    if sequence:
+        return idx_2_chars[idx-1]
+    return idx_2_chars[idx]
+def load_words_data(dataloc='data/words/', is_csv=False, load_gaplines=False):
+    """
+    Load word images with corresponding labels and gaplines (if load_gaplines == True).
+    Args:
+        dataloc: image folder location/CSV file - can be list of multiple locations
+        is_csv: using CSV files
+        load_gaplines: wheter or not load gaplines positions files
+    Returns:
+        (images, labels (, gaplines))
+    """
+    print("Loading words...")
+    if type(dataloc) is not list:
+        dataloc = [dataloc]
+    if is_csv:
+        csv.field_size_limit(sys.maxsize)
+        length = 0
+        for loc in dataloc:
+            with open(loc) as csvfile:
+                reader = csv.reader(csvfile)
+                length += max(sum(1 for row in csvfile)-1, 0)
+        labels = np.empty(length, dtype=object)
+        images = np.empty(length, dtype=object)
+        i = 0
+        for loc in dataloc:
+            print(loc)
+            with open(loc) as csvfile:
+                reader = csv.DictReader(csvfile)
+                for row in reader:
+                    shape = np.fromstring(
+                        row['shape'],
+                        sep=',',
+                        dtype=int)
+                    img = np.fromstring(
+                        row['image'],
+                        sep=', ',
+                        dtype=np.uint8).reshape(shape)
+                    labels[i] = row['label']
+                    images[i] = img
+                    print_progress_bar(i, length)
+                    i += 1
+    else:
+        img_list = []
+        tmp_labels = []
+        for loc in dataloc:
+            tmp_list = glob.glob(os.path.join(loc, '*.png'))
+            img_list += tmp_list
+            tmp_labels += [name[len(loc):].split("_")[0] for name in tmp_list]
+        labels = np.array(tmp_labels)
+        images = np.empty(len(img_list), dtype=object)
+        # Load grayscaled images
+        for i, img in enumerate(img_list):
+            images[i] = cv2.imread(img, 0)
+            print_progress_bar(i, len(img_list))
+        # Load gaplines (lines separating letters) from txt files
+        if load_gaplines:
+            gaplines = np.empty(len(img_list), dtype=object)
+            for i, name in enumerate(img_list):
+                with open(name[:-3] + 'txt', 'r') as fp:
+                    gaplines[i] = np.array(simplejson.load(fp))
+    if load_gaplines:
+        assert len(labels) == len(images) == len(gaplines)
+    else:
+        assert len(labels) == len(images)
+    print("-> Number of words:", len(labels))
+    if load_gaplines:
+        return (images, labels, gaplines)
+    return (images, labels)
+def _words2chars(images, labels, gaplines):
+    """Transform word images with gaplines into individual chars."""
+    # Total number of chars
+    length = sum([len(l) for l in labels])
+    imgs = np.empty(length, dtype=object)
+    new_labels = []
+    height = images[0].shape[0]
+    idx = 0;
+    for i, gaps in enumerate(gaplines):
+        for pos in range(len(gaps) - 1):
+            imgs[idx] = images[i][0:height, gaps[pos]:gaps[pos+1]]
+            new_labels.append(char2idx(labels[i][pos]))
+            idx += 1
+    print("Loaded chars from words:", length)
+    return imgs, new_labels
+def load_chars_data(charloc='data/charclas/', wordloc='data/words/', lang='cz'):
+    """
+    Load chars images with corresponding labels.
+    Args:
+        charloc: char images FOLDER LOCATION
+        wordloc: word images with gaplines FOLDER LOCATION
+    Returns:
+        (images, labels)
+    """
+    print("Loading chars...")
+    images = np.zeros((1, 4096))
+    labels = []
+    if charloc != '':
+        # Get subfolders with chars
+        dir_list = glob.glob(os.path.join(charloc, lang, "*/"))
+        dir_list.sort()
+        # if lang == 'en':
+        chars = CHARS[:53]
+        assert [d[-2] if d[-2] != '0' else '' for d in dir_list] == chars
+        # For every label load images and create corresponding labels
+        # cv2.imread(img, 0) - for loading images in grayscale
+        # Images are scaled to 64x64 = 4096 px
+        for i in range(len(chars)):
+            img_list = glob.glob(os.path.join(dir_list[i], '*.jpg'))
+            imgs = np.array([letter_normalization(cv2.imread(img, 0)) for img in img_list])
+            images = np.concatenate([images, imgs.reshape(len(imgs), 4096)])
+            labels.extend([i] * len(imgs))
+    if wordloc != '':
+        imgs, words, gaplines = load_words_data(wordloc, load_gaplines=True)
+        if lang != 'cz':
+             words = np.array([unidecode.unidecode(w) for w in words])
+        imgs, chars = _words2chars(imgs, words, gaplines)
+        labels.extend(chars)
+        images2 = np.zeros((len(imgs), 4096))
+        for i in range(len(imgs)):
+            print_progress_bar(i, len(imgs))
+            images2[i] = letter_normalization(imgs[i]).reshape(1, 4096)
+        images = np.concatenate([images, images2])
+    images = images[1:]
+    labels = np.array(labels)
+    print("-> Number of chars:", len(labels))
+    return (images, labels)
+def load_gap_data(loc='data/gapdet/large/', slider=(60, 120), seq=False, flatten=True):
+    """
+    Load gap data from location with corresponding labels.
+    Args:
+        loc: location of folder with words separated into gap data
+             images have to by named as label_timestamp.jpg, label is 0 or 1
+        slider: dimensions of of output images
+        seq: Store images from one word as a sequence
+        flatten: Flatten the output images
+    Returns:
+        (images, labels)
+    """
+    print('Loading gap data...')
+    dir_list = glob.glob(os.path.join(loc, "*/"))
+    dir_list.sort()
+    if slider[1] > 120:
+        # TODO Implement for higher dimmensions
+        slider[1] = 120
+    cut_s = None if (120 - slider[1]) // 2 <= 0 else  (120 - slider[1]) // 2
+    cut_e = None if (120 - slider[1]) // 2 <= 0 else -(120 - slider[1]) // 2
+    if seq:
+        images = np.empty(len(dir_list), dtype=object)
+        labels = np.empty(len(dir_list), dtype=object)
+        for i, loc in enumerate(dir_list):
+            # TODO Check for empty directories
+            img_list = glob.glob(os.path.join(loc, '*.jpg'))
+            if (len(img_list) != 0):
+                img_list = sorted(imglist, key=lambda x: int(x[len(loc):].split("_")[1][:-4]))
+                images[i] = np.array([(cv2.imread(img, 0)[:, cut_s:cut_e].flatten() if flatten else
+                                       cv2.imread(img, 0)[:, cut_s:cut_e])
+                                      for img in img_list])
+                labels[i] = np.array([int(name[len(loc):].split("_")[0]) for name in img_list])
+    else:
+        images = np.zeros((1, slider[0]*slider[1]))
+        labels = []
+        for i in range(len(dir_list)):
+            img_list = glob.glob(os.path.join(dir_list[i], '*.jpg'))
+            if (len(img_list) != 0):
+                imgs = np.array([cv2.imread(img, 0)[:, cut_s:cut_e] for img in img_list])
+                images = np.concatenate([images, imgs.reshape(len(imgs), slider[0]*slider[1])])
+                labels.extend([int(img[len(dirlist[i])]) for img in img_list])
+        images = images[1:]
+        labels = np.array(labels)
+    if seq:
+        print("-> Number of words / gaps and letters:",
+              len(labels), '/', sum([len(l) for l in labels]))
+    else:
+        print("-> Number of gaps and letters:", len(labels))
+    return (images, labels)
+def corresponding_shuffle(a):
+    """
+    Shuffle array of numpy arrays such that
+    each pair a[x][i] and a[y][i] remains the same.
+    Args:
+        a: array of same length numpy arrays
+    Returns:
+        Array a with shuffled numpy arrays
+    """
+    assert all([len(a[0]) == len(a[i]) for i in range(len(a))])
+    p = np.random.permutation(len(a[0]))
+    for i in range(len(a)):
+        a[i] = a[i][p]
+    return a
+def sequences_to_sparse(sequences):
+    """
+    Create a sparse representention of sequences.
+    Args:
+        sequences: a list of lists of type dtype where each element is a sequence
+    Returns:
+        A tuple with (indices, values, shape)
+    """
+    indices = []
+    values = []
+    for n, seq in enumerate(sequences):
+        indices.extend(zip([n]*len(seq), range(len(seq))))
+        values.extend(seq)
+    indices = np.asarray(indices, dtype=np.int64)
+    values = np.asarray(values, dtype=np.int32)
+    shape = np.asarray([len(sequences), np.asarray(indices).max(0)[1]+1], dtype=np.int64)
+    return indices, values, shape

test/ocr/dataiterator.py ADDED Viewed

	@@ -0,0 +1,98 @@

+# -*- coding: utf-8 -*-
+"""Classes for feeding data during training."""
+import numpy as np
+import pandas as pd
+from .helpers import img_extend
+from .datahelpers import sequences_to_sparse
+class BucketDataIterator():
+    """Iterator for feeding CTC model during training."""
+    def __init__(self,
+                 images,
+                 targets,
+                 num_buckets=5,
+                 slider=(60, 30),
+                 augmentation=None,
+                 dropout=0.0,
+                 train=True):
+        self.train = train
+        self.slider = slider
+        self.augmentation = augmentation
+        self.dropout = dropout
+        for i in range(len(images)):
+            images[i] = img_extend(
+                images[i],
+                (self.slider[0],
+		         max(images[i].shape[1], self.slider[1])))
+        in_length = [image.shape[1] for image in images]
+        # Create pandas dataFrame and sort it by images width (length)
+        self.dataFrame = pd.DataFrame({
+            'in_length': in_length,
+            'images': images,
+            'targets': targets}).sort_values('in_length').reset_index(drop=True)
+        bsize = int(len(images) / num_buckets)
+        self.num_buckets = num_buckets
+        self.buckets = []
+        for bucket in range(num_buckets-1):
+            self.buckets.append(
+                self.dataFrame.iloc[bucket * bsize: (bucket+1) * bsize])
+        self.buckets.append(self.dataFrame.iloc[(num_buckets-1) * bsize:])
+        self.buckets_size = [len(bucket) for bucket in self.buckets]
+        self.cursor = np.array([0] * num_buckets)
+        self.bucket_order = np.random.permutation(num_buckets)
+        self.bucket_cursor = 0
+        self.shuffle()
+        print("Iterator created.")
+    def shuffle(self, idx=None):
+        """Shuffle idx bucket or each bucket separately."""
+        for i in [idx] if idx is not None else range(self.num_buckets):
+            self.buckets[i] = self.buckets[i].sample(frac=1).reset_index(drop=True)
+            self.cursor[i] = 0
+    def next_batch(self, batch_size):
+        """Creates next training batch of size.
+        Args:
+            batch_size: size of next batch
+        Retruns:
+            (images, labels, images lengths, labels lengths)
+        """
+        i_bucket = self.bucket_order[self.bucket_cursor]
+        # Increment cursor and shuffle in case of new round
+        self.bucket_cursor = (self.bucket_cursor + 1) % self.num_buckets
+        if self.bucket_cursor == 0:
+            self.bucket_order = np.random.permutation(self.num_buckets)
+        if self.cursor[i_bucket] + batch_size > self.buckets_size[i_bucket]:
+            self.shuffle(i_bucket)
+        # Handle too big batch sizes
+        if (batch_size > self.buckets_size[i_bucket]):
+            batch_size = self.buckets_size[i_bucket]
+        res = self.buckets[i_bucket].iloc[self.cursor[i_bucket]:
+                                          self.cursor[i_bucket]+batch_size]
+        self.cursor[i_bucket] += batch_size
+        # PAD input sequence and output
+        input_max = max(res['in_length'])
+        input_imgs = np.zeros(
+            (batch_size, self.slider[0], input_max, 1), dtype=np.uint8)
+        for i, img in enumerate(res['images']):
+            input_imgs[i][:, :res['in_length'].values[i], 0] = img
+        if self.train:
+            input_imgs = self.augmentation.augment_images(input_imgs)
+        input_imgs = input_imgs.astype(np.float32)
+        targets = sequences_to_sparse(res['targets'].values)
+        return input_imgs, targets, res['in_length'].values

test/ocr/helpers.py ADDED Viewed

	@@ -0,0 +1,45 @@

+# -*- coding: utf-8 -*-
+"""
+Helper functions for ocr project
+"""
+import matplotlib.pyplot as plt
+import numpy as np
+import cv2
+SMALL_HEIGHT = 800
+def implt(img, cmp=None, t=''):
+    """Show image using plt."""
+    plt.imshow(img, cmap=cmp)
+    plt.title(t)
+    plt.show()
+def resize(img, height=SMALL_HEIGHT, always=False):
+    """Resize image to given height."""
+    if (img.shape[0] > height or always):
+        rat = height / img.shape[0]
+        return cv2.resize(img, (int(rat * img.shape[1]), height))
+    return img
+def ratio(img, height=SMALL_HEIGHT):
+    """Getting scale ratio."""
+    return img.shape[0] / height
+def img_extend(img, shape):
+    """Extend 2D image (numpy array) in vertical and horizontal direction.
+    Shape of result image will match 'shape'
+    Args:
+        img: image to be extended
+        shape: shape (touple) of result image
+    Returns:
+        Extended image
+    """
+    x = np.zeros(shape, np.uint8)
+    x[:img.shape[0], :img.shape[1]] = img
+    return x

test/ocr/imgtransform.py ADDED Viewed

	@@ -0,0 +1,29 @@

+# -*- coding: utf-8 -*-
+"""
+Functions for transforming and preprocessing images for training
+"""
+import numpy as np
+import pandas as pd
+import cv2
+from scipy.ndimage.interpolation import map_coordinates
+def coordinates_remap(image, factor_alpha, factor_sigma):
+    """Transforming image using remaping coordinates."""
+    alpha = image.shape[1] * factor_alpha
+    sigma = image.shape[1] * factor_sigma
+    shape = image.shape
+    blur_size = int(4*sigma) | 1
+    dx = alpha * cv2.GaussianBlur((np.random.rand(*shape) * 2 - 1),
+                                  ksize=(blur_size, blur_size),
+                                  sigmaX=sigma)
+    dy = alpha * cv2.GaussianBlur((np.random.rand(*shape) * 2 - 1),
+                                  ksize=(blur_size, blur_size),
+                                  sigmaX=sigma)
+    x, y = np.meshgrid(np.arange(shape[1]), np.arange(shape[0]))
+    indices = np.reshape(y+dy, (-1, 1)), np.reshape(x+dx, (-1, 1))
+    # TODO use cv2.remap(image, dx, dy, interpolation=cv2.INTER_LINEAR)
+    return np.array(map_coordinates(image, indices, order=1, mode='constant').reshape(shape))

test/ocr/mlhelpers.py ADDED Viewed

	@@ -0,0 +1,102 @@

+# -*- coding: utf-8 -*-
+"""
+Classes for controling machine learning processes
+"""
+import numpy as np
+import math
+import matplotlib.pyplot as plt
+import csv
+class TrainingPlot:
+    """
+    Creating live plot during training
+    REUIRES notebook backend: %matplotlib notebook
+    @TODO Migrate to Tensorboard
+    """
+    train_loss = []
+    train_acc = []
+    valid_acc = []
+    test_iter = 0
+    loss_iter = 0
+    interval = 0
+    ax1 = None
+    ax2 = None
+    fig = None
+    def __init__(self, steps, test_itr, loss_itr):
+        self.test_iter = test_itr
+        self.loss_iter = loss_itr
+        self.interval = steps
+        self.fig, self.ax1 = plt.subplots()
+        self.ax2 = self.ax1.twinx()
+        self.ax1.set_autoscaley_on(True)
+        plt.ion()
+        self._update_plot()
+        # Description
+        self.ax1.set_xlabel('Iteration')
+        self.ax1.set_ylabel('Train Loss')
+        self.ax2.set_ylabel('Valid. Accuracy')
+        # Axes limits
+        self.ax1.set_ylim([0,10])
+    def _update_plot(self):
+        self.fig.canvas.draw()
+    def update_loss(self, loss_train, index):
+        self.trainLoss.append(loss_train)
+        if len(self.train_loss) == 1:
+            self.ax1.set_ylim([0, min(10, math.ceil(loss_train))])
+        self.ax1.plot(self.lossInterval * np.arange(len(self.train_loss)),
+                      self.train_loss, 'b', linewidth=1.0)
+        self.updatePlot()
+    def update_acc(self, acc_val, acc_train, index):
+        self.validAcc.append(acc_val)
+        self.trainAcc.append(acc_train)
+        self.ax2.plot(self.test_iter * np.arange(len(self.valid_acc)),
+                      self.valid_acc, 'r', linewidth=1.0)
+        self.ax2.plot(self.test_iter * np.arange(len(self.train_acc)),
+                      self.train_acc, 'g',linewidth=1.0)
+        self.ax2.set_title('Valid. Accuracy: {:.4f}'.format(self.valid_acc[-1]))
+        self.updatePlot()
+class DataSet:
+    """Class for training data and feeding train function."""
+    images = None
+    labels = None
+    length = 0
+    index = 0
+    def __init__(self, img, lbl):
+        self.images = img
+        self.labels = lbl
+        self.length = len(img)
+        self.index = 0
+    def next_batch(self, batch_size):
+        """Return the next batch from the data set."""
+        start = self.index
+        self.index += batch_size
+        if self.index > self.length:
+            # Shuffle the data
+            perm = np.arange(self.length)
+            np.random.shuffle(perm)
+            self.images = self.images[perm]
+            self.labels = self.labels[perm]
+            # Start next epoch
+            start = 0
+            self.index = batch_size
+        end = self.index
+        return self.images[start:end], self.labels[start:end]

test/ocr/normalization.py ADDED Viewed

	@@ -0,0 +1,207 @@

+# -*- coding: utf-8 -*-
+"""
+Include functions for normalizing images of words and letters
+Main functions: word_normalization, letter_normalization, image_standardization
+"""
+import numpy as np
+import cv2
+import math
+from .helpers import *
+def image_standardization(image):
+    """Image standardization should result in same output
+    as tf.image.per_image_standardization.
+    """
+    return (image - np.mean(image)) / max(np.std(image), 1.0/math.sqrt(image.size))
+def _crop_add_border(img, height, threshold=50, border=True, border_size=15):
+    """Crop and add border to word image of letter segmentation."""
+    # Clear small values
+    ret, img = cv2.threshold(img, threshold, 255, cv2.THRESH_TOZERO)
+    x0 = 0
+    y0 = 0
+    x1 = img.shape[1]
+    y1 = img.shape[0]
+    for i in range(img.shape[0]):
+        if np.count_nonzero(img[i, :]) > 1:
+            y0 = i
+            break
+    for i in reversed(range(img.shape[0])):
+        if np.count_nonzero(img[i, :]) > 1:
+            y1 = i+1
+            break
+    for i in range(img.shape[1]):
+        if np.count_nonzero(img[:, i]) > 1:
+            x0 = i
+            break
+    for i in reversed(range(img.shape[1])):
+        if np.count_nonzero(img[:, i]) > 1:
+            x1 = i+1
+            break
+    if height != 0:
+        img = resize(img[y0:y1, x0:x1], height, True)
+    else:
+        img = img[y0:y1, x0:x1]
+    if border:
+        return cv2.copyMakeBorder(img, 0, 0, border_size, border_size,
+                                  cv2.BORDER_CONSTANT,
+                                  value=[0, 0, 0])
+    return img
+def _word_tilt(img, height, border=True, border_size=15):
+    """Detect the angle and tilt the image."""
+    edges = cv2.Canny(img, 50, 150, apertureSize = 3)
+    lines = cv2.HoughLines(edges, 1, np.pi/180, 30)
+    if lines is not None:
+        meanAngle = 0
+        # Set min number of valid lines (try higher)
+        numLines = np.sum(1 for l in lines if l[0][1] < 0.7 or l[0][1] > 2.6)
+        if numLines > 1:
+            meanAngle = np.mean([l[0][1] for l in lines if l[0][1] < 0.7 or l[0][1] > 2.6])
+        # Look for angle with correct value
+        if meanAngle != 0 and (meanAngle < 0.7 or meanAngle > 2.6):
+            img = _tilt_by_angle(img, meanAngle, height)
+    return _crop_add_border(img, height, 50, border, border_size)
+def _tilt_by_angle(img, angle, height):
+    """Tilt the image by given angle."""
+    dist = np.tan(angle) * height
+    width = len(img[0])
+    sPoints = np.float32([[0,0], [0,height], [width,height], [width,0]])
+    # Dist is positive for angle < 0.7; negative for angle > 2.6
+    # Image must be shifed to right
+    if dist > 0:
+        tPoints = np.float32([[0,0],
+                              [dist,height],
+                              [width+dist,height],
+                              [width,0]])
+    else:
+        tPoints = np.float32([[-dist,0],
+                              [0,height],
+                              [width,height],
+                              [width-dist,0]])
+    M = cv2.getPerspectiveTransform(sPoints, tPoints)
+    return cv2.warpPerspective(img, M, (int(width+abs(dist)), height))
+def _sobel_detect(channel):
+    """The Sobel Operator."""
+    sobelX = cv2.Sobel(channel, cv2.CV_16S, 1, 0)
+    sobelY = cv2.Sobel(channel, cv2.CV_16S, 0, 1)
+    # Combine x, y gradient magnitudes sqrt(x^2 + y^2)
+    sobel = np.hypot(sobelX, sobelY)
+    sobel[sobel > 255] = 255
+    return np.uint8(sobel)
+class HysterThresh:
+    def __init__(self, img):
+        img = 255 - img
+        img = (img - np.min(img)) / (np.max(img) - np.min(img)) * 255
+        hist, bins = np.histogram(img.ravel(), 256, [0,256])
+        self.high = np.argmax(hist) + 65
+        self.low = np.argmax(hist) + 45
+        self.diff = 255 - self.high
+        self.img = img
+        self.im = np.zeros(img.shape, dtype=img.dtype)
+    def get_image(self):
+        self._hyster()
+        return np.uint8(self.im)
+    def _hyster_rec(self, r, c):
+        h, w = self.img.shape
+        for ri in range(r-1, r+2):
+            for ci in range(c-1, c+2):
+                if (h > ri >= 0
+                    and w > ci >= 0
+                    and self.im[ri, ci] == 0
+                    and self.high > self.img[ri, ci] >= self.low):
+                    self.im[ri, ci] = self.img[ri, ci] + self.diff
+                    self._hyster_rec(ri, ci)
+    def _hyster(self):
+        r, c = self.img.shape
+        for ri in range(r):
+            for ci in range(c):
+                if (self.img[ri, ci] >= self.high):
+                    self.im[ri, ci] = 255
+                    self.img[ri, ci] = 255
+                    self._hyster_rec(ri, ci)
+def _hyst_word_norm(image):
+    """Word normalization using hystheresis thresholding."""
+    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
+#     img = cv2.bilateralFilter(gray, 0, 10, 30)
+    img = cv2.bilateralFilter(gray, 10, 10, 30)
+    return HysterThresh(img).get_image()
+def word_normalization(image, height, border=True, tilt=True, border_size=15, hyst_norm=False):
+    """ Preprocess a word - resize, binarize, tilt world."""
+    image = resize(image, height, True)
+    if hyst_norm:
+        th = _hyst_word_norm(image)
+    else:
+        img = cv2.bilateralFilter(image, 10, 30, 30)
+        gray = 255 - cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
+        norm = cv2.normalize(gray, None, 0, 255, cv2.NORM_MINMAX)
+        ret,th = cv2.threshold(norm, 50, 255, cv2.THRESH_TOZERO)
+    if tilt:
+        return _word_tilt(th, height, border, border_size)
+    return _crop_add_border(th, height=height, border=border, border_size=border_size)
+def _resize_letter(img, size = 56):
+    """Resize bigger side of the image to given size."""
+    if (img.shape[0] > img.shape[1]):
+        rat = size / img.shape[0]
+        return cv2.resize(img, (int(rat * img.shape[1]), size))
+    else:
+        rat = size / img.shape[1]
+        return cv2.resize(img, (size, int(rat * img.shape[0])))
+    return img
+def letter_normalization(image, is_thresh=True, dim=False):
+    """Preprocess a letter - crop, resize"""
+    if is_thresh and image.shape[0] > 0 and image.shape[1] > 0:
+        image = _crop_add_border(image, height=0, threshold=80, border=False)
+    resized = image
+    if image.shape[0] > 1 and image.shape[1] > 1:
+        resized = _resize_letter(image)
+    result = np.zeros((64, 64), np.uint8)
+    offset = [0, 0]
+    # Calculate offset for smaller size
+    if image.shape[0] > image.shape[1]:
+        offset = [int((result.shape[1] - resized.shape[1])/2), 4]
+    else:
+        offset = [4, int((result.shape[0] - resized.shape[0])/2)]
+    # Replace zeros by image
+    result[offset[1]:offset[1] + resized.shape[0],
+           offset[0]:offset[0] + resized.shape[1]] = resized
+    if dim:
+        return result, image.shape
+    return result

test/ocr/page.py ADDED Viewed

	@@ -0,0 +1,121 @@

+# -*- coding: utf-8 -*-
+"""
+Crop background and transform perspective from the photo of page
+"""
+import numpy as np
+import cv2
+from .helpers import *
+def detection(image, area_thresh = 0.5):
+    """Finding Page."""
+    small = resize(image)
+    # Edge detection
+    image_edges = _edges_detection(small, 200, 250)
+    # Close gaps between edges (double page clouse => rectangle kernel)
+    closed_edges = cv2.morphologyEx(image_edges,
+                                    cv2.MORPH_CLOSE,
+                                    np.ones((5, 11)))
+    # Countours
+    page_contour = _find_page_contours(closed_edges, small, area_thresh)
+    # Recalculate to original scale
+    page_contour = page_contour.dot(ratio(image, small.shape[0]))
+    # Transform prespective
+    new_image = _persp_transform(image, page_contour)
+    return new_image
+def _edges_detection(img, minVal, maxVal):
+    """Preprocessing (gray, thresh, filter, border) + Canny edge detection."""
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    img = cv2.bilateralFilter(img, 9, 75, 75)
+    img = cv2.adaptiveThreshold(img, 255,
+                                cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+                                cv2.THRESH_BINARY, 115, 4)
+    # Median blur replace center pixel by median of pixels under kelner
+    # => removes thin details
+    img = cv2.medianBlur(img, 11)
+    # Add black border - detection of border touching pages
+    img = cv2.copyMakeBorder(img, 5, 5, 5, 5,
+                             cv2.BORDER_CONSTANT,
+                             value=[0, 0, 0])
+    return cv2.Canny(img, minVal, maxVal)
+def _four_corners_sort(pts):
+    """Sort corners in order: top-left, bot-left, bot-right, top-right."""
+    diff = np.diff(pts, axis=1)
+    summ = pts.sum(axis=1)
+    return np.array([pts[np.argmin(summ)],
+                     pts[np.argmax(diff)],
+                     pts[np.argmax(summ)],
+                     pts[np.argmin(diff)]])
+def _contour_offset(cnt, offset):
+    """Offset contour because of 5px border."""
+    cnt += offset
+    cnt[cnt < 0] = 0
+    return cnt
+def _find_page_contours(edges, img, area_thresh):
+    """Finding corner points of page contour."""
+    contours, hierarchy = cv2.findContours(edges,
+                                                cv2.RETR_TREE,
+                                                cv2.CHAIN_APPROX_SIMPLE)
+    # Finding biggest rectangle otherwise return original corners
+    height = edges.shape[0]
+    width = edges.shape[1]
+    MIN_COUNTOUR_AREA = height * width * area_thresh
+    MAX_COUNTOUR_AREA = (width - 10) * (height - 10)
+    max_area = MIN_COUNTOUR_AREA
+    page_contour = np.array([[0, 0],
+                             [0, height-5],
+                             [width-5, height-5],
+                             [width-5, 0]])
+    for cnt in contours:
+        perimeter = cv2.arcLength(cnt, True)
+        approx = cv2.approxPolyDP(cnt, 0.03 * perimeter, True)
+        # Page has 4 corners and it is convex
+        if (len(approx) == 4 and
+                cv2.isContourConvex(approx) and
+                max_area < cv2.contourArea(approx) < MAX_COUNTOUR_AREA):
+            max_area = cv2.contourArea(approx)
+            page_contour = approx[:, 0]
+    # Sort corners and offset them
+    page_contour = _four_corners_sort(page_contour)
+    return _contour_offset(page_contour, (-5, -5))
+def _persp_transform(img, s_points):
+    """Transform perspective from start points to target points."""
+    # Euclidean distance - calculate maximum height and width
+    height = max(np.linalg.norm(s_points[0] - s_points[1]),
+                 np.linalg.norm(s_points[2] - s_points[3]))
+    width = max(np.linalg.norm(s_points[1] - s_points[2]),
+                 np.linalg.norm(s_points[3] - s_points[0]))
+    # Create target points
+    t_points = np.array([[0, 0],
+                        [0, height],
+                        [width, height],
+                        [width, 0]], np.float32)
+    # getPerspectiveTransform() needs float32
+    if s_points.dtype != np.float32:
+        s_points = s_points.astype(np.float32)
+    M = cv2.getPerspectiveTransform(s_points, t_points)
+    return cv2.warpPerspective(img, M, (int(width), int(height)))

test/ocr/tfhelpers.py ADDED Viewed

	@@ -0,0 +1,73 @@

+# -*- coding: utf-8 -*-
+"""
+Provide functions and classes:
+Model       = Class for loading and using trained models from tensorflow
+create_cell = function for creatting RNN cells with wrappers
+"""
+#import tensorflow as tf
+import tensorflow.compat.v1 as tf
+from tensorflow.python.ops.rnn_cell_impl import LSTMCell, ResidualWrapper, DropoutWrapper, MultiRNNCell
+class Model():
+    """Loading and running isolated tf graph."""
+    def __init__(self, loc, operation='activation', input_name='x'):
+        """
+        loc: location of file containing saved model
+        operation: name of operation for running the model
+        input_name: name of input placeholder
+        """
+        self.input = input_name + ":0"
+        self.graph = tf.Graph()
+        self.sess = tf.Session(graph=self.graph)
+        with self.graph.as_default():
+            saver = tf.train.import_meta_graph(loc + '.meta', clear_devices=True)
+            saver.restore(self.sess, loc)
+            self.op = self.graph.get_operation_by_name(operation).outputs[0]
+    def run(self, data):
+        """Run the specified operation on given data."""
+        return self.sess.run(self.op, feed_dict={self.input: data})
+    def eval_feed(self, feed):
+        """Run the specified operation with given feed."""
+        return self.sess.run(self.op, feed_dict=feed)
+    def run_op(self, op, feed, output=True):
+        """Run given operation with the feed."""
+        if output:
+            return self.sess.run(
+                self.graph.get_operation_by_name(op).outputs[0],
+                feed_dict=feed)
+        else:
+            self.sess.run(
+                self.graph.get_operation_by_name(op),
+                feed_dict=feed)
+def _create_single_cell(cell_fn, num_units, is_residual=False, is_dropout=False, keep_prob=None):
+    """Create single RNN cell based on cell_fn."""
+    cell = cell_fn(num_units)
+    if is_dropout:
+        cell = DropoutWrapper(cell, input_keep_prob=keep_prob)
+    if is_residual:
+        cell = ResidualWrapper(cell)
+    return cell
+def create_cell(num_units, num_layers, num_residual_layers, is_dropout=False, keep_prob=None, cell_fn=LSTMCell):
+    """Create corresponding number of RNN cells with given wrappers."""
+    cell_list = []
+    for i in range(num_layers):
+        cell_list.append(_create_single_cell(
+            cell_fn=cell_fn,
+            num_units=num_units,
+            is_residual=(i >= num_layers - num_residual_layers),
+            is_dropout=is_dropout,
+            keep_prob=keep_prob
+        ))
+    if num_layers == 1:
+        return cell_list[0]
+    return MultiRNNCell(cell_list)

test/ocr/viz.py ADDED Viewed

	@@ -0,0 +1,22 @@

+def print_progress_bar(iteration,
+                       total,
+                       prefix = '',
+                       suffix = ''):
+    """Call in a loop to create terminal progress bar.
+    Args:
+        iteration: current iteration (Int)
+        total: total iterations (Int)
+        prefix: prefix string (Str)
+        suffix: suffix string (Str)
+    """
+    # Printing slowes down the loop
+    if iteration % (total // 100) == 0:
+        length = 40
+        iteration += 1
+        percent = (100 * iteration) // (total * 99/100)
+        filled_length = int(length * percent / 100)
+        bar = '█' * filled_length + '-' * (length - filled_length)
+        print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end = '\r')
+        if iteration >= total * 99/100:
+            print()

test/ocr/words.py ADDED Viewed

	@@ -0,0 +1,223 @@

+# -*- coding: utf-8 -*-
+"""
+Detect words on the page
+return array of words' bounding boxes
+"""
+import numpy as np
+import matplotlib.pyplot as plt
+import cv2
+from .helpers import *
+def detection(image, join=False):
+    """Detecting the words bounding boxes.
+    Return: numpy array of bounding boxes [x, y, x+w, y+h]
+    """
+    # Preprocess image for word detection
+    blurred = cv2.GaussianBlur(image, (5, 5), 18)
+    edge_img = _edge_detect(blurred)
+    ret, edge_img = cv2.threshold(edge_img, 50, 255, cv2.THRESH_BINARY)
+    bw_img = cv2.morphologyEx(edge_img, cv2.MORPH_CLOSE,
+                              np.ones((15,15), np.uint8))
+    return _text_detect(bw_img, image, join)
+def sort_words(boxes):
+    """Sort boxes - (x, y, x+w, y+h) from left to right, top to bottom."""
+    mean_height = sum([y2 - y1 for _, y1, _, y2 in boxes]) / len(boxes)
+    boxes.view('i8,i8,i8,i8').sort(order=['f1'], axis=0)
+    current_line = boxes[0][1]
+    lines = []
+    tmp_line = []
+    for box in boxes:
+        if box[1] > current_line + mean_height:
+            lines.append(tmp_line)
+            tmp_line = [box]
+            current_line = box[1]
+            continue
+        tmp_line.append(box)
+    lines.append(tmp_line)
+    for line in lines:
+        line.sort(key=lambda box: box[0])
+    return lines
+def _edge_detect(im):
+    """
+    Edge detection using sobel operator on each layer individually.
+    Sobel operator is applied for each image layer (RGB)
+    """
+    return np.max(np.array([_sobel_detect(im[:,:, 0]),
+                            _sobel_detect(im[:,:, 1]),
+                            _sobel_detect(im[:,:, 2])]), axis=0)
+def _sobel_detect(channel):
+    """Sobel operator."""
+    sobelX = cv2.Sobel(channel, cv2.CV_16S, 1, 0)
+    sobelY = cv2.Sobel(channel, cv2.CV_16S, 0, 1)
+    sobel = np.hypot(sobelX, sobelY)
+    sobel[sobel > 255] = 255
+    return np.uint8(sobel)
+def union(a,b):
+    x = min(a[0], b[0])
+    y = min(a[1], b[1])
+    w = max(a[0]+a[2], b[0]+b[2]) - x
+    h = max(a[1]+a[3], b[1]+b[3]) - y
+    return [x, y, w, h]
+def _intersect(a,b):
+    x = max(a[0], b[0])
+    y = max(a[1], b[1])
+    w = min(a[0]+a[2], b[0]+b[2]) - x
+    h = min(a[1]+a[3], b[1]+b[3]) - y
+    if w<0 or h<0:
+        return False
+    return True
+def _group_rectangles(rec):
+    """
+    Uion intersecting rectangles.
+    Args:
+        rec - list of rectangles in form [x, y, w, h]
+    Return:
+        list of grouped ractangles
+    """
+    tested = [False for i in range(len(rec))]
+    final = []
+    i = 0
+    while i < len(rec):
+        if not tested[i]:
+            j = i+1
+            while j < len(rec):
+                if not tested[j] and _intersect(rec[i], rec[j]):
+                    rec[i] = union(rec[i], rec[j])
+                    tested[j] = True
+                    j = i
+                j += 1
+            final += [rec[i]]
+        i += 1
+    return final
+def _text_detect(img, image, join=False):
+    """Text detection using contours."""
+    small = resize(img, 2000)
+    # Finding contours
+    mask = np.zeros(small.shape, np.uint8)
+    cnt, hierarchy = cv2.findContours(np.copy(small),
+                                           cv2.RETR_CCOMP,
+                                           cv2.CHAIN_APPROX_SIMPLE)
+    index = 0
+    boxes = []
+    # Go through all contours in top level
+    while (index >= 0):
+        x,y,w,h = cv2.boundingRect(cnt[index])
+        cv2.drawContours(mask, cnt, index, (255, 255, 255), cv2.FILLED)
+        maskROI = mask[y:y+h, x:x+w]
+        # Ratio of white pixels to area of bounding rectangle
+        r = cv2.countNonZero(maskROI) / (w * h)
+        # Limits for text
+        if (r > 0.1
+            and 1600 > w > 10
+            and 1600 > h > 10
+            and h/w < 3
+            and w/h < 10
+            and (60 // h) * w < 1000):
+            boxes += [[x, y, w, h]]
+        index = hierarchy[0][index][0]
+    if join:
+        # Need more work
+        boxes = _group_rectangles(boxes)
+    # image for drawing bounding boxes
+    small = cv2.cvtColor(small, cv2.COLOR_GRAY2RGB)
+    bounding_boxes = np.array([0,0,0,0])
+    for (x, y, w, h) in boxes:
+        cv2.rectangle(small, (x, y),(x+w,y+h), (0, 255, 0), 2)
+        bounding_boxes = np.vstack((bounding_boxes,
+                                    np.array([x, y, x+w, y+h])))
+    implt(small, t='Bounding rectangles')
+    boxes = bounding_boxes.dot(ratio(image, small.shape[0])).astype(np.int64)
+    return boxes[1:]
+def textDetectWatershed(thresh):
+    """NOT IN USE - Text detection using watershed algorithm.
+    Based on: http://docs.opencv.org/trunk/d3/db4/tutorial_py_watershed.html
+    """
+    img = cv2.cvtColor(cv2.imread("data/textdet/%s.jpg" % IMG),
+                       cv2.COLOR_BGR2RGB)
+    img = resize(img, 3000)
+    thresh = resize(thresh, 3000)
+    # noise removal
+    kernel = np.ones((3,3),np.uint8)
+    opening = cv2.morphologyEx(thresh,cv2.MORPH_OPEN,kernel, iterations = 3)
+    # sure background area
+    sure_bg = cv2.dilate(opening,kernel,iterations=3)
+    # Finding sure foreground area
+    dist_transform = cv2.distanceTransform(opening,cv2.DIST_L2,5)
+    ret, sure_fg = cv2.threshold(dist_transform,
+                                 0.01*dist_transform.max(), 255, 0)
+    # Finding unknown region
+    sure_fg = np.uint8(sure_fg)
+    unknown = cv2.subtract(sure_bg,sure_fg)
+    # Marker labelling
+    ret, markers = cv2.connectedComponents(sure_fg)
+    # Add one to all labels so that sure background is not 0, but 1
+    markers += 1
+    # Now, mark the region of unknown with zero
+    markers[unknown == 255] = 0
+    markers = cv2.watershed(img, markers)
+    implt(markers, t='Markers')
+    image = img.copy()
+    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    for mark in np.unique(markers):
+        # mark == 0 --> background
+        if mark == 0:
+            continue
+        # Draw it on mask and detect biggest contour
+        mask = np.zeros(gray.shape, dtype="uint8")
+        mask[markers == mark] = 255
+        cnts = cv2.findContours(mask.copy(),
+                                cv2.RETR_EXTERNAL,
+                                cv2.CHAIN_APPROX_SIMPLE)[-2]
+        c = max(cnts, key=cv2.contourArea)
+        # Draw a bounding rectangle if it contains text
+        x,y,w,h = cv2.boundingRect(c)
+        cv2.drawContours(mask, c, 0, (255, 255, 255), cv2.FILLED)
+        maskROI = mask[y:y+h, x:x+w]
+        # Ratio of white pixels to area of bounding rectangle
+        r = cv2.countNonZero(maskROI) / (w * h)
+        # Limits for text
+        if r > 0.2 and 2000 > w > 15 and 1500 > h > 15:
+            cv2.rectangle(image, (x, y),(x+w,y+h), (0, 255, 0), 2)
+    implt(image)

test/ocr_test.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import sys
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import tensorflow as tf
+import cv2
+sys.path.append('../src')
+from ocr.normalization import word_normalization, letter_normalization
+from ocr import page, words, characters
+from ocr.helpers import implt, resize
+from ocr.tfhelpers import Model
+from ocr.datahelpers import idx2char
+IMG = '../data/test.jpg'    # 1, 2, 3
+LANG = 'en'
+# You can use only one of these two
+# You HABE TO train the CTC model by yourself using word_classifier_CTC.ipynb
+MODEL_LOC_CHARS = f'../models/char-clas/{LANG}/CharClassifier'
+MODEL_LOC_CTC = '../models/word-clas/CTC/Classifier1'
+CHARACTER_MODEL = Model(MODEL_LOC_CHARS)
+CTC_MODEL = Model(MODEL_LOC_CTC, 'word_prediction')
+image = cv2.cvtColor(cv2.imread(IMG), cv2.COLOR_BGR2RGB)
+# implt(image)
+# Crop image and get bounding boxes
+crop = page.detection(image)
+# implt(crop)
+boxes = words.detection(crop)
+lines = words.sort_words(boxes)
+def recognise(img):
+    """Recognising words using CTC Model."""
+    img = word_normalization(
+        img,
+        64,
+        border=False,
+        tilt=False,
+        hyst_norm=False)
+    length = img.shape[1]
+    # Input has shape [batch_size, height, width, 1]
+    input_imgs = np.zeros(
+            (1, 64, length, 1), dtype=np.uint8)
+    input_imgs[0][:, :length, 0] = img
+    pred = CTC_MODEL.eval_feed({
+        'inputs:0': input_imgs,
+        'inputs_length:0': [length],
+        'keep_prob:0': 1})[0]
+    word = ''
+    for i in pred:
+        word += idx2char(i + 1)
+    return word
+# implt(crop)
+for line in lines:
+    print(" ".join([recognise(crop[y1:y2, x1:x2]) for (x1, y1, x2, y2) in line]))

test/openai_demo.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import os
+from openai import OpenAI
+from llamaapi import LlamaAPI
+# Initialize the llamaapi with your api_token
+llama = LlamaAPI("LL-AirERHEk0jLIE1yEPvMXeobNfLsqLWJWcxLRS53obrZ3XyqMTfZc4EAuOs7r3wso")
+api_key = "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"
+os.environ["OPENAI_API_KEY"] = api_key
+client = OpenAI()
+prompt = 'hello, who are you ?'
+chat_completion = client.chat.completions.create(
+    model="gpt-4",
+    messages=[
+        {"role": "system", "content": "Provide feedback on the inputted writing sample from an ESL learner. "
+                                      "Focus on areas such as grammar, vocabulary usage, and overall coherence and organization of the essay. "
+                                      "Offer corrective feedback on errors, suggest improvements, and highlight positive aspects to encourage "
+                                      "the learner. Please ensure the feedback is constructive, clear, and supportive to help the learner "
+                                      "understand and apply the suggestions. Always frame feedback in a positive, constructive manner. "
+                                      "Focus on how the student can improve rather than just highlighting mistakes. Provide clear examples "
+                                      "when pointing out errors or suggesting improvements. Prompt the learner to reflect on specific parts of "
+                                      "their writing"},
+        {"role": "user", "content": prompt},
+    ]
+)
+print(chat_completion.choices[0].message.content.strip())

test/streamlit_demo.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import streamlit as st
+from faker import Faker
+st.title('Smart Robot')
+user_input = st.chat_input('你想说什么')
+fake = Faker()
+def generate_response():
+    output = fake.text()
+    return output
+if user_input:
+    container = st.container(border=True)
+    bot_response = generate_response()
+    container.write("机器人：" + bot_response)
+    container.write("asdjkl")

test/test.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import streamlit as st
+import webbrowser
+# 定义跳转到 Google 的函数
+def redirect_to_google():
+    url = 'https://www.google.com'
+    webbrowser.open_new_tab(url)
+# # 在应用程序中调用跳转函数
+# redirect_to_google()
+if st.button('go'):
+    print('go')
+    redirect_to_google()