|
import os |
|
import cv2 |
|
import tensorflow as tf |
|
import numpy as np |
|
import yaml |
|
|
|
from typing import List |
|
|
|
with open('config.yml', 'r') as config_file_obj: |
|
yaml_config = yaml.safe_load(config_file_obj) |
|
|
|
dataset_config = yaml_config['datasets'] |
|
VIDEO_DIR = dataset_config['video_dir'] |
|
ALIGNMENTS_DIR = dataset_config['alignments_dir'] |
|
|
|
vocab = [x for x in "abcdefghijklmnopqrstuvwxyz'?!123456789 "] |
|
char_to_num = tf.keras.layers.StringLookup(vocabulary=vocab, oov_token="") |
|
num_to_char = tf.keras.layers.StringLookup( |
|
vocabulary=char_to_num.get_vocabulary(), oov_token="", invert=True |
|
) |
|
|
|
|
|
def load_video(path: str) -> List[float]: |
|
""" |
|
adapted original loading code using this tutorial about openCV |
|
https://learnopencv.com/read-write-and-display-a-video-using-opencv-cpp-python/ |
|
""" |
|
cap = cv2.VideoCapture(path) |
|
frames = [] |
|
ret = True |
|
|
|
while cap.isOpened(): |
|
ret, frame = cap.read() |
|
|
|
if ret is True: |
|
frame = tf.image.rgb_to_grayscale(frame) |
|
frames.append(frame[190:236, 80:220, :]) |
|
else: |
|
break |
|
|
|
cap.release() |
|
|
|
mean = tf.math.reduce_mean(frames) |
|
std = tf.math.reduce_std(tf.cast(frames, tf.float32)) |
|
return tf.cast((frames - mean), tf.float32) / std |
|
|
|
|
|
def load_alignments(path: str) -> List[str]: |
|
with open(path, 'r') as f: |
|
lines = f.readlines() |
|
|
|
tokens = [] |
|
for line in lines: |
|
line = line.split() |
|
if line[2] != 'sil': |
|
tokens = [*tokens, ' ', line[2]] |
|
|
|
return char_to_num(tf.reshape( |
|
tf.strings.unicode_split(tokens, input_encoding='UTF-8'), (-1) |
|
))[1:] |
|
|
|
|
|
def load_data(tf_path): |
|
|
|
path = tf_path.numpy().decode('utf-8') |
|
|
|
|
|
dir_name = os.path.basename(os.path.dirname(path)) |
|
|
|
base_filename = os.path.basename(path) |
|
base_name = os.path.splitext(base_filename)[0] |
|
new_base_path = os.path.join(dir_name, base_name) |
|
|
|
|
|
|
|
video_path = os.path.join(VIDEO_DIR, f'{new_base_path}.mpg') |
|
alignment_path = os.path.join( |
|
ALIGNMENTS_DIR, f'{new_base_path}.align' |
|
) |
|
|
|
try: |
|
frames = load_video(video_path) |
|
except Exception as e: |
|
print('BAD_VIDEO', video_path) |
|
raise e |
|
|
|
alignments = load_alignments(alignment_path) |
|
return frames, alignments |
|
|
|
|
|
def mappable_function(path:str) -> List[str]: |
|
result = tf.py_function( |
|
load_data, [path], (tf.float32, tf.int64) |
|
) |
|
return result |