|
import os |
|
|
|
CTC_SCALE = 2 |
|
lrs2_dirpath = '/media/milselarch/47FC4BC577667AAD/LRS2' |
|
valid_lrs2_filepath = f'../data/LRS2-CTC{CTC_SCALE}-valid-pairs.txt' |
|
filenames = ['train.txt', 'test.txt', 'val.txt'] |
|
valid_lrs2_pairs = set([ |
|
line.strip() for line in open(valid_lrs2_filepath).readlines() |
|
]) |
|
|
|
for filename in filenames: |
|
filepath = os.path.join(lrs2_dirpath, filename) |
|
lines = open(filepath, 'r').readlines() |
|
valid_lines = [] |
|
|
|
for line in lines: |
|
line = line + ' ' |
|
line = line[:line.index(' ')].strip() |
|
|
|
if line in valid_lrs2_pairs: |
|
valid_lines.append(line) |
|
|
|
valid_lines = sorted(valid_lines) |
|
export_filename = f'../data/LRS2_CTC{CTC_SCALE}_{filename}' |
|
open(export_filename, 'w').write('\n'.join(valid_lines)) |
|
|
|
print(f'<<< {filename} >>>') |
|
print(f'VALID: {len(valid_lines)}') |
|
print(f'TOTAL: {len(lines)}') |
|
|
|
|
|
|