|
gpu = '0' |
|
random_seed = 0 |
|
data_type = 'LRS2_CTC2' |
|
video_path = '' |
|
train_list = f'data/{data_type}_train.txt' |
|
val_list = f'data/{data_type}_val.txt' |
|
anno_path = 'GRID_align_txt' |
|
vid_padding = 100 |
|
txt_padding = 200 |
|
batch_size = 32 |
|
base_lr = 8e-5 |
|
num_workers = 8 |
|
max_epoch = 10000 |
|
display = 10 |
|
test_step = 1000 |
|
save_prefix = f'weights/LipNet_{data_type}' |
|
is_optimize = True |
|
|
|
run_name = 'phonemes-lrs2' |
|
lsr2_dir = '/home/milselarch/projects/SUTD/50-035/LRS2' |
|
|
|
pre_gru_repeats = 1 |
|
frame_doubling = False |
|
|
|
video_dir = f'{lsr2_dir}/lrs2_v1/main' |
|
|
|
audio_dir = 'lip/GRID_wavs' |
|
alignments_dir = f'{lsr2_dir}/lrs2_v1/mvlrs_v1/main' |
|
|
|
crop_images_dir = f'{lsr2_dir}/lrs2_v1/mvlrs_v1/main_images' |
|
|
|
images_dir = crop_images_dir |
|
dataset = 'LRS2' |
|
|
|
|
|
phonemes_dir = f'{lsr2_dir}/lrs2_v1/mvlrs_v1/main_phonemes' |
|
cache_videos = False |
|
use_lip_crops = True |
|
|
|
|
|
|
|
text_char_map = 'lsr2_text' |
|
char_map = 'cmu_phonemes' |
|
|
|
|