|
{ |
|
"dataset_name": "mozilla-foundation/common_voice_13_0", |
|
"model_name_or_path": "facebook/wav2vec2-large-xlsr-53", |
|
"dataset_config_name": "eo", |
|
"output_dir": "./wav2vec2-common_voice_13_0-eo-10", |
|
"train_split_name": "train", |
|
"eval_split_name": "validation", |
|
"eval_metrics": ["cer", "wer"], |
|
"overwrite_output_dir": false, |
|
"preprocessing_num_workers": 1, |
|
"num_train_epochs": 5, |
|
"per_device_train_batch_size": 16, |
|
"gradient_accumulation_steps": 2, |
|
"gradient_checkpointing": true, |
|
"learning_rate": 3e-5, |
|
"warmup_steps": 500, |
|
"evaluation_strategy": "steps", |
|
"text_column_name": "sentence", |
|
"length_column_name": "input_length", |
|
"save_steps": 1000, |
|
"eval_steps": 1000, |
|
"layerdrop": 0.2, |
|
"save_total_limit": 3, |
|
"freeze_feature_encoder": true, |
|
"chars_to_ignore": "-!\"'(),.:;=?_`¨«¸»ʼ‑–—‘’“”„…‹›♫?", |
|
"chars_to_substitute": { |
|
"przy": "pŝe", |
|
"byn": "bin", |
|
"cx": "ĉ", |
|
"sx": "ŝ", |
|
"fi": "fi", |
|
"fl": "fl", |
|
"ǔ": "ŭ", |
|
"ñ": "nj", |
|
"á": "a", |
|
"é": "e", |
|
"ü": "ŭ", |
|
"y": "j", |
|
"qu": "ku" |
|
}, |
|
"fp16": true, |
|
"group_by_length": true, |
|
"push_to_hub": true, |
|
"do_train": true, |
|
"do_eval": true |
|
} |
|
|