activations: Tanh batch_size: 4 class_identifier: unified_metric continuous_word_labels: false dropout: 0.15 encoder_learning_rate: 1.0e-05 encoder_model: RoBERTa final_activation: null hidden_sizes: - 384 initalize_pretrained_unified_weights: true input_segments: - edit_id_simplified - edit_id_original keep_embeddings_frozen: true layer: mix layer_norm: true layer_transformation: sparsemax layerwise_decay: 0.95 learning_rate: 3.1e-05 load_pretrained_weights: true loss: mse loss_lambda: 0.9 nr_frozen_epochs: 0.3 optimizer: AdamW pool: avg pretrained_model: roberta-large score_target: lens_score sent_layer: mix span_targets: - edit_id_simplified - edit_id_original span_tokens: - bad warmup_steps: 0 word_layer: 24 word_level_training: true word_weights: - 0.1 - 0.9