End of training
d81becf
verified
-
attn_loss_fn=mse, attn_weight=10.0, hidden_weight=10.0, hs_loss_fn=raw_mse, learning_rate=0.0001, warmup_ratio=0.1
Training in progress, step 61875
-
attn_loss_fn=mse, attn_weight=10.0, hidden_weight=10.0, hs_loss_fn=raw_mse, learning_rate=0.0001, warmup_ratio=0
Training in progress, step 61875
-
attn_loss_fn=mse, attn_weight=10.0, hidden_weight=10.0, hs_loss_fn=raw_mse, learning_rate=0.0004, warmup_ratio=0
Training in progress, step 61875
-
attn_loss_fn=mse, attn_weight=10.0, hidden_weight=10.0, hs_loss_fn=raw_mse, learning_rate=0.004, warmup_ratio=0
Training in progress, step 61875
-
attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=cos, hs_weight=10.0, learning_rate=0.001, warmup_ratio=0
Training in progress, step 61875
-
attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=cos, hs_weight=10.0, learning_rate=0.004, warmup_ratio=0.1
Training in progress, step 61875
-
attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=cos, hs_weight=10.0, learning_rate=0.004, warmup_ratio=0
Training in progress, step 61875
-
attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=kl, hs_weight=10.0, learning_rate=0.004, warmup_ratio=0
End of training
-
attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=mse, hs_weight=10.0, learning_rate=0.001, warmup_ratio=0.1
Training in progress, step 61875
-
attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=mse, hs_weight=10.0, learning_rate=0.001, warmup_ratio=0
End of training
-
attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=mse, hs_weight=10.0, learning_rate=0.004, warmup_ratio=0.1
Training in progress, step 61875
-
attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=mse, hs_weight=10.0, learning_rate=0.004, warmup_ratio=0
Training in progress, step 61875
-
attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=raw_mse, hs_weight=10.0, learning_rate=0.0001, warmup_ratio=0
Training in progress, step 61875
-
attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=raw_mse, hs_weight=10.0, learning_rate=0.0004, warmup_ratio=0.1
End of training
-
attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=raw_mse, hs_weight=10.0, learning_rate=0.0004, warmup_ratio=0
Training in progress, step 61875
-
attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=raw_mse, hs_weight=10.0, learning_rate=0.001, warmup_ratio=0.1
Training in progress, step 61875
-
attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=raw_mse, hs_weight=10.0, learning_rate=0.001, warmup_ratio=0
Training in progress, step 61875
-
attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=raw_mse, hs_weight=10.0, learning_rate=0.004, warmup_ratio=0.1
Training in progress, step 61875
-
attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=raw_mse, hs_weight=10.0, learning_rate=0.004, warmup_ratio=0
Training in progress, step 61875
-
attn_loss_fn=raw_mse, attn_weight=10.0, hidden_weight=10.0, hs_loss_fn=mse, learning_rate=0.0004
Training in progress, step 15469
-
attn_loss_fn=raw_mse, attn_weight=10.0, hidden_weight=10.0, hs_loss_fn=mse, learning_rate=4e-05
Training in progress, step 15469
-
attn_loss_fn=raw_mse, attn_weight=10.0, hidden_weight=10.0, hs_loss_fn=mse, learning_rate=4e-06
Training in progress, step 15469
-
attn_loss_fn=raw_mse, attn_weight=10.0, hidden_weight=10.0, hs_loss_fn=raw_mse, learning_rate=0.0004
Training in progress, step 15469
-
attn_loss_fn=raw_mse, attn_weight=10.0, hidden_weight=10.0, hs_loss_fn=raw_mse, learning_rate=4e-05
Training in progress, step 15469
-
attn_loss_fn=raw_mse, attn_weight=10.0, hidden_weight=10.0, hs_loss_fn=raw_mse, learning_rate=4e-06
Training in progress, step 15469
-
attn_loss_fn=raw_mse, attn_weight=10.0, hs_loss_fn=cos, hs_weight=10.0, learning_rate=4e-05
Training in progress, step 61875
-
attn_loss_fn=raw_mse, attn_weight=10.0, hs_loss_fn=mse, hs_weight=10.0, learning_rate=0.0004
Training in progress, step 61875
-
attn_loss_fn=raw_mse, attn_weight=10.0, hs_loss_fn=mse, hs_weight=10.0, learning_rate=4e-05
Training in progress, step 61875
-
attn_loss_fn=raw_mse, attn_weight=10.0, hs_loss_fn=mse, hs_weight=10.0, learning_rate=4e-06
Training in progress, step 61875
-
attn_loss_fn=raw_mse, attn_weight=10.0, hs_loss_fn=raw_mse, hs_weight=10.0, learning_rate=0.0004
Training in progress, step 15469
-
attn_loss_fn=raw_mse, attn_weight=10.0, hs_loss_fn=raw_mse, hs_weight=10.0, learning_rate=4e-05
Training in progress, step 15469
-
attn_loss_fn=raw_mse, attn_weight=10.0, hs_loss_fn=raw_mse, hs_weight=10.0, learning_rate=4e-06
Training in progress, step 15469