abdiharyadi commited on
Commit
159f2b3
·
verified ·
1 Parent(s): be4477e

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8ae5ab01ccbb566cd3dd8ee6dc1252a85b1c7f271331643ea03050b0e8ec9d3
3
  size 1575259780
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a32de7d5357341b365094d4b0bdeccdb3aae0d77fcd2fe97c3e7e0b29c7aab4e
3
  size 1575259780
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acc7dc7d28456c78f8d6080cf32977d4aebc2e514871c0bf819e045e28972103
3
  size 3150397656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b65ad8a4d4408d8ea2d47cfca03bb0202226b3d1773d79cdbcc48402654bb48a
3
  size 3150397656
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e8361756a3864b69a416a28667f28ee22be0ff5d84048a54680e7de8b87f656
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d0982c65fadbf6bf7482db7062c965549f8158635957f4bdc41338317eca289
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dede00403a11b2b14e170429444fee20dcef7db14bc3c7a358467f448966c579
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fe116f764987ad933a40001607fac90b24f963059ef9f0559216685859936f0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.0747,
3
- "best_model_checkpoint": "/kaggle/tmp/amr-tst-indo/AMRBART-id/fine-tune/../outputs/mbart-en-id-smaller-fted/checkpoint-7739",
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 7739,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2349,6 +2349,1180 @@
2349
  "eval_samples_per_second": 0.876,
2350
  "eval_steps_per_second": 0.438,
2351
  "step": 7739
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2352
  }
2353
  ],
2354
  "logging_steps": 20,
@@ -2368,7 +3542,7 @@
2368
  "attributes": {}
2369
  }
2370
  },
2371
- "total_flos": 5.079387107794944e+16,
2372
  "train_batch_size": 2,
2373
  "trial_name": null,
2374
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.0795,
3
+ "best_model_checkpoint": "/kaggle/tmp/amr-tst-indo/AMRBART-id/fine-tune/../outputs/mbart-en-id-smaller-fted/checkpoint-11608",
4
+ "epoch": 2.9998707843390617,
5
  "eval_steps": 500,
6
+ "global_step": 11608,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2349
  "eval_samples_per_second": 0.876,
2350
  "eval_steps_per_second": 0.438,
2351
  "step": 7739
2352
+ },
2353
+ {
2354
+ "epoch": 2.000258431321876,
2355
+ "learning_rate": 1.8778037080254114e-07,
2356
+ "loss": 2.8846,
2357
+ "step": 7740
2358
+ },
2359
+ {
2360
+ "epoch": 2.0054270577594004,
2361
+ "learning_rate": 1.8774795799299882e-07,
2362
+ "loss": 2.954,
2363
+ "step": 7760
2364
+ },
2365
+ {
2366
+ "epoch": 2.010595684196925,
2367
+ "learning_rate": 1.8771554518345649e-07,
2368
+ "loss": 2.9074,
2369
+ "step": 7780
2370
+ },
2371
+ {
2372
+ "epoch": 2.0157643106344487,
2373
+ "learning_rate": 1.8768313237391415e-07,
2374
+ "loss": 2.8973,
2375
+ "step": 7800
2376
+ },
2377
+ {
2378
+ "epoch": 2.020932937071973,
2379
+ "learning_rate": 1.8765071956437184e-07,
2380
+ "loss": 2.9155,
2381
+ "step": 7820
2382
+ },
2383
+ {
2384
+ "epoch": 2.0261015635094974,
2385
+ "learning_rate": 1.876183067548295e-07,
2386
+ "loss": 2.9162,
2387
+ "step": 7840
2388
+ },
2389
+ {
2390
+ "epoch": 2.031270189947022,
2391
+ "learning_rate": 1.8758589394528716e-07,
2392
+ "loss": 2.8391,
2393
+ "step": 7860
2394
+ },
2395
+ {
2396
+ "epoch": 2.0364388163845457,
2397
+ "learning_rate": 1.8755348113574485e-07,
2398
+ "loss": 2.8963,
2399
+ "step": 7880
2400
+ },
2401
+ {
2402
+ "epoch": 2.04160744282207,
2403
+ "learning_rate": 1.8752106832620248e-07,
2404
+ "loss": 2.9148,
2405
+ "step": 7900
2406
+ },
2407
+ {
2408
+ "epoch": 2.0467760692595944,
2409
+ "learning_rate": 1.8748865551666017e-07,
2410
+ "loss": 2.9378,
2411
+ "step": 7920
2412
+ },
2413
+ {
2414
+ "epoch": 2.0519446956971183,
2415
+ "learning_rate": 1.8745624270711783e-07,
2416
+ "loss": 2.8385,
2417
+ "step": 7940
2418
+ },
2419
+ {
2420
+ "epoch": 2.0571133221346427,
2421
+ "learning_rate": 1.874238298975755e-07,
2422
+ "loss": 2.9649,
2423
+ "step": 7960
2424
+ },
2425
+ {
2426
+ "epoch": 2.062281948572167,
2427
+ "learning_rate": 1.8739141708803318e-07,
2428
+ "loss": 2.8578,
2429
+ "step": 7980
2430
+ },
2431
+ {
2432
+ "epoch": 2.067450575009691,
2433
+ "learning_rate": 1.8735900427849085e-07,
2434
+ "loss": 2.8477,
2435
+ "step": 8000
2436
+ },
2437
+ {
2438
+ "epoch": 2.0726192014472153,
2439
+ "learning_rate": 1.873265914689485e-07,
2440
+ "loss": 2.8336,
2441
+ "step": 8020
2442
+ },
2443
+ {
2444
+ "epoch": 2.0777878278847397,
2445
+ "learning_rate": 1.872941786594062e-07,
2446
+ "loss": 2.9005,
2447
+ "step": 8040
2448
+ },
2449
+ {
2450
+ "epoch": 2.082956454322264,
2451
+ "learning_rate": 1.8726176584986386e-07,
2452
+ "loss": 2.9426,
2453
+ "step": 8060
2454
+ },
2455
+ {
2456
+ "epoch": 2.088125080759788,
2457
+ "learning_rate": 1.8722935304032152e-07,
2458
+ "loss": 2.8823,
2459
+ "step": 8080
2460
+ },
2461
+ {
2462
+ "epoch": 2.0932937071973123,
2463
+ "learning_rate": 1.871969402307792e-07,
2464
+ "loss": 2.8766,
2465
+ "step": 8100
2466
+ },
2467
+ {
2468
+ "epoch": 2.0984623336348367,
2469
+ "learning_rate": 1.8716452742123684e-07,
2470
+ "loss": 2.8903,
2471
+ "step": 8120
2472
+ },
2473
+ {
2474
+ "epoch": 2.1036309600723606,
2475
+ "learning_rate": 1.8713211461169453e-07,
2476
+ "loss": 2.9383,
2477
+ "step": 8140
2478
+ },
2479
+ {
2480
+ "epoch": 2.108799586509885,
2481
+ "learning_rate": 1.870997018021522e-07,
2482
+ "loss": 2.8702,
2483
+ "step": 8160
2484
+ },
2485
+ {
2486
+ "epoch": 2.1139682129474093,
2487
+ "learning_rate": 1.8706728899260986e-07,
2488
+ "loss": 2.8984,
2489
+ "step": 8180
2490
+ },
2491
+ {
2492
+ "epoch": 2.1191368393849332,
2493
+ "learning_rate": 1.8703487618306754e-07,
2494
+ "loss": 2.934,
2495
+ "step": 8200
2496
+ },
2497
+ {
2498
+ "epoch": 2.1243054658224576,
2499
+ "learning_rate": 1.870024633735252e-07,
2500
+ "loss": 2.8964,
2501
+ "step": 8220
2502
+ },
2503
+ {
2504
+ "epoch": 2.129474092259982,
2505
+ "learning_rate": 1.8697005056398287e-07,
2506
+ "loss": 2.887,
2507
+ "step": 8240
2508
+ },
2509
+ {
2510
+ "epoch": 2.1346427186975063,
2511
+ "learning_rate": 1.8693763775444056e-07,
2512
+ "loss": 2.8915,
2513
+ "step": 8260
2514
+ },
2515
+ {
2516
+ "epoch": 2.1398113451350302,
2517
+ "learning_rate": 1.8690522494489822e-07,
2518
+ "loss": 2.9201,
2519
+ "step": 8280
2520
+ },
2521
+ {
2522
+ "epoch": 2.1449799715725546,
2523
+ "learning_rate": 1.8687281213535588e-07,
2524
+ "loss": 2.8616,
2525
+ "step": 8300
2526
+ },
2527
+ {
2528
+ "epoch": 2.150148598010079,
2529
+ "learning_rate": 1.8684039932581354e-07,
2530
+ "loss": 2.8828,
2531
+ "step": 8320
2532
+ },
2533
+ {
2534
+ "epoch": 2.155317224447603,
2535
+ "learning_rate": 1.868079865162712e-07,
2536
+ "loss": 2.8614,
2537
+ "step": 8340
2538
+ },
2539
+ {
2540
+ "epoch": 2.1604858508851272,
2541
+ "learning_rate": 1.867755737067289e-07,
2542
+ "loss": 2.8295,
2543
+ "step": 8360
2544
+ },
2545
+ {
2546
+ "epoch": 2.1656544773226516,
2547
+ "learning_rate": 1.8674316089718655e-07,
2548
+ "loss": 2.8375,
2549
+ "step": 8380
2550
+ },
2551
+ {
2552
+ "epoch": 2.170823103760176,
2553
+ "learning_rate": 1.8671074808764422e-07,
2554
+ "loss": 2.844,
2555
+ "step": 8400
2556
+ },
2557
+ {
2558
+ "epoch": 2.1759917301977,
2559
+ "learning_rate": 1.866783352781019e-07,
2560
+ "loss": 2.8151,
2561
+ "step": 8420
2562
+ },
2563
+ {
2564
+ "epoch": 2.1811603566352242,
2565
+ "learning_rate": 1.8664592246855957e-07,
2566
+ "loss": 2.8761,
2567
+ "step": 8440
2568
+ },
2569
+ {
2570
+ "epoch": 2.1863289830727486,
2571
+ "learning_rate": 1.8661350965901723e-07,
2572
+ "loss": 2.9129,
2573
+ "step": 8460
2574
+ },
2575
+ {
2576
+ "epoch": 2.1914976095102725,
2577
+ "learning_rate": 1.8658109684947492e-07,
2578
+ "loss": 2.8907,
2579
+ "step": 8480
2580
+ },
2581
+ {
2582
+ "epoch": 2.196666235947797,
2583
+ "learning_rate": 1.8654868403993255e-07,
2584
+ "loss": 2.9062,
2585
+ "step": 8500
2586
+ },
2587
+ {
2588
+ "epoch": 2.2018348623853212,
2589
+ "learning_rate": 1.8651627123039024e-07,
2590
+ "loss": 2.9185,
2591
+ "step": 8520
2592
+ },
2593
+ {
2594
+ "epoch": 2.207003488822845,
2595
+ "learning_rate": 1.864838584208479e-07,
2596
+ "loss": 2.9334,
2597
+ "step": 8540
2598
+ },
2599
+ {
2600
+ "epoch": 2.2121721152603695,
2601
+ "learning_rate": 1.8645144561130556e-07,
2602
+ "loss": 2.8598,
2603
+ "step": 8560
2604
+ },
2605
+ {
2606
+ "epoch": 2.217340741697894,
2607
+ "learning_rate": 1.8641903280176325e-07,
2608
+ "loss": 2.8341,
2609
+ "step": 8580
2610
+ },
2611
+ {
2612
+ "epoch": 2.222509368135418,
2613
+ "learning_rate": 1.8638661999222091e-07,
2614
+ "loss": 2.9006,
2615
+ "step": 8600
2616
+ },
2617
+ {
2618
+ "epoch": 2.227677994572942,
2619
+ "learning_rate": 1.8635420718267858e-07,
2620
+ "loss": 2.8851,
2621
+ "step": 8620
2622
+ },
2623
+ {
2624
+ "epoch": 2.2328466210104665,
2625
+ "learning_rate": 1.8632179437313626e-07,
2626
+ "loss": 2.8807,
2627
+ "step": 8640
2628
+ },
2629
+ {
2630
+ "epoch": 2.238015247447991,
2631
+ "learning_rate": 1.8628938156359393e-07,
2632
+ "loss": 2.8064,
2633
+ "step": 8660
2634
+ },
2635
+ {
2636
+ "epoch": 2.2431838738855148,
2637
+ "learning_rate": 1.862569687540516e-07,
2638
+ "loss": 2.89,
2639
+ "step": 8680
2640
+ },
2641
+ {
2642
+ "epoch": 2.248352500323039,
2643
+ "learning_rate": 1.8622455594450928e-07,
2644
+ "loss": 2.8585,
2645
+ "step": 8700
2646
+ },
2647
+ {
2648
+ "epoch": 2.2535211267605635,
2649
+ "learning_rate": 1.861921431349669e-07,
2650
+ "loss": 2.8825,
2651
+ "step": 8720
2652
+ },
2653
+ {
2654
+ "epoch": 2.2586897531980874,
2655
+ "learning_rate": 1.861597303254246e-07,
2656
+ "loss": 2.8311,
2657
+ "step": 8740
2658
+ },
2659
+ {
2660
+ "epoch": 2.2638583796356118,
2661
+ "learning_rate": 1.8612731751588226e-07,
2662
+ "loss": 2.8857,
2663
+ "step": 8760
2664
+ },
2665
+ {
2666
+ "epoch": 2.269027006073136,
2667
+ "learning_rate": 1.8609490470633992e-07,
2668
+ "loss": 2.8278,
2669
+ "step": 8780
2670
+ },
2671
+ {
2672
+ "epoch": 2.2741956325106605,
2673
+ "learning_rate": 1.860624918967976e-07,
2674
+ "loss": 2.8549,
2675
+ "step": 8800
2676
+ },
2677
+ {
2678
+ "epoch": 2.2793642589481844,
2679
+ "learning_rate": 1.8603007908725527e-07,
2680
+ "loss": 2.8433,
2681
+ "step": 8820
2682
+ },
2683
+ {
2684
+ "epoch": 2.2845328853857088,
2685
+ "learning_rate": 1.8599766627771294e-07,
2686
+ "loss": 2.8928,
2687
+ "step": 8840
2688
+ },
2689
+ {
2690
+ "epoch": 2.289701511823233,
2691
+ "learning_rate": 1.8596525346817062e-07,
2692
+ "loss": 2.909,
2693
+ "step": 8860
2694
+ },
2695
+ {
2696
+ "epoch": 2.294870138260757,
2697
+ "learning_rate": 1.8593284065862829e-07,
2698
+ "loss": 2.8203,
2699
+ "step": 8880
2700
+ },
2701
+ {
2702
+ "epoch": 2.3000387646982814,
2703
+ "learning_rate": 1.8590042784908595e-07,
2704
+ "loss": 2.8176,
2705
+ "step": 8900
2706
+ },
2707
+ {
2708
+ "epoch": 2.3052073911358058,
2709
+ "learning_rate": 1.858680150395436e-07,
2710
+ "loss": 2.8396,
2711
+ "step": 8920
2712
+ },
2713
+ {
2714
+ "epoch": 2.31037601757333,
2715
+ "learning_rate": 1.8583560223000127e-07,
2716
+ "loss": 2.8307,
2717
+ "step": 8940
2718
+ },
2719
+ {
2720
+ "epoch": 2.315544644010854,
2721
+ "learning_rate": 1.8580318942045896e-07,
2722
+ "loss": 2.895,
2723
+ "step": 8960
2724
+ },
2725
+ {
2726
+ "epoch": 2.3207132704483784,
2727
+ "learning_rate": 1.8577077661091662e-07,
2728
+ "loss": 2.8683,
2729
+ "step": 8980
2730
+ },
2731
+ {
2732
+ "epoch": 2.3258818968859027,
2733
+ "learning_rate": 1.8573836380137428e-07,
2734
+ "loss": 2.8272,
2735
+ "step": 9000
2736
+ },
2737
+ {
2738
+ "epoch": 2.3310505233234267,
2739
+ "learning_rate": 1.8570595099183197e-07,
2740
+ "loss": 2.8701,
2741
+ "step": 9020
2742
+ },
2743
+ {
2744
+ "epoch": 2.336219149760951,
2745
+ "learning_rate": 1.8567353818228963e-07,
2746
+ "loss": 2.9526,
2747
+ "step": 9040
2748
+ },
2749
+ {
2750
+ "epoch": 2.3413877761984754,
2751
+ "learning_rate": 1.856411253727473e-07,
2752
+ "loss": 2.8748,
2753
+ "step": 9060
2754
+ },
2755
+ {
2756
+ "epoch": 2.3465564026359993,
2757
+ "learning_rate": 1.8560871256320498e-07,
2758
+ "loss": 2.8858,
2759
+ "step": 9080
2760
+ },
2761
+ {
2762
+ "epoch": 2.3517250290735237,
2763
+ "learning_rate": 1.8557629975366262e-07,
2764
+ "loss": 2.9003,
2765
+ "step": 9100
2766
+ },
2767
+ {
2768
+ "epoch": 2.356893655511048,
2769
+ "learning_rate": 1.855438869441203e-07,
2770
+ "loss": 2.8281,
2771
+ "step": 9120
2772
+ },
2773
+ {
2774
+ "epoch": 2.362062281948572,
2775
+ "learning_rate": 1.8551147413457797e-07,
2776
+ "loss": 2.81,
2777
+ "step": 9140
2778
+ },
2779
+ {
2780
+ "epoch": 2.3672309083860963,
2781
+ "learning_rate": 1.8547906132503563e-07,
2782
+ "loss": 2.8814,
2783
+ "step": 9160
2784
+ },
2785
+ {
2786
+ "epoch": 2.3723995348236206,
2787
+ "learning_rate": 1.8544664851549332e-07,
2788
+ "loss": 2.9104,
2789
+ "step": 9180
2790
+ },
2791
+ {
2792
+ "epoch": 2.377568161261145,
2793
+ "learning_rate": 1.8541423570595098e-07,
2794
+ "loss": 2.8546,
2795
+ "step": 9200
2796
+ },
2797
+ {
2798
+ "epoch": 2.382736787698669,
2799
+ "learning_rate": 1.8538182289640864e-07,
2800
+ "loss": 2.8443,
2801
+ "step": 9220
2802
+ },
2803
+ {
2804
+ "epoch": 2.3879054141361933,
2805
+ "learning_rate": 1.8534941008686633e-07,
2806
+ "loss": 2.8323,
2807
+ "step": 9240
2808
+ },
2809
+ {
2810
+ "epoch": 2.3930740405737176,
2811
+ "learning_rate": 1.85316997277324e-07,
2812
+ "loss": 2.8195,
2813
+ "step": 9260
2814
+ },
2815
+ {
2816
+ "epoch": 2.3982426670112416,
2817
+ "learning_rate": 1.8528458446778166e-07,
2818
+ "loss": 2.7998,
2819
+ "step": 9280
2820
+ },
2821
+ {
2822
+ "epoch": 2.403411293448766,
2823
+ "learning_rate": 1.8525217165823934e-07,
2824
+ "loss": 2.85,
2825
+ "step": 9300
2826
+ },
2827
+ {
2828
+ "epoch": 2.4085799198862903,
2829
+ "learning_rate": 1.8521975884869698e-07,
2830
+ "loss": 2.8015,
2831
+ "step": 9320
2832
+ },
2833
+ {
2834
+ "epoch": 2.4137485463238146,
2835
+ "learning_rate": 1.8518734603915467e-07,
2836
+ "loss": 2.8752,
2837
+ "step": 9340
2838
+ },
2839
+ {
2840
+ "epoch": 2.4189171727613386,
2841
+ "learning_rate": 1.8515493322961233e-07,
2842
+ "loss": 2.7702,
2843
+ "step": 9360
2844
+ },
2845
+ {
2846
+ "epoch": 2.424085799198863,
2847
+ "learning_rate": 1.8512252042007e-07,
2848
+ "loss": 2.9099,
2849
+ "step": 9380
2850
+ },
2851
+ {
2852
+ "epoch": 2.4292544256363873,
2853
+ "learning_rate": 1.8509010761052768e-07,
2854
+ "loss": 2.8563,
2855
+ "step": 9400
2856
+ },
2857
+ {
2858
+ "epoch": 2.434423052073911,
2859
+ "learning_rate": 1.8505769480098534e-07,
2860
+ "loss": 2.7581,
2861
+ "step": 9420
2862
+ },
2863
+ {
2864
+ "epoch": 2.4395916785114355,
2865
+ "learning_rate": 1.85025281991443e-07,
2866
+ "loss": 2.9199,
2867
+ "step": 9440
2868
+ },
2869
+ {
2870
+ "epoch": 2.44476030494896,
2871
+ "learning_rate": 1.849928691819007e-07,
2872
+ "loss": 2.8404,
2873
+ "step": 9460
2874
+ },
2875
+ {
2876
+ "epoch": 2.4499289313864843,
2877
+ "learning_rate": 1.8496045637235835e-07,
2878
+ "loss": 2.8321,
2879
+ "step": 9480
2880
+ },
2881
+ {
2882
+ "epoch": 2.455097557824008,
2883
+ "learning_rate": 1.8492804356281602e-07,
2884
+ "loss": 2.781,
2885
+ "step": 9500
2886
+ },
2887
+ {
2888
+ "epoch": 2.4602661842615325,
2889
+ "learning_rate": 1.8489563075327368e-07,
2890
+ "loss": 2.9004,
2891
+ "step": 9520
2892
+ },
2893
+ {
2894
+ "epoch": 2.465434810699057,
2895
+ "learning_rate": 1.8486321794373134e-07,
2896
+ "loss": 2.8439,
2897
+ "step": 9540
2898
+ },
2899
+ {
2900
+ "epoch": 2.470603437136581,
2901
+ "learning_rate": 1.8483080513418903e-07,
2902
+ "loss": 2.9077,
2903
+ "step": 9560
2904
+ },
2905
+ {
2906
+ "epoch": 2.475772063574105,
2907
+ "learning_rate": 1.847983923246467e-07,
2908
+ "loss": 2.8844,
2909
+ "step": 9580
2910
+ },
2911
+ {
2912
+ "epoch": 2.4809406900116295,
2913
+ "learning_rate": 1.8476597951510435e-07,
2914
+ "loss": 2.8135,
2915
+ "step": 9600
2916
+ },
2917
+ {
2918
+ "epoch": 2.4861093164491535,
2919
+ "learning_rate": 1.8473356670556204e-07,
2920
+ "loss": 2.8818,
2921
+ "step": 9620
2922
+ },
2923
+ {
2924
+ "epoch": 2.491277942886678,
2925
+ "learning_rate": 1.847011538960197e-07,
2926
+ "loss": 2.8716,
2927
+ "step": 9640
2928
+ },
2929
+ {
2930
+ "epoch": 2.496446569324202,
2931
+ "learning_rate": 1.8466874108647736e-07,
2932
+ "loss": 2.8514,
2933
+ "step": 9660
2934
+ },
2935
+ {
2936
+ "epoch": 2.501615195761726,
2937
+ "learning_rate": 1.8463632827693505e-07,
2938
+ "loss": 2.8514,
2939
+ "step": 9680
2940
+ },
2941
+ {
2942
+ "epoch": 2.5067838221992504,
2943
+ "learning_rate": 1.846039154673927e-07,
2944
+ "loss": 2.8991,
2945
+ "step": 9700
2946
+ },
2947
+ {
2948
+ "epoch": 2.511952448636775,
2949
+ "learning_rate": 1.8457150265785038e-07,
2950
+ "loss": 2.8639,
2951
+ "step": 9720
2952
+ },
2953
+ {
2954
+ "epoch": 2.517121075074299,
2955
+ "learning_rate": 1.8453908984830804e-07,
2956
+ "loss": 2.839,
2957
+ "step": 9740
2958
+ },
2959
+ {
2960
+ "epoch": 2.522289701511823,
2961
+ "learning_rate": 1.845066770387657e-07,
2962
+ "loss": 2.8364,
2963
+ "step": 9760
2964
+ },
2965
+ {
2966
+ "epoch": 2.5274583279493474,
2967
+ "learning_rate": 1.844742642292234e-07,
2968
+ "loss": 2.8632,
2969
+ "step": 9780
2970
+ },
2971
+ {
2972
+ "epoch": 2.532626954386872,
2973
+ "learning_rate": 1.8444185141968105e-07,
2974
+ "loss": 2.8573,
2975
+ "step": 9800
2976
+ },
2977
+ {
2978
+ "epoch": 2.5377955808243957,
2979
+ "learning_rate": 1.844094386101387e-07,
2980
+ "loss": 2.869,
2981
+ "step": 9820
2982
+ },
2983
+ {
2984
+ "epoch": 2.54296420726192,
2985
+ "learning_rate": 1.843770258005964e-07,
2986
+ "loss": 2.8879,
2987
+ "step": 9840
2988
+ },
2989
+ {
2990
+ "epoch": 2.5481328336994444,
2991
+ "learning_rate": 1.8434461299105406e-07,
2992
+ "loss": 2.8887,
2993
+ "step": 9860
2994
+ },
2995
+ {
2996
+ "epoch": 2.553301460136969,
2997
+ "learning_rate": 1.8431220018151172e-07,
2998
+ "loss": 2.817,
2999
+ "step": 9880
3000
+ },
3001
+ {
3002
+ "epoch": 2.5584700865744927,
3003
+ "learning_rate": 1.842797873719694e-07,
3004
+ "loss": 2.8758,
3005
+ "step": 9900
3006
+ },
3007
+ {
3008
+ "epoch": 2.563638713012017,
3009
+ "learning_rate": 1.8424737456242705e-07,
3010
+ "loss": 2.8466,
3011
+ "step": 9920
3012
+ },
3013
+ {
3014
+ "epoch": 2.5688073394495414,
3015
+ "learning_rate": 1.8421496175288474e-07,
3016
+ "loss": 2.8203,
3017
+ "step": 9940
3018
+ },
3019
+ {
3020
+ "epoch": 2.5739759658870653,
3021
+ "learning_rate": 1.841825489433424e-07,
3022
+ "loss": 2.8138,
3023
+ "step": 9960
3024
+ },
3025
+ {
3026
+ "epoch": 2.5791445923245897,
3027
+ "learning_rate": 1.8415013613380006e-07,
3028
+ "loss": 2.8538,
3029
+ "step": 9980
3030
+ },
3031
+ {
3032
+ "epoch": 2.584313218762114,
3033
+ "learning_rate": 1.8411772332425775e-07,
3034
+ "loss": 2.779,
3035
+ "step": 10000
3036
+ },
3037
+ {
3038
+ "epoch": 2.5894818451996384,
3039
+ "learning_rate": 1.840853105147154e-07,
3040
+ "loss": 2.8533,
3041
+ "step": 10020
3042
+ },
3043
+ {
3044
+ "epoch": 2.5946504716371623,
3045
+ "learning_rate": 1.8405289770517307e-07,
3046
+ "loss": 2.8389,
3047
+ "step": 10040
3048
+ },
3049
+ {
3050
+ "epoch": 2.5998190980746867,
3051
+ "learning_rate": 1.8402048489563076e-07,
3052
+ "loss": 2.9132,
3053
+ "step": 10060
3054
+ },
3055
+ {
3056
+ "epoch": 2.6049877245122106,
3057
+ "learning_rate": 1.8398807208608842e-07,
3058
+ "loss": 2.8213,
3059
+ "step": 10080
3060
+ },
3061
+ {
3062
+ "epoch": 2.610156350949735,
3063
+ "learning_rate": 1.8395565927654608e-07,
3064
+ "loss": 2.7963,
3065
+ "step": 10100
3066
+ },
3067
+ {
3068
+ "epoch": 2.6153249773872593,
3069
+ "learning_rate": 1.8392324646700374e-07,
3070
+ "loss": 2.7789,
3071
+ "step": 10120
3072
+ },
3073
+ {
3074
+ "epoch": 2.6204936038247837,
3075
+ "learning_rate": 1.838908336574614e-07,
3076
+ "loss": 2.8891,
3077
+ "step": 10140
3078
+ },
3079
+ {
3080
+ "epoch": 2.625662230262308,
3081
+ "learning_rate": 1.838584208479191e-07,
3082
+ "loss": 2.8305,
3083
+ "step": 10160
3084
+ },
3085
+ {
3086
+ "epoch": 2.630830856699832,
3087
+ "learning_rate": 1.8382600803837676e-07,
3088
+ "loss": 2.8333,
3089
+ "step": 10180
3090
+ },
3091
+ {
3092
+ "epoch": 2.6359994831373563,
3093
+ "learning_rate": 1.8379359522883442e-07,
3094
+ "loss": 2.8771,
3095
+ "step": 10200
3096
+ },
3097
+ {
3098
+ "epoch": 2.6411681095748802,
3099
+ "learning_rate": 1.837611824192921e-07,
3100
+ "loss": 2.8126,
3101
+ "step": 10220
3102
+ },
3103
+ {
3104
+ "epoch": 2.6463367360124046,
3105
+ "learning_rate": 1.8372876960974977e-07,
3106
+ "loss": 2.8727,
3107
+ "step": 10240
3108
+ },
3109
+ {
3110
+ "epoch": 2.651505362449929,
3111
+ "learning_rate": 1.8369635680020743e-07,
3112
+ "loss": 2.8323,
3113
+ "step": 10260
3114
+ },
3115
+ {
3116
+ "epoch": 2.6566739888874533,
3117
+ "learning_rate": 1.8366394399066512e-07,
3118
+ "loss": 2.7787,
3119
+ "step": 10280
3120
+ },
3121
+ {
3122
+ "epoch": 2.6618426153249772,
3123
+ "learning_rate": 1.8363153118112275e-07,
3124
+ "loss": 2.8147,
3125
+ "step": 10300
3126
+ },
3127
+ {
3128
+ "epoch": 2.6670112417625016,
3129
+ "learning_rate": 1.8359911837158044e-07,
3130
+ "loss": 2.7373,
3131
+ "step": 10320
3132
+ },
3133
+ {
3134
+ "epoch": 2.672179868200026,
3135
+ "learning_rate": 1.835667055620381e-07,
3136
+ "loss": 2.8365,
3137
+ "step": 10340
3138
+ },
3139
+ {
3140
+ "epoch": 2.67734849463755,
3141
+ "learning_rate": 1.8353429275249577e-07,
3142
+ "loss": 2.7914,
3143
+ "step": 10360
3144
+ },
3145
+ {
3146
+ "epoch": 2.6825171210750742,
3147
+ "learning_rate": 1.8350187994295346e-07,
3148
+ "loss": 2.8569,
3149
+ "step": 10380
3150
+ },
3151
+ {
3152
+ "epoch": 2.6876857475125986,
3153
+ "learning_rate": 1.8346946713341112e-07,
3154
+ "loss": 2.8851,
3155
+ "step": 10400
3156
+ },
3157
+ {
3158
+ "epoch": 2.692854373950123,
3159
+ "learning_rate": 1.8343705432386878e-07,
3160
+ "loss": 2.8061,
3161
+ "step": 10420
3162
+ },
3163
+ {
3164
+ "epoch": 2.698023000387647,
3165
+ "learning_rate": 1.8340464151432647e-07,
3166
+ "loss": 2.8493,
3167
+ "step": 10440
3168
+ },
3169
+ {
3170
+ "epoch": 2.7031916268251712,
3171
+ "learning_rate": 1.8337222870478413e-07,
3172
+ "loss": 2.7935,
3173
+ "step": 10460
3174
+ },
3175
+ {
3176
+ "epoch": 2.7083602532626956,
3177
+ "learning_rate": 1.833398158952418e-07,
3178
+ "loss": 2.8418,
3179
+ "step": 10480
3180
+ },
3181
+ {
3182
+ "epoch": 2.7135288797002195,
3183
+ "learning_rate": 1.8330740308569948e-07,
3184
+ "loss": 2.7704,
3185
+ "step": 10500
3186
+ },
3187
+ {
3188
+ "epoch": 2.718697506137744,
3189
+ "learning_rate": 1.8327499027615711e-07,
3190
+ "loss": 2.87,
3191
+ "step": 10520
3192
+ },
3193
+ {
3194
+ "epoch": 2.723866132575268,
3195
+ "learning_rate": 1.832425774666148e-07,
3196
+ "loss": 2.8444,
3197
+ "step": 10540
3198
+ },
3199
+ {
3200
+ "epoch": 2.7290347590127926,
3201
+ "learning_rate": 1.8321016465707246e-07,
3202
+ "loss": 2.7719,
3203
+ "step": 10560
3204
+ },
3205
+ {
3206
+ "epoch": 2.7342033854503165,
3207
+ "learning_rate": 1.8317775184753013e-07,
3208
+ "loss": 2.823,
3209
+ "step": 10580
3210
+ },
3211
+ {
3212
+ "epoch": 2.739372011887841,
3213
+ "learning_rate": 1.8314533903798782e-07,
3214
+ "loss": 2.8562,
3215
+ "step": 10600
3216
+ },
3217
+ {
3218
+ "epoch": 2.7445406383253648,
3219
+ "learning_rate": 1.8311292622844548e-07,
3220
+ "loss": 2.8903,
3221
+ "step": 10620
3222
+ },
3223
+ {
3224
+ "epoch": 2.749709264762889,
3225
+ "learning_rate": 1.8308051341890314e-07,
3226
+ "loss": 2.8069,
3227
+ "step": 10640
3228
+ },
3229
+ {
3230
+ "epoch": 2.7548778912004135,
3231
+ "learning_rate": 1.8304810060936083e-07,
3232
+ "loss": 2.7696,
3233
+ "step": 10660
3234
+ },
3235
+ {
3236
+ "epoch": 2.760046517637938,
3237
+ "learning_rate": 1.830156877998185e-07,
3238
+ "loss": 2.7616,
3239
+ "step": 10680
3240
+ },
3241
+ {
3242
+ "epoch": 2.765215144075462,
3243
+ "learning_rate": 1.8298327499027615e-07,
3244
+ "loss": 2.7976,
3245
+ "step": 10700
3246
+ },
3247
+ {
3248
+ "epoch": 2.770383770512986,
3249
+ "learning_rate": 1.829508621807338e-07,
3250
+ "loss": 2.8024,
3251
+ "step": 10720
3252
+ },
3253
+ {
3254
+ "epoch": 2.7755523969505105,
3255
+ "learning_rate": 1.8291844937119147e-07,
3256
+ "loss": 2.8195,
3257
+ "step": 10740
3258
+ },
3259
+ {
3260
+ "epoch": 2.7807210233880344,
3261
+ "learning_rate": 1.8288603656164916e-07,
3262
+ "loss": 2.8792,
3263
+ "step": 10760
3264
+ },
3265
+ {
3266
+ "epoch": 2.7858896498255588,
3267
+ "learning_rate": 1.8285362375210682e-07,
3268
+ "loss": 2.8563,
3269
+ "step": 10780
3270
+ },
3271
+ {
3272
+ "epoch": 2.791058276263083,
3273
+ "learning_rate": 1.8282121094256449e-07,
3274
+ "loss": 2.7792,
3275
+ "step": 10800
3276
+ },
3277
+ {
3278
+ "epoch": 2.7962269027006075,
3279
+ "learning_rate": 1.8278879813302218e-07,
3280
+ "loss": 2.8382,
3281
+ "step": 10820
3282
+ },
3283
+ {
3284
+ "epoch": 2.8013955291381314,
3285
+ "learning_rate": 1.8275638532347984e-07,
3286
+ "loss": 2.7697,
3287
+ "step": 10840
3288
+ },
3289
+ {
3290
+ "epoch": 2.8065641555756558,
3291
+ "learning_rate": 1.827239725139375e-07,
3292
+ "loss": 2.8915,
3293
+ "step": 10860
3294
+ },
3295
+ {
3296
+ "epoch": 2.81173278201318,
3297
+ "learning_rate": 1.826915597043952e-07,
3298
+ "loss": 2.8202,
3299
+ "step": 10880
3300
+ },
3301
+ {
3302
+ "epoch": 2.816901408450704,
3303
+ "learning_rate": 1.8265914689485282e-07,
3304
+ "loss": 2.7699,
3305
+ "step": 10900
3306
+ },
3307
+ {
3308
+ "epoch": 2.8220700348882284,
3309
+ "learning_rate": 1.826267340853105e-07,
3310
+ "loss": 2.8795,
3311
+ "step": 10920
3312
+ },
3313
+ {
3314
+ "epoch": 2.8272386613257527,
3315
+ "learning_rate": 1.8259432127576817e-07,
3316
+ "loss": 2.7777,
3317
+ "step": 10940
3318
+ },
3319
+ {
3320
+ "epoch": 2.832407287763277,
3321
+ "learning_rate": 1.8256190846622583e-07,
3322
+ "loss": 2.8015,
3323
+ "step": 10960
3324
+ },
3325
+ {
3326
+ "epoch": 2.837575914200801,
3327
+ "learning_rate": 1.8252949565668352e-07,
3328
+ "loss": 2.8306,
3329
+ "step": 10980
3330
+ },
3331
+ {
3332
+ "epoch": 2.8427445406383254,
3333
+ "learning_rate": 1.8249708284714118e-07,
3334
+ "loss": 2.986,
3335
+ "step": 11000
3336
+ },
3337
+ {
3338
+ "epoch": 2.8479131670758497,
3339
+ "learning_rate": 1.8246467003759885e-07,
3340
+ "loss": 2.785,
3341
+ "step": 11020
3342
+ },
3343
+ {
3344
+ "epoch": 2.8530817935133737,
3345
+ "learning_rate": 1.8243225722805653e-07,
3346
+ "loss": 2.7796,
3347
+ "step": 11040
3348
+ },
3349
+ {
3350
+ "epoch": 2.858250419950898,
3351
+ "learning_rate": 1.823998444185142e-07,
3352
+ "loss": 2.7851,
3353
+ "step": 11060
3354
+ },
3355
+ {
3356
+ "epoch": 2.8634190463884224,
3357
+ "learning_rate": 1.8236743160897186e-07,
3358
+ "loss": 2.8133,
3359
+ "step": 11080
3360
+ },
3361
+ {
3362
+ "epoch": 2.8685876728259467,
3363
+ "learning_rate": 1.8233501879942955e-07,
3364
+ "loss": 2.7767,
3365
+ "step": 11100
3366
+ },
3367
+ {
3368
+ "epoch": 2.8737562992634706,
3369
+ "learning_rate": 1.8230260598988718e-07,
3370
+ "loss": 2.8456,
3371
+ "step": 11120
3372
+ },
3373
+ {
3374
+ "epoch": 2.878924925700995,
3375
+ "learning_rate": 1.8227019318034487e-07,
3376
+ "loss": 2.7697,
3377
+ "step": 11140
3378
+ },
3379
+ {
3380
+ "epoch": 2.884093552138519,
3381
+ "learning_rate": 1.8223778037080253e-07,
3382
+ "loss": 2.7501,
3383
+ "step": 11160
3384
+ },
3385
+ {
3386
+ "epoch": 2.8892621785760433,
3387
+ "learning_rate": 1.822053675612602e-07,
3388
+ "loss": 2.8102,
3389
+ "step": 11180
3390
+ },
3391
+ {
3392
+ "epoch": 2.8944308050135676,
3393
+ "learning_rate": 1.8217295475171788e-07,
3394
+ "loss": 2.8303,
3395
+ "step": 11200
3396
+ },
3397
+ {
3398
+ "epoch": 2.899599431451092,
3399
+ "learning_rate": 1.8214054194217554e-07,
3400
+ "loss": 2.8061,
3401
+ "step": 11220
3402
+ },
3403
+ {
3404
+ "epoch": 2.9047680578886164,
3405
+ "learning_rate": 1.821081291326332e-07,
3406
+ "loss": 2.853,
3407
+ "step": 11240
3408
+ },
3409
+ {
3410
+ "epoch": 2.9099366843261403,
3411
+ "learning_rate": 1.820757163230909e-07,
3412
+ "loss": 2.8419,
3413
+ "step": 11260
3414
+ },
3415
+ {
3416
+ "epoch": 2.9151053107636646,
3417
+ "learning_rate": 1.8204330351354856e-07,
3418
+ "loss": 2.8818,
3419
+ "step": 11280
3420
+ },
3421
+ {
3422
+ "epoch": 2.9202739372011886,
3423
+ "learning_rate": 1.8201089070400622e-07,
3424
+ "loss": 2.7992,
3425
+ "step": 11300
3426
+ },
3427
+ {
3428
+ "epoch": 2.925442563638713,
3429
+ "learning_rate": 1.8197847789446388e-07,
3430
+ "loss": 2.8553,
3431
+ "step": 11320
3432
+ },
3433
+ {
3434
+ "epoch": 2.9306111900762373,
3435
+ "learning_rate": 1.8194606508492154e-07,
3436
+ "loss": 2.7901,
3437
+ "step": 11340
3438
+ },
3439
+ {
3440
+ "epoch": 2.9357798165137616,
3441
+ "learning_rate": 1.8191365227537923e-07,
3442
+ "loss": 2.7795,
3443
+ "step": 11360
3444
+ },
3445
+ {
3446
+ "epoch": 2.9409484429512855,
3447
+ "learning_rate": 1.818812394658369e-07,
3448
+ "loss": 2.7977,
3449
+ "step": 11380
3450
+ },
3451
+ {
3452
+ "epoch": 2.94611706938881,
3453
+ "learning_rate": 1.8184882665629455e-07,
3454
+ "loss": 2.7824,
3455
+ "step": 11400
3456
+ },
3457
+ {
3458
+ "epoch": 2.9512856958263343,
3459
+ "learning_rate": 1.8181641384675224e-07,
3460
+ "loss": 2.7671,
3461
+ "step": 11420
3462
+ },
3463
+ {
3464
+ "epoch": 2.956454322263858,
3465
+ "learning_rate": 1.817840010372099e-07,
3466
+ "loss": 2.8553,
3467
+ "step": 11440
3468
+ },
3469
+ {
3470
+ "epoch": 2.9616229487013825,
3471
+ "learning_rate": 1.8175158822766757e-07,
3472
+ "loss": 2.7253,
3473
+ "step": 11460
3474
+ },
3475
+ {
3476
+ "epoch": 2.966791575138907,
3477
+ "learning_rate": 1.8171917541812525e-07,
3478
+ "loss": 2.8338,
3479
+ "step": 11480
3480
+ },
3481
+ {
3482
+ "epoch": 2.9719602015764313,
3483
+ "learning_rate": 1.816867626085829e-07,
3484
+ "loss": 2.7843,
3485
+ "step": 11500
3486
+ },
3487
+ {
3488
+ "epoch": 2.977128828013955,
3489
+ "learning_rate": 1.8165434979904058e-07,
3490
+ "loss": 2.8181,
3491
+ "step": 11520
3492
+ },
3493
+ {
3494
+ "epoch": 2.9822974544514795,
3495
+ "learning_rate": 1.8162193698949824e-07,
3496
+ "loss": 2.7918,
3497
+ "step": 11540
3498
+ },
3499
+ {
3500
+ "epoch": 2.987466080889004,
3501
+ "learning_rate": 1.815895241799559e-07,
3502
+ "loss": 2.8051,
3503
+ "step": 11560
3504
+ },
3505
+ {
3506
+ "epoch": 2.992634707326528,
3507
+ "learning_rate": 1.815571113704136e-07,
3508
+ "loss": 2.8276,
3509
+ "step": 11580
3510
+ },
3511
+ {
3512
+ "epoch": 2.997803333764052,
3513
+ "learning_rate": 1.8152469856087125e-07,
3514
+ "loss": 2.8109,
3515
+ "step": 11600
3516
+ },
3517
+ {
3518
+ "epoch": 2.9998707843390617,
3519
+ "eval_bleu": 0.0795,
3520
+ "eval_gen_len": 111.475,
3521
+ "eval_loss": 2.76475191116333,
3522
+ "eval_runtime": 1941.3663,
3523
+ "eval_samples_per_second": 0.887,
3524
+ "eval_steps_per_second": 0.444,
3525
+ "step": 11608
3526
  }
3527
  ],
3528
  "logging_steps": 20,
 
3542
  "attributes": {}
3543
  }
3544
  },
3545
+ "total_flos": 7.619114628911923e+16,
3546
  "train_batch_size": 2,
3547
  "trial_name": null,
3548
  "trial_params": null