abdiharyadi commited on
Commit
b0981b7
·
verified ·
1 Parent(s): 20ec1df

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57b71df27bbff184f858f60fa4c02ae0d6834cea36b5bbaef3fc795b3fb84c00
3
  size 1575259780
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8ae5ab01ccbb566cd3dd8ee6dc1252a85b1c7f271331643ea03050b0e8ec9d3
3
  size 1575259780
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca53e5c4b589bb05918e6d459fce2f8dd863e3cb2c154c610132eb3575de22aa
3
  size 3150397656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acc7dc7d28456c78f8d6080cf32977d4aebc2e514871c0bf819e045e28972103
3
  size 3150397656
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48f3f58ed4eabd409cc401f71f645e608c74737dcced18ce35f8fb5e96e138ce
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e8361756a3864b69a416a28667f28ee22be0ff5d84048a54680e7de8b87f656
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edd8e642695cbd529619d314964203d96be720c003fedc73e966fd2d06b703c4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dede00403a11b2b14e170429444fee20dcef7db14bc3c7a358467f448966c579
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.0741,
3
- "best_model_checkpoint": "/kaggle/tmp/amr-tst-indo/AMRBART-id/fine-tune/../outputs/mbart-en-id-smaller-fted/checkpoint-3869",
4
- "epoch": 0.9998707843390618,
5
  "eval_steps": 500,
6
- "global_step": 3869,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1181,6 +1181,1174 @@
1181
  "eval_samples_per_second": 0.864,
1182
  "eval_steps_per_second": 0.432,
1183
  "step": 3869
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1184
  }
1185
  ],
1186
  "logging_steps": 20,
@@ -1200,7 +2368,7 @@
1200
  "attributes": {}
1201
  }
1202
  },
1203
- "total_flos": 2.541046422749184e+16,
1204
  "train_batch_size": 2,
1205
  "trial_name": null,
1206
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.0747,
3
+ "best_model_checkpoint": "/kaggle/tmp/amr-tst-indo/AMRBART-id/fine-tune/../outputs/mbart-en-id-smaller-fted/checkpoint-7739",
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
+ "global_step": 7739,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1181
  "eval_samples_per_second": 0.864,
1182
  "eval_steps_per_second": 0.432,
1183
  "step": 3869
1184
+ },
1185
+ {
1186
+ "epoch": 1.0027135288797002,
1187
+ "learning_rate": 1.9403604304421105e-07,
1188
+ "loss": 3.0219,
1189
+ "step": 3880
1190
+ },
1191
+ {
1192
+ "epoch": 1.0078821553172244,
1193
+ "learning_rate": 1.9400363023466873e-07,
1194
+ "loss": 3.0378,
1195
+ "step": 3900
1196
+ },
1197
+ {
1198
+ "epoch": 1.0130507817547487,
1199
+ "learning_rate": 1.939712174251264e-07,
1200
+ "loss": 3.058,
1201
+ "step": 3920
1202
+ },
1203
+ {
1204
+ "epoch": 1.0182194081922729,
1205
+ "learning_rate": 1.9393880461558406e-07,
1206
+ "loss": 3.0479,
1207
+ "step": 3940
1208
+ },
1209
+ {
1210
+ "epoch": 1.0233880346297972,
1211
+ "learning_rate": 1.9390639180604175e-07,
1212
+ "loss": 3.0733,
1213
+ "step": 3960
1214
+ },
1215
+ {
1216
+ "epoch": 1.0285566610673214,
1217
+ "learning_rate": 1.938739789964994e-07,
1218
+ "loss": 3.0543,
1219
+ "step": 3980
1220
+ },
1221
+ {
1222
+ "epoch": 1.0337252875048455,
1223
+ "learning_rate": 1.9384156618695707e-07,
1224
+ "loss": 2.9607,
1225
+ "step": 4000
1226
+ },
1227
+ {
1228
+ "epoch": 1.0388939139423699,
1229
+ "learning_rate": 1.9380915337741476e-07,
1230
+ "loss": 2.9848,
1231
+ "step": 4020
1232
+ },
1233
+ {
1234
+ "epoch": 1.044062540379894,
1235
+ "learning_rate": 1.937767405678724e-07,
1236
+ "loss": 3.0765,
1237
+ "step": 4040
1238
+ },
1239
+ {
1240
+ "epoch": 1.0492311668174183,
1241
+ "learning_rate": 1.9374432775833008e-07,
1242
+ "loss": 2.9827,
1243
+ "step": 4060
1244
+ },
1245
+ {
1246
+ "epoch": 1.0543997932549425,
1247
+ "learning_rate": 1.9371191494878777e-07,
1248
+ "loss": 3.0336,
1249
+ "step": 4080
1250
+ },
1251
+ {
1252
+ "epoch": 1.0595684196924666,
1253
+ "learning_rate": 1.936795021392454e-07,
1254
+ "loss": 3.0382,
1255
+ "step": 4100
1256
+ },
1257
+ {
1258
+ "epoch": 1.064737046129991,
1259
+ "learning_rate": 1.936470893297031e-07,
1260
+ "loss": 2.9706,
1261
+ "step": 4120
1262
+ },
1263
+ {
1264
+ "epoch": 1.0699056725675151,
1265
+ "learning_rate": 1.9361467652016076e-07,
1266
+ "loss": 2.9261,
1267
+ "step": 4140
1268
+ },
1269
+ {
1270
+ "epoch": 1.0750742990050395,
1271
+ "learning_rate": 1.9358226371061842e-07,
1272
+ "loss": 3.0358,
1273
+ "step": 4160
1274
+ },
1275
+ {
1276
+ "epoch": 1.0802429254425636,
1277
+ "learning_rate": 1.935498509010761e-07,
1278
+ "loss": 3.0442,
1279
+ "step": 4180
1280
+ },
1281
+ {
1282
+ "epoch": 1.085411551880088,
1283
+ "learning_rate": 1.9351743809153377e-07,
1284
+ "loss": 2.9493,
1285
+ "step": 4200
1286
+ },
1287
+ {
1288
+ "epoch": 1.0905801783176121,
1289
+ "learning_rate": 1.9348502528199143e-07,
1290
+ "loss": 2.9963,
1291
+ "step": 4220
1292
+ },
1293
+ {
1294
+ "epoch": 1.0957488047551363,
1295
+ "learning_rate": 1.9345261247244912e-07,
1296
+ "loss": 3.0102,
1297
+ "step": 4240
1298
+ },
1299
+ {
1300
+ "epoch": 1.1009174311926606,
1301
+ "learning_rate": 1.9342019966290675e-07,
1302
+ "loss": 2.9861,
1303
+ "step": 4260
1304
+ },
1305
+ {
1306
+ "epoch": 1.1060860576301847,
1307
+ "learning_rate": 1.9338778685336444e-07,
1308
+ "loss": 3.0209,
1309
+ "step": 4280
1310
+ },
1311
+ {
1312
+ "epoch": 1.111254684067709,
1313
+ "learning_rate": 1.933553740438221e-07,
1314
+ "loss": 2.9864,
1315
+ "step": 4300
1316
+ },
1317
+ {
1318
+ "epoch": 1.1164233105052332,
1319
+ "learning_rate": 1.9332296123427977e-07,
1320
+ "loss": 2.9766,
1321
+ "step": 4320
1322
+ },
1323
+ {
1324
+ "epoch": 1.1215919369427574,
1325
+ "learning_rate": 1.9329054842473745e-07,
1326
+ "loss": 3.0403,
1327
+ "step": 4340
1328
+ },
1329
+ {
1330
+ "epoch": 1.1267605633802817,
1331
+ "learning_rate": 1.9325813561519512e-07,
1332
+ "loss": 2.9861,
1333
+ "step": 4360
1334
+ },
1335
+ {
1336
+ "epoch": 1.1319291898178059,
1337
+ "learning_rate": 1.9322572280565278e-07,
1338
+ "loss": 3.0244,
1339
+ "step": 4380
1340
+ },
1341
+ {
1342
+ "epoch": 1.1370978162553302,
1343
+ "learning_rate": 1.9319330999611047e-07,
1344
+ "loss": 2.9827,
1345
+ "step": 4400
1346
+ },
1347
+ {
1348
+ "epoch": 1.1422664426928544,
1349
+ "learning_rate": 1.931608971865681e-07,
1350
+ "loss": 3.0343,
1351
+ "step": 4420
1352
+ },
1353
+ {
1354
+ "epoch": 1.1474350691303785,
1355
+ "learning_rate": 1.931284843770258e-07,
1356
+ "loss": 2.9305,
1357
+ "step": 4440
1358
+ },
1359
+ {
1360
+ "epoch": 1.1526036955679029,
1361
+ "learning_rate": 1.9309607156748348e-07,
1362
+ "loss": 2.9797,
1363
+ "step": 4460
1364
+ },
1365
+ {
1366
+ "epoch": 1.157772322005427,
1367
+ "learning_rate": 1.930636587579411e-07,
1368
+ "loss": 2.9846,
1369
+ "step": 4480
1370
+ },
1371
+ {
1372
+ "epoch": 1.1629409484429514,
1373
+ "learning_rate": 1.930312459483988e-07,
1374
+ "loss": 3.0176,
1375
+ "step": 4500
1376
+ },
1377
+ {
1378
+ "epoch": 1.1681095748804755,
1379
+ "learning_rate": 1.9299883313885646e-07,
1380
+ "loss": 2.9897,
1381
+ "step": 4520
1382
+ },
1383
+ {
1384
+ "epoch": 1.1732782013179996,
1385
+ "learning_rate": 1.9296642032931413e-07,
1386
+ "loss": 3.0404,
1387
+ "step": 4540
1388
+ },
1389
+ {
1390
+ "epoch": 1.178446827755524,
1391
+ "learning_rate": 1.9293400751977181e-07,
1392
+ "loss": 2.9724,
1393
+ "step": 4560
1394
+ },
1395
+ {
1396
+ "epoch": 1.1836154541930481,
1397
+ "learning_rate": 1.9290159471022948e-07,
1398
+ "loss": 3.019,
1399
+ "step": 4580
1400
+ },
1401
+ {
1402
+ "epoch": 1.1887840806305725,
1403
+ "learning_rate": 1.9286918190068714e-07,
1404
+ "loss": 2.9637,
1405
+ "step": 4600
1406
+ },
1407
+ {
1408
+ "epoch": 1.1939527070680966,
1409
+ "learning_rate": 1.9283676909114483e-07,
1410
+ "loss": 3.0281,
1411
+ "step": 4620
1412
+ },
1413
+ {
1414
+ "epoch": 1.1991213335056208,
1415
+ "learning_rate": 1.9280435628160246e-07,
1416
+ "loss": 2.9964,
1417
+ "step": 4640
1418
+ },
1419
+ {
1420
+ "epoch": 1.2042899599431451,
1421
+ "learning_rate": 1.9277194347206015e-07,
1422
+ "loss": 3.0762,
1423
+ "step": 4660
1424
+ },
1425
+ {
1426
+ "epoch": 1.2094585863806693,
1427
+ "learning_rate": 1.9273953066251784e-07,
1428
+ "loss": 3.035,
1429
+ "step": 4680
1430
+ },
1431
+ {
1432
+ "epoch": 1.2146272128181936,
1433
+ "learning_rate": 1.9270711785297547e-07,
1434
+ "loss": 2.9578,
1435
+ "step": 4700
1436
+ },
1437
+ {
1438
+ "epoch": 1.2197958392557178,
1439
+ "learning_rate": 1.9267470504343316e-07,
1440
+ "loss": 2.9568,
1441
+ "step": 4720
1442
+ },
1443
+ {
1444
+ "epoch": 1.2249644656932421,
1445
+ "learning_rate": 1.9264229223389082e-07,
1446
+ "loss": 3.0495,
1447
+ "step": 4740
1448
+ },
1449
+ {
1450
+ "epoch": 1.2301330921307663,
1451
+ "learning_rate": 1.9260987942434848e-07,
1452
+ "loss": 3.0054,
1453
+ "step": 4760
1454
+ },
1455
+ {
1456
+ "epoch": 1.2353017185682904,
1457
+ "learning_rate": 1.9257746661480617e-07,
1458
+ "loss": 3.0101,
1459
+ "step": 4780
1460
+ },
1461
+ {
1462
+ "epoch": 1.2404703450058148,
1463
+ "learning_rate": 1.9254505380526384e-07,
1464
+ "loss": 2.9621,
1465
+ "step": 4800
1466
+ },
1467
+ {
1468
+ "epoch": 1.245638971443339,
1469
+ "learning_rate": 1.925126409957215e-07,
1470
+ "loss": 3.0161,
1471
+ "step": 4820
1472
+ },
1473
+ {
1474
+ "epoch": 1.250807597880863,
1475
+ "learning_rate": 1.9248022818617919e-07,
1476
+ "loss": 2.9672,
1477
+ "step": 4840
1478
+ },
1479
+ {
1480
+ "epoch": 1.2559762243183874,
1481
+ "learning_rate": 1.9244781537663682e-07,
1482
+ "loss": 2.9749,
1483
+ "step": 4860
1484
+ },
1485
+ {
1486
+ "epoch": 1.2611448507559115,
1487
+ "learning_rate": 1.924154025670945e-07,
1488
+ "loss": 2.9898,
1489
+ "step": 4880
1490
+ },
1491
+ {
1492
+ "epoch": 1.266313477193436,
1493
+ "learning_rate": 1.9238298975755217e-07,
1494
+ "loss": 2.9917,
1495
+ "step": 4900
1496
+ },
1497
+ {
1498
+ "epoch": 1.27148210363096,
1499
+ "learning_rate": 1.9235057694800983e-07,
1500
+ "loss": 2.9675,
1501
+ "step": 4920
1502
+ },
1503
+ {
1504
+ "epoch": 1.2766507300684844,
1505
+ "learning_rate": 1.9231816413846752e-07,
1506
+ "loss": 2.9495,
1507
+ "step": 4940
1508
+ },
1509
+ {
1510
+ "epoch": 1.2818193565060085,
1511
+ "learning_rate": 1.9228575132892518e-07,
1512
+ "loss": 3.0443,
1513
+ "step": 4960
1514
+ },
1515
+ {
1516
+ "epoch": 1.2869879829435327,
1517
+ "learning_rate": 1.9225333851938284e-07,
1518
+ "loss": 2.9756,
1519
+ "step": 4980
1520
+ },
1521
+ {
1522
+ "epoch": 1.292156609381057,
1523
+ "learning_rate": 1.9222092570984053e-07,
1524
+ "loss": 2.9824,
1525
+ "step": 5000
1526
+ },
1527
+ {
1528
+ "epoch": 1.2973252358185812,
1529
+ "learning_rate": 1.9218851290029817e-07,
1530
+ "loss": 3.0607,
1531
+ "step": 5020
1532
+ },
1533
+ {
1534
+ "epoch": 1.3024938622561053,
1535
+ "learning_rate": 1.9215610009075586e-07,
1536
+ "loss": 3.0266,
1537
+ "step": 5040
1538
+ },
1539
+ {
1540
+ "epoch": 1.3076624886936297,
1541
+ "learning_rate": 1.9212368728121355e-07,
1542
+ "loss": 2.9212,
1543
+ "step": 5060
1544
+ },
1545
+ {
1546
+ "epoch": 1.312831115131154,
1547
+ "learning_rate": 1.9209127447167118e-07,
1548
+ "loss": 3.0226,
1549
+ "step": 5080
1550
+ },
1551
+ {
1552
+ "epoch": 1.3179997415686782,
1553
+ "learning_rate": 1.9205886166212887e-07,
1554
+ "loss": 2.9282,
1555
+ "step": 5100
1556
+ },
1557
+ {
1558
+ "epoch": 1.3231683680062023,
1559
+ "learning_rate": 1.9202644885258653e-07,
1560
+ "loss": 2.9725,
1561
+ "step": 5120
1562
+ },
1563
+ {
1564
+ "epoch": 1.3283369944437267,
1565
+ "learning_rate": 1.919940360430442e-07,
1566
+ "loss": 2.9777,
1567
+ "step": 5140
1568
+ },
1569
+ {
1570
+ "epoch": 1.3335056208812508,
1571
+ "learning_rate": 1.9196162323350188e-07,
1572
+ "loss": 2.9426,
1573
+ "step": 5160
1574
+ },
1575
+ {
1576
+ "epoch": 1.338674247318775,
1577
+ "learning_rate": 1.9192921042395954e-07,
1578
+ "loss": 2.9584,
1579
+ "step": 5180
1580
+ },
1581
+ {
1582
+ "epoch": 1.3438428737562993,
1583
+ "learning_rate": 1.918967976144172e-07,
1584
+ "loss": 2.9244,
1585
+ "step": 5200
1586
+ },
1587
+ {
1588
+ "epoch": 1.3490115001938234,
1589
+ "learning_rate": 1.918643848048749e-07,
1590
+ "loss": 3.0344,
1591
+ "step": 5220
1592
+ },
1593
+ {
1594
+ "epoch": 1.3541801266313478,
1595
+ "learning_rate": 1.9183197199533253e-07,
1596
+ "loss": 2.9877,
1597
+ "step": 5240
1598
+ },
1599
+ {
1600
+ "epoch": 1.359348753068872,
1601
+ "learning_rate": 1.9179955918579022e-07,
1602
+ "loss": 2.9763,
1603
+ "step": 5260
1604
+ },
1605
+ {
1606
+ "epoch": 1.3645173795063963,
1607
+ "learning_rate": 1.917671463762479e-07,
1608
+ "loss": 2.9684,
1609
+ "step": 5280
1610
+ },
1611
+ {
1612
+ "epoch": 1.3696860059439204,
1613
+ "learning_rate": 1.9173473356670554e-07,
1614
+ "loss": 3.0331,
1615
+ "step": 5300
1616
+ },
1617
+ {
1618
+ "epoch": 1.3748546323814446,
1619
+ "learning_rate": 1.9170232075716323e-07,
1620
+ "loss": 3.0052,
1621
+ "step": 5320
1622
+ },
1623
+ {
1624
+ "epoch": 1.380023258818969,
1625
+ "learning_rate": 1.916699079476209e-07,
1626
+ "loss": 2.9511,
1627
+ "step": 5340
1628
+ },
1629
+ {
1630
+ "epoch": 1.385191885256493,
1631
+ "learning_rate": 1.9163749513807855e-07,
1632
+ "loss": 2.9444,
1633
+ "step": 5360
1634
+ },
1635
+ {
1636
+ "epoch": 1.3903605116940172,
1637
+ "learning_rate": 1.9160508232853624e-07,
1638
+ "loss": 2.9757,
1639
+ "step": 5380
1640
+ },
1641
+ {
1642
+ "epoch": 1.3955291381315416,
1643
+ "learning_rate": 1.915726695189939e-07,
1644
+ "loss": 3.0247,
1645
+ "step": 5400
1646
+ },
1647
+ {
1648
+ "epoch": 1.4006977645690657,
1649
+ "learning_rate": 1.9154025670945156e-07,
1650
+ "loss": 3.0173,
1651
+ "step": 5420
1652
+ },
1653
+ {
1654
+ "epoch": 1.40586639100659,
1655
+ "learning_rate": 1.9150784389990925e-07,
1656
+ "loss": 2.9848,
1657
+ "step": 5440
1658
+ },
1659
+ {
1660
+ "epoch": 1.4110350174441142,
1661
+ "learning_rate": 1.914754310903669e-07,
1662
+ "loss": 2.9544,
1663
+ "step": 5460
1664
+ },
1665
+ {
1666
+ "epoch": 1.4162036438816386,
1667
+ "learning_rate": 1.9144301828082458e-07,
1668
+ "loss": 2.9586,
1669
+ "step": 5480
1670
+ },
1671
+ {
1672
+ "epoch": 1.4213722703191627,
1673
+ "learning_rate": 1.9141060547128224e-07,
1674
+ "loss": 2.9663,
1675
+ "step": 5500
1676
+ },
1677
+ {
1678
+ "epoch": 1.4265408967566868,
1679
+ "learning_rate": 1.913781926617399e-07,
1680
+ "loss": 2.9736,
1681
+ "step": 5520
1682
+ },
1683
+ {
1684
+ "epoch": 1.4317095231942112,
1685
+ "learning_rate": 1.913457798521976e-07,
1686
+ "loss": 2.9392,
1687
+ "step": 5540
1688
+ },
1689
+ {
1690
+ "epoch": 1.4368781496317353,
1691
+ "learning_rate": 1.9131336704265525e-07,
1692
+ "loss": 2.9724,
1693
+ "step": 5560
1694
+ },
1695
+ {
1696
+ "epoch": 1.4420467760692595,
1697
+ "learning_rate": 1.912809542331129e-07,
1698
+ "loss": 2.9819,
1699
+ "step": 5580
1700
+ },
1701
+ {
1702
+ "epoch": 1.4472154025067838,
1703
+ "learning_rate": 1.912485414235706e-07,
1704
+ "loss": 2.8861,
1705
+ "step": 5600
1706
+ },
1707
+ {
1708
+ "epoch": 1.4523840289443082,
1709
+ "learning_rate": 1.9121612861402824e-07,
1710
+ "loss": 2.9408,
1711
+ "step": 5620
1712
+ },
1713
+ {
1714
+ "epoch": 1.4575526553818323,
1715
+ "learning_rate": 1.9118371580448592e-07,
1716
+ "loss": 3.0236,
1717
+ "step": 5640
1718
+ },
1719
+ {
1720
+ "epoch": 1.4627212818193565,
1721
+ "learning_rate": 1.911513029949436e-07,
1722
+ "loss": 2.9481,
1723
+ "step": 5660
1724
+ },
1725
+ {
1726
+ "epoch": 1.4678899082568808,
1727
+ "learning_rate": 1.9111889018540125e-07,
1728
+ "loss": 2.9892,
1729
+ "step": 5680
1730
+ },
1731
+ {
1732
+ "epoch": 1.473058534694405,
1733
+ "learning_rate": 1.9108647737585894e-07,
1734
+ "loss": 2.9723,
1735
+ "step": 5700
1736
+ },
1737
+ {
1738
+ "epoch": 1.478227161131929,
1739
+ "learning_rate": 1.910540645663166e-07,
1740
+ "loss": 2.8791,
1741
+ "step": 5720
1742
+ },
1743
+ {
1744
+ "epoch": 1.4833957875694535,
1745
+ "learning_rate": 1.9102165175677426e-07,
1746
+ "loss": 2.9381,
1747
+ "step": 5740
1748
+ },
1749
+ {
1750
+ "epoch": 1.4885644140069776,
1751
+ "learning_rate": 1.9098923894723195e-07,
1752
+ "loss": 2.8873,
1753
+ "step": 5760
1754
+ },
1755
+ {
1756
+ "epoch": 1.493733040444502,
1757
+ "learning_rate": 1.909568261376896e-07,
1758
+ "loss": 2.8586,
1759
+ "step": 5780
1760
+ },
1761
+ {
1762
+ "epoch": 1.498901666882026,
1763
+ "learning_rate": 1.9092441332814727e-07,
1764
+ "loss": 2.9308,
1765
+ "step": 5800
1766
+ },
1767
+ {
1768
+ "epoch": 1.5040702933195504,
1769
+ "learning_rate": 1.9089200051860496e-07,
1770
+ "loss": 2.9222,
1771
+ "step": 5820
1772
+ },
1773
+ {
1774
+ "epoch": 1.5092389197570746,
1775
+ "learning_rate": 1.908595877090626e-07,
1776
+ "loss": 2.9787,
1777
+ "step": 5840
1778
+ },
1779
+ {
1780
+ "epoch": 1.5144075461945987,
1781
+ "learning_rate": 1.9082717489952028e-07,
1782
+ "loss": 2.9207,
1783
+ "step": 5860
1784
+ },
1785
+ {
1786
+ "epoch": 1.519576172632123,
1787
+ "learning_rate": 1.9079476208997797e-07,
1788
+ "loss": 2.9304,
1789
+ "step": 5880
1790
+ },
1791
+ {
1792
+ "epoch": 1.5247447990696472,
1793
+ "learning_rate": 1.907623492804356e-07,
1794
+ "loss": 2.942,
1795
+ "step": 5900
1796
+ },
1797
+ {
1798
+ "epoch": 1.5299134255071714,
1799
+ "learning_rate": 1.907299364708933e-07,
1800
+ "loss": 2.9672,
1801
+ "step": 5920
1802
+ },
1803
+ {
1804
+ "epoch": 1.5350820519446957,
1805
+ "learning_rate": 1.9069752366135096e-07,
1806
+ "loss": 2.9461,
1807
+ "step": 5940
1808
+ },
1809
+ {
1810
+ "epoch": 1.54025067838222,
1811
+ "learning_rate": 1.9066511085180862e-07,
1812
+ "loss": 2.9206,
1813
+ "step": 5960
1814
+ },
1815
+ {
1816
+ "epoch": 1.545419304819744,
1817
+ "learning_rate": 1.906326980422663e-07,
1818
+ "loss": 3.0094,
1819
+ "step": 5980
1820
+ },
1821
+ {
1822
+ "epoch": 1.5505879312572683,
1823
+ "learning_rate": 1.9060028523272397e-07,
1824
+ "loss": 2.9333,
1825
+ "step": 6000
1826
+ },
1827
+ {
1828
+ "epoch": 1.5557565576947927,
1829
+ "learning_rate": 1.9056787242318163e-07,
1830
+ "loss": 2.8769,
1831
+ "step": 6020
1832
+ },
1833
+ {
1834
+ "epoch": 1.5609251841323168,
1835
+ "learning_rate": 1.9053545961363932e-07,
1836
+ "loss": 2.8884,
1837
+ "step": 6040
1838
+ },
1839
+ {
1840
+ "epoch": 1.566093810569841,
1841
+ "learning_rate": 1.9050304680409696e-07,
1842
+ "loss": 2.9742,
1843
+ "step": 6060
1844
+ },
1845
+ {
1846
+ "epoch": 1.5712624370073653,
1847
+ "learning_rate": 1.9047063399455464e-07,
1848
+ "loss": 3.0092,
1849
+ "step": 6080
1850
+ },
1851
+ {
1852
+ "epoch": 1.5764310634448895,
1853
+ "learning_rate": 1.904382211850123e-07,
1854
+ "loss": 2.9553,
1855
+ "step": 6100
1856
+ },
1857
+ {
1858
+ "epoch": 1.5815996898824136,
1859
+ "learning_rate": 1.9040580837546997e-07,
1860
+ "loss": 2.8705,
1861
+ "step": 6120
1862
+ },
1863
+ {
1864
+ "epoch": 1.586768316319938,
1865
+ "learning_rate": 1.9037339556592766e-07,
1866
+ "loss": 2.9877,
1867
+ "step": 6140
1868
+ },
1869
+ {
1870
+ "epoch": 1.5919369427574623,
1871
+ "learning_rate": 1.9034098275638532e-07,
1872
+ "loss": 2.966,
1873
+ "step": 6160
1874
+ },
1875
+ {
1876
+ "epoch": 1.5971055691949865,
1877
+ "learning_rate": 1.9030856994684298e-07,
1878
+ "loss": 2.9414,
1879
+ "step": 6180
1880
+ },
1881
+ {
1882
+ "epoch": 1.6022741956325106,
1883
+ "learning_rate": 1.9027615713730067e-07,
1884
+ "loss": 2.9618,
1885
+ "step": 6200
1886
+ },
1887
+ {
1888
+ "epoch": 1.607442822070035,
1889
+ "learning_rate": 1.902437443277583e-07,
1890
+ "loss": 2.9328,
1891
+ "step": 6220
1892
+ },
1893
+ {
1894
+ "epoch": 1.612611448507559,
1895
+ "learning_rate": 1.90211331518216e-07,
1896
+ "loss": 2.8855,
1897
+ "step": 6240
1898
+ },
1899
+ {
1900
+ "epoch": 1.6177800749450832,
1901
+ "learning_rate": 1.9017891870867368e-07,
1902
+ "loss": 2.9168,
1903
+ "step": 6260
1904
+ },
1905
+ {
1906
+ "epoch": 1.6229487013826076,
1907
+ "learning_rate": 1.9014650589913132e-07,
1908
+ "loss": 2.9443,
1909
+ "step": 6280
1910
+ },
1911
+ {
1912
+ "epoch": 1.628117327820132,
1913
+ "learning_rate": 1.90114093089589e-07,
1914
+ "loss": 2.9139,
1915
+ "step": 6300
1916
+ },
1917
+ {
1918
+ "epoch": 1.6332859542576559,
1919
+ "learning_rate": 1.9008168028004667e-07,
1920
+ "loss": 2.9343,
1921
+ "step": 6320
1922
+ },
1923
+ {
1924
+ "epoch": 1.6384545806951802,
1925
+ "learning_rate": 1.9004926747050433e-07,
1926
+ "loss": 2.9279,
1927
+ "step": 6340
1928
+ },
1929
+ {
1930
+ "epoch": 1.6436232071327046,
1931
+ "learning_rate": 1.9001685466096202e-07,
1932
+ "loss": 2.8902,
1933
+ "step": 6360
1934
+ },
1935
+ {
1936
+ "epoch": 1.6487918335702287,
1937
+ "learning_rate": 1.8998444185141968e-07,
1938
+ "loss": 2.9188,
1939
+ "step": 6380
1940
+ },
1941
+ {
1942
+ "epoch": 1.6539604600077529,
1943
+ "learning_rate": 1.8995202904187734e-07,
1944
+ "loss": 2.9314,
1945
+ "step": 6400
1946
+ },
1947
+ {
1948
+ "epoch": 1.6591290864452772,
1949
+ "learning_rate": 1.8991961623233503e-07,
1950
+ "loss": 2.9729,
1951
+ "step": 6420
1952
+ },
1953
+ {
1954
+ "epoch": 1.6642977128828014,
1955
+ "learning_rate": 1.8988720342279266e-07,
1956
+ "loss": 2.9336,
1957
+ "step": 6440
1958
+ },
1959
+ {
1960
+ "epoch": 1.6694663393203255,
1961
+ "learning_rate": 1.8985479061325035e-07,
1962
+ "loss": 2.9242,
1963
+ "step": 6460
1964
+ },
1965
+ {
1966
+ "epoch": 1.6746349657578499,
1967
+ "learning_rate": 1.8982237780370804e-07,
1968
+ "loss": 3.0066,
1969
+ "step": 6480
1970
+ },
1971
+ {
1972
+ "epoch": 1.6798035921953742,
1973
+ "learning_rate": 1.8978996499416568e-07,
1974
+ "loss": 2.9272,
1975
+ "step": 6500
1976
+ },
1977
+ {
1978
+ "epoch": 1.6849722186328981,
1979
+ "learning_rate": 1.8975755218462336e-07,
1980
+ "loss": 2.8841,
1981
+ "step": 6520
1982
+ },
1983
+ {
1984
+ "epoch": 1.6901408450704225,
1985
+ "learning_rate": 1.8972513937508103e-07,
1986
+ "loss": 2.9365,
1987
+ "step": 6540
1988
+ },
1989
+ {
1990
+ "epoch": 1.6953094715079469,
1991
+ "learning_rate": 1.896927265655387e-07,
1992
+ "loss": 2.9948,
1993
+ "step": 6560
1994
+ },
1995
+ {
1996
+ "epoch": 1.700478097945471,
1997
+ "learning_rate": 1.8966031375599638e-07,
1998
+ "loss": 2.9,
1999
+ "step": 6580
2000
+ },
2001
+ {
2002
+ "epoch": 1.7056467243829951,
2003
+ "learning_rate": 1.8962790094645404e-07,
2004
+ "loss": 2.9318,
2005
+ "step": 6600
2006
+ },
2007
+ {
2008
+ "epoch": 1.7108153508205195,
2009
+ "learning_rate": 1.895954881369117e-07,
2010
+ "loss": 2.8761,
2011
+ "step": 6620
2012
+ },
2013
+ {
2014
+ "epoch": 1.7159839772580436,
2015
+ "learning_rate": 1.895630753273694e-07,
2016
+ "loss": 2.9628,
2017
+ "step": 6640
2018
+ },
2019
+ {
2020
+ "epoch": 1.7211526036955678,
2021
+ "learning_rate": 1.8953066251782702e-07,
2022
+ "loss": 2.9114,
2023
+ "step": 6660
2024
+ },
2025
+ {
2026
+ "epoch": 1.7263212301330921,
2027
+ "learning_rate": 1.894982497082847e-07,
2028
+ "loss": 2.9338,
2029
+ "step": 6680
2030
+ },
2031
+ {
2032
+ "epoch": 1.7314898565706165,
2033
+ "learning_rate": 1.8946583689874237e-07,
2034
+ "loss": 2.9956,
2035
+ "step": 6700
2036
+ },
2037
+ {
2038
+ "epoch": 1.7366584830081406,
2039
+ "learning_rate": 1.8943342408920004e-07,
2040
+ "loss": 2.9089,
2041
+ "step": 6720
2042
+ },
2043
+ {
2044
+ "epoch": 1.7418271094456648,
2045
+ "learning_rate": 1.8940101127965772e-07,
2046
+ "loss": 2.9213,
2047
+ "step": 6740
2048
+ },
2049
+ {
2050
+ "epoch": 1.7469957358831891,
2051
+ "learning_rate": 1.8936859847011539e-07,
2052
+ "loss": 2.9461,
2053
+ "step": 6760
2054
+ },
2055
+ {
2056
+ "epoch": 1.7521643623207133,
2057
+ "learning_rate": 1.8933618566057305e-07,
2058
+ "loss": 2.9103,
2059
+ "step": 6780
2060
+ },
2061
+ {
2062
+ "epoch": 1.7573329887582374,
2063
+ "learning_rate": 1.8930377285103074e-07,
2064
+ "loss": 2.9025,
2065
+ "step": 6800
2066
+ },
2067
+ {
2068
+ "epoch": 1.7625016151957618,
2069
+ "learning_rate": 1.8927136004148837e-07,
2070
+ "loss": 2.9723,
2071
+ "step": 6820
2072
+ },
2073
+ {
2074
+ "epoch": 1.767670241633286,
2075
+ "learning_rate": 1.8923894723194606e-07,
2076
+ "loss": 2.9768,
2077
+ "step": 6840
2078
+ },
2079
+ {
2080
+ "epoch": 1.77283886807081,
2081
+ "learning_rate": 1.8920653442240375e-07,
2082
+ "loss": 2.8984,
2083
+ "step": 6860
2084
+ },
2085
+ {
2086
+ "epoch": 1.7780074945083344,
2087
+ "learning_rate": 1.8917412161286138e-07,
2088
+ "loss": 2.9167,
2089
+ "step": 6880
2090
+ },
2091
+ {
2092
+ "epoch": 1.7831761209458588,
2093
+ "learning_rate": 1.8914170880331907e-07,
2094
+ "loss": 2.9561,
2095
+ "step": 6900
2096
+ },
2097
+ {
2098
+ "epoch": 1.788344747383383,
2099
+ "learning_rate": 1.8910929599377673e-07,
2100
+ "loss": 2.8689,
2101
+ "step": 6920
2102
+ },
2103
+ {
2104
+ "epoch": 1.793513373820907,
2105
+ "learning_rate": 1.890768831842344e-07,
2106
+ "loss": 2.9081,
2107
+ "step": 6940
2108
+ },
2109
+ {
2110
+ "epoch": 1.7986820002584314,
2111
+ "learning_rate": 1.8904447037469208e-07,
2112
+ "loss": 2.8818,
2113
+ "step": 6960
2114
+ },
2115
+ {
2116
+ "epoch": 1.8038506266959555,
2117
+ "learning_rate": 1.8901205756514975e-07,
2118
+ "loss": 2.9503,
2119
+ "step": 6980
2120
+ },
2121
+ {
2122
+ "epoch": 1.8090192531334797,
2123
+ "learning_rate": 1.889796447556074e-07,
2124
+ "loss": 2.985,
2125
+ "step": 7000
2126
+ },
2127
+ {
2128
+ "epoch": 1.814187879571004,
2129
+ "learning_rate": 1.889472319460651e-07,
2130
+ "loss": 2.9812,
2131
+ "step": 7020
2132
+ },
2133
+ {
2134
+ "epoch": 1.8193565060085284,
2135
+ "learning_rate": 1.8891481913652273e-07,
2136
+ "loss": 2.9244,
2137
+ "step": 7040
2138
+ },
2139
+ {
2140
+ "epoch": 1.8245251324460523,
2141
+ "learning_rate": 1.8888240632698042e-07,
2142
+ "loss": 2.9199,
2143
+ "step": 7060
2144
+ },
2145
+ {
2146
+ "epoch": 1.8296937588835767,
2147
+ "learning_rate": 1.8884999351743808e-07,
2148
+ "loss": 2.9121,
2149
+ "step": 7080
2150
+ },
2151
+ {
2152
+ "epoch": 1.834862385321101,
2153
+ "learning_rate": 1.8881758070789574e-07,
2154
+ "loss": 2.9476,
2155
+ "step": 7100
2156
+ },
2157
+ {
2158
+ "epoch": 1.8400310117586252,
2159
+ "learning_rate": 1.8878516789835343e-07,
2160
+ "loss": 2.9117,
2161
+ "step": 7120
2162
+ },
2163
+ {
2164
+ "epoch": 1.8451996381961493,
2165
+ "learning_rate": 1.887527550888111e-07,
2166
+ "loss": 2.8593,
2167
+ "step": 7140
2168
+ },
2169
+ {
2170
+ "epoch": 1.8503682646336737,
2171
+ "learning_rate": 1.8872034227926876e-07,
2172
+ "loss": 2.9515,
2173
+ "step": 7160
2174
+ },
2175
+ {
2176
+ "epoch": 1.8555368910711978,
2177
+ "learning_rate": 1.8868792946972644e-07,
2178
+ "loss": 2.8816,
2179
+ "step": 7180
2180
+ },
2181
+ {
2182
+ "epoch": 1.860705517508722,
2183
+ "learning_rate": 1.8865551666018408e-07,
2184
+ "loss": 2.8811,
2185
+ "step": 7200
2186
+ },
2187
+ {
2188
+ "epoch": 1.8658741439462463,
2189
+ "learning_rate": 1.8862310385064177e-07,
2190
+ "loss": 2.8955,
2191
+ "step": 7220
2192
+ },
2193
+ {
2194
+ "epoch": 1.8710427703837706,
2195
+ "learning_rate": 1.8859069104109943e-07,
2196
+ "loss": 2.8895,
2197
+ "step": 7240
2198
+ },
2199
+ {
2200
+ "epoch": 1.8762113968212948,
2201
+ "learning_rate": 1.885582782315571e-07,
2202
+ "loss": 2.8621,
2203
+ "step": 7260
2204
+ },
2205
+ {
2206
+ "epoch": 1.881380023258819,
2207
+ "learning_rate": 1.8852586542201478e-07,
2208
+ "loss": 2.8738,
2209
+ "step": 7280
2210
+ },
2211
+ {
2212
+ "epoch": 1.8865486496963433,
2213
+ "learning_rate": 1.8849345261247244e-07,
2214
+ "loss": 2.9246,
2215
+ "step": 7300
2216
+ },
2217
+ {
2218
+ "epoch": 1.8917172761338674,
2219
+ "learning_rate": 1.884610398029301e-07,
2220
+ "loss": 2.898,
2221
+ "step": 7320
2222
+ },
2223
+ {
2224
+ "epoch": 1.8968859025713916,
2225
+ "learning_rate": 1.884286269933878e-07,
2226
+ "loss": 2.9362,
2227
+ "step": 7340
2228
+ },
2229
+ {
2230
+ "epoch": 1.902054529008916,
2231
+ "learning_rate": 1.8839621418384545e-07,
2232
+ "loss": 2.9173,
2233
+ "step": 7360
2234
+ },
2235
+ {
2236
+ "epoch": 1.90722315544644,
2237
+ "learning_rate": 1.8836380137430312e-07,
2238
+ "loss": 2.9209,
2239
+ "step": 7380
2240
+ },
2241
+ {
2242
+ "epoch": 1.9123917818839642,
2243
+ "learning_rate": 1.883313885647608e-07,
2244
+ "loss": 2.9294,
2245
+ "step": 7400
2246
+ },
2247
+ {
2248
+ "epoch": 1.9175604083214886,
2249
+ "learning_rate": 1.8829897575521844e-07,
2250
+ "loss": 2.9321,
2251
+ "step": 7420
2252
+ },
2253
+ {
2254
+ "epoch": 1.922729034759013,
2255
+ "learning_rate": 1.8826656294567613e-07,
2256
+ "loss": 2.8453,
2257
+ "step": 7440
2258
+ },
2259
+ {
2260
+ "epoch": 1.927897661196537,
2261
+ "learning_rate": 1.882341501361338e-07,
2262
+ "loss": 2.8941,
2263
+ "step": 7460
2264
+ },
2265
+ {
2266
+ "epoch": 1.9330662876340612,
2267
+ "learning_rate": 1.8820173732659145e-07,
2268
+ "loss": 2.932,
2269
+ "step": 7480
2270
+ },
2271
+ {
2272
+ "epoch": 1.9382349140715855,
2273
+ "learning_rate": 1.8816932451704914e-07,
2274
+ "loss": 2.9291,
2275
+ "step": 7500
2276
+ },
2277
+ {
2278
+ "epoch": 1.9434035405091097,
2279
+ "learning_rate": 1.881369117075068e-07,
2280
+ "loss": 2.9375,
2281
+ "step": 7520
2282
+ },
2283
+ {
2284
+ "epoch": 1.9485721669466338,
2285
+ "learning_rate": 1.8810449889796446e-07,
2286
+ "loss": 2.9291,
2287
+ "step": 7540
2288
+ },
2289
+ {
2290
+ "epoch": 1.9537407933841582,
2291
+ "learning_rate": 1.8807208608842215e-07,
2292
+ "loss": 2.8834,
2293
+ "step": 7560
2294
+ },
2295
+ {
2296
+ "epoch": 1.9589094198216825,
2297
+ "learning_rate": 1.880396732788798e-07,
2298
+ "loss": 2.9606,
2299
+ "step": 7580
2300
+ },
2301
+ {
2302
+ "epoch": 1.9640780462592065,
2303
+ "learning_rate": 1.8800726046933748e-07,
2304
+ "loss": 2.8769,
2305
+ "step": 7600
2306
+ },
2307
+ {
2308
+ "epoch": 1.9692466726967308,
2309
+ "learning_rate": 1.8797484765979514e-07,
2310
+ "loss": 2.9195,
2311
+ "step": 7620
2312
+ },
2313
+ {
2314
+ "epoch": 1.9744152991342552,
2315
+ "learning_rate": 1.879424348502528e-07,
2316
+ "loss": 2.9,
2317
+ "step": 7640
2318
+ },
2319
+ {
2320
+ "epoch": 1.9795839255717793,
2321
+ "learning_rate": 1.879100220407105e-07,
2322
+ "loss": 2.9407,
2323
+ "step": 7660
2324
+ },
2325
+ {
2326
+ "epoch": 1.9847525520093035,
2327
+ "learning_rate": 1.8787760923116815e-07,
2328
+ "loss": 2.9094,
2329
+ "step": 7680
2330
+ },
2331
+ {
2332
+ "epoch": 1.9899211784468278,
2333
+ "learning_rate": 1.878451964216258e-07,
2334
+ "loss": 2.9574,
2335
+ "step": 7700
2336
+ },
2337
+ {
2338
+ "epoch": 1.995089804884352,
2339
+ "learning_rate": 1.878127836120835e-07,
2340
+ "loss": 2.8978,
2341
+ "step": 7720
2342
+ },
2343
+ {
2344
+ "epoch": 2.0,
2345
+ "eval_bleu": 0.0747,
2346
+ "eval_gen_len": 113.0081,
2347
+ "eval_loss": 2.8610005378723145,
2348
+ "eval_runtime": 1965.2378,
2349
+ "eval_samples_per_second": 0.876,
2350
+ "eval_steps_per_second": 0.438,
2351
+ "step": 7739
2352
  }
2353
  ],
2354
  "logging_steps": 20,
 
2368
  "attributes": {}
2369
  }
2370
  },
2371
+ "total_flos": 5.079387107794944e+16,
2372
  "train_batch_size": 2,
2373
  "trial_name": null,
2374
  "trial_params": null