End of training
Browse files- .ipynb_checkpoints/vocab-checkpoint.json +1 -0
- all_results.json +14 -0
- eval_results.json +9 -0
- pytorch_model.bin +1 -1
- runs/Jan27_15-07-11_job-fa775f5b-8438-4c3d-95fa-ed70ddaee577/events.out.tfevents.1643296094.job-fa775f5b-8438-4c3d-95fa-ed70ddaee577.936763.0 +2 -2
- runs/Jan27_15-07-11_job-fa775f5b-8438-4c3d-95fa-ed70ddaee577/events.out.tfevents.1643407175.job-fa775f5b-8438-4c3d-95fa-ed70ddaee577.936763.2 +3 -0
- train_results.json +8 -0
- trainer_state.json +430 -0
.ipynb_checkpoints/vocab-checkpoint.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"<pad>": 0, "<s>": 1, "</s>": 2, "<unk>": 3, "|": 4, "a": 5, "á": 6, "à": 7, "ả": 8, "ã": 9, "ạ": 10, "e": 11, "é": 12, "è": 13, "ẻ": 14, "ẽ": 15, "ẹ": 16, "ê": 17, "ế": 18, "ề": 19, "ể": 20, "ễ": 21, "ệ": 22, "i": 23, "í": 24, "ì": 25, "ỉ": 26, "ĩ": 27, "ị": 28, "o": 29, "ó": 30, "ò": 31, "ỏ": 32, "õ": 33, "ọ": 34, "ơ": 35, "ớ": 36, "ờ": 37, "ở": 38, "ỡ": 39, "ợ": 40, "ô": 41, "ố": 42, "ồ": 43, "ổ": 44, "ỗ": 45, "ộ": 46, "u": 47, "ú": 48, "ù": 49, "ủ": 50, "ũ": 51, "ụ": 52, "ư": 53, "ứ": 54, "ừ": 55, "ử": 56, "ữ": 57, "ự": 58, "y": 59, "ỳ": 60, "ý": 61, "ỷ": 62, "ỹ": 63, "ỵ": 64, "ă": 65, "ắ": 66, "ằ": 67, "ẳ": 68, "ẵ": 69, "ặ": 70, "â": 71, "ấ": 72, "ầ": 73, "ẩ": 74, "ẫ": 75, "ậ": 76, "đ": 77, "q": 78, "w": 79, "r": 80, "t": 81, "p": 82, "s": 83, "d": 84, "f": 85, "g": 86, "h": 87, "j": 88, "k": 89, "l": 90, "z": 91, "x": 92, "c": 93, "v": 94, "b": 95, "n": 96, "m": 97, "th": 98, "ch": 99, "kh": 100, "ph": 101, "nh": 102, "gh": 103, "qu": 104, "ng": 105, "ngh": 106, "tr": 107, "ác": 108, "ạc": 109, "ai": 110, "ái": 111, "ài": 112, "ải": 113, "ãi": 114, "ại": 115, "am": 116, "ám": 117, "àm": 118, "ảm": 119, "ãm": 120, "ạm": 121, "an": 122, "án": 123, "àn": 124, "ản": 125, "ãn": 126, "ạn": 127, "ao": 128, "áo": 129, "ào": 130, "ảo": 131, "ão": 132, "ạo": 133, "au": 134, "áu": 135, "àu": 136, "ảu": 137, "ãu": 138, "ạu": 139, "áp": 140, "ạp": 141, "át": 142, "ạt": 143, "ay": 144, "áy": 145, "ày": 146, "ảy": 147, "ãy": 148, "ạy": 149, "ắc": 150, "ặc": 151, "ăm": 152, "ằm": 153, "ắm": 154, "ẳm": 155, "ẵm": 156, "ặm": 157, "ăn": 158, "ắn": 159, "ằn": 160, "ẳn": 161, "ẵn": 162, "ặn": 163, "ắp": 164, "ặp": 165, "ắt": 166, "ặt": 167, "ấc": 168, "ậc": 169, "âm": 170, "ấm": 171, "ầm": 172, "ẩm": 173, "ẫm": 174, "ậm": 175, "ân": 176, "ấn": 177, "ần": 178, "ẩn": 179, "ẫn": 180, "ận": 181, "ấp": 182, "ập": 183, "ất": 184, "ật": 185, "âu": 186, "ấu": 187, "ầu": 188, "ẩu": 189, "ẫu": 190, "ậu": 191, "ây": 192, "ấy": 193, "ầy": 194, "ẩy": 195, "ẫy": 196, "ậy": 197, "éc": 198, "ẹc": 199, "em": 200, "ém": 201, "èm": 202, "ẻm": 203, "ẽm": 204, "ẹm": 205, "en": 206, "én": 207, "èn": 208, "ẻn": 209, "ẽn": 210, "ẹn": 211, "eo": 212, "éo": 213, "èo": 214, "ẻo": 215, "ẽo": 216, "ẹo": 217, "ép": 218, "ẹp": 219, "ét": 220, "ẹt": 221, "êm": 222, "ếm": 223, "ềm": 224, "ễm": 225, "ệm": 226, "ên": 227, "ến": 228, "ền": 229, "ển": 230, "ện": 231, "ếp": 232, "ệp": 233, "ết": 234, "ệt": 235, "êu": 236, "ếu": 237, "ều": 238, "ểu": 239, "ễu": 240, "ệu": 241, "ia": 242, "ía": 243, "ìa": 244, "ỉa": 245, "ĩa": 246, "ịa": 247, "im": 248, "ím": 249, "ìm": 250, "ỉm": 251, "ĩm": 252, "ịm": 253, "in": 254, "ín": 255, "ìn": 256, "ỉn": 257, "ịn": 258, "íp": 259, "ịp": 260, "ít": 261, "ịt": 262, "iu": 263, "íu": 264, "ìu": 265, "ỉu": 266, "ĩu": 267, "ịu": 268, "oa": 269, "óa": 270, "òa": 271, "ỏa": 272, "õa": 273, "ọa": 274, "oà": 275, "óc": 276, "ọc": 277, "oe": 278, "óe": 279, "òe": 280, "ỏe": 281, "ọe": 282, "oẹ": 283, "oi": 284, "ói": 285, "òi": 286, "ỏi": 287, "õi": 288, "ọi": 289, "om": 290, "óm": 291, "òm": 292, "ỏm": 293, "õm": 294, "ọm": 295, "on": 296, "ón": 297, "òn": 298, "ỏn": 299, "õn": 300, "ọn": 301, "óp": 302, "ọp": 303, "ót": 304, "ọt": 305, "ốc": 306, "ộc": 307, "ôi": 308, "ối": 309, "ồi": 310, "ổi": 311, "ỗi": 312, "ội": 313, "ôm": 314, "ốm": 315, "ồm": 316, "ổm": 317, "ỗm": 318, "ộm": 319, "ôn": 320, "ốn": 321, "ồn": 322, "ổn": 323, "ỗn": 324, "ộn": 325, "ốp": 326, "ộp": 327, "ốt": 328, "ột": 329, "ơi": 330, "ới": 331, "ời": 332, "ởi": 333, "ỡi": 334, "ợi": 335, "ơm": 336, "ớm": 337, "ờm": 338, "ởm": 339, "ỡm": 340, "ợm": 341, "ơn": 342, "ớn": 343, "ờn": 344, "ởn": 345, "ỡn": 346, "ợn": 347, "ớp": 348, "ợp": 349, "ớt": 350, "ợt": 351, "ua": 352, "úa": 353, "ùa": 354, "ủa": 355, "ũa": 356, "ụa": 357, "úc": 358, "ục": 359, "uê": 360, "uế": 361, "uề": 362, "uể": 363, "uệ": 364, "ui": 365, "úi": 366, "ùi": 367, "ủi": 368, "ũi": 369, "ụi": 370, "um": 371, "úm": 372, "ùm": 373, "ủm": 374, "ũm": 375, "ụm": 376, "un": 377, "ún": 378, "ùn": 379, "ủn": 380, "ũn": 381, "ụn": 382, "úp": 383, "ụp": 384, "út": 385, "ụt": 386, "uy": 387, "úy": 388, "ùy": 389, "ủy": 390, "ũy": 391, "ụy": 392, "ưa": 393, "ứa": 394, "ừa": 395, "ửa": 396, "ữa": 397, "ựa": 398, "ức": 399, "ực": 400, "ửi": 401, "ừm": 402, "uơ": 403, "uở": 404, "ứt": 405, "ựt": 406, "ưu": 407, "ứu": 408, "ừu": 409, "ửu": 410, "ữu": 411, "ựu": 412, "sh": 413, "aw": 414, "ee": 415, "ea": 416, "ei": 417, "ew": 418, "eu": 419, "ie": 420, "oo": 421, "ou": 422, "ow": 423, "oy": 424, "ue": 425, "io": 426, "ách": 427, "ạch": 428, "ang": 429, "áng": 430, "àng": 431, "ảng": 432, "ãng": 433, "ạng": 434, "anh": 435, "ánh": 436, "ành": 437, "ảnh": 438, "ãnh": 439, "ạnh": 440, "ăng": 441, "ắng": 442, "ằng": 443, "ẳng": 444, "ẵng": 445, "ặng": 446, "âng": 447, "ấng": 448, "ầng": 449, "ẩng": 450, "ẫng": 451, "ậng": 452, "eng": 453, "éng": 454, "èng": 455, "ẻng": 456, "ếch": 457, "ệch": 458, "ênh": 459, "ếnh": 460, "ềnh": 461, "ểnh": 462, "ễnh": 463, "ệnh": 464, "ích": 465, "ịch": 466, "iếc": 467, "iệc": 468, "iêm": 469, "iếm": 470, "iềm": 471, "iểm": 472, "iễm": 473, "iệm": 474, "iên": 475, "iến": 476, "iền": 477, "iển": 478, "iễn": 479, "iện": 480, "iếp": 481, "iệp": 482, "iết": 483, "iệt": 484, "iêu": 485, "iếu": 486, "iều": 487, "iểu": 488, "iễu": 489, "iệu": 490, "inh": 491, "ính": 492, "ình": 493, "ỉnh": 494, "ĩnh": 495, "ịnh": 496, "oác": 497, "oạc": 498, "oai": 499, "oái": 500, "oài": 501, "oải": 502, "oãi": 503, "oại": 504, "oàm": 505, "oan": 506, "oán": 507, "oàn": 508, "oản": 509, "oãn": 510, "oạn": 511, "oao": 512, "oáo": 513, "oáp": 514, "oạp": 515, "oát": 516, "oạt": 517, "oay": 518, "oáy": 519, "oảy": 520, "oắc": 521, "oặc": 522, "oăm": 523, "oăn": 524, "oẳn": 525, "oắn": 526, "oằn": 527, "oắt": 528, "oặt": 529, "oen": 530, "oẻn": 531, "oeo": 532, "oéo": 533, "oèo": 534, "oẻo": 535, "oét": 536, "oẹt": 537, "ong": 538, "óng": 539, "òng": 540, "ỏng": 541, "õng": 542, "ọng": 543, "oóc": 544, "oọc": 545, "ông": 546, "ống": 547, "ồng": 548, "ổng": 549, "ỗng": 550, "ộng": 551, "uân": 552, "uấn": 553, "uần": 554, "uẩn": 555, "uẫn": 556, "uận": 557, "uất": 558, "uật": 559, "uây": 560, "uấy": 561, "uầy": 562, "ung": 563, "úng": 564, "ùng": 565, "ủng": 566, "ũng": 567, "ụng": 568, "uốc": 569, "uộc": 570, "uôi": 571, "uối": 572, "uồi": 573, "uổi": 574, "uỗi": 575, "uội": 576, "uôm": 577, "uốm": 578, "uồm": 579, "uỗm": 580, "uộm": 581, "uôn": 582, "uốn": 583, "uồn": 584, "uỗn": 585, "uộn": 586, "uốt": 587, "uột": 588, "uýt": 589, "uỵt": 590, "uya": 591, "uỷu": 592, "ưng": 593, "ứng": 594, "ừng": 595, "ửng": 596, "ững": 597, "ựng": 598, "ước": 599, "ược": 600, "ươi": 601, "ưới": 602, "ười": 603, "ưởi": 604, "ưỡi": 605, "ượi": 606, "ươm": 607, "ướm": 608, "ườm": 609, "ượm": 610, "ươn": 611, "ướn": 612, "ườn": 613, "ưỡn": 614, "ượn": 615, "ướp": 616, "ượp": 617, "ướt": 618, "ượt": 619, "ươu": 620, "ướu": 621, "ượu": 622, "yêm": 623, "yếm": 624, "yểm": 625, "yên": 626, "yến": 627, "yêu": 628, "yếu": 629, "yểu": 630, "yết": 631, "iêng": 632, "iếng": 633, "iềng": 634, "iểng": 635, "iễng": 636, "iệng": 637, "oách": 638, "oạch": 639, "oang": 640, "oáng": 641, "oàng": 642, "oảng": 643, "oãng": 644, "oạng": 645, "oanh": 646, "oánh": 647, "oành": 648, "oạnh": 649, "oảnh": 650, "oăng": 651, "oắng": 652, "oằng": 653, "oẳng": 654, "oong": 655, "uếch": 656, "uênh": 657, "uông": 658, "uống": 659, "uồng": 660, "uổng": 661, "uỗng": 662, "uộng": 663, "uých": 664, "uỵch": 665, "uyên": 666, "uyến": 667, "uyền": 668, "uyển": 669, "uyễn": 670, "uyện": 671, "uyết": 672, "uyệt": 673, "uynh": 674, "uỳnh": 675, "uýnh": 676, "uỷnh": 677, "ương": 678, "ướng": 679, "ường": 680, "ưởng": 681, "ưỡng": 682, "ượng": 683, "op": 684, "ot": 685, "gi": 686, "ap": 687, "at": 688, "ac": 689, "it": 690, "ip": 691, "ic": 692, "ep": 693, "et": 694, "ec": 695}
|
all_results.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 50.0,
|
| 3 |
+
"eval_loss": 4.169058322906494,
|
| 4 |
+
"eval_runtime": 34.4023,
|
| 5 |
+
"eval_samples": 761,
|
| 6 |
+
"eval_samples_per_second": 22.121,
|
| 7 |
+
"eval_steps_per_second": 1.395,
|
| 8 |
+
"eval_wer": 0.4132525828286427,
|
| 9 |
+
"train_loss": 0.4784129307122878,
|
| 10 |
+
"train_runtime": 111036.8702,
|
| 11 |
+
"train_samples": 25915,
|
| 12 |
+
"train_samples_per_second": 11.67,
|
| 13 |
+
"train_steps_per_second": 0.365
|
| 14 |
+
}
|
eval_results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 50.0,
|
| 3 |
+
"eval_loss": 4.169058322906494,
|
| 4 |
+
"eval_runtime": 34.4023,
|
| 5 |
+
"eval_samples": 761,
|
| 6 |
+
"eval_samples_per_second": 22.121,
|
| 7 |
+
"eval_steps_per_second": 1.395,
|
| 8 |
+
"eval_wer": 0.4132525828286427
|
| 9 |
+
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3853879025
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c05448377a54c2003bbb8a101bfa860a234ccc179698df82f58d8267e8d6d758
|
| 3 |
size 3853879025
|
runs/Jan27_15-07-11_job-fa775f5b-8438-4c3d-95fa-ed70ddaee577/events.out.tfevents.1643296094.job-fa775f5b-8438-4c3d-95fa-ed70ddaee577.936763.0
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d2be5c7956ad26cb9eada2947b4856f86d34bddfcbe8e39ea894aa76262d294e
|
| 3 |
+
size 18082
|
runs/Jan27_15-07-11_job-fa775f5b-8438-4c3d-95fa-ed70ddaee577/events.out.tfevents.1643407175.job-fa775f5b-8438-4c3d-95fa-ed70ddaee577.936763.2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1cb93f5c9c99b4792687041e26c92c95b4edfd96760988faa451327c4d9fe9a2
|
| 3 |
+
size 364
|
train_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 50.0,
|
| 3 |
+
"train_loss": 0.4784129307122878,
|
| 4 |
+
"train_runtime": 111036.8702,
|
| 5 |
+
"train_samples": 25915,
|
| 6 |
+
"train_samples_per_second": 11.67,
|
| 7 |
+
"train_steps_per_second": 0.365
|
| 8 |
+
}
|
trainer_state.json
ADDED
|
@@ -0,0 +1,430 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 50.0,
|
| 5 |
+
"global_step": 40500,
|
| 6 |
+
"is_hyper_param_search": false,
|
| 7 |
+
"is_local_process_zero": true,
|
| 8 |
+
"is_world_process_zero": true,
|
| 9 |
+
"log_history": [
|
| 10 |
+
{
|
| 11 |
+
"epoch": 1.85,
|
| 12 |
+
"learning_rate": 4.9800000000000004e-05,
|
| 13 |
+
"loss": 4.6222,
|
| 14 |
+
"step": 1500
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"epoch": 1.85,
|
| 18 |
+
"eval_loss": 5.947904586791992,
|
| 19 |
+
"eval_runtime": 35.3873,
|
| 20 |
+
"eval_samples_per_second": 21.505,
|
| 21 |
+
"eval_steps_per_second": 1.356,
|
| 22 |
+
"eval_wer": 0.5473815461346634,
|
| 23 |
+
"step": 1500
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 3.7,
|
| 27 |
+
"learning_rate": 4.8084615384615386e-05,
|
| 28 |
+
"loss": 1.1362,
|
| 29 |
+
"step": 3000
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"epoch": 3.7,
|
| 33 |
+
"eval_loss": 7.979872226715088,
|
| 34 |
+
"eval_runtime": 34.3508,
|
| 35 |
+
"eval_samples_per_second": 22.154,
|
| 36 |
+
"eval_steps_per_second": 1.397,
|
| 37 |
+
"eval_wer": 0.509440684004275,
|
| 38 |
+
"step": 3000
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"epoch": 5.56,
|
| 42 |
+
"learning_rate": 4.616153846153846e-05,
|
| 43 |
+
"loss": 0.7814,
|
| 44 |
+
"step": 4500
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 5.56,
|
| 48 |
+
"eval_loss": 5.032960891723633,
|
| 49 |
+
"eval_runtime": 33.9113,
|
| 50 |
+
"eval_samples_per_second": 22.441,
|
| 51 |
+
"eval_steps_per_second": 1.415,
|
| 52 |
+
"eval_wer": 0.47239045244032773,
|
| 53 |
+
"step": 4500
|
| 54 |
+
},
|
| 55 |
+
{
|
| 56 |
+
"epoch": 7.41,
|
| 57 |
+
"learning_rate": 4.423974358974359e-05,
|
| 58 |
+
"loss": 0.6281,
|
| 59 |
+
"step": 6000
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"epoch": 7.41,
|
| 63 |
+
"eval_loss": 2.3483684062957764,
|
| 64 |
+
"eval_runtime": 35.8392,
|
| 65 |
+
"eval_samples_per_second": 21.234,
|
| 66 |
+
"eval_steps_per_second": 1.339,
|
| 67 |
+
"eval_wer": 0.5019593872461703,
|
| 68 |
+
"step": 6000
|
| 69 |
+
},
|
| 70 |
+
{
|
| 71 |
+
"epoch": 9.26,
|
| 72 |
+
"learning_rate": 4.2316666666666674e-05,
|
| 73 |
+
"loss": 0.5472,
|
| 74 |
+
"step": 7500
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"epoch": 9.26,
|
| 78 |
+
"eval_loss": 2.249516487121582,
|
| 79 |
+
"eval_runtime": 35.6948,
|
| 80 |
+
"eval_samples_per_second": 21.32,
|
| 81 |
+
"eval_steps_per_second": 1.345,
|
| 82 |
+
"eval_wer": 0.47933737085856787,
|
| 83 |
+
"step": 7500
|
| 84 |
+
},
|
| 85 |
+
{
|
| 86 |
+
"epoch": 11.11,
|
| 87 |
+
"learning_rate": 4.039358974358974e-05,
|
| 88 |
+
"loss": 0.4827,
|
| 89 |
+
"step": 9000
|
| 90 |
+
},
|
| 91 |
+
{
|
| 92 |
+
"epoch": 11.11,
|
| 93 |
+
"eval_loss": 1.1529797315597534,
|
| 94 |
+
"eval_runtime": 35.6048,
|
| 95 |
+
"eval_samples_per_second": 21.373,
|
| 96 |
+
"eval_steps_per_second": 1.348,
|
| 97 |
+
"eval_wer": 0.47684360527253294,
|
| 98 |
+
"step": 9000
|
| 99 |
+
},
|
| 100 |
+
{
|
| 101 |
+
"epoch": 12.96,
|
| 102 |
+
"learning_rate": 3.847051282051282e-05,
|
| 103 |
+
"loss": 0.4327,
|
| 104 |
+
"step": 10500
|
| 105 |
+
},
|
| 106 |
+
{
|
| 107 |
+
"epoch": 12.96,
|
| 108 |
+
"eval_loss": 1.6159653663635254,
|
| 109 |
+
"eval_runtime": 34.1129,
|
| 110 |
+
"eval_samples_per_second": 22.308,
|
| 111 |
+
"eval_steps_per_second": 1.407,
|
| 112 |
+
"eval_wer": 0.4645529034556466,
|
| 113 |
+
"step": 10500
|
| 114 |
+
},
|
| 115 |
+
{
|
| 116 |
+
"epoch": 14.81,
|
| 117 |
+
"learning_rate": 3.6548717948717956e-05,
|
| 118 |
+
"loss": 0.3989,
|
| 119 |
+
"step": 12000
|
| 120 |
+
},
|
| 121 |
+
{
|
| 122 |
+
"epoch": 14.81,
|
| 123 |
+
"eval_loss": 3.263315439224243,
|
| 124 |
+
"eval_runtime": 34.7016,
|
| 125 |
+
"eval_samples_per_second": 21.93,
|
| 126 |
+
"eval_steps_per_second": 1.383,
|
| 127 |
+
"eval_wer": 0.47025293908086924,
|
| 128 |
+
"step": 12000
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 16.67,
|
| 132 |
+
"learning_rate": 3.4625641025641024e-05,
|
| 133 |
+
"loss": 0.3522,
|
| 134 |
+
"step": 13500
|
| 135 |
+
},
|
| 136 |
+
{
|
| 137 |
+
"epoch": 16.67,
|
| 138 |
+
"eval_loss": 2.2337419986724854,
|
| 139 |
+
"eval_runtime": 35.6982,
|
| 140 |
+
"eval_samples_per_second": 21.318,
|
| 141 |
+
"eval_steps_per_second": 1.345,
|
| 142 |
+
"eval_wer": 0.4707873174207339,
|
| 143 |
+
"step": 13500
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"epoch": 18.52,
|
| 147 |
+
"learning_rate": 3.2702564102564105e-05,
|
| 148 |
+
"loss": 0.3201,
|
| 149 |
+
"step": 15000
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 18.52,
|
| 153 |
+
"eval_loss": 3.6878626346588135,
|
| 154 |
+
"eval_runtime": 36.4839,
|
| 155 |
+
"eval_samples_per_second": 20.859,
|
| 156 |
+
"eval_steps_per_second": 1.316,
|
| 157 |
+
"eval_wer": 0.45653722835767724,
|
| 158 |
+
"step": 15000
|
| 159 |
+
},
|
| 160 |
+
{
|
| 161 |
+
"epoch": 20.37,
|
| 162 |
+
"learning_rate": 3.078076923076923e-05,
|
| 163 |
+
"loss": 0.2899,
|
| 164 |
+
"step": 16500
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
"epoch": 20.37,
|
| 168 |
+
"eval_loss": 5.438948631286621,
|
| 169 |
+
"eval_runtime": 34.4996,
|
| 170 |
+
"eval_samples_per_second": 22.058,
|
| 171 |
+
"eval_steps_per_second": 1.391,
|
| 172 |
+
"eval_wer": 0.45992162451015317,
|
| 173 |
+
"step": 16500
|
| 174 |
+
},
|
| 175 |
+
{
|
| 176 |
+
"epoch": 22.22,
|
| 177 |
+
"learning_rate": 2.885897435897436e-05,
|
| 178 |
+
"loss": 0.2776,
|
| 179 |
+
"step": 18000
|
| 180 |
+
},
|
| 181 |
+
{
|
| 182 |
+
"epoch": 22.22,
|
| 183 |
+
"eval_loss": 3.528372049331665,
|
| 184 |
+
"eval_runtime": 35.9097,
|
| 185 |
+
"eval_samples_per_second": 21.192,
|
| 186 |
+
"eval_steps_per_second": 1.337,
|
| 187 |
+
"eval_wer": 0.4536872105450659,
|
| 188 |
+
"step": 18000
|
| 189 |
+
},
|
| 190 |
+
{
|
| 191 |
+
"epoch": 24.07,
|
| 192 |
+
"learning_rate": 2.6935897435897438e-05,
|
| 193 |
+
"loss": 0.2574,
|
| 194 |
+
"step": 19500
|
| 195 |
+
},
|
| 196 |
+
{
|
| 197 |
+
"epoch": 24.07,
|
| 198 |
+
"eval_loss": 2.1759419441223145,
|
| 199 |
+
"eval_runtime": 34.313,
|
| 200 |
+
"eval_samples_per_second": 22.178,
|
| 201 |
+
"eval_steps_per_second": 1.399,
|
| 202 |
+
"eval_wer": 0.464909155682223,
|
| 203 |
+
"step": 19500
|
| 204 |
+
},
|
| 205 |
+
{
|
| 206 |
+
"epoch": 25.93,
|
| 207 |
+
"learning_rate": 2.5012820512820513e-05,
|
| 208 |
+
"loss": 0.2378,
|
| 209 |
+
"step": 21000
|
| 210 |
+
},
|
| 211 |
+
{
|
| 212 |
+
"epoch": 25.93,
|
| 213 |
+
"eval_loss": 3.390052080154419,
|
| 214 |
+
"eval_runtime": 34.4471,
|
| 215 |
+
"eval_samples_per_second": 22.092,
|
| 216 |
+
"eval_steps_per_second": 1.393,
|
| 217 |
+
"eval_wer": 0.4447809048806555,
|
| 218 |
+
"step": 21000
|
| 219 |
+
},
|
| 220 |
+
{
|
| 221 |
+
"epoch": 27.78,
|
| 222 |
+
"learning_rate": 2.3092307692307694e-05,
|
| 223 |
+
"loss": 0.217,
|
| 224 |
+
"step": 22500
|
| 225 |
+
},
|
| 226 |
+
{
|
| 227 |
+
"epoch": 27.78,
|
| 228 |
+
"eval_loss": 1.163241982460022,
|
| 229 |
+
"eval_runtime": 36.0254,
|
| 230 |
+
"eval_samples_per_second": 21.124,
|
| 231 |
+
"eval_steps_per_second": 1.332,
|
| 232 |
+
"eval_wer": 0.45653722835767724,
|
| 233 |
+
"step": 22500
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 29.63,
|
| 237 |
+
"learning_rate": 2.1169230769230768e-05,
|
| 238 |
+
"loss": 0.2115,
|
| 239 |
+
"step": 24000
|
| 240 |
+
},
|
| 241 |
+
{
|
| 242 |
+
"epoch": 29.63,
|
| 243 |
+
"eval_loss": 1.7441022396087646,
|
| 244 |
+
"eval_runtime": 35.1297,
|
| 245 |
+
"eval_samples_per_second": 21.663,
|
| 246 |
+
"eval_steps_per_second": 1.366,
|
| 247 |
+
"eval_wer": 0.42322764517278233,
|
| 248 |
+
"step": 24000
|
| 249 |
+
},
|
| 250 |
+
{
|
| 251 |
+
"epoch": 31.48,
|
| 252 |
+
"learning_rate": 1.9246153846153846e-05,
|
| 253 |
+
"loss": 0.1959,
|
| 254 |
+
"step": 25500
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 31.48,
|
| 258 |
+
"eval_loss": 3.4991888999938965,
|
| 259 |
+
"eval_runtime": 36.7374,
|
| 260 |
+
"eval_samples_per_second": 20.715,
|
| 261 |
+
"eval_steps_per_second": 1.307,
|
| 262 |
+
"eval_wer": 0.4303526897043107,
|
| 263 |
+
"step": 25500
|
| 264 |
+
},
|
| 265 |
+
{
|
| 266 |
+
"epoch": 33.33,
|
| 267 |
+
"learning_rate": 1.7323076923076924e-05,
|
| 268 |
+
"loss": 0.187,
|
| 269 |
+
"step": 27000
|
| 270 |
+
},
|
| 271 |
+
{
|
| 272 |
+
"epoch": 33.33,
|
| 273 |
+
"eval_loss": 3.6162784099578857,
|
| 274 |
+
"eval_runtime": 34.8093,
|
| 275 |
+
"eval_samples_per_second": 21.862,
|
| 276 |
+
"eval_steps_per_second": 1.379,
|
| 277 |
+
"eval_wer": 0.43694335589597433,
|
| 278 |
+
"step": 27000
|
| 279 |
+
},
|
| 280 |
+
{
|
| 281 |
+
"epoch": 35.19,
|
| 282 |
+
"learning_rate": 1.540128205128205e-05,
|
| 283 |
+
"loss": 0.1748,
|
| 284 |
+
"step": 28500
|
| 285 |
+
},
|
| 286 |
+
{
|
| 287 |
+
"epoch": 35.19,
|
| 288 |
+
"eval_loss": 3.603774309158325,
|
| 289 |
+
"eval_runtime": 35.9258,
|
| 290 |
+
"eval_samples_per_second": 21.183,
|
| 291 |
+
"eval_steps_per_second": 1.336,
|
| 292 |
+
"eval_wer": 0.4467402921268258,
|
| 293 |
+
"step": 28500
|
| 294 |
+
},
|
| 295 |
+
{
|
| 296 |
+
"epoch": 37.04,
|
| 297 |
+
"learning_rate": 1.347820512820513e-05,
|
| 298 |
+
"loss": 0.17,
|
| 299 |
+
"step": 30000
|
| 300 |
+
},
|
| 301 |
+
{
|
| 302 |
+
"epoch": 37.04,
|
| 303 |
+
"eval_loss": 2.970829486846924,
|
| 304 |
+
"eval_runtime": 35.2981,
|
| 305 |
+
"eval_samples_per_second": 21.559,
|
| 306 |
+
"eval_steps_per_second": 1.36,
|
| 307 |
+
"eval_wer": 0.43623085144282153,
|
| 308 |
+
"step": 30000
|
| 309 |
+
},
|
| 310 |
+
{
|
| 311 |
+
"epoch": 38.89,
|
| 312 |
+
"learning_rate": 1.1557692307692308e-05,
|
| 313 |
+
"loss": 0.159,
|
| 314 |
+
"step": 31500
|
| 315 |
+
},
|
| 316 |
+
{
|
| 317 |
+
"epoch": 38.89,
|
| 318 |
+
"eval_loss": 3.2044625282287598,
|
| 319 |
+
"eval_runtime": 34.6143,
|
| 320 |
+
"eval_samples_per_second": 21.985,
|
| 321 |
+
"eval_steps_per_second": 1.387,
|
| 322 |
+
"eval_wer": 0.42785892411827575,
|
| 323 |
+
"step": 31500
|
| 324 |
+
},
|
| 325 |
+
{
|
| 326 |
+
"epoch": 40.74,
|
| 327 |
+
"learning_rate": 9.635897435897436e-06,
|
| 328 |
+
"loss": 0.153,
|
| 329 |
+
"step": 33000
|
| 330 |
+
},
|
| 331 |
+
{
|
| 332 |
+
"epoch": 40.74,
|
| 333 |
+
"eval_loss": 3.2426888942718506,
|
| 334 |
+
"eval_runtime": 35.08,
|
| 335 |
+
"eval_samples_per_second": 21.693,
|
| 336 |
+
"eval_steps_per_second": 1.368,
|
| 337 |
+
"eval_wer": 0.42874955468471676,
|
| 338 |
+
"step": 33000
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 42.59,
|
| 342 |
+
"learning_rate": 7.712820512820514e-06,
|
| 343 |
+
"loss": 0.1463,
|
| 344 |
+
"step": 34500
|
| 345 |
+
},
|
| 346 |
+
{
|
| 347 |
+
"epoch": 42.59,
|
| 348 |
+
"eval_loss": 3.5439305305480957,
|
| 349 |
+
"eval_runtime": 36.6846,
|
| 350 |
+
"eval_samples_per_second": 20.744,
|
| 351 |
+
"eval_steps_per_second": 1.308,
|
| 352 |
+
"eval_wer": 0.4269682935518347,
|
| 353 |
+
"step": 34500
|
| 354 |
+
},
|
| 355 |
+
{
|
| 356 |
+
"epoch": 44.44,
|
| 357 |
+
"learning_rate": 5.78974358974359e-06,
|
| 358 |
+
"loss": 0.139,
|
| 359 |
+
"step": 36000
|
| 360 |
+
},
|
| 361 |
+
{
|
| 362 |
+
"epoch": 44.44,
|
| 363 |
+
"eval_loss": 3.938081741333008,
|
| 364 |
+
"eval_runtime": 34.7219,
|
| 365 |
+
"eval_samples_per_second": 21.917,
|
| 366 |
+
"eval_steps_per_second": 1.382,
|
| 367 |
+
"eval_wer": 0.41503384396152476,
|
| 368 |
+
"step": 36000
|
| 369 |
+
},
|
| 370 |
+
{
|
| 371 |
+
"epoch": 46.3,
|
| 372 |
+
"learning_rate": 3.867948717948718e-06,
|
| 373 |
+
"loss": 0.1352,
|
| 374 |
+
"step": 37500
|
| 375 |
+
},
|
| 376 |
+
{
|
| 377 |
+
"epoch": 46.3,
|
| 378 |
+
"eval_loss": 4.174356937408447,
|
| 379 |
+
"eval_runtime": 36.3496,
|
| 380 |
+
"eval_samples_per_second": 20.936,
|
| 381 |
+
"eval_steps_per_second": 1.321,
|
| 382 |
+
"eval_wer": 0.4091556822230139,
|
| 383 |
+
"step": 37500
|
| 384 |
+
},
|
| 385 |
+
{
|
| 386 |
+
"epoch": 48.15,
|
| 387 |
+
"learning_rate": 1.9461538461538464e-06,
|
| 388 |
+
"loss": 0.1369,
|
| 389 |
+
"step": 39000
|
| 390 |
+
},
|
| 391 |
+
{
|
| 392 |
+
"epoch": 48.15,
|
| 393 |
+
"eval_loss": 4.227924823760986,
|
| 394 |
+
"eval_runtime": 34.5902,
|
| 395 |
+
"eval_samples_per_second": 22.0,
|
| 396 |
+
"eval_steps_per_second": 1.388,
|
| 397 |
+
"eval_wer": 0.4153900961881012,
|
| 398 |
+
"step": 39000
|
| 399 |
+
},
|
| 400 |
+
{
|
| 401 |
+
"epoch": 50.0,
|
| 402 |
+
"learning_rate": 2.3076923076923076e-08,
|
| 403 |
+
"loss": 0.1273,
|
| 404 |
+
"step": 40500
|
| 405 |
+
},
|
| 406 |
+
{
|
| 407 |
+
"epoch": 50.0,
|
| 408 |
+
"eval_loss": 4.169058322906494,
|
| 409 |
+
"eval_runtime": 34.2804,
|
| 410 |
+
"eval_samples_per_second": 22.199,
|
| 411 |
+
"eval_steps_per_second": 1.4,
|
| 412 |
+
"eval_wer": 0.4132525828286427,
|
| 413 |
+
"step": 40500
|
| 414 |
+
},
|
| 415 |
+
{
|
| 416 |
+
"epoch": 50.0,
|
| 417 |
+
"step": 40500,
|
| 418 |
+
"total_flos": 5.1003805267852526e+20,
|
| 419 |
+
"train_loss": 0.4784129307122878,
|
| 420 |
+
"train_runtime": 111036.8702,
|
| 421 |
+
"train_samples_per_second": 11.67,
|
| 422 |
+
"train_steps_per_second": 0.365
|
| 423 |
+
}
|
| 424 |
+
],
|
| 425 |
+
"max_steps": 40500,
|
| 426 |
+
"num_train_epochs": 50,
|
| 427 |
+
"total_flos": 5.1003805267852526e+20,
|
| 428 |
+
"trial_name": null,
|
| 429 |
+
"trial_params": null
|
| 430 |
+
}
|