File size: 33,743 Bytes
28939a1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 |
------------> log file ==runs2/rte/1/log_bs32_lr3e-05_20221118_060236_793692.txt Namespace(aug_train=False, data_dir='/home.local/jianwei/datasets/nlp/glue_data/RTE', do_eval=False, early_stop=True, early_stop_metric='accuracy', eval_step=120, gradient_accumulation_steps=1, learning_rate=3e-05, local_rank=0, lr_scheduler_type=<SchedulerType.CONSTANT_WITH_WARMUP: 'constant_with_warmup'>, max_length=128, max_train_steps=None, model_name_or_path='/home.local/jianwei/workspace/archive/SparseOptimizer/output/Layer_7_12_Hid_160_768_Head_10_12_IMRatio_3.5', num_train_epochs=30, num_warmup_steps=0, output_dir='runs2/rte/1', pad_to_max_length=False, per_device_eval_batch_size=32, per_device_train_batch_size=32, print_step=5, save_last=False, seed=None, task_name='rte', train_file=None, use_slow_tokenizer=False, validation_file=None, weight_decay=0.0) Distributed environment: NO Num processes: 1 Process index: 0 Local process index: 0 Device: cuda Mixed precision type: fp16 Sample 595 of the training set: (tensor([ 101, 11929, 1010, 5553, 1012, 2570, 1006, 8418, 25311, 13860, 3388, 1007, 1011, 1011, 2019, 18410, 2140, 6187, 24887, 2080, 11183, 1010, 1037, 2280, 3539, 2704, 1010, 2180, 5978, 1005, 1055, 4883, 2602, 2006, 4465, 1012, 102, 2047, 5077, 3539, 2704, 2003, 2700, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor(1)). Sample 2375 of the training set: (tensor([ 101, 1996, 5611, 2390, 2749, 3344, 2041, 1010, 2006, 5095, 1010, 1037, 6923, 2510, 3169, 2046, 1996, 2225, 2924, 2237, 1997, 15419, 2378, 1998, 2049, 13141, 3409, 1010, 2334, 9302, 4216, 2056, 1012, 102, 1996, 5611, 2390, 3344, 2041, 1037, 6923, 3169, 1999, 15419, 2378, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor(0)). Sample 149 of the training set: (tensor([ 101, 2048, 9767, 8461, 2379, 2019, 5499, 2082, 1999, 4501, 2730, 2809, 2111, 1998, 5229, 4413, 2500, 7483, 1999, 1996, 6745, 8293, 1997, 4808, 13940, 1996, 2670, 3417, 1997, 15381, 1012, 102, 2809, 2111, 8461, 2048, 9767, 2379, 2019, 5499, 2082, 1999, 4501, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor(1)). ***** Running training ***** Num examples = 2490 Num Epochs = 30 Instantaneous batch size per device = 32 Total train batch size (w. parallel, distributed & accumulation) = 32 Gradient Accumulation steps = 1 Total optimization steps = 2340 000005/002340, loss: 0.694824, avg_loss: 0.691177 000010/002340, loss: 0.707565, avg_loss: 0.693715 000015/002340, loss: 0.699615, avg_loss: 0.693022 000020/002340, loss: 0.699615, avg_loss: 0.693939 000025/002340, loss: 0.699310, avg_loss: 0.694436 000030/002340, loss: 0.698532, avg_loss: 0.694941 000035/002340, loss: 0.686935, avg_loss: 0.694372 000040/002340, loss: 0.696411, avg_loss: 0.694273 000045/002340, loss: 0.692871, avg_loss: 0.693708 000050/002340, loss: 0.687256, avg_loss: 0.693756 000055/002340, loss: 0.701004, avg_loss: 0.693827 000060/002340, loss: 0.691040, avg_loss: 0.693579 000065/002340, loss: 0.689056, avg_loss: 0.693324 000070/002340, loss: 0.696518, avg_loss: 0.693440 000075/002340, loss: 0.696930, avg_loss: 0.693460 000080/002340, loss: 0.693802, avg_loss: 0.693340 000085/002340, loss: 0.688171, avg_loss: 0.693318 000090/002340, loss: 0.698029, avg_loss: 0.693154 000095/002340, loss: 0.689453, avg_loss: 0.692949 000100/002340, loss: 0.690857, avg_loss: 0.692921 000105/002340, loss: 0.689819, avg_loss: 0.692827 000110/002340, loss: 0.682220, avg_loss: 0.692768 000115/002340, loss: 0.700806, avg_loss: 0.692803 000120/002340, loss: 0.701385, avg_loss: 0.692652 ***** Running dev evaluation ***** Num examples = 277 Instantaneous batch size per device = 32 epoch 1, step 120/2340: {'accuracy': 0.5523465703971119} 000125/002340, loss: 0.693527, avg_loss: 0.692706 000130/002340, loss: 0.689957, avg_loss: 0.692658 000135/002340, loss: 0.685425, avg_loss: 0.692536 000140/002340, loss: 0.690201, avg_loss: 0.692434 000145/002340, loss: 0.686600, avg_loss: 0.692396 000150/002340, loss: 0.678986, avg_loss: 0.692177 000155/002340, loss: 0.679138, avg_loss: 0.691975 000160/002340, loss: 0.694275, avg_loss: 0.691769 000165/002340, loss: 0.692368, avg_loss: 0.691443 000170/002340, loss: 0.680664, avg_loss: 0.691252 000175/002340, loss: 0.666016, avg_loss: 0.690698 000180/002340, loss: 0.671844, avg_loss: 0.690296 000185/002340, loss: 0.651184, avg_loss: 0.689748 000190/002340, loss: 0.659752, avg_loss: 0.688919 000195/002340, loss: 0.662926, avg_loss: 0.688697 000200/002340, loss: 0.643776, avg_loss: 0.688136 000205/002340, loss: 0.693794, avg_loss: 0.687406 000210/002340, loss: 0.716675, avg_loss: 0.686937 000215/002340, loss: 0.665474, avg_loss: 0.686136 000220/002340, loss: 0.625298, avg_loss: 0.685308 000225/002340, loss: 0.656639, avg_loss: 0.685019 000230/002340, loss: 0.673508, avg_loss: 0.684550 000235/002340, loss: 0.575394, avg_loss: 0.682954 000240/002340, loss: 0.615173, avg_loss: 0.681390 ***** Running dev evaluation ***** Num examples = 277 Instantaneous batch size per device = 32 epoch 3, step 240/2340: {'accuracy': 0.5884476534296029} 000245/002340, loss: 0.566116, avg_loss: 0.679216 000250/002340, loss: 0.662231, avg_loss: 0.677990 000255/002340, loss: 0.742844, avg_loss: 0.677457 000260/002340, loss: 0.744896, avg_loss: 0.677289 000265/002340, loss: 0.524788, avg_loss: 0.675974 000270/002340, loss: 0.573128, avg_loss: 0.674871 000275/002340, loss: 0.698616, avg_loss: 0.674028 000280/002340, loss: 0.661125, avg_loss: 0.672997 000285/002340, loss: 0.577705, avg_loss: 0.671527 000290/002340, loss: 0.529144, avg_loss: 0.669498 000295/002340, loss: 0.548820, avg_loss: 0.668429 000300/002340, loss: 0.533775, avg_loss: 0.667589 000305/002340, loss: 0.724682, avg_loss: 0.666549 000310/002340, loss: 0.618702, avg_loss: 0.667052 000315/002340, loss: 0.600662, avg_loss: 0.666212 000320/002340, loss: 0.560127, avg_loss: 0.665015 000325/002340, loss: 0.667423, avg_loss: 0.663344 000330/002340, loss: 0.520096, avg_loss: 0.661692 000335/002340, loss: 0.589901, avg_loss: 0.659812 000340/002340, loss: 0.718616, avg_loss: 0.658405 000345/002340, loss: 0.523731, avg_loss: 0.657693 000350/002340, loss: 0.597912, avg_loss: 0.656364 000355/002340, loss: 0.510841, avg_loss: 0.654704 000360/002340, loss: 0.598392, avg_loss: 0.652629 ***** Running dev evaluation ***** Num examples = 277 Instantaneous batch size per device = 32 epoch 4, step 360/2340: {'accuracy': 0.6137184115523465} 000365/002340, loss: 0.509396, avg_loss: 0.650652 000370/002340, loss: 0.625957, avg_loss: 0.649372 000375/002340, loss: 0.632420, avg_loss: 0.648425 000380/002340, loss: 0.562641, avg_loss: 0.647222 000385/002340, loss: 0.649609, avg_loss: 0.645501 000390/002340, loss: 0.361694, avg_loss: 0.643182 000395/002340, loss: 0.425430, avg_loss: 0.642246 000400/002340, loss: 0.577938, avg_loss: 0.640067 000405/002340, loss: 0.554668, avg_loss: 0.638333 000410/002340, loss: 0.505466, avg_loss: 0.636457 000415/002340, loss: 0.531124, avg_loss: 0.634969 000420/002340, loss: 0.425911, avg_loss: 0.633147 000425/002340, loss: 0.532368, avg_loss: 0.632082 000430/002340, loss: 0.569756, avg_loss: 0.630961 000435/002340, loss: 0.451645, avg_loss: 0.629107 000440/002340, loss: 0.459530, avg_loss: 0.627486 000445/002340, loss: 0.380501, avg_loss: 0.625123 000450/002340, loss: 0.565880, avg_loss: 0.624122 000455/002340, loss: 0.422201, avg_loss: 0.621911 000460/002340, loss: 0.671333, avg_loss: 0.620993 000465/002340, loss: 0.427799, avg_loss: 0.618575 000470/002340, loss: 0.301590, avg_loss: 0.616753 000475/002340, loss: 0.517204, avg_loss: 0.614735 000480/002340, loss: 0.473822, avg_loss: 0.612666 ***** Running dev evaluation ***** Num examples = 277 Instantaneous batch size per device = 32 epoch 6, step 480/2340: {'accuracy': 0.6209386281588448} 000485/002340, loss: 0.235840, avg_loss: 0.610187 000490/002340, loss: 0.535803, avg_loss: 0.608769 000495/002340, loss: 0.447842, avg_loss: 0.606833 000500/002340, loss: 0.359915, avg_loss: 0.604468 000505/002340, loss: 0.473944, avg_loss: 0.601928 000510/002340, loss: 0.487707, avg_loss: 0.600405 000515/002340, loss: 0.280029, avg_loss: 0.599008 000520/002340, loss: 0.509848, avg_loss: 0.597484 000525/002340, loss: 0.646320, avg_loss: 0.596454 000530/002340, loss: 0.350674, avg_loss: 0.594710 000535/002340, loss: 0.480106, avg_loss: 0.593436 000540/002340, loss: 0.560251, avg_loss: 0.593214 000545/002340, loss: 0.387239, avg_loss: 0.591432 000550/002340, loss: 0.277430, avg_loss: 0.589320 000555/002340, loss: 0.280695, avg_loss: 0.587417 000560/002340, loss: 0.330351, avg_loss: 0.585310 000565/002340, loss: 0.391579, avg_loss: 0.583662 000570/002340, loss: 0.280355, avg_loss: 0.582107 000575/002340, loss: 0.359081, avg_loss: 0.580171 000580/002340, loss: 0.367201, avg_loss: 0.578450 000585/002340, loss: 0.430851, avg_loss: 0.577231 000590/002340, loss: 0.331879, avg_loss: 0.575557 000595/002340, loss: 0.333700, avg_loss: 0.573829 000600/002340, loss: 0.309275, avg_loss: 0.571686 ***** Running dev evaluation ***** Num examples = 277 Instantaneous batch size per device = 32 epoch 7, step 600/2340: {'accuracy': 0.6425992779783394} 000605/002340, loss: 0.461454, avg_loss: 0.570168 000610/002340, loss: 0.434152, avg_loss: 0.568408 000615/002340, loss: 0.565701, avg_loss: 0.567013 000620/002340, loss: 0.281487, avg_loss: 0.564378 000625/002340, loss: 0.183996, avg_loss: 0.562576 000630/002340, loss: 0.308249, avg_loss: 0.560548 000635/002340, loss: 0.492087, avg_loss: 0.558905 000640/002340, loss: 0.276144, avg_loss: 0.556907 000645/002340, loss: 0.379016, avg_loss: 0.555011 000650/002340, loss: 0.257240, avg_loss: 0.553119 000655/002340, loss: 0.260510, avg_loss: 0.550735 000660/002340, loss: 0.482807, avg_loss: 0.549067 000665/002340, loss: 0.313425, avg_loss: 0.547653 000670/002340, loss: 0.244961, avg_loss: 0.545744 000675/002340, loss: 0.386663, avg_loss: 0.544380 000680/002340, loss: 0.137331, avg_loss: 0.541812 000685/002340, loss: 0.301256, avg_loss: 0.539778 000690/002340, loss: 0.284186, avg_loss: 0.537928 000695/002340, loss: 0.521972, avg_loss: 0.536261 000700/002340, loss: 0.718600, avg_loss: 0.535717 000705/002340, loss: 0.237306, avg_loss: 0.534266 000710/002340, loss: 0.164028, avg_loss: 0.532027 000715/002340, loss: 0.235560, avg_loss: 0.530920 000720/002340, loss: 0.224425, avg_loss: 0.529428 ***** Running dev evaluation ***** Num examples = 277 Instantaneous batch size per device = 32 epoch 9, step 720/2340: {'accuracy': 0.6462093862815884} 000725/002340, loss: 0.250054, avg_loss: 0.527996 000730/002340, loss: 0.213790, avg_loss: 0.526521 000735/002340, loss: 0.339844, avg_loss: 0.525346 000740/002340, loss: 0.192316, avg_loss: 0.523399 000745/002340, loss: 0.322181, avg_loss: 0.521820 000750/002340, loss: 0.114270, avg_loss: 0.519722 000755/002340, loss: 0.242498, avg_loss: 0.517846 000760/002340, loss: 0.234197, avg_loss: 0.515497 000765/002340, loss: 0.332447, avg_loss: 0.513969 000770/002340, loss: 0.163693, avg_loss: 0.512496 000775/002340, loss: 0.260910, avg_loss: 0.511088 000780/002340, loss: 0.236919, avg_loss: 0.509495 000785/002340, loss: 0.151022, avg_loss: 0.507580 000790/002340, loss: 0.489914, avg_loss: 0.506298 000795/002340, loss: 0.175525, avg_loss: 0.504419 000800/002340, loss: 0.274471, avg_loss: 0.502310 000805/002340, loss: 0.308759, avg_loss: 0.500468 000810/002340, loss: 0.227170, avg_loss: 0.498888 000815/002340, loss: 0.112951, avg_loss: 0.496910 000820/002340, loss: 0.168542, avg_loss: 0.495333 000825/002340, loss: 0.163078, avg_loss: 0.493526 000830/002340, loss: 0.208418, avg_loss: 0.492144 000835/002340, loss: 0.204179, avg_loss: 0.490463 000840/002340, loss: 0.262290, avg_loss: 0.488488 ***** Running dev evaluation ***** Num examples = 277 Instantaneous batch size per device = 32 epoch 10, step 840/2340: {'accuracy': 0.6245487364620939} 000845/002340, loss: 0.166388, avg_loss: 0.486870 000850/002340, loss: 0.221429, avg_loss: 0.485510 000855/002340, loss: 0.376082, avg_loss: 0.484030 000860/002340, loss: 0.083231, avg_loss: 0.482307 000865/002340, loss: 0.161541, avg_loss: 0.480355 000870/002340, loss: 0.180701, avg_loss: 0.478405 000875/002340, loss: 0.175531, avg_loss: 0.476498 000880/002340, loss: 0.148172, avg_loss: 0.475174 000885/002340, loss: 0.110148, avg_loss: 0.473676 000890/002340, loss: 0.177225, avg_loss: 0.472175 000895/002340, loss: 0.051785, avg_loss: 0.470479 000900/002340, loss: 0.239419, avg_loss: 0.469122 000905/002340, loss: 0.294643, avg_loss: 0.467460 000910/002340, loss: 0.372546, avg_loss: 0.466119 000915/002340, loss: 0.160401, avg_loss: 0.464562 000920/002340, loss: 0.389829, avg_loss: 0.463444 000925/002340, loss: 0.461596, avg_loss: 0.462050 000930/002340, loss: 0.169349, avg_loss: 0.460443 000935/002340, loss: 0.274192, avg_loss: 0.459206 000940/002340, loss: 0.245536, avg_loss: 0.457409 000945/002340, loss: 0.124900, avg_loss: 0.455669 000950/002340, loss: 0.258810, avg_loss: 0.453951 000955/002340, loss: 0.328007, avg_loss: 0.452289 000960/002340, loss: 0.243825, avg_loss: 0.450600 ***** Running dev evaluation ***** Num examples = 277 Instantaneous batch size per device = 32 epoch 12, step 960/2340: {'accuracy': 0.6389891696750902} 000965/002340, loss: 0.201036, avg_loss: 0.449321 000970/002340, loss: 0.091728, avg_loss: 0.447797 000975/002340, loss: 0.182425, avg_loss: 0.446324 000980/002340, loss: 0.159452, avg_loss: 0.444909 000985/002340, loss: 0.142912, avg_loss: 0.443522 000990/002340, loss: 0.304327, avg_loss: 0.442004 000995/002340, loss: 0.117483, avg_loss: 0.440452 001000/002340, loss: 0.156437, avg_loss: 0.438837 001005/002340, loss: 0.032182, avg_loss: 0.437682 001010/002340, loss: 0.063084, avg_loss: 0.436744 001015/002340, loss: 0.258552, avg_loss: 0.435504 001020/002340, loss: 0.091414, avg_loss: 0.434340 001025/002340, loss: 0.100409, avg_loss: 0.432843 001030/002340, loss: 0.064708, avg_loss: 0.431516 001035/002340, loss: 0.459350, avg_loss: 0.430340 001040/002340, loss: 0.195770, avg_loss: 0.428896 001045/002340, loss: 0.101108, avg_loss: 0.427430 001050/002340, loss: 0.162723, avg_loss: 0.425868 001055/002340, loss: 0.170199, avg_loss: 0.424800 001060/002340, loss: 0.066082, avg_loss: 0.423415 001065/002340, loss: 0.139599, avg_loss: 0.422219 001070/002340, loss: 0.089475, avg_loss: 0.420665 001075/002340, loss: 0.115157, avg_loss: 0.419250 001080/002340, loss: 0.085939, avg_loss: 0.417821 ***** Running dev evaluation ***** Num examples = 277 Instantaneous batch size per device = 32 epoch 13, step 1080/2340: {'accuracy': 0.6173285198555957} 001085/002340, loss: 0.138964, avg_loss: 0.416740 001090/002340, loss: 0.385725, avg_loss: 0.415552 001095/002340, loss: 0.173466, avg_loss: 0.414612 001100/002340, loss: 0.101382, avg_loss: 0.413397 001105/002340, loss: 0.098917, avg_loss: 0.412091 001110/002340, loss: 0.088198, avg_loss: 0.410518 001115/002340, loss: 0.039977, avg_loss: 0.409207 001120/002340, loss: 0.126413, avg_loss: 0.407805 001125/002340, loss: 0.154641, avg_loss: 0.406540 001130/002340, loss: 0.221717, avg_loss: 0.405238 001135/002340, loss: 0.155590, avg_loss: 0.403870 001140/002340, loss: 0.072533, avg_loss: 0.402521 001145/002340, loss: 0.148947, avg_loss: 0.401401 001150/002340, loss: 0.202878, avg_loss: 0.400165 001155/002340, loss: 0.054971, avg_loss: 0.399305 001160/002340, loss: 0.058926, avg_loss: 0.398088 001165/002340, loss: 0.187665, avg_loss: 0.396901 001170/002340, loss: 0.091442, avg_loss: 0.395624 001175/002340, loss: 0.339817, avg_loss: 0.394529 001180/002340, loss: 0.029183, avg_loss: 0.393430 001185/002340, loss: 0.052091, avg_loss: 0.392348 001190/002340, loss: 0.175309, avg_loss: 0.391464 001195/002340, loss: 0.269615, avg_loss: 0.390438 001200/002340, loss: 0.042982, avg_loss: 0.389416 ***** Running dev evaluation ***** Num examples = 277 Instantaneous batch size per device = 32 epoch 15, step 1200/2340: {'accuracy': 0.6353790613718412} 001205/002340, loss: 0.029362, avg_loss: 0.388045 001210/002340, loss: 0.106356, avg_loss: 0.386842 001215/002340, loss: 0.055282, avg_loss: 0.385720 001220/002340, loss: 0.025587, avg_loss: 0.384474 001225/002340, loss: 0.017830, avg_loss: 0.383314 001230/002340, loss: 0.156192, avg_loss: 0.382166 001235/002340, loss: 0.017268, avg_loss: 0.381167 001240/002340, loss: 0.015908, avg_loss: 0.379919 001245/002340, loss: 0.024442, avg_loss: 0.378661 001250/002340, loss: 0.016508, avg_loss: 0.377585 001255/002340, loss: 0.021355, avg_loss: 0.376479 001260/002340, loss: 0.024076, avg_loss: 0.375165 001265/002340, loss: 0.202033, avg_loss: 0.374116 001270/002340, loss: 0.027793, avg_loss: 0.372882 001275/002340, loss: 0.027369, avg_loss: 0.372247 001280/002340, loss: 0.021813, avg_loss: 0.371052 001285/002340, loss: 0.021163, avg_loss: 0.370046 001290/002340, loss: 0.046603, avg_loss: 0.369336 001295/002340, loss: 0.076338, avg_loss: 0.368328 001300/002340, loss: 0.183380, avg_loss: 0.367225 001305/002340, loss: 0.169317, avg_loss: 0.366140 001310/002340, loss: 0.020987, avg_loss: 0.365018 001315/002340, loss: 0.169484, avg_loss: 0.364127 001320/002340, loss: 0.044023, avg_loss: 0.363106 ***** Running dev evaluation ***** Num examples = 277 Instantaneous batch size per device = 32 epoch 16, step 1320/2340: {'accuracy': 0.6462093862815884} 001325/002340, loss: 0.146640, avg_loss: 0.361943 001330/002340, loss: 0.053370, avg_loss: 0.360778 001335/002340, loss: 0.024849, avg_loss: 0.359785 001340/002340, loss: 0.040356, avg_loss: 0.358545 001345/002340, loss: 0.216520, avg_loss: 0.357564 001350/002340, loss: 0.020188, avg_loss: 0.356442 001355/002340, loss: 0.050854, avg_loss: 0.355434 001360/002340, loss: 0.013922, avg_loss: 0.354336 001365/002340, loss: 0.034302, avg_loss: 0.353537 001370/002340, loss: 0.083984, avg_loss: 0.352530 001375/002340, loss: 0.044313, avg_loss: 0.351671 001380/002340, loss: 0.197178, avg_loss: 0.350656 001385/002340, loss: 0.087372, avg_loss: 0.349721 001390/002340, loss: 0.122292, avg_loss: 0.348657 001395/002340, loss: 0.161705, avg_loss: 0.347780 001400/002340, loss: 0.014310, avg_loss: 0.346943 001405/002340, loss: 0.096345, avg_loss: 0.345930 001410/002340, loss: 0.142292, avg_loss: 0.345120 001415/002340, loss: 0.016984, avg_loss: 0.344193 001420/002340, loss: 0.014843, avg_loss: 0.343171 001425/002340, loss: 0.054250, avg_loss: 0.342329 001430/002340, loss: 0.049341, avg_loss: 0.341417 001435/002340, loss: 0.033567, avg_loss: 0.340340 001440/002340, loss: 0.108241, avg_loss: 0.339508 ***** Running dev evaluation ***** Num examples = 277 Instantaneous batch size per device = 32 epoch 18, step 1440/2340: {'accuracy': 0.6137184115523465} 001445/002340, loss: 0.148780, avg_loss: 0.338643 001450/002340, loss: 0.121979, avg_loss: 0.337871 001455/002340, loss: 0.015762, avg_loss: 0.337010 001460/002340, loss: 0.197943, avg_loss: 0.336178 001465/002340, loss: 0.019593, avg_loss: 0.335371 001470/002340, loss: 0.129545, avg_loss: 0.334404 001475/002340, loss: 0.015238, avg_loss: 0.333483 001480/002340, loss: 0.016869, avg_loss: 0.332625 001485/002340, loss: 0.011418, avg_loss: 0.331565 001490/002340, loss: 0.338315, avg_loss: 0.330893 001495/002340, loss: 0.288740, avg_loss: 0.330484 001500/002340, loss: 0.148870, avg_loss: 0.329575 001505/002340, loss: 0.013757, avg_loss: 0.328768 001510/002340, loss: 0.016786, avg_loss: 0.327894 001515/002340, loss: 0.013239, avg_loss: 0.326989 001520/002340, loss: 0.024581, avg_loss: 0.326006 001525/002340, loss: 0.017539, avg_loss: 0.325226 001530/002340, loss: 0.067678, avg_loss: 0.324287 001535/002340, loss: 0.024253, avg_loss: 0.323389 001540/002340, loss: 0.077925, avg_loss: 0.322495 001545/002340, loss: 0.024680, avg_loss: 0.321567 001550/002340, loss: 0.012920, avg_loss: 0.320824 001555/002340, loss: 0.023837, avg_loss: 0.320000 001560/002340, loss: 0.221982, avg_loss: 0.319304 ***** Running dev evaluation ***** Num examples = 277 Instantaneous batch size per device = 32 epoch 19, step 1560/2340: {'accuracy': 0.6137184115523465} 001565/002340, loss: 0.013699, avg_loss: 0.318449 001570/002340, loss: 0.011844, avg_loss: 0.317610 001575/002340, loss: 0.012580, avg_loss: 0.316855 001580/002340, loss: 0.037540, avg_loss: 0.316005 001585/002340, loss: 0.019229, avg_loss: 0.315232 001590/002340, loss: 0.048232, avg_loss: 0.314477 001595/002340, loss: 0.141452, avg_loss: 0.313963 001600/002340, loss: 0.015298, avg_loss: 0.313133 001605/002340, loss: 0.013662, avg_loss: 0.312229 001610/002340, loss: 0.160849, avg_loss: 0.311404 001615/002340, loss: 0.012301, avg_loss: 0.310524 001620/002340, loss: 0.063877, avg_loss: 0.309759 001625/002340, loss: 0.032892, avg_loss: 0.309026 001630/002340, loss: 0.177563, avg_loss: 0.308279 001635/002340, loss: 0.157313, avg_loss: 0.307644 001640/002340, loss: 0.130090, avg_loss: 0.306819 001645/002340, loss: 0.021889, avg_loss: 0.306081 001650/002340, loss: 0.152882, avg_loss: 0.305300 001655/002340, loss: 0.009122, avg_loss: 0.304627 001660/002340, loss: 0.015140, avg_loss: 0.303849 001665/002340, loss: 0.164985, avg_loss: 0.303089 001670/002340, loss: 0.008990, avg_loss: 0.302396 001675/002340, loss: 0.010757, avg_loss: 0.301671 001680/002340, loss: 0.009137, avg_loss: 0.300904 ***** Running dev evaluation ***** Num examples = 277 Instantaneous batch size per device = 32 epoch 21, step 1680/2340: {'accuracy': 0.6173285198555957} 001685/002340, loss: 0.053387, avg_loss: 0.300194 001690/002340, loss: 0.022511, avg_loss: 0.299502 001695/002340, loss: 0.105420, avg_loss: 0.298722 001700/002340, loss: 0.013549, avg_loss: 0.297988 001705/002340, loss: 0.073981, avg_loss: 0.297318 001710/002340, loss: 0.014491, avg_loss: 0.296600 001715/002340, loss: 0.154422, avg_loss: 0.295955 001720/002340, loss: 0.163267, avg_loss: 0.295310 001725/002340, loss: 0.136114, avg_loss: 0.294759 001730/002340, loss: 0.015310, avg_loss: 0.294064 001735/002340, loss: 0.087005, avg_loss: 0.293422 001740/002340, loss: 0.020296, avg_loss: 0.292756 001745/002340, loss: 0.018787, avg_loss: 0.292135 001750/002340, loss: 0.034191, avg_loss: 0.291526 001755/002340, loss: 0.045470, avg_loss: 0.290987 001760/002340, loss: 0.014372, avg_loss: 0.290662 001765/002340, loss: 0.015767, avg_loss: 0.289942 001770/002340, loss: 0.039629, avg_loss: 0.289302 001775/002340, loss: 0.016410, avg_loss: 0.288527 001780/002340, loss: 0.038289, avg_loss: 0.287933 001785/002340, loss: 0.017720, avg_loss: 0.287493 001790/002340, loss: 0.033570, avg_loss: 0.286735 001795/002340, loss: 0.012522, avg_loss: 0.286079 001800/002340, loss: 0.053891, avg_loss: 0.285344 ***** Running dev evaluation ***** Num examples = 277 Instantaneous batch size per device = 32 epoch 23, step 1800/2340: {'accuracy': 0.6245487364620939} 001805/002340, loss: 0.126177, avg_loss: 0.284716 001810/002340, loss: 0.011923, avg_loss: 0.284070 001815/002340, loss: 0.142181, avg_loss: 0.283613 001820/002340, loss: 0.010828, avg_loss: 0.282998 001825/002340, loss: 0.025087, avg_loss: 0.282492 001830/002340, loss: 0.273915, avg_loss: 0.281916 001835/002340, loss: 0.016827, avg_loss: 0.281382 001840/002340, loss: 0.010785, avg_loss: 0.280767 001845/002340, loss: 0.015339, avg_loss: 0.280337 001850/002340, loss: 0.020906, avg_loss: 0.279696 001855/002340, loss: 0.165239, avg_loss: 0.279069 001860/002340, loss: 0.053642, avg_loss: 0.278450 001865/002340, loss: 0.133574, avg_loss: 0.277862 001870/002340, loss: 0.097644, avg_loss: 0.277226 001875/002340, loss: 0.059441, avg_loss: 0.276570 001880/002340, loss: 0.016699, avg_loss: 0.275948 001885/002340, loss: 0.146401, avg_loss: 0.275488 001890/002340, loss: 0.011636, avg_loss: 0.274799 001895/002340, loss: 0.018686, avg_loss: 0.274214 001900/002340, loss: 0.026965, avg_loss: 0.273611 001905/002340, loss: 0.013933, avg_loss: 0.272935 001910/002340, loss: 0.125580, avg_loss: 0.272318 001915/002340, loss: 0.129783, avg_loss: 0.271802 001920/002340, loss: 0.116678, avg_loss: 0.271278 ***** Running dev evaluation ***** Num examples = 277 Instantaneous batch size per device = 32 epoch 24, step 1920/2340: {'accuracy': 0.6173285198555957} 001925/002340, loss: 0.254784, avg_loss: 0.270806 001930/002340, loss: 0.157526, avg_loss: 0.270238 001935/002340, loss: 0.031608, avg_loss: 0.269644 001940/002340, loss: 0.009236, avg_loss: 0.269169 001945/002340, loss: 0.009980, avg_loss: 0.268799 001950/002340, loss: 0.033835, avg_loss: 0.268168 001955/002340, loss: 0.051771, avg_loss: 0.267547 001960/002340, loss: 0.142184, avg_loss: 0.267055 001965/002340, loss: 0.046325, avg_loss: 0.266676 001970/002340, loss: 0.041966, avg_loss: 0.266192 001975/002340, loss: 0.020202, avg_loss: 0.265597 001980/002340, loss: 0.125195, avg_loss: 0.265071 001985/002340, loss: 0.019307, avg_loss: 0.264558 001990/002340, loss: 0.011511, avg_loss: 0.263954 001995/002340, loss: 0.092994, avg_loss: 0.263384 002000/002340, loss: 0.098703, avg_loss: 0.262809 002005/002340, loss: 0.017836, avg_loss: 0.262371 002010/002340, loss: 0.047947, avg_loss: 0.261831 002015/002340, loss: 0.157151, avg_loss: 0.261291 002020/002340, loss: 0.063095, avg_loss: 0.260695 002025/002340, loss: 0.239691, avg_loss: 0.260198 002030/002340, loss: 0.008953, avg_loss: 0.259652 002035/002340, loss: 0.008303, avg_loss: 0.259056 002040/002340, loss: 0.133496, avg_loss: 0.258505 ***** Running dev evaluation ***** Num examples = 277 Instantaneous batch size per device = 32 epoch 26, step 2040/2340: {'accuracy': 0.6173285198555957} 002045/002340, loss: 0.070495, avg_loss: 0.258069 002050/002340, loss: 0.082666, avg_loss: 0.257558 002055/002340, loss: 0.036117, avg_loss: 0.257011 002060/002340, loss: 0.018446, avg_loss: 0.256447 002065/002340, loss: 0.019938, avg_loss: 0.255982 002070/002340, loss: 0.010070, avg_loss: 0.255545 002075/002340, loss: 0.010592, avg_loss: 0.254990 002080/002340, loss: 0.047749, avg_loss: 0.254418 002085/002340, loss: 0.157273, avg_loss: 0.253991 002090/002340, loss: 0.012268, avg_loss: 0.253488 002095/002340, loss: 0.010397, avg_loss: 0.252964 002100/002340, loss: 0.152166, avg_loss: 0.252516 002105/002340, loss: 0.149034, avg_loss: 0.252077 002110/002340, loss: 0.022406, avg_loss: 0.251554 002115/002340, loss: 0.050635, avg_loss: 0.251001 002120/002340, loss: 0.101384, avg_loss: 0.250624 002125/002340, loss: 0.019535, avg_loss: 0.250064 002130/002340, loss: 0.017638, avg_loss: 0.249509 002135/002340, loss: 0.007454, avg_loss: 0.249097 002140/002340, loss: 0.170886, avg_loss: 0.248638 002145/002340, loss: 0.008658, avg_loss: 0.248148 002150/002340, loss: 0.018784, avg_loss: 0.247731 002155/002340, loss: 0.006945, avg_loss: 0.247294 002160/002340, loss: 0.149141, avg_loss: 0.246973 ***** Running dev evaluation ***** Num examples = 277 Instantaneous batch size per device = 32 epoch 27, step 2160/2340: {'accuracy': 0.6173285198555957} 002165/002340, loss: 0.070260, avg_loss: 0.246627 002170/002340, loss: 0.018735, avg_loss: 0.246110 002175/002340, loss: 0.011750, avg_loss: 0.245641 002180/002340, loss: 0.024557, avg_loss: 0.245194 002185/002340, loss: 0.022439, avg_loss: 0.244675 002190/002340, loss: 0.009183, avg_loss: 0.244218 002195/002340, loss: 0.147473, avg_loss: 0.243797 002200/002340, loss: 0.008439, avg_loss: 0.243311 002205/002340, loss: 0.009392, avg_loss: 0.242842 002210/002340, loss: 0.007260, avg_loss: 0.242363 002215/002340, loss: 0.006505, avg_loss: 0.241869 002220/002340, loss: 0.036663, avg_loss: 0.241415 002225/002340, loss: 0.010591, avg_loss: 0.240936 002230/002340, loss: 0.008057, avg_loss: 0.240418 002235/002340, loss: 0.005135, avg_loss: 0.240005 002240/002340, loss: 0.009763, avg_loss: 0.239661 002245/002340, loss: 0.009173, avg_loss: 0.239206 002250/002340, loss: 0.015700, avg_loss: 0.238819 002255/002340, loss: 0.021340, avg_loss: 0.238346 002260/002340, loss: 0.060185, avg_loss: 0.237882 002265/002340, loss: 0.038913, avg_loss: 0.237484 002270/002340, loss: 0.016376, avg_loss: 0.237112 002275/002340, loss: 0.010828, avg_loss: 0.236714 002280/002340, loss: 0.129731, avg_loss: 0.236370 ***** Running dev evaluation ***** Num examples = 277 Instantaneous batch size per device = 32 epoch 29, step 2280/2340: {'accuracy': 0.6064981949458483} 002285/002340, loss: 0.044581, avg_loss: 0.235897 002290/002340, loss: 0.008923, avg_loss: 0.235524 002295/002340, loss: 0.011697, avg_loss: 0.235179 002300/002340, loss: 0.020234, avg_loss: 0.234708 002305/002340, loss: 0.024606, avg_loss: 0.234225 002310/002340, loss: 0.007431, avg_loss: 0.233798 002315/002340, loss: 0.006717, avg_loss: 0.233382 002320/002340, loss: 0.017990, avg_loss: 0.232940 002325/002340, loss: 0.145197, avg_loss: 0.232597 002330/002340, loss: 0.013951, avg_loss: 0.232139 002335/002340, loss: 0.014238, avg_loss: 0.231719 002340/002340, loss: 0.019154, avg_loss: 0.231268 ***** Running train evaluation ***** Num examples = 2490 Instantaneous batch size per device = 32 Train Dataset Result: {'accuracy': 0.9955823293172691} ***** Running dev evaluation ***** Num examples = 277 Instantaneous batch size per device = 32 Dev Dataset Result: {'accuracy': 0.6101083032490975} DEV Best Result: accuracy, 0.6462093862815884 Training time 0:02:36 |