diff --git "a/train.ipynb" "b/train.ipynb" --- "a/train.ipynb" +++ "b/train.ipynb" @@ -289,9 +289,10 @@ "text": [ "/Users/andrewmayes/Openclassroom/CanineNet/env/lib/python3.12/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n", - "Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:\n", + "Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration. Please open a PR/issue to update `preprocessor_config.json` to use `image_processor_type` instead of `feature_extractor_type`. This warning will be removed in v4.40.\n", + "Some weights of Swinv2ForImageClassification were not initialized from the model checkpoint at microsoft/swinv2-base-patch4-window16-256 and are newly initialized because the shapes did not match:\n", + "- classifier.weight: found shape torch.Size([1000, 1024]) in the checkpoint and torch.Size([120, 1024]) in the model instantiated\n", "- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([120]) in the model instantiated\n", - "- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([120, 768]) in the model instantiated\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", "max_steps is given, it will override any value given in num_train_epochs\n" ] @@ -299,7 +300,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "6c962c11c994495fb8155c7005d82523", + "model_id": "3f11443236f74e3aa0d3ea41fcaaaf91", "version_major": 2, "version_minor": 0 }, @@ -322,13 +323,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 4.8453, 'grad_norm': 3.2187986373901367, 'learning_rate': 4.9500000000000004e-05, 'epoch': 0.08}\n" + "{'loss': 4.7451, 'grad_norm': 7.392679691314697, 'learning_rate': 4.9500000000000004e-05, 'epoch': 0.08}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "6021bcb13579469fb0394ebda25f4bce", + "model_id": "b8f571126879442b9bb8bf00fbf6c1a8", "version_major": 2, "version_minor": 0 }, @@ -351,7 +352,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 4.63408899307251, 'eval_accuracy': 0.03547133138969873, 'eval_f1': 0.03035870635022939, 'eval_precision': 0.031125096822983367, 'eval_recall': 0.03642496088050857, 'eval_runtime': 51.1034, 'eval_samples_per_second': 80.543, 'eval_steps_per_second': 2.524, 'epoch': 0.08}\n" + "{'eval_loss': 4.6182661056518555, 'eval_accuracy': 0.07167152575315841, 'eval_f1': 0.06175833391362469, 'eval_precision': 0.06693832428157515, 'eval_recall': 0.06805161257279692, 'eval_runtime': 118.9863, 'eval_samples_per_second': 34.592, 'eval_steps_per_second': 1.084, 'epoch': 0.08}\n" ] }, { @@ -366,13 +367,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 4.5433, 'grad_norm': 3.1309454441070557, 'learning_rate': 4.9e-05, 'epoch': 0.16}\n" + "{'loss': 4.5204, 'grad_norm': 17.792699813842773, 'learning_rate': 4.9e-05, 'epoch': 0.16}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "3c6a94375def4988b006147b103f088e", + "model_id": "b29d38bfa09546ad8384d3f8d9d90109", "version_major": 2, "version_minor": 0 }, @@ -395,7 +396,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 4.31070613861084, 'eval_accuracy': 0.12463556851311954, 'eval_f1': 0.09815189378488118, 'eval_precision': 0.126318491286059, 'eval_recall': 0.12250067902004853, 'eval_runtime': 50.7554, 'eval_samples_per_second': 81.095, 'eval_steps_per_second': 2.542, 'epoch': 0.16}\n" + "{'eval_loss': 4.324223518371582, 'eval_accuracy': 0.20238095238095238, 'eval_f1': 0.14930686647738964, 'eval_precision': 0.18576762567564203, 'eval_recall': 0.1827081374434747, 'eval_runtime': 119.253, 'eval_samples_per_second': 34.515, 'eval_steps_per_second': 1.082, 'epoch': 0.16}\n" ] }, { @@ -410,13 +411,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 4.2752, 'grad_norm': 3.1881861686706543, 'learning_rate': 4.85e-05, 'epoch': 0.23}\n" + "{'loss': 4.2163, 'grad_norm': 27.42233657836914, 'learning_rate': 4.85e-05, 'epoch': 0.23}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "f0178dfde45c4f588a96b8eea341a396", + "model_id": "7403045592cf4234aba9e2cf16811a7c", "version_major": 2, "version_minor": 0 }, @@ -439,7 +440,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 3.9696836471557617, 'eval_accuracy': 0.271865889212828, 'eval_f1': 0.217632428246865, 'eval_precision': 0.2517716495124851, 'eval_recall': 0.26318141054054833, 'eval_runtime': 50.0122, 'eval_samples_per_second': 82.3, 'eval_steps_per_second': 2.579, 'epoch': 0.23}\n" + "{'eval_loss': 3.85136079788208, 'eval_accuracy': 0.3816812439261419, 'eval_f1': 0.3107746196937497, 'eval_precision': 0.3754529363764648, 'eval_recall': 0.3598087581195972, 'eval_runtime': 116.3653, 'eval_samples_per_second': 35.371, 'eval_steps_per_second': 1.109, 'epoch': 0.23}\n" ] }, { @@ -454,13 +455,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 3.9872, 'grad_norm': 3.366978406906128, 'learning_rate': 4.8e-05, 'epoch': 0.31}\n" + "{'loss': 3.5996, 'grad_norm': 39.71358871459961, 'learning_rate': 4.8e-05, 'epoch': 0.31}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e83c39a8bf8a48a28c1cc1382bda022b", + "model_id": "ad05537e9c5046b89224fdde600fc61e", "version_major": 2, "version_minor": 0 }, @@ -483,7 +484,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 3.6401774883270264, 'eval_accuracy': 0.4273566569484937, 'eval_f1': 0.3660895297115911, 'eval_precision': 0.42644619182693877, 'eval_recall': 0.4167238077350393, 'eval_runtime': 49.7203, 'eval_samples_per_second': 82.783, 'eval_steps_per_second': 2.595, 'epoch': 0.31}\n" + "{'eval_loss': 2.993649959564209, 'eval_accuracy': 0.6025267249757046, 'eval_f1': 0.5396699491275935, 'eval_precision': 0.5985274492543927, 'eval_recall': 0.5851786479410009, 'eval_runtime': 116.097, 'eval_samples_per_second': 35.453, 'eval_steps_per_second': 1.111, 'epoch': 0.31}\n" ] }, { @@ -498,13 +499,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 3.7182, 'grad_norm': 3.2500784397125244, 'learning_rate': 4.75e-05, 'epoch': 0.39}\n" + "{'loss': 2.7565, 'grad_norm': 35.98662567138672, 'learning_rate': 4.75e-05, 'epoch': 0.39}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1a13f0bea0b445708a3fffb55aaa0c40", + "model_id": "4c50344bb3c4447cb362115a99ce7108", "version_major": 2, "version_minor": 0 }, @@ -527,7 +528,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 3.3251121044158936, 'eval_accuracy': 0.5362001943634597, 'eval_f1': 0.4888484189687165, 'eval_precision': 0.5816706888112184, 'eval_recall': 0.5247186429593306, 'eval_runtime': 49.5066, 'eval_samples_per_second': 83.14, 'eval_steps_per_second': 2.606, 'epoch': 0.39}\n" + "{'eval_loss': 1.8901652097702026, 'eval_accuracy': 0.7738095238095238, 'eval_f1': 0.7419140762746178, 'eval_precision': 0.80018778271747, 'eval_recall': 0.759897512543306, 'eval_runtime': 115.975, 'eval_samples_per_second': 35.49, 'eval_steps_per_second': 1.112, 'epoch': 0.39}\n" ] }, { @@ -542,13 +543,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 3.473, 'grad_norm': 3.3794054985046387, 'learning_rate': 4.7e-05, 'epoch': 0.47}\n" + "{'loss': 1.9695, 'grad_norm': 69.04105377197266, 'learning_rate': 4.7e-05, 'epoch': 0.47}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8d1448df4ed74b8d8f060360641d38f9", + "model_id": "0ead9a11bd9e479f9a84c69fb687c541", "version_major": 2, "version_minor": 0 }, @@ -559,19 +560,11 @@ "metadata": {}, "output_type": "display_data" }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/andrewmayes/Openclassroom/CanineNet/env/lib/python3.12/site-packages/sklearn/metrics/_classification.py:1509: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 3.0453436374664307, 'eval_accuracy': 0.6219630709426628, 'eval_f1': 0.5814523710729809, 'eval_precision': 0.6516315278477566, 'eval_recall': 0.611536920880998, 'eval_runtime': 49.5824, 'eval_samples_per_second': 83.013, 'eval_steps_per_second': 2.602, 'epoch': 0.47}\n" + "{'eval_loss': 1.2026917934417725, 'eval_accuracy': 0.8644314868804664, 'eval_f1': 0.851211822050322, 'eval_precision': 0.8810420986229455, 'eval_recall': 0.8584653488560972, 'eval_runtime': 116.5016, 'eval_samples_per_second': 35.33, 'eval_steps_per_second': 1.107, 'epoch': 0.47}\n" ] }, { @@ -586,13 +579,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 3.2252, 'grad_norm': 3.5271167755126953, 'learning_rate': 4.6500000000000005e-05, 'epoch': 0.54}\n" + "{'loss': 1.4292, 'grad_norm': 35.54287338256836, 'learning_rate': 4.6500000000000005e-05, 'epoch': 0.54}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "6fb4d3ce9a4943059082ccf1bc48d6c3", + "model_id": "cfb89a2dbe894a7bb3111134ad7560ca", "version_major": 2, "version_minor": 0 }, @@ -603,19 +596,11 @@ "metadata": {}, "output_type": "display_data" }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/andrewmayes/Openclassroom/CanineNet/env/lib/python3.12/site-packages/sklearn/metrics/_classification.py:1509: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 2.773921489715576, 'eval_accuracy': 0.6817298347910593, 'eval_f1': 0.6505739550950606, 'eval_precision': 0.7193505945909296, 'eval_recall': 0.6712955927826942, 'eval_runtime': 49.4649, 'eval_samples_per_second': 83.21, 'eval_steps_per_second': 2.608, 'epoch': 0.54}\n" + "{'eval_loss': 0.8375147581100464, 'eval_accuracy': 0.8901846452866861, 'eval_f1': 0.8767897170357445, 'eval_precision': 0.9034288303237173, 'eval_recall': 0.8852697852467155, 'eval_runtime': 115.813, 'eval_samples_per_second': 35.54, 'eval_steps_per_second': 1.114, 'epoch': 0.54}\n" ] }, { @@ -630,13 +615,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 2.9976, 'grad_norm': 3.7335126399993896, 'learning_rate': 4.600000000000001e-05, 'epoch': 0.62}\n" + "{'loss': 1.1191, 'grad_norm': 36.4020881652832, 'learning_rate': 4.600000000000001e-05, 'epoch': 0.62}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "71436b6f8c46408f852e8c58275680ea", + "model_id": "026f809b056f4c8883256767246fb3a2", "version_major": 2, "version_minor": 0 }, @@ -647,19 +632,11 @@ "metadata": {}, "output_type": "display_data" }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/andrewmayes/Openclassroom/CanineNet/env/lib/python3.12/site-packages/sklearn/metrics/_classification.py:1509: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 2.5391345024108887, 'eval_accuracy': 0.7045675413022352, 'eval_f1': 0.6756150478428508, 'eval_precision': 0.7285806531908359, 'eval_recall': 0.6953532664643869, 'eval_runtime': 49.5802, 'eval_samples_per_second': 83.017, 'eval_steps_per_second': 2.602, 'epoch': 0.62}\n" + "{'eval_loss': 0.5400001406669617, 'eval_accuracy': 0.9139941690962099, 'eval_f1': 0.9084552011702601, 'eval_precision': 0.9209161574159062, 'eval_recall': 0.9114069285375231, 'eval_runtime': 117.1034, 'eval_samples_per_second': 35.148, 'eval_steps_per_second': 1.102, 'epoch': 0.62}\n" ] }, { @@ -674,13 +651,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 2.762, 'grad_norm': 3.7279648780822754, 'learning_rate': 4.55e-05, 'epoch': 0.7}\n" + "{'loss': 0.9249, 'grad_norm': 37.288429260253906, 'learning_rate': 4.55e-05, 'epoch': 0.7}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c81a8a0d5c534a5080d7a53def87c955", + "model_id": "16da153d2d6a4f82ae0188589b477e11", "version_major": 2, "version_minor": 0 }, @@ -695,7 +672,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 2.2989985942840576, 'eval_accuracy': 0.750485908649174, 'eval_f1': 0.725800496636549, 'eval_precision': 0.7646354734126305, 'eval_recall': 0.7420594975688783, 'eval_runtime': 48.8518, 'eval_samples_per_second': 84.255, 'eval_steps_per_second': 2.641, 'epoch': 0.7}\n" + "{'eval_loss': 0.41832435131073, 'eval_accuracy': 0.9193391642371235, 'eval_f1': 0.913590633986456, 'eval_precision': 0.9283911611083989, 'eval_recall': 0.9169393031418399, 'eval_runtime': 116.4522, 'eval_samples_per_second': 35.345, 'eval_steps_per_second': 1.108, 'epoch': 0.7}\n" ] }, { @@ -710,13 +687,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 2.5763, 'grad_norm': 3.7486424446105957, 'learning_rate': 4.5e-05, 'epoch': 0.78}\n" + "{'loss': 0.7701, 'grad_norm': 36.70619201660156, 'learning_rate': 4.5e-05, 'epoch': 0.78}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "469c5d7e3d64453eacba1e1a0657dce7", + "model_id": "6eb5e91b0ec64038a6b485bdc7b6c513", "version_major": 2, "version_minor": 0 }, @@ -727,11 +704,19 @@ "metadata": {}, "output_type": "display_data" }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/andrewmayes/Openclassroom/CanineNet/env/lib/python3.12/site-packages/sklearn/metrics/_classification.py:1509: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 2.107466459274292, 'eval_accuracy': 0.7645772594752187, 'eval_f1': 0.7433831500294813, 'eval_precision': 0.7792831030046864, 'eval_recall': 0.75564353300614, 'eval_runtime': 49.0856, 'eval_samples_per_second': 83.854, 'eval_steps_per_second': 2.628, 'epoch': 0.78}\n" + "{'eval_loss': 0.3423353433609009, 'eval_accuracy': 0.923712342079689, 'eval_f1': 0.9167317908696295, 'eval_precision': 0.9265458073145066, 'eval_recall': 0.9207090056403667, 'eval_runtime': 116.9797, 'eval_samples_per_second': 35.186, 'eval_steps_per_second': 1.103, 'epoch': 0.78}\n" ] }, { @@ -746,13 +731,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 2.4357, 'grad_norm': 3.8511111736297607, 'learning_rate': 4.4500000000000004e-05, 'epoch': 0.85}\n" + "{'loss': 0.7036, 'grad_norm': 30.09217643737793, 'learning_rate': 4.4500000000000004e-05, 'epoch': 0.85}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "19f32c96f133401db23c3ef7c51831b1", + "model_id": "1ccbb1ea839f4560a70d353b6e370a4e", "version_major": 2, "version_minor": 0 }, @@ -763,11 +748,19 @@ "metadata": {}, "output_type": "display_data" }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/andrewmayes/Openclassroom/CanineNet/env/lib/python3.12/site-packages/sklearn/metrics/_classification.py:1509: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 1.9225727319717407, 'eval_accuracy': 0.7849854227405247, 'eval_f1': 0.7652021489489449, 'eval_precision': 0.8027206093648191, 'eval_recall': 0.7768373145126328, 'eval_runtime': 49.2931, 'eval_samples_per_second': 83.501, 'eval_steps_per_second': 2.617, 'epoch': 0.85}\n" + "{'eval_loss': 0.3141166865825653, 'eval_accuracy': 0.9258989310009719, 'eval_f1': 0.9199295975579974, 'eval_precision': 0.9270010191480259, 'eval_recall': 0.9228040093614015, 'eval_runtime': 116.9652, 'eval_samples_per_second': 35.19, 'eval_steps_per_second': 1.103, 'epoch': 0.85}\n" ] }, { @@ -782,13 +775,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 2.2669, 'grad_norm': 3.930382490158081, 'learning_rate': 4.4000000000000006e-05, 'epoch': 0.93}\n" + "{'loss': 0.7279, 'grad_norm': 25.417821884155273, 'learning_rate': 4.4000000000000006e-05, 'epoch': 0.93}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4523bb9fc8f34257ab3bc6b06bef09a3", + "model_id": "d3cc2129e1d44615a2d2301cf30c9642", "version_major": 2, "version_minor": 0 }, @@ -803,7 +796,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 1.7672972679138184, 'eval_accuracy': 0.8007774538386784, 'eval_f1': 0.7837747422166718, 'eval_precision': 0.8149458770309199, 'eval_recall': 0.7937617881931025, 'eval_runtime': 48.9548, 'eval_samples_per_second': 84.078, 'eval_steps_per_second': 2.635, 'epoch': 0.93}\n" + "{'eval_loss': 0.2813890278339386, 'eval_accuracy': 0.9261418853255587, 'eval_f1': 0.9200313705925975, 'eval_precision': 0.9300531792015672, 'eval_recall': 0.9234535506220213, 'eval_runtime': 116.2712, 'eval_samples_per_second': 35.4, 'eval_steps_per_second': 1.109, 'epoch': 0.93}\n" ] }, { @@ -818,13 +811,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 2.1459, 'grad_norm': 3.9222986698150635, 'learning_rate': 4.35e-05, 'epoch': 1.01}\n" + "{'loss': 0.6732, 'grad_norm': 37.04437255859375, 'learning_rate': 4.35e-05, 'epoch': 1.01}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a9154903a0fe418da37a7918a5f811f0", + "model_id": "3dd1623d6f7a4dd191113836acea426f", "version_major": 2, "version_minor": 0 }, @@ -839,7 +832,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 1.6338553428649902, 'eval_accuracy': 0.8175413022351797, 'eval_f1': 0.8058247212224944, 'eval_precision': 0.8291157786332387, 'eval_recall': 0.8109660231401558, 'eval_runtime': 48.8096, 'eval_samples_per_second': 84.328, 'eval_steps_per_second': 2.643, 'epoch': 1.01}\n" + "{'eval_loss': 0.2583376467227936, 'eval_accuracy': 0.9278425655976676, 'eval_f1': 0.9257827956681505, 'eval_precision': 0.9336659702817446, 'eval_recall': 0.9264134713855884, 'eval_runtime': 116.0872, 'eval_samples_per_second': 35.456, 'eval_steps_per_second': 1.111, 'epoch': 1.01}\n" ] }, { @@ -854,13 +847,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 1.9822, 'grad_norm': 3.697866678237915, 'learning_rate': 4.3e-05, 'epoch': 1.09}\n" + "{'loss': 0.5251, 'grad_norm': 29.01424789428711, 'learning_rate': 4.3e-05, 'epoch': 1.09}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "67f1c3edb1f34f69ba07c99543af05f3", + "model_id": "6ae9a429aeed497ba37cd62926ab41ea", "version_major": 2, "version_minor": 0 }, @@ -875,7 +868,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 1.5203596353530884, 'eval_accuracy': 0.8214285714285714, 'eval_f1': 0.811394046732835, 'eval_precision': 0.8365573111749929, 'eval_recall': 0.8150928251002161, 'eval_runtime': 49.0932, 'eval_samples_per_second': 83.841, 'eval_steps_per_second': 2.628, 'epoch': 1.09}\n" + "{'eval_loss': 0.24332156777381897, 'eval_accuracy': 0.9387755102040817, 'eval_f1': 0.9343495020560317, 'eval_precision': 0.9400195362137737, 'eval_recall': 0.9364590575108477, 'eval_runtime': 118.0813, 'eval_samples_per_second': 34.857, 'eval_steps_per_second': 1.092, 'epoch': 1.09}\n" ] }, { @@ -890,13 +883,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 1.8701, 'grad_norm': 3.714578151702881, 'learning_rate': 4.25e-05, 'epoch': 1.17}\n" + "{'loss': 0.506, 'grad_norm': 24.758153915405273, 'learning_rate': 4.25e-05, 'epoch': 1.17}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d43f0e82a2db44cfb713fb933acad02d", + "model_id": "e65c57c63acd4174a08e1ca23f77573f", "version_major": 2, "version_minor": 0 }, @@ -911,7 +904,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 1.4219428300857544, 'eval_accuracy': 0.8172983479105929, 'eval_f1': 0.8090784041709039, 'eval_precision': 0.8330024657306648, 'eval_recall': 0.8116546751922475, 'eval_runtime': 49.3628, 'eval_samples_per_second': 83.383, 'eval_steps_per_second': 2.613, 'epoch': 1.17}\n" + "{'eval_loss': 0.2486099898815155, 'eval_accuracy': 0.9293002915451894, 'eval_f1': 0.923673480868727, 'eval_precision': 0.9392627242326875, 'eval_recall': 0.9284078296144684, 'eval_runtime': 117.2673, 'eval_samples_per_second': 35.099, 'eval_steps_per_second': 1.1, 'epoch': 1.17}\n" ] }, { @@ -926,13 +919,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 1.8007, 'grad_norm': 3.973750591278076, 'learning_rate': 4.2e-05, 'epoch': 1.24}\n" + "{'loss': 0.4941, 'grad_norm': 30.309085845947266, 'learning_rate': 4.2e-05, 'epoch': 1.24}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2ebda73cf4f94eb7a9ec251c4e25bbbc", + "model_id": "34e037fa487a493e87ddb4f175491798", "version_major": 2, "version_minor": 0 }, @@ -947,7 +940,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 1.3224109411239624, 'eval_accuracy': 0.8292031098153547, 'eval_f1': 0.8205404948387138, 'eval_precision': 0.8390297018418361, 'eval_recall': 0.8233368890551767, 'eval_runtime': 48.8834, 'eval_samples_per_second': 84.2, 'eval_steps_per_second': 2.639, 'epoch': 1.24}\n" + "{'eval_loss': 0.24892301857471466, 'eval_accuracy': 0.9295432458697764, 'eval_f1': 0.9275860219532449, 'eval_precision': 0.9340224064275343, 'eval_recall': 0.9276101852749326, 'eval_runtime': 116.9083, 'eval_samples_per_second': 35.207, 'eval_steps_per_second': 1.103, 'epoch': 1.24}\n" ] }, { @@ -962,13 +955,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 1.8004, 'grad_norm': 4.3374433517456055, 'learning_rate': 4.15e-05, 'epoch': 1.32}\n" + "{'loss': 0.493, 'grad_norm': 31.011947631835938, 'learning_rate': 4.15e-05, 'epoch': 1.32}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "7a067a0488d543419b3fbc09718fe0d4", + "model_id": "e7338155c71343d09d29b0c2b0a89e1c", "version_major": 2, "version_minor": 0 }, @@ -983,7 +976,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 1.255250334739685, 'eval_accuracy': 0.8323615160349854, 'eval_f1': 0.8243100776010878, 'eval_precision': 0.8413215716962761, 'eval_recall': 0.827061188971052, 'eval_runtime': 48.8143, 'eval_samples_per_second': 84.32, 'eval_steps_per_second': 2.643, 'epoch': 1.32}\n" + "{'eval_loss': 0.22557905316352844, 'eval_accuracy': 0.9361030126336248, 'eval_f1': 0.9336891277999396, 'eval_precision': 0.9401808677720161, 'eval_recall': 0.9344423711587737, 'eval_runtime': 118.5567, 'eval_samples_per_second': 34.718, 'eval_steps_per_second': 1.088, 'epoch': 1.32}\n" ] }, { @@ -998,13 +991,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 1.6511, 'grad_norm': 4.235231876373291, 'learning_rate': 4.1e-05, 'epoch': 1.4}\n" + "{'loss': 0.4975, 'grad_norm': 31.7733154296875, 'learning_rate': 4.1e-05, 'epoch': 1.4}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "aad38f3918c8490a83ecc4aa2c3e3f1a", + "model_id": "2117f169a61b478ca0c28e3d34504780", "version_major": 2, "version_minor": 0 }, @@ -1015,19 +1008,11 @@ "metadata": {}, "output_type": "display_data" }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/andrewmayes/Openclassroom/CanineNet/env/lib/python3.12/site-packages/sklearn/metrics/_classification.py:1509: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 1.1728312969207764, 'eval_accuracy': 0.8372206025267249, 'eval_f1': 0.8281689029224051, 'eval_precision': 0.8466546065690131, 'eval_recall': 0.8313627834890125, 'eval_runtime': 48.8413, 'eval_samples_per_second': 84.273, 'eval_steps_per_second': 2.641, 'epoch': 1.4}\n" + "{'eval_loss': 0.2236272543668747, 'eval_accuracy': 0.9390184645286687, 'eval_f1': 0.9352200153615517, 'eval_precision': 0.9430236036085446, 'eval_recall': 0.9376522647842572, 'eval_runtime': 114.3593, 'eval_samples_per_second': 35.992, 'eval_steps_per_second': 1.128, 'epoch': 1.4}\n" ] }, { @@ -1042,13 +1027,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 1.548, 'grad_norm': 4.652594089508057, 'learning_rate': 4.05e-05, 'epoch': 1.48}\n" + "{'loss': 0.4742, 'grad_norm': 44.587825775146484, 'learning_rate': 4.05e-05, 'epoch': 1.48}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9f2c27b410e143bba1b35dd19d3c2590", + "model_id": "02daf2752fd740a182450a096ebf7ff1", "version_major": 2, "version_minor": 0 }, @@ -1063,7 +1048,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 1.109053373336792, 'eval_accuracy': 0.8394071914480078, 'eval_f1': 0.8300341562938096, 'eval_precision': 0.8499928349581373, 'eval_recall': 0.8339710868607537, 'eval_runtime': 50.0745, 'eval_samples_per_second': 82.197, 'eval_steps_per_second': 2.576, 'epoch': 1.48}\n" + "{'eval_loss': 0.22906745970249176, 'eval_accuracy': 0.9390184645286687, 'eval_f1': 0.9348672227662737, 'eval_precision': 0.9443221376291268, 'eval_recall': 0.9368278532845314, 'eval_runtime': 114.4144, 'eval_samples_per_second': 35.974, 'eval_steps_per_second': 1.127, 'epoch': 1.48}\n" ] }, { @@ -1078,13 +1063,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 1.5634, 'grad_norm': 4.48358678817749, 'learning_rate': 4e-05, 'epoch': 1.55}\n" + "{'loss': 0.4788, 'grad_norm': 51.10420608520508, 'learning_rate': 4e-05, 'epoch': 1.55}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "cd700beaf90c44769bfef4b7d4db5c43", + "model_id": "9a2263ab9f3146df8d7b4ee491f818b7", "version_major": 2, "version_minor": 0 }, @@ -1099,7 +1084,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 1.0561015605926514, 'eval_accuracy': 0.8345481049562682, 'eval_f1': 0.826287548167397, 'eval_precision': 0.8444342550131346, 'eval_recall': 0.8287289494101967, 'eval_runtime': 50.026, 'eval_samples_per_second': 82.277, 'eval_steps_per_second': 2.579, 'epoch': 1.55}\n" + "{'eval_loss': 0.21873104572296143, 'eval_accuracy': 0.9385325558794947, 'eval_f1': 0.9348021784060464, 'eval_precision': 0.9429207736971593, 'eval_recall': 0.9358784149851511, 'eval_runtime': 114.5674, 'eval_samples_per_second': 35.926, 'eval_steps_per_second': 1.126, 'epoch': 1.55}\n" ] }, { @@ -1114,13 +1099,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 1.5163, 'grad_norm': 4.588488578796387, 'learning_rate': 3.9500000000000005e-05, 'epoch': 1.63}\n" + "{'loss': 0.4817, 'grad_norm': 29.57049560546875, 'learning_rate': 3.9500000000000005e-05, 'epoch': 1.63}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9361f96f0fff4e9d88d8471610800659", + "model_id": "5e82313185004fe8abca126ec2257bf6", "version_major": 2, "version_minor": 0 }, @@ -1135,7 +1120,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.9983496069908142, 'eval_accuracy': 0.8457240038872692, 'eval_f1': 0.8382174768545753, 'eval_precision': 0.8511547753297591, 'eval_recall': 0.8409440776548832, 'eval_runtime': 52.3351, 'eval_samples_per_second': 78.647, 'eval_steps_per_second': 2.465, 'epoch': 1.63}\n" + "{'eval_loss': 0.2193629890680313, 'eval_accuracy': 0.9382896015549077, 'eval_f1': 0.9365648816530497, 'eval_precision': 0.9438250894351351, 'eval_recall': 0.9370270150771032, 'eval_runtime': 114.6083, 'eval_samples_per_second': 35.914, 'eval_steps_per_second': 1.126, 'epoch': 1.63}\n" ] }, { @@ -1150,13 +1135,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 1.3883, 'grad_norm': 4.083033084869385, 'learning_rate': 3.9000000000000006e-05, 'epoch': 1.71}\n" + "{'loss': 0.425, 'grad_norm': 20.471195220947266, 'learning_rate': 3.9000000000000006e-05, 'epoch': 1.71}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "48180e876ddc4a79a4e1b2b9a5c6934c", + "model_id": "3599276492a6452681c73dc614934a3b", "version_major": 2, "version_minor": 0 }, @@ -1171,7 +1156,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.9574113488197327, 'eval_accuracy': 0.8498542274052479, 'eval_f1': 0.8424611801873888, 'eval_precision': 0.8545480234681159, 'eval_recall': 0.8452103929509444, 'eval_runtime': 54.0583, 'eval_samples_per_second': 76.14, 'eval_steps_per_second': 2.386, 'epoch': 1.71}\n" + "{'eval_loss': 0.21454617381095886, 'eval_accuracy': 0.9395043731778425, 'eval_f1': 0.9365201289942966, 'eval_precision': 0.9419287910877474, 'eval_recall': 0.9373934984216807, 'eval_runtime': 114.4861, 'eval_samples_per_second': 35.952, 'eval_steps_per_second': 1.127, 'epoch': 1.71}\n" ] }, { @@ -1186,13 +1171,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 1.3161, 'grad_norm': 3.867316484451294, 'learning_rate': 3.85e-05, 'epoch': 1.79}\n" + "{'loss': 0.4392, 'grad_norm': 30.670284271240234, 'learning_rate': 3.85e-05, 'epoch': 1.79}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "dc7b1140028d4bd6a4d169919a5ae08d", + "model_id": "af272ced345a4bd49ef0660206a87bdd", "version_major": 2, "version_minor": 0 }, @@ -1207,7 +1192,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.9129320979118347, 'eval_accuracy': 0.8510689990281827, 'eval_f1': 0.842499864625979, 'eval_precision': 0.8563830616439544, 'eval_recall': 0.8457464024015375, 'eval_runtime': 50.3958, 'eval_samples_per_second': 81.673, 'eval_steps_per_second': 2.56, 'epoch': 1.79}\n" + "{'eval_loss': 0.21058662235736847, 'eval_accuracy': 0.9404761904761905, 'eval_f1': 0.9367420698571843, 'eval_precision': 0.9472776166631209, 'eval_recall': 0.9389728915687214, 'eval_runtime': 114.8313, 'eval_samples_per_second': 35.844, 'eval_steps_per_second': 1.123, 'epoch': 1.79}\n" ] }, { @@ -1222,13 +1207,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 1.304, 'grad_norm': 4.2902374267578125, 'learning_rate': 3.8e-05, 'epoch': 1.86}\n" + "{'loss': 0.4295, 'grad_norm': 23.996227264404297, 'learning_rate': 3.8e-05, 'epoch': 1.86}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "fec69a88122a4f64a91c84217201f685", + "model_id": "98117d4b70cc469fbb4eb50cd3326fdc", "version_major": 2, "version_minor": 0 }, @@ -1243,7 +1228,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.8726516962051392, 'eval_accuracy': 0.8534985422740525, 'eval_f1': 0.8454465001707215, 'eval_precision': 0.8569985990010426, 'eval_recall': 0.8487166838712229, 'eval_runtime': 50.6061, 'eval_samples_per_second': 81.334, 'eval_steps_per_second': 2.549, 'epoch': 1.86}\n" + "{'eval_loss': 0.20313242077827454, 'eval_accuracy': 0.9426627793974732, 'eval_f1': 0.9415231910984045, 'eval_precision': 0.9460733827301256, 'eval_recall': 0.9419062362633588, 'eval_runtime': 114.4366, 'eval_samples_per_second': 35.968, 'eval_steps_per_second': 1.127, 'epoch': 1.86}\n" ] }, { @@ -1258,13 +1243,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 1.3268, 'grad_norm': 4.475445747375488, 'learning_rate': 3.7500000000000003e-05, 'epoch': 1.94}\n" + "{'loss': 0.447, 'grad_norm': 29.65323257446289, 'learning_rate': 3.7500000000000003e-05, 'epoch': 1.94}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "6bb29ec8592c46569b186d1117110255", + "model_id": "8bee82efa39b4275b471a34c4ab4bc21", "version_major": 2, "version_minor": 0 }, @@ -1279,7 +1264,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.8411638736724854, 'eval_accuracy': 0.8510689990281827, 'eval_f1': 0.8440658519317673, 'eval_precision': 0.8572039674559415, 'eval_recall': 0.8472696555447555, 'eval_runtime': 50.1137, 'eval_samples_per_second': 82.133, 'eval_steps_per_second': 2.574, 'epoch': 1.94}\n" + "{'eval_loss': 0.20734070241451263, 'eval_accuracy': 0.9373177842565598, 'eval_f1': 0.934085851023942, 'eval_precision': 0.9405809641296471, 'eval_recall': 0.935499789776851, 'eval_runtime': 114.6373, 'eval_samples_per_second': 35.905, 'eval_steps_per_second': 1.125, 'epoch': 1.94}\n" ] }, { @@ -1294,13 +1279,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 1.2388, 'grad_norm': 4.488099575042725, 'learning_rate': 3.7e-05, 'epoch': 2.02}\n" + "{'loss': 0.4718, 'grad_norm': 25.023006439208984, 'learning_rate': 3.7e-05, 'epoch': 2.02}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "583e36931c9a45179a394bc95a485cdc", + "model_id": "503f18cd37ea4436af2bec5e8d90e89a", "version_major": 2, "version_minor": 0 }, @@ -1315,7 +1300,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.810386598110199, 'eval_accuracy': 0.8568999028182702, 'eval_f1': 0.8482250401180048, 'eval_precision': 0.8608009175717143, 'eval_recall': 0.8522420627356607, 'eval_runtime': 50.7442, 'eval_samples_per_second': 81.113, 'eval_steps_per_second': 2.542, 'epoch': 2.02}\n" + "{'eval_loss': 0.20732612907886505, 'eval_accuracy': 0.9416909620991254, 'eval_f1': 0.939770108961802, 'eval_precision': 0.9436139851292384, 'eval_recall': 0.9395972540954411, 'eval_runtime': 114.8017, 'eval_samples_per_second': 35.853, 'eval_steps_per_second': 1.124, 'epoch': 2.02}\n" ] }, { @@ -1330,13 +1315,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 1.1333, 'grad_norm': 4.516467094421387, 'learning_rate': 3.65e-05, 'epoch': 2.1}\n" + "{'loss': 0.4528, 'grad_norm': 29.38068199157715, 'learning_rate': 3.65e-05, 'epoch': 2.1}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8c925c5effbc45ddac06c813f367fda1", + "model_id": "ba6795e422984e49872674ffe95ea296", "version_major": 2, "version_minor": 0 }, @@ -1351,7 +1336,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.7920050024986267, 'eval_accuracy': 0.8556851311953353, 'eval_f1': 0.8486107665476558, 'eval_precision': 0.859555442251791, 'eval_recall': 0.851584767005035, 'eval_runtime': 50.4707, 'eval_samples_per_second': 81.552, 'eval_steps_per_second': 2.556, 'epoch': 2.1}\n" + "{'eval_loss': 0.20109473168849945, 'eval_accuracy': 0.9426627793974732, 'eval_f1': 0.9402546407567826, 'eval_precision': 0.9447143456273354, 'eval_recall': 0.9401327475466873, 'eval_runtime': 115.0331, 'eval_samples_per_second': 35.781, 'eval_steps_per_second': 1.121, 'epoch': 2.1}\n" ] }, { @@ -1366,13 +1351,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 1.1305, 'grad_norm': 3.9769482612609863, 'learning_rate': 3.6e-05, 'epoch': 2.17}\n" + "{'loss': 0.3958, 'grad_norm': 19.475000381469727, 'learning_rate': 3.6e-05, 'epoch': 2.17}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "510552b78d434bbba789c829af651a60", + "model_id": "3856d7fc28bb40a08477f017d6e465b5", "version_major": 2, "version_minor": 0 }, @@ -1387,7 +1372,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.7564593553543091, 'eval_accuracy': 0.857871720116618, 'eval_f1': 0.850534421437704, 'eval_precision': 0.8629770792513701, 'eval_recall': 0.8533865540611241, 'eval_runtime': 50.4855, 'eval_samples_per_second': 81.528, 'eval_steps_per_second': 2.555, 'epoch': 2.17}\n" + "{'eval_loss': 0.1979122757911682, 'eval_accuracy': 0.9438775510204082, 'eval_f1': 0.9401571777817359, 'eval_precision': 0.9466559993183812, 'eval_recall': 0.941823625617955, 'eval_runtime': 114.5986, 'eval_samples_per_second': 35.917, 'eval_steps_per_second': 1.126, 'epoch': 2.17}\n" ] }, { @@ -1402,13 +1387,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 1.1849, 'grad_norm': 5.161864757537842, 'learning_rate': 3.55e-05, 'epoch': 2.25}\n" + "{'loss': 0.4325, 'grad_norm': 23.977619171142578, 'learning_rate': 3.55e-05, 'epoch': 2.25}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1d5dc9b61953499ba761cd6cd53d0fa6", + "model_id": "cff02076da5a41b497372e4dbffe4610", "version_major": 2, "version_minor": 0 }, @@ -1423,7 +1408,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.7497532367706299, 'eval_accuracy': 0.85932944606414, 'eval_f1': 0.8535738637582512, 'eval_precision': 0.8646493754082539, 'eval_recall': 0.8548654745509918, 'eval_runtime': 51.6133, 'eval_samples_per_second': 79.747, 'eval_steps_per_second': 2.499, 'epoch': 2.25}\n" + "{'eval_loss': 0.19925996661186218, 'eval_accuracy': 0.9421768707482994, 'eval_f1': 0.9396204404158489, 'eval_precision': 0.9448251772503703, 'eval_recall': 0.940379301213978, 'eval_runtime': 114.6027, 'eval_samples_per_second': 35.915, 'eval_steps_per_second': 1.126, 'epoch': 2.25}\n" ] }, { @@ -1438,13 +1423,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 1.1287, 'grad_norm': 4.14560079574585, 'learning_rate': 3.5e-05, 'epoch': 2.33}\n" + "{'loss': 0.3228, 'grad_norm': 26.77207374572754, 'learning_rate': 3.5e-05, 'epoch': 2.33}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "acd5a1fd42334a7a95cc52286e3f24ea", + "model_id": "84ed894e77de406596ea27aecbb8f749", "version_major": 2, "version_minor": 0 }, @@ -1459,7 +1444,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.7347991466522217, 'eval_accuracy': 0.85932944606414, 'eval_f1': 0.853286935591957, 'eval_precision': 0.865349657662883, 'eval_recall': 0.8552408374882046, 'eval_runtime': 51.0727, 'eval_samples_per_second': 80.591, 'eval_steps_per_second': 2.526, 'epoch': 2.33}\n" + "{'eval_loss': 0.20252813398838043, 'eval_accuracy': 0.9397473275024295, 'eval_f1': 0.9371514375373243, 'eval_precision': 0.9415118336037184, 'eval_recall': 0.9374533753415454, 'eval_runtime': 114.9424, 'eval_samples_per_second': 35.809, 'eval_steps_per_second': 1.122, 'epoch': 2.33}\n" ] }, { @@ -1474,13 +1459,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 1.0537, 'grad_norm': 4.066341876983643, 'learning_rate': 3.45e-05, 'epoch': 2.41}\n" + "{'loss': 0.383, 'grad_norm': 18.35658073425293, 'learning_rate': 3.45e-05, 'epoch': 2.41}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "87f92583951e4326b1a1bfd7c4a47d60", + "model_id": "d19e69a21ad54df4ad3c6ca7a7b8bbe8", "version_major": 2, "version_minor": 0 }, @@ -1495,7 +1480,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.7120196223258972, 'eval_accuracy': 0.8554421768707483, 'eval_f1': 0.8495897159090885, 'eval_precision': 0.8585552651791398, 'eval_recall': 0.8515328760686888, 'eval_runtime': 51.0985, 'eval_samples_per_second': 80.55, 'eval_steps_per_second': 2.525, 'epoch': 2.41}\n" + "{'eval_loss': 0.20318132638931274, 'eval_accuracy': 0.9424198250728864, 'eval_f1': 0.9395786141795599, 'eval_precision': 0.9471288830591912, 'eval_recall': 0.9406682184567607, 'eval_runtime': 114.5097, 'eval_samples_per_second': 35.945, 'eval_steps_per_second': 1.127, 'epoch': 2.41}\n" ] }, { @@ -1510,13 +1495,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 1.1157, 'grad_norm': 4.041014671325684, 'learning_rate': 3.4000000000000007e-05, 'epoch': 2.49}\n" + "{'loss': 0.4147, 'grad_norm': 27.020387649536133, 'learning_rate': 3.4000000000000007e-05, 'epoch': 2.49}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ca8e221ddd5141d09f8ce738e36fa202", + "model_id": "319bb634831147d29846ecc06033f21d", "version_major": 2, "version_minor": 0 }, @@ -1531,7 +1516,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.6832228302955627, 'eval_accuracy': 0.8622448979591837, 'eval_f1': 0.855151527873783, 'eval_precision': 0.86621670758505, 'eval_recall': 0.8579446769808802, 'eval_runtime': 49.7698, 'eval_samples_per_second': 82.701, 'eval_steps_per_second': 2.592, 'epoch': 2.49}\n" + "{'eval_loss': 0.19750656187534332, 'eval_accuracy': 0.9433916423712342, 'eval_f1': 0.9401031149800597, 'eval_precision': 0.9466297083232625, 'eval_recall': 0.9418430619246869, 'eval_runtime': 114.3341, 'eval_samples_per_second': 36.0, 'eval_steps_per_second': 1.128, 'epoch': 2.49}\n" ] }, { @@ -1546,13 +1531,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 1.1008, 'grad_norm': 5.150105953216553, 'learning_rate': 3.35e-05, 'epoch': 2.56}\n" + "{'loss': 0.3587, 'grad_norm': 32.080196380615234, 'learning_rate': 3.35e-05, 'epoch': 2.56}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "89c4bee12afb45d9a0f6b9fd1efbee44", + "model_id": "55c80f1158d14b08a5847f17501bfa87", "version_major": 2, "version_minor": 0 }, @@ -1567,7 +1552,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.6704536080360413, 'eval_accuracy': 0.8617589893100097, 'eval_f1': 0.8546440118880293, 'eval_precision': 0.8640243651018866, 'eval_recall': 0.8573904668097079, 'eval_runtime': 49.6951, 'eval_samples_per_second': 82.825, 'eval_steps_per_second': 2.596, 'epoch': 2.56}\n" + "{'eval_loss': 0.20484071969985962, 'eval_accuracy': 0.9429057337220602, 'eval_f1': 0.941181947588637, 'eval_precision': 0.9453028458596549, 'eval_recall': 0.9415019462749606, 'eval_runtime': 114.7942, 'eval_samples_per_second': 35.855, 'eval_steps_per_second': 1.124, 'epoch': 2.56}\n" ] }, { @@ -1582,13 +1567,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 1.0512, 'grad_norm': 5.102594375610352, 'learning_rate': 3.3e-05, 'epoch': 2.64}\n" + "{'loss': 0.3481, 'grad_norm': 21.583600997924805, 'learning_rate': 3.3e-05, 'epoch': 2.64}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "302b8ae0e7954a28aef9e93ae735a9cc", + "model_id": "fdc0080da2ec46b0a002b729afce272d", "version_major": 2, "version_minor": 0 }, @@ -1603,7 +1588,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.655726432800293, 'eval_accuracy': 0.8629737609329446, 'eval_f1': 0.8563127076721974, 'eval_precision': 0.8635521819478348, 'eval_recall': 0.8592849145660356, 'eval_runtime': 50.4309, 'eval_samples_per_second': 81.617, 'eval_steps_per_second': 2.558, 'epoch': 2.64}\n" + "{'eval_loss': 0.21101877093315125, 'eval_accuracy': 0.9416909620991254, 'eval_f1': 0.9409328461528574, 'eval_precision': 0.9453296950566307, 'eval_recall': 0.9414241081371865, 'eval_runtime': 115.2273, 'eval_samples_per_second': 35.721, 'eval_steps_per_second': 1.12, 'epoch': 2.64}\n" ] }, { @@ -1618,13 +1603,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 1.0641, 'grad_norm': 3.8377113342285156, 'learning_rate': 3.2500000000000004e-05, 'epoch': 2.72}\n" + "{'loss': 0.4007, 'grad_norm': 30.03070640563965, 'learning_rate': 3.2500000000000004e-05, 'epoch': 2.72}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "14be25d55e1f467aab67e650e9108884", + "model_id": "f7229a61cca842679c7de9fbb575d375", "version_major": 2, "version_minor": 0 }, @@ -1639,7 +1624,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.6490115523338318, 'eval_accuracy': 0.8632167152575316, 'eval_f1': 0.8581037098670572, 'eval_precision': 0.8690863260667425, 'eval_recall': 0.8595770895727156, 'eval_runtime': 53.4616, 'eval_samples_per_second': 76.99, 'eval_steps_per_second': 2.413, 'epoch': 2.72}\n" + "{'eval_loss': 0.19447678327560425, 'eval_accuracy': 0.9448493683187561, 'eval_f1': 0.9415138633931484, 'eval_precision': 0.9469802948132661, 'eval_recall': 0.942949195133524, 'eval_runtime': 115.0467, 'eval_samples_per_second': 35.777, 'eval_steps_per_second': 1.121, 'epoch': 2.72}\n" ] }, { @@ -1654,13 +1639,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 1.0446, 'grad_norm': 4.310233116149902, 'learning_rate': 3.2000000000000005e-05, 'epoch': 2.8}\n" + "{'loss': 0.3719, 'grad_norm': 19.006587982177734, 'learning_rate': 3.2000000000000005e-05, 'epoch': 2.8}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e01a3df1dcc9451f8a24925ad73faca1", + "model_id": "bf63e2448443476e8e9d135a31429515", "version_major": 2, "version_minor": 0 }, @@ -1675,7 +1660,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.6300677061080933, 'eval_accuracy': 0.8651603498542274, 'eval_f1': 0.8597453620586883, 'eval_precision': 0.8692247694896753, 'eval_recall': 0.8611769340746278, 'eval_runtime': 51.2965, 'eval_samples_per_second': 80.239, 'eval_steps_per_second': 2.515, 'epoch': 2.8}\n" + "{'eval_loss': 0.20250166952610016, 'eval_accuracy': 0.9414480077745384, 'eval_f1': 0.9404145495231127, 'eval_precision': 0.9447405846663819, 'eval_recall': 0.9408034205466518, 'eval_runtime': 115.0173, 'eval_samples_per_second': 35.786, 'eval_steps_per_second': 1.122, 'epoch': 2.8}\n" ] }, { @@ -1690,13 +1675,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 1.0104, 'grad_norm': 4.522639274597168, 'learning_rate': 3.15e-05, 'epoch': 2.87}\n" + "{'loss': 0.3993, 'grad_norm': 29.12856101989746, 'learning_rate': 3.15e-05, 'epoch': 2.87}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4ddd6e5aef5543f2974a7477e9412b52", + "model_id": "5ceaa5c7d4a040d29c63eb7095828c69", "version_major": 2, "version_minor": 0 }, @@ -1711,7 +1696,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.6286835074424744, 'eval_accuracy': 0.8632167152575316, 'eval_f1': 0.8562195534431664, 'eval_precision': 0.8667545101362343, 'eval_recall': 0.8588131278764602, 'eval_runtime': 51.7032, 'eval_samples_per_second': 79.608, 'eval_steps_per_second': 2.495, 'epoch': 2.87}\n" + "{'eval_loss': 0.2011735588312149, 'eval_accuracy': 0.9448493683187561, 'eval_f1': 0.9419080758511604, 'eval_precision': 0.9485110507596467, 'eval_recall': 0.9429995946279425, 'eval_runtime': 114.9695, 'eval_samples_per_second': 35.801, 'eval_steps_per_second': 1.122, 'epoch': 2.87}\n" ] }, { @@ -1726,13 +1711,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 1.0544, 'grad_norm': 4.106919765472412, 'learning_rate': 3.1e-05, 'epoch': 2.95}\n" + "{'loss': 0.3745, 'grad_norm': 31.063690185546875, 'learning_rate': 3.1e-05, 'epoch': 2.95}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c0813c760f5f49a29b2a6c81739bc09b", + "model_id": "4fe4672267894541b62e2d0d3478bbaf", "version_major": 2, "version_minor": 0 }, @@ -1747,7 +1732,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.6149628162384033, 'eval_accuracy': 0.8644314868804664, 'eval_f1': 0.857927074632293, 'eval_precision': 0.8657466184998583, 'eval_recall': 0.8601794684156818, 'eval_runtime': 52.1426, 'eval_samples_per_second': 78.937, 'eval_steps_per_second': 2.474, 'epoch': 2.95}\n" + "{'eval_loss': 0.19235630333423615, 'eval_accuracy': 0.9450923226433431, 'eval_f1': 0.9414846823446322, 'eval_precision': 0.9499272592883472, 'eval_recall': 0.9434571156744745, 'eval_runtime': 115.3893, 'eval_samples_per_second': 35.671, 'eval_steps_per_second': 1.118, 'epoch': 2.95}\n" ] }, { @@ -1762,13 +1747,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 1.0074, 'grad_norm': 4.786281108856201, 'learning_rate': 3.05e-05, 'epoch': 3.03}\n" + "{'loss': 0.3638, 'grad_norm': 17.190593719482422, 'learning_rate': 3.05e-05, 'epoch': 3.03}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "42db7ee0a60d4ce8b29cf6af56989820", + "model_id": "9b3eeceea86d492299092c9c1c290262", "version_major": 2, "version_minor": 0 }, @@ -1783,7 +1768,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.6061375737190247, 'eval_accuracy': 0.8683187560738581, 'eval_f1': 0.8617445445380302, 'eval_precision': 0.8711997430081487, 'eval_recall': 0.8640854766935886, 'eval_runtime': 51.9411, 'eval_samples_per_second': 79.244, 'eval_steps_per_second': 2.484, 'epoch': 3.03}\n" + "{'eval_loss': 0.19399064779281616, 'eval_accuracy': 0.9443634596695821, 'eval_f1': 0.9424039415864353, 'eval_precision': 0.9477598282320199, 'eval_recall': 0.9424056698043153, 'eval_runtime': 114.892, 'eval_samples_per_second': 35.825, 'eval_steps_per_second': 1.123, 'epoch': 3.03}\n" ] }, { @@ -1798,13 +1783,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.9329, 'grad_norm': 4.053191184997559, 'learning_rate': 3e-05, 'epoch': 3.11}\n" + "{'loss': 0.3421, 'grad_norm': 34.20246505737305, 'learning_rate': 3e-05, 'epoch': 3.11}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9a0c228606bd429980c487be94e921d1", + "model_id": "b9fbbb413f794c7da2ad5a007fc5f355", "version_major": 2, "version_minor": 0 }, @@ -1819,7 +1804,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.6000697016716003, 'eval_accuracy': 0.8661321671525753, 'eval_f1': 0.8591313660290419, 'eval_precision': 0.8750348135359067, 'eval_recall': 0.861981456704839, 'eval_runtime': 49.9761, 'eval_samples_per_second': 82.359, 'eval_steps_per_second': 2.581, 'epoch': 3.11}\n" + "{'eval_loss': 0.1897164136171341, 'eval_accuracy': 0.9465500485908649, 'eval_f1': 0.9441019525393687, 'eval_precision': 0.9496324245782034, 'eval_recall': 0.9445579251653037, 'eval_runtime': 114.9933, 'eval_samples_per_second': 35.793, 'eval_steps_per_second': 1.122, 'epoch': 3.11}\n" ] }, { @@ -1834,13 +1819,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.9049, 'grad_norm': 3.7903292179107666, 'learning_rate': 2.95e-05, 'epoch': 3.18}\n" + "{'loss': 0.2906, 'grad_norm': 39.98542404174805, 'learning_rate': 2.95e-05, 'epoch': 3.18}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "0e14e21215e74e42803f6fd452a85644", + "model_id": "f1f70f5b6d064c17801f3ec017b26c17", "version_major": 2, "version_minor": 0 }, @@ -1855,7 +1840,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.5924625992774963, 'eval_accuracy': 0.8685617103984451, 'eval_f1': 0.8617161924024251, 'eval_precision': 0.8731252723397493, 'eval_recall': 0.8646873775472712, 'eval_runtime': 50.4365, 'eval_samples_per_second': 81.608, 'eval_steps_per_second': 2.558, 'epoch': 3.18}\n" + "{'eval_loss': 0.18932002782821655, 'eval_accuracy': 0.9470359572400389, 'eval_f1': 0.9456754034759556, 'eval_precision': 0.9494168814742135, 'eval_recall': 0.9456967112313741, 'eval_runtime': 115.4616, 'eval_samples_per_second': 35.648, 'eval_steps_per_second': 1.117, 'epoch': 3.18}\n" ] }, { @@ -1870,13 +1855,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.9815, 'grad_norm': 4.7535929679870605, 'learning_rate': 2.9e-05, 'epoch': 3.26}\n" + "{'loss': 0.3455, 'grad_norm': 22.710445404052734, 'learning_rate': 2.9e-05, 'epoch': 3.26}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "7fafd8a826374a6a9e7e48ca6f3abcb4", + "model_id": "11c74a1f15c641efa382db8b5bf90fb1", "version_major": 2, "version_minor": 0 }, @@ -1891,7 +1876,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.5806066393852234, 'eval_accuracy': 0.8685617103984451, 'eval_f1': 0.8622182587119455, 'eval_precision': 0.8717482848256731, 'eval_recall': 0.8644384448185363, 'eval_runtime': 49.3023, 'eval_samples_per_second': 83.485, 'eval_steps_per_second': 2.617, 'epoch': 3.26}\n" + "{'eval_loss': 0.18017922341823578, 'eval_accuracy': 0.9484936831875608, 'eval_f1': 0.94714050801556, 'eval_precision': 0.9499007284588198, 'eval_recall': 0.9474784961404308, 'eval_runtime': 114.5961, 'eval_samples_per_second': 35.917, 'eval_steps_per_second': 1.126, 'epoch': 3.26}\n" ] }, { @@ -1906,13 +1891,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.9507, 'grad_norm': 4.06498384475708, 'learning_rate': 2.8499999999999998e-05, 'epoch': 3.34}\n" + "{'loss': 0.3338, 'grad_norm': 27.539039611816406, 'learning_rate': 2.8499999999999998e-05, 'epoch': 3.34}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "84731053de7b4d578e5faa26ddba8eb4", + "model_id": "1839f3e67b81440bb5fdbbc6327a2cdb", "version_major": 2, "version_minor": 0 }, @@ -1927,7 +1912,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.5792554020881653, 'eval_accuracy': 0.8673469387755102, 'eval_f1': 0.8613137189565682, 'eval_precision': 0.8691317661017358, 'eval_recall': 0.863785050860545, 'eval_runtime': 49.2511, 'eval_samples_per_second': 83.572, 'eval_steps_per_second': 2.619, 'epoch': 3.34}\n" + "{'eval_loss': 0.19262713193893433, 'eval_accuracy': 0.9441205053449951, 'eval_f1': 0.9414006658184005, 'eval_precision': 0.9472613482110482, 'eval_recall': 0.9424492875208347, 'eval_runtime': 114.3881, 'eval_samples_per_second': 35.983, 'eval_steps_per_second': 1.128, 'epoch': 3.34}\n" ] }, { @@ -1942,13 +1927,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.9608, 'grad_norm': 4.010519981384277, 'learning_rate': 2.8000000000000003e-05, 'epoch': 3.42}\n" + "{'loss': 0.3307, 'grad_norm': 31.13157844543457, 'learning_rate': 2.8000000000000003e-05, 'epoch': 3.42}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "705413cf7a7d437ab30de4075d934927", + "model_id": "35636ac347cf40e897a69a8833d3d2e9", "version_major": 2, "version_minor": 0 }, @@ -1963,7 +1948,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.5720817446708679, 'eval_accuracy': 0.8671039844509232, 'eval_f1': 0.8614434918253173, 'eval_precision': 0.8683467456488219, 'eval_recall': 0.8635546513196746, 'eval_runtime': 49.32, 'eval_samples_per_second': 83.455, 'eval_steps_per_second': 2.616, 'epoch': 3.42}\n" + "{'eval_loss': 0.20202496647834778, 'eval_accuracy': 0.9419339164237124, 'eval_f1': 0.9407488062667215, 'eval_precision': 0.9447014610743715, 'eval_recall': 0.9408760495815661, 'eval_runtime': 114.2398, 'eval_samples_per_second': 36.029, 'eval_steps_per_second': 1.129, 'epoch': 3.42}\n" ] }, { @@ -1978,13 +1963,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.9409, 'grad_norm': 4.6165385246276855, 'learning_rate': 2.7500000000000004e-05, 'epoch': 3.5}\n" + "{'loss': 0.367, 'grad_norm': 35.75934982299805, 'learning_rate': 2.7500000000000004e-05, 'epoch': 3.5}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9048d20d67564db5b43854c4edda7abf", + "model_id": "d326a4dd31164df19d6fb5a91293712c", "version_major": 2, "version_minor": 0 }, @@ -1999,7 +1984,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.5688398480415344, 'eval_accuracy': 0.8651603498542274, 'eval_f1': 0.8591215620506901, 'eval_precision': 0.8658442906831004, 'eval_recall': 0.8612273702740664, 'eval_runtime': 49.46, 'eval_samples_per_second': 83.219, 'eval_steps_per_second': 2.608, 'epoch': 3.5}\n" + "{'eval_loss': 0.19336122274398804, 'eval_accuracy': 0.9465500485908649, 'eval_f1': 0.9452244096779967, 'eval_precision': 0.9486806069204384, 'eval_recall': 0.945357240364433, 'eval_runtime': 114.8563, 'eval_samples_per_second': 35.836, 'eval_steps_per_second': 1.123, 'epoch': 3.5}\n" ] }, { @@ -2014,13 +1999,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.8856, 'grad_norm': 3.7885537147521973, 'learning_rate': 2.7000000000000002e-05, 'epoch': 3.57}\n" + "{'loss': 0.3248, 'grad_norm': 23.628128051757812, 'learning_rate': 2.7000000000000002e-05, 'epoch': 3.57}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "29145fa83828458f8a088c624cb2999a", + "model_id": "064a046703f74653a93f8141ca4a02ea", "version_major": 2, "version_minor": 0 }, @@ -2035,7 +2020,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.5563119649887085, 'eval_accuracy': 0.870019436345967, 'eval_f1': 0.8649784624757803, 'eval_precision': 0.8714328398178769, 'eval_recall': 0.8667391442108526, 'eval_runtime': 49.4538, 'eval_samples_per_second': 83.229, 'eval_steps_per_second': 2.608, 'epoch': 3.57}\n" + "{'eval_loss': 0.2003922015428543, 'eval_accuracy': 0.9419339164237124, 'eval_f1': 0.9392921765894248, 'eval_precision': 0.9442515458934876, 'eval_recall': 0.9401493961700677, 'eval_runtime': 114.5631, 'eval_samples_per_second': 35.928, 'eval_steps_per_second': 1.126, 'epoch': 3.57}\n" ] }, { @@ -2050,13 +2035,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.9099, 'grad_norm': 5.023146629333496, 'learning_rate': 2.6500000000000004e-05, 'epoch': 3.65}\n" + "{'loss': 0.3366, 'grad_norm': 42.56528854370117, 'learning_rate': 2.6500000000000004e-05, 'epoch': 3.65}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "03f92068564b4657b0b886f0a6dd9dc5", + "model_id": "f17d9a4de43a4cca84ccd9f535c043d2", "version_major": 2, "version_minor": 0 }, @@ -2071,7 +2056,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.5557062029838562, 'eval_accuracy': 0.8661321671525753, 'eval_f1': 0.8612849221676474, 'eval_precision': 0.8681216404872331, 'eval_recall': 0.8622434834974765, 'eval_runtime': 49.2588, 'eval_samples_per_second': 83.559, 'eval_steps_per_second': 2.619, 'epoch': 3.65}\n" + "{'eval_loss': 0.19236095249652863, 'eval_accuracy': 0.9431486880466472, 'eval_f1': 0.9409643735150478, 'eval_precision': 0.9466555090439602, 'eval_recall': 0.9415082502284738, 'eval_runtime': 114.6786, 'eval_samples_per_second': 35.892, 'eval_steps_per_second': 1.125, 'epoch': 3.65}\n" ] }, { @@ -2086,13 +2071,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.9167, 'grad_norm': 4.845632076263428, 'learning_rate': 2.6000000000000002e-05, 'epoch': 3.73}\n" + "{'loss': 0.3342, 'grad_norm': 19.98875617980957, 'learning_rate': 2.6000000000000002e-05, 'epoch': 3.73}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "0a4200b6100c4c70aba319aa94bf38c2", + "model_id": "f5913a02a3344de1a338eaaaf5cfa89a", "version_major": 2, "version_minor": 0 }, @@ -2107,7 +2092,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.5526946783065796, 'eval_accuracy': 0.8685617103984451, 'eval_f1': 0.8638929665689471, 'eval_precision': 0.8701328643411136, 'eval_recall': 0.8647950070672704, 'eval_runtime': 48.692, 'eval_samples_per_second': 84.531, 'eval_steps_per_second': 2.649, 'epoch': 3.73}\n" + "{'eval_loss': 0.19377027451992035, 'eval_accuracy': 0.9453352769679301, 'eval_f1': 0.9436307861283311, 'eval_precision': 0.9467901159978599, 'eval_recall': 0.943816355738806, 'eval_runtime': 114.9849, 'eval_samples_per_second': 35.796, 'eval_steps_per_second': 1.122, 'epoch': 3.73}\n" ] }, { @@ -2122,13 +2107,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.9077, 'grad_norm': 4.908811569213867, 'learning_rate': 2.5500000000000003e-05, 'epoch': 3.81}\n" + "{'loss': 0.3386, 'grad_norm': 21.59819221496582, 'learning_rate': 2.5500000000000003e-05, 'epoch': 3.81}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "57a90ed23c7f492fa5b7dcfceb506e83", + "model_id": "015bdb45bda248ffbd21008cef7334cd", "version_major": 2, "version_minor": 0 }, @@ -2143,7 +2128,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.5431388020515442, 'eval_accuracy': 0.8705053449951409, 'eval_f1': 0.8669220309230944, 'eval_precision': 0.8721780266203637, 'eval_recall': 0.8673898965351654, 'eval_runtime': 49.1531, 'eval_samples_per_second': 83.738, 'eval_steps_per_second': 2.624, 'epoch': 3.81}\n" + "{'eval_loss': 0.20178444683551788, 'eval_accuracy': 0.9443634596695821, 'eval_f1': 0.9428295179020412, 'eval_precision': 0.9469455644494296, 'eval_recall': 0.9430155349321213, 'eval_runtime': 114.6708, 'eval_samples_per_second': 35.894, 'eval_steps_per_second': 1.125, 'epoch': 3.81}\n" ] }, { @@ -2158,13 +2143,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.9005, 'grad_norm': 5.006350994110107, 'learning_rate': 2.5e-05, 'epoch': 3.88}\n" + "{'loss': 0.3841, 'grad_norm': 21.11665153503418, 'learning_rate': 2.5e-05, 'epoch': 3.88}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "44a7bc37f5864131b9354194a55991ab", + "model_id": "b05151a706644c8baa4ea293a23d6107", "version_major": 2, "version_minor": 0 }, @@ -2179,7 +2164,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.5389750003814697, 'eval_accuracy': 0.8731778425655977, 'eval_f1': 0.8697069550253945, 'eval_precision': 0.8748570776000245, 'eval_recall': 0.8700614515732695, 'eval_runtime': 48.8894, 'eval_samples_per_second': 84.19, 'eval_steps_per_second': 2.639, 'epoch': 3.88}\n" + "{'eval_loss': 0.193311870098114, 'eval_accuracy': 0.9433916423712342, 'eval_f1': 0.9414012627871041, 'eval_precision': 0.9458002923284925, 'eval_recall': 0.94184618952753, 'eval_runtime': 114.9776, 'eval_samples_per_second': 35.798, 'eval_steps_per_second': 1.122, 'epoch': 3.88}\n" ] }, { @@ -2194,13 +2179,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.8596, 'grad_norm': 4.478883743286133, 'learning_rate': 2.45e-05, 'epoch': 3.96}\n" + "{'loss': 0.3174, 'grad_norm': 18.7007999420166, 'learning_rate': 2.45e-05, 'epoch': 3.96}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "075b394879fe402ea13db7d263b6e26a", + "model_id": "96eb8e73e84a403894a748ad134b7c0d", "version_major": 2, "version_minor": 0 }, @@ -2215,7 +2200,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.5375447273254395, 'eval_accuracy': 0.8707482993197279, 'eval_f1': 0.8655144499982467, 'eval_precision': 0.8731854105567156, 'eval_recall': 0.8667916588503376, 'eval_runtime': 48.919, 'eval_samples_per_second': 84.139, 'eval_steps_per_second': 2.637, 'epoch': 3.96}\n" + "{'eval_loss': 0.19022010266780853, 'eval_accuracy': 0.9453352769679301, 'eval_f1': 0.9437864796087925, 'eval_precision': 0.9466327980959414, 'eval_recall': 0.9435855550407289, 'eval_runtime': 115.2406, 'eval_samples_per_second': 35.717, 'eval_steps_per_second': 1.119, 'epoch': 3.96}\n" ] }, { @@ -2230,13 +2215,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.8856, 'grad_norm': 5.626810073852539, 'learning_rate': 2.4e-05, 'epoch': 4.04}\n" + "{'loss': 0.2996, 'grad_norm': 17.607086181640625, 'learning_rate': 2.4e-05, 'epoch': 4.04}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "28e32e7cae704936af9cd959cfee5acf", + "model_id": "2d92308dbd924d52834c288a29ed0c9a", "version_major": 2, "version_minor": 0 }, @@ -2251,7 +2236,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.5254101753234863, 'eval_accuracy': 0.8705053449951409, 'eval_f1': 0.8651370337427908, 'eval_precision': 0.8740741181870556, 'eval_recall': 0.8663241246770017, 'eval_runtime': 48.9016, 'eval_samples_per_second': 84.169, 'eval_steps_per_second': 2.638, 'epoch': 4.04}\n" + "{'eval_loss': 0.18879003822803497, 'eval_accuracy': 0.9465500485908649, 'eval_f1': 0.9453809424654497, 'eval_precision': 0.9497376727045288, 'eval_recall': 0.9459956280754142, 'eval_runtime': 115.2126, 'eval_samples_per_second': 35.725, 'eval_steps_per_second': 1.12, 'epoch': 4.04}\n" ] }, { @@ -2266,13 +2251,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.8869, 'grad_norm': 5.352138996124268, 'learning_rate': 2.35e-05, 'epoch': 4.12}\n" + "{'loss': 0.2879, 'grad_norm': 34.3960075378418, 'learning_rate': 2.35e-05, 'epoch': 4.12}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "5e9661c3d81e4149a6e86bff787dee44", + "model_id": "6d170d7308f4406f8d3cacd6f36d0d1a", "version_major": 2, "version_minor": 0 }, @@ -2287,7 +2272,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.5237516760826111, 'eval_accuracy': 0.8717201166180758, 'eval_f1': 0.865671971284358, 'eval_precision': 0.8731240454316355, 'eval_recall': 0.8679943344822763, 'eval_runtime': 48.7844, 'eval_samples_per_second': 84.371, 'eval_steps_per_second': 2.644, 'epoch': 4.12}\n" + "{'eval_loss': 0.18851810693740845, 'eval_accuracy': 0.9441205053449951, 'eval_f1': 0.9427902851201815, 'eval_precision': 0.9464331604533499, 'eval_recall': 0.9428457088438701, 'eval_runtime': 115.3928, 'eval_samples_per_second': 35.669, 'eval_steps_per_second': 1.118, 'epoch': 4.12}\n" ] }, { @@ -2302,13 +2287,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.8069, 'grad_norm': 4.255963325500488, 'learning_rate': 2.3000000000000003e-05, 'epoch': 4.19}\n" + "{'loss': 0.3035, 'grad_norm': 27.399072647094727, 'learning_rate': 2.3000000000000003e-05, 'epoch': 4.19}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8f31e7e5f8d547b9986e2dc7c0f785e9", + "model_id": "8fb9ff5f50e04639ad7cac188f4e66bd", "version_major": 2, "version_minor": 0 }, @@ -2323,7 +2308,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.5188306570053101, 'eval_accuracy': 0.8731778425655977, 'eval_f1': 0.867099882007312, 'eval_precision': 0.87435010386749, 'eval_recall': 0.8695338515827772, 'eval_runtime': 48.8651, 'eval_samples_per_second': 84.232, 'eval_steps_per_second': 2.64, 'epoch': 4.19}\n" + "{'eval_loss': 0.1908504068851471, 'eval_accuracy': 0.9453352769679301, 'eval_f1': 0.9434051369389914, 'eval_precision': 0.9474717636272078, 'eval_recall': 0.9437481173747684, 'eval_runtime': 114.8582, 'eval_samples_per_second': 35.835, 'eval_steps_per_second': 1.123, 'epoch': 4.19}\n" ] }, { @@ -2338,13 +2323,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.8474, 'grad_norm': 4.418267250061035, 'learning_rate': 2.25e-05, 'epoch': 4.27}\n" + "{'loss': 0.2574, 'grad_norm': 21.96457290649414, 'learning_rate': 2.25e-05, 'epoch': 4.27}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "cbd43bb0dcba4d01abb0735444e46f1b", + "model_id": "6808dd6b16fe4f36b7132c0ac086d184", "version_major": 2, "version_minor": 0 }, @@ -2359,7 +2344,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.5188424587249756, 'eval_accuracy': 0.8709912536443148, 'eval_f1': 0.8648806741668069, 'eval_precision': 0.8728845356582566, 'eval_recall': 0.8671342829268437, 'eval_runtime': 50.6662, 'eval_samples_per_second': 81.238, 'eval_steps_per_second': 2.546, 'epoch': 4.27}\n" + "{'eval_loss': 0.18855735659599304, 'eval_accuracy': 0.9453352769679301, 'eval_f1': 0.9427031026358901, 'eval_precision': 0.9475830137825754, 'eval_recall': 0.9438414557692797, 'eval_runtime': 114.8411, 'eval_samples_per_second': 35.841, 'eval_steps_per_second': 1.123, 'epoch': 4.27}\n" ] }, { @@ -2374,13 +2359,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.8243, 'grad_norm': 5.182613372802734, 'learning_rate': 2.2000000000000003e-05, 'epoch': 4.35}\n" + "{'loss': 0.3219, 'grad_norm': 41.36893081665039, 'learning_rate': 2.2000000000000003e-05, 'epoch': 4.35}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "b8be87f279094e95adcf2a84573bba8f", + "model_id": "afa7acec8c6c4ac19fe96faadb0786d6", "version_major": 2, "version_minor": 0 }, @@ -2395,7 +2380,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.517697811126709, 'eval_accuracy': 0.8726919339164237, 'eval_f1': 0.8683813511248041, 'eval_precision': 0.8756396608060336, 'eval_recall': 0.8695726524990666, 'eval_runtime': 49.672, 'eval_samples_per_second': 82.864, 'eval_steps_per_second': 2.597, 'epoch': 4.35}\n" + "{'eval_loss': 0.18892067670822144, 'eval_accuracy': 0.9433916423712342, 'eval_f1': 0.9411438682134082, 'eval_precision': 0.9461780674309639, 'eval_recall': 0.9417372009351842, 'eval_runtime': 115.4308, 'eval_samples_per_second': 35.658, 'eval_steps_per_second': 1.118, 'epoch': 4.35}\n" ] }, { @@ -2410,13 +2395,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.8437, 'grad_norm': 5.045173645019531, 'learning_rate': 2.15e-05, 'epoch': 4.43}\n" + "{'loss': 0.2827, 'grad_norm': 31.3144588470459, 'learning_rate': 2.15e-05, 'epoch': 4.43}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "aca1827d264449d986cd7452ed1e9d39", + "model_id": "4c8c12d4a0cb4c1fbd6140520c6a1bec", "version_major": 2, "version_minor": 0 }, @@ -2431,7 +2416,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.5107200741767883, 'eval_accuracy': 0.8726919339164237, 'eval_f1': 0.8681911737274114, 'eval_precision': 0.8742153303246233, 'eval_recall': 0.8693299915121044, 'eval_runtime': 51.4774, 'eval_samples_per_second': 79.957, 'eval_steps_per_second': 2.506, 'epoch': 4.43}\n" + "{'eval_loss': 0.18959985673427582, 'eval_accuracy': 0.9448493683187561, 'eval_f1': 0.9434954931712027, 'eval_precision': 0.9463643348204727, 'eval_recall': 0.9433684998867641, 'eval_runtime': 115.1487, 'eval_samples_per_second': 35.745, 'eval_steps_per_second': 1.12, 'epoch': 4.43}\n" ] }, { @@ -2446,13 +2431,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.7761, 'grad_norm': 4.515903949737549, 'learning_rate': 2.1e-05, 'epoch': 4.5}\n" + "{'loss': 0.2869, 'grad_norm': 20.795242309570312, 'learning_rate': 2.1e-05, 'epoch': 4.5}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8dd1633c2196465a9eba56be8c8d844f", + "model_id": "15f67e755fe44958b5d31c3894e377bf", "version_major": 2, "version_minor": 0 }, @@ -2467,7 +2452,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.5024524331092834, 'eval_accuracy': 0.8739067055393586, 'eval_f1': 0.8699913214103319, 'eval_precision': 0.8750598594391856, 'eval_recall': 0.8708382312909497, 'eval_runtime': 50.3936, 'eval_samples_per_second': 81.677, 'eval_steps_per_second': 2.56, 'epoch': 4.5}\n" + "{'eval_loss': 0.1945854127407074, 'eval_accuracy': 0.9443634596695821, 'eval_f1': 0.9429882368776638, 'eval_precision': 0.9458759338467805, 'eval_recall': 0.9427150271390136, 'eval_runtime': 115.0128, 'eval_samples_per_second': 35.787, 'eval_steps_per_second': 1.122, 'epoch': 4.5}\n" ] }, { @@ -2482,13 +2467,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.784, 'grad_norm': 4.124168872833252, 'learning_rate': 2.05e-05, 'epoch': 4.58}\n" + "{'loss': 0.3442, 'grad_norm': 22.8905029296875, 'learning_rate': 2.05e-05, 'epoch': 4.58}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "00dc1b02cfeb47459627c3fe3b3ac700", + "model_id": "ffdcf3afa5c841208a80480437fd2227", "version_major": 2, "version_minor": 0 }, @@ -2503,7 +2488,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.5016156435012817, 'eval_accuracy': 0.8768221574344023, 'eval_f1': 0.8716643545809573, 'eval_precision': 0.8777660734719462, 'eval_recall': 0.8734492800744772, 'eval_runtime': 50.9735, 'eval_samples_per_second': 80.748, 'eval_steps_per_second': 2.531, 'epoch': 4.58}\n" + "{'eval_loss': 0.18711623549461365, 'eval_accuracy': 0.9458211856171039, 'eval_f1': 0.9443975552551526, 'eval_precision': 0.9477411298392244, 'eval_recall': 0.9444669404930629, 'eval_runtime': 115.0342, 'eval_samples_per_second': 35.781, 'eval_steps_per_second': 1.121, 'epoch': 4.58}\n" ] }, { @@ -2518,13 +2503,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.8055, 'grad_norm': 4.525731563568115, 'learning_rate': 2e-05, 'epoch': 4.66}\n" + "{'loss': 0.2739, 'grad_norm': 18.282894134521484, 'learning_rate': 2e-05, 'epoch': 4.66}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "3da318349b6f463f94a8591719200d64", + "model_id": "c23489f9c11c4458902349cae28ed860", "version_major": 2, "version_minor": 0 }, @@ -2539,7 +2524,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.5018946528434753, 'eval_accuracy': 0.8739067055393586, 'eval_f1': 0.8700873351301581, 'eval_precision': 0.8771679126025378, 'eval_recall': 0.8709610203860829, 'eval_runtime': 51.3104, 'eval_samples_per_second': 80.218, 'eval_steps_per_second': 2.514, 'epoch': 4.66}\n" + "{'eval_loss': 0.1880841702222824, 'eval_accuracy': 0.9441205053449951, 'eval_f1': 0.9415259481526164, 'eval_precision': 0.9469780431226001, 'eval_recall': 0.9421132519003071, 'eval_runtime': 114.6389, 'eval_samples_per_second': 35.904, 'eval_steps_per_second': 1.125, 'epoch': 4.66}\n" ] }, { @@ -2554,13 +2539,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.8109, 'grad_norm': 4.918236255645752, 'learning_rate': 1.9500000000000003e-05, 'epoch': 4.74}\n" + "{'loss': 0.3067, 'grad_norm': 17.205751419067383, 'learning_rate': 1.9500000000000003e-05, 'epoch': 4.74}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "539c11496bc546ef90d2844363f0e760", + "model_id": "8a7bbac5d1af4407b8b409c07b47bfbb", "version_major": 2, "version_minor": 0 }, @@ -2575,7 +2560,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.49602368474006653, 'eval_accuracy': 0.8770651117589893, 'eval_f1': 0.8724184770886717, 'eval_precision': 0.8785135286746661, 'eval_recall': 0.8739509959414178, 'eval_runtime': 50.8816, 'eval_samples_per_second': 80.894, 'eval_steps_per_second': 2.535, 'epoch': 4.74}\n" + "{'eval_loss': 0.19253447651863098, 'eval_accuracy': 0.9475218658892128, 'eval_f1': 0.9455549198929877, 'eval_precision': 0.9498783733589569, 'eval_recall': 0.9455746598929079, 'eval_runtime': 114.8075, 'eval_samples_per_second': 35.851, 'eval_steps_per_second': 1.124, 'epoch': 4.74}\n" ] }, { @@ -2590,13 +2575,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.8697, 'grad_norm': 5.5949811935424805, 'learning_rate': 1.9e-05, 'epoch': 4.82}\n" + "{'loss': 0.2674, 'grad_norm': 29.793489456176758, 'learning_rate': 1.9e-05, 'epoch': 4.82}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2222ad38bab34d8eafefff4eb8277629", + "model_id": "6b7526e006ad4963ae418b2c8221a88f", "version_major": 2, "version_minor": 0 }, @@ -2611,7 +2596,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.488719642162323, 'eval_accuracy': 0.8792517006802721, 'eval_f1': 0.8749420478853339, 'eval_precision': 0.8815954350698849, 'eval_recall': 0.875699865312381, 'eval_runtime': 51.2212, 'eval_samples_per_second': 80.357, 'eval_steps_per_second': 2.518, 'epoch': 4.82}\n" + "{'eval_loss': 0.19188550114631653, 'eval_accuracy': 0.9429057337220602, 'eval_f1': 0.9405066366498914, 'eval_precision': 0.9458056670586158, 'eval_recall': 0.9407893783001804, 'eval_runtime': 114.8903, 'eval_samples_per_second': 35.825, 'eval_steps_per_second': 1.123, 'epoch': 4.82}\n" ] }, { @@ -2626,13 +2611,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.7996, 'grad_norm': 5.343413829803467, 'learning_rate': 1.85e-05, 'epoch': 4.89}\n" + "{'loss': 0.3029, 'grad_norm': 40.88832473754883, 'learning_rate': 1.85e-05, 'epoch': 4.89}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1b60858453f74d798e184bec16f55513", + "model_id": "f78fa40f6e694072be7fef48ca0273fc", "version_major": 2, "version_minor": 0 }, @@ -2647,7 +2632,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.4877583384513855, 'eval_accuracy': 0.8773080660835763, 'eval_f1': 0.8719325240901645, 'eval_precision': 0.8781732781341105, 'eval_recall': 0.8733867416979888, 'eval_runtime': 50.5742, 'eval_samples_per_second': 81.385, 'eval_steps_per_second': 2.551, 'epoch': 4.89}\n" + "{'eval_loss': 0.1870385855436325, 'eval_accuracy': 0.9446064139941691, 'eval_f1': 0.9419978871151995, 'eval_precision': 0.9467945730172027, 'eval_recall': 0.9425046516642015, 'eval_runtime': 115.0825, 'eval_samples_per_second': 35.766, 'eval_steps_per_second': 1.121, 'epoch': 4.89}\n" ] }, { @@ -2662,13 +2647,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.8002, 'grad_norm': 5.200406551361084, 'learning_rate': 1.8e-05, 'epoch': 4.97}\n" + "{'loss': 0.293, 'grad_norm': 25.430564880371094, 'learning_rate': 1.8e-05, 'epoch': 4.97}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "edc9006d595b4fd4893ad2883bd010b2", + "model_id": "102c265c87cf4a3eb9fe268cd8bade28", "version_major": 2, "version_minor": 0 }, @@ -2683,7 +2668,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.48469260334968567, 'eval_accuracy': 0.8785228377065112, 'eval_f1': 0.8737883601515327, 'eval_precision': 0.880730357224163, 'eval_recall': 0.8751595996352247, 'eval_runtime': 49.1632, 'eval_samples_per_second': 83.721, 'eval_steps_per_second': 2.624, 'epoch': 4.97}\n" + "{'eval_loss': 0.19138064980506897, 'eval_accuracy': 0.9421768707482994, 'eval_f1': 0.9398439773363093, 'eval_precision': 0.9443712235347143, 'eval_recall': 0.9402050138864034, 'eval_runtime': 114.8654, 'eval_samples_per_second': 35.833, 'eval_steps_per_second': 1.123, 'epoch': 4.97}\n" ] }, { @@ -2698,13 +2683,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.7404, 'grad_norm': 6.2063446044921875, 'learning_rate': 1.75e-05, 'epoch': 5.05}\n" + "{'loss': 0.3242, 'grad_norm': 16.76936149597168, 'learning_rate': 1.75e-05, 'epoch': 5.05}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "5b20bab9a7cd41c7843c1d6e20cfe4d7", + "model_id": "11e15df5be2444268facc7d47a32eacd", "version_major": 2, "version_minor": 0 }, @@ -2719,7 +2704,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.48882460594177246, 'eval_accuracy': 0.8770651117589893, 'eval_f1': 0.8726392692763753, 'eval_precision': 0.8795280651438127, 'eval_recall': 0.8739210392423884, 'eval_runtime': 50.4391, 'eval_samples_per_second': 81.603, 'eval_steps_per_second': 2.558, 'epoch': 5.05}\n" + "{'eval_loss': 0.1905665248632431, 'eval_accuracy': 0.9443634596695821, 'eval_f1': 0.9428469199634004, 'eval_precision': 0.9462565822668318, 'eval_recall': 0.9428831333486722, 'eval_runtime': 114.8538, 'eval_samples_per_second': 35.837, 'eval_steps_per_second': 1.123, 'epoch': 5.05}\n" ] }, { @@ -2734,13 +2719,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.7326, 'grad_norm': 3.9257824420928955, 'learning_rate': 1.7000000000000003e-05, 'epoch': 5.13}\n" + "{'loss': 0.3302, 'grad_norm': 30.843332290649414, 'learning_rate': 1.7000000000000003e-05, 'epoch': 5.13}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ffb4331077cb41dfa461c96bf1d51a7a", + "model_id": "35f9aed766a94a6e8f2e69e7fc7e2af6", "version_major": 2, "version_minor": 0 }, @@ -2755,7 +2740,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.48825767636299133, 'eval_accuracy': 0.8746355685131195, 'eval_f1': 0.8701209700636171, 'eval_precision': 0.8772238194071659, 'eval_recall': 0.8717747556188732, 'eval_runtime': 50.0124, 'eval_samples_per_second': 82.3, 'eval_steps_per_second': 2.579, 'epoch': 5.13}\n" + "{'eval_loss': 0.18933725357055664, 'eval_accuracy': 0.9453352769679301, 'eval_f1': 0.9436571818715118, 'eval_precision': 0.9467108586574963, 'eval_recall': 0.9438527768448465, 'eval_runtime': 115.1185, 'eval_samples_per_second': 35.754, 'eval_steps_per_second': 1.121, 'epoch': 5.13}\n" ] }, { @@ -2770,13 +2755,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.797, 'grad_norm': 5.060814380645752, 'learning_rate': 1.65e-05, 'epoch': 5.2}\n" + "{'loss': 0.2754, 'grad_norm': 20.851177215576172, 'learning_rate': 1.65e-05, 'epoch': 5.2}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "021b42916f7949e79856bb533fb060e1", + "model_id": "5590b0148f554375b4c2a33a2e5e7a44", "version_major": 2, "version_minor": 0 }, @@ -2791,7 +2776,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.4892158806324005, 'eval_accuracy': 0.8729348882410107, 'eval_f1': 0.8689396976822885, 'eval_precision': 0.8751671494168254, 'eval_recall': 0.8700644770381991, 'eval_runtime': 49.9051, 'eval_samples_per_second': 82.477, 'eval_steps_per_second': 2.585, 'epoch': 5.2}\n" + "{'eval_loss': 0.1859486699104309, 'eval_accuracy': 0.9470359572400389, 'eval_f1': 0.9452360577527517, 'eval_precision': 0.948884107004187, 'eval_recall': 0.9453068178693637, 'eval_runtime': 114.9667, 'eval_samples_per_second': 35.802, 'eval_steps_per_second': 1.122, 'epoch': 5.2}\n" ] }, { @@ -2806,13 +2791,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.8084, 'grad_norm': 4.504694938659668, 'learning_rate': 1.6000000000000003e-05, 'epoch': 5.28}\n" + "{'loss': 0.2794, 'grad_norm': 32.60487747192383, 'learning_rate': 1.6000000000000003e-05, 'epoch': 5.28}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "b904576fb9684093910c89565d618cca", + "model_id": "3d10eeaf9de5425ab2690e0eee24cef9", "version_major": 2, "version_minor": 0 }, @@ -2827,7 +2812,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.48001018166542053, 'eval_accuracy': 0.8792517006802721, 'eval_f1': 0.8752056640329726, 'eval_precision': 0.8816582892567761, 'eval_recall': 0.876272144996767, 'eval_runtime': 49.7474, 'eval_samples_per_second': 82.738, 'eval_steps_per_second': 2.593, 'epoch': 5.28}\n" + "{'eval_loss': 0.18755145370960236, 'eval_accuracy': 0.9458211856171039, 'eval_f1': 0.9441062093968957, 'eval_precision': 0.9472886051353733, 'eval_recall': 0.9442015299233748, 'eval_runtime': 114.8785, 'eval_samples_per_second': 35.829, 'eval_steps_per_second': 1.123, 'epoch': 5.28}\n" ] }, { @@ -2842,13 +2827,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.8025, 'grad_norm': 5.151145935058594, 'learning_rate': 1.55e-05, 'epoch': 5.36}\n" + "{'loss': 0.3015, 'grad_norm': 23.714696884155273, 'learning_rate': 1.55e-05, 'epoch': 5.36}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "b3fe671b965b411b8161a26b0af2d5fc", + "model_id": "af49bfff49bb4bd99545d1248cd56a75", "version_major": 2, "version_minor": 0 }, @@ -2863,7 +2848,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.47617706656455994, 'eval_accuracy': 0.8768221574344023, 'eval_f1': 0.8726680937092665, 'eval_precision': 0.8771151901740718, 'eval_recall': 0.8736036311111193, 'eval_runtime': 49.5334, 'eval_samples_per_second': 83.095, 'eval_steps_per_second': 2.604, 'epoch': 5.36}\n" + "{'eval_loss': 0.1870153546333313, 'eval_accuracy': 0.9463070942662779, 'eval_f1': 0.9450064660883716, 'eval_precision': 0.9480599743441597, 'eval_recall': 0.9451215682039763, 'eval_runtime': 115.2137, 'eval_samples_per_second': 35.725, 'eval_steps_per_second': 1.12, 'epoch': 5.36}\n" ] }, { @@ -2878,13 +2863,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.7087, 'grad_norm': 5.042810440063477, 'learning_rate': 1.5e-05, 'epoch': 5.44}\n" + "{'loss': 0.2741, 'grad_norm': 26.103408813476562, 'learning_rate': 1.5e-05, 'epoch': 5.44}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ee2db0cf733141e9af7fbf5a73dbc6de", + "model_id": "030c7affe8094971b14709ff33bb87cf", "version_major": 2, "version_minor": 0 }, @@ -2899,7 +2884,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.47620466351509094, 'eval_accuracy': 0.8782798833819242, 'eval_f1': 0.87497004051077, 'eval_precision': 0.8806975888010735, 'eval_recall': 0.8755686029577235, 'eval_runtime': 49.9256, 'eval_samples_per_second': 82.443, 'eval_steps_per_second': 2.584, 'epoch': 5.44}\n" + "{'eval_loss': 0.1891375035047531, 'eval_accuracy': 0.9426627793974732, 'eval_f1': 0.941466297473656, 'eval_precision': 0.9446859437408165, 'eval_recall': 0.9414389371088902, 'eval_runtime': 114.8799, 'eval_samples_per_second': 35.829, 'eval_steps_per_second': 1.123, 'epoch': 5.44}\n" ] }, { @@ -2914,13 +2899,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.7502, 'grad_norm': 5.252780914306641, 'learning_rate': 1.45e-05, 'epoch': 5.51}\n" + "{'loss': 0.2856, 'grad_norm': 24.357542037963867, 'learning_rate': 1.45e-05, 'epoch': 5.51}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c72c31a3af824cdfbcac24c266c460c1", + "model_id": "2f76d1e7433642948a4af2a83e28adfd", "version_major": 2, "version_minor": 0 }, @@ -2935,7 +2920,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.4753693640232086, 'eval_accuracy': 0.8785228377065112, 'eval_f1': 0.8753647515204712, 'eval_precision': 0.8801227365007281, 'eval_recall': 0.8758968338566632, 'eval_runtime': 49.701, 'eval_samples_per_second': 82.815, 'eval_steps_per_second': 2.596, 'epoch': 5.51}\n" + "{'eval_loss': 0.18975676596164703, 'eval_accuracy': 0.9455782312925171, 'eval_f1': 0.943872222215918, 'eval_precision': 0.946982737270694, 'eval_recall': 0.9438607151950139, 'eval_runtime': 114.6348, 'eval_samples_per_second': 35.905, 'eval_steps_per_second': 1.125, 'epoch': 5.51}\n" ] }, { @@ -2950,13 +2935,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.7386, 'grad_norm': 4.32672643661499, 'learning_rate': 1.4000000000000001e-05, 'epoch': 5.59}\n" + "{'loss': 0.2869, 'grad_norm': 17.85304069519043, 'learning_rate': 1.4000000000000001e-05, 'epoch': 5.59}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "105eefa3b45c4adb8800cbae0f67964e", + "model_id": "2a188b9ebe724488a9607667f60ee25f", "version_major": 2, "version_minor": 0 }, @@ -2971,7 +2956,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.47379669547080994, 'eval_accuracy': 0.8792517006802721, 'eval_f1': 0.8754319224053987, 'eval_precision': 0.8806675702100011, 'eval_recall': 0.8760217928047063, 'eval_runtime': 49.655, 'eval_samples_per_second': 82.892, 'eval_steps_per_second': 2.598, 'epoch': 5.59}\n" + "{'eval_loss': 0.1900128573179245, 'eval_accuracy': 0.9463070942662779, 'eval_f1': 0.9449003574138755, 'eval_precision': 0.9485287191510329, 'eval_recall': 0.9447589201115735, 'eval_runtime': 115.1085, 'eval_samples_per_second': 35.758, 'eval_steps_per_second': 1.121, 'epoch': 5.59}\n" ] }, { @@ -2986,13 +2971,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.8173, 'grad_norm': 4.857478618621826, 'learning_rate': 1.3500000000000001e-05, 'epoch': 5.67}\n" + "{'loss': 0.2874, 'grad_norm': 23.500152587890625, 'learning_rate': 1.3500000000000001e-05, 'epoch': 5.67}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a465177e36bc466fbf676d5d03142cad", + "model_id": "b49e284dfd834a37a87e8cb66cff2c39", "version_major": 2, "version_minor": 0 }, @@ -3007,7 +2992,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.4712308645248413, 'eval_accuracy': 0.8792517006802721, 'eval_f1': 0.8749799985940361, 'eval_precision': 0.8801373033918221, 'eval_recall': 0.8762236995104532, 'eval_runtime': 49.9395, 'eval_samples_per_second': 82.42, 'eval_steps_per_second': 2.583, 'epoch': 5.67}\n" + "{'eval_loss': 0.19259242713451385, 'eval_accuracy': 0.9458211856171039, 'eval_f1': 0.9434055925300381, 'eval_precision': 0.9488706777885639, 'eval_recall': 0.9439478824344302, 'eval_runtime': 114.9474, 'eval_samples_per_second': 35.808, 'eval_steps_per_second': 1.122, 'epoch': 5.67}\n" ] }, { @@ -3022,13 +3007,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.8213, 'grad_norm': 4.416097164154053, 'learning_rate': 1.3000000000000001e-05, 'epoch': 5.75}\n" + "{'loss': 0.1988, 'grad_norm': 22.595317840576172, 'learning_rate': 1.3000000000000001e-05, 'epoch': 5.75}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "6ebbeb60bb5c4768bef0ccfdeefa2f28", + "model_id": "da6bb4b812cc4e409baf2e4414a71e49", "version_major": 2, "version_minor": 0 }, @@ -3043,7 +3028,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.46962377429008484, 'eval_accuracy': 0.8790087463556852, 'eval_f1': 0.8750143770029749, 'eval_precision': 0.879517876232382, 'eval_recall': 0.8756484742320205, 'eval_runtime': 49.5319, 'eval_samples_per_second': 83.098, 'eval_steps_per_second': 2.604, 'epoch': 5.75}\n" + "{'eval_loss': 0.1882985234260559, 'eval_accuracy': 0.9453352769679301, 'eval_f1': 0.9426854260794502, 'eval_precision': 0.9469339918302618, 'eval_recall': 0.9433031282032139, 'eval_runtime': 115.1264, 'eval_samples_per_second': 35.752, 'eval_steps_per_second': 1.121, 'epoch': 5.75}\n" ] }, { @@ -3058,13 +3043,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.7184, 'grad_norm': 4.346248149871826, 'learning_rate': 1.25e-05, 'epoch': 5.83}\n" + "{'loss': 0.2644, 'grad_norm': 28.592859268188477, 'learning_rate': 1.25e-05, 'epoch': 5.83}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "0fbde0d0954241939f4c5883beadd41e", + "model_id": "e22e79ff48bb4e49baf81a8940e09b79", "version_major": 2, "version_minor": 0 }, @@ -3079,7 +3064,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.47138118743896484, 'eval_accuracy': 0.880466472303207, 'eval_f1': 0.8758928938078699, 'eval_precision': 0.8826157033462411, 'eval_recall': 0.8768000530071781, 'eval_runtime': 50.2999, 'eval_samples_per_second': 81.829, 'eval_steps_per_second': 2.565, 'epoch': 5.83}\n" + "{'eval_loss': 0.18950645625591278, 'eval_accuracy': 0.9472789115646258, 'eval_f1': 0.9448028291783778, 'eval_precision': 0.9493876702823437, 'eval_recall': 0.9454785004855722, 'eval_runtime': 115.0675, 'eval_samples_per_second': 35.77, 'eval_steps_per_second': 1.121, 'epoch': 5.83}\n" ] }, { @@ -3094,13 +3079,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.7168, 'grad_norm': 4.947293281555176, 'learning_rate': 1.2e-05, 'epoch': 5.9}\n" + "{'loss': 0.2641, 'grad_norm': 31.456523895263672, 'learning_rate': 1.2e-05, 'epoch': 5.9}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "86db9cdcfa294a88a2db11269676b551", + "model_id": "6cbe0afe87aa4eabbeeb622e1b70a024", "version_major": 2, "version_minor": 0 }, @@ -3115,7 +3100,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.46817025542259216, 'eval_accuracy': 0.8748785228377065, 'eval_f1': 0.869518306953471, 'eval_precision': 0.87712624552976, 'eval_recall': 0.8714885684526, 'eval_runtime': 50.2079, 'eval_samples_per_second': 81.979, 'eval_steps_per_second': 2.569, 'epoch': 5.9}\n" + "{'eval_loss': 0.1931435912847519, 'eval_accuracy': 0.9438775510204082, 'eval_f1': 0.941433386425573, 'eval_precision': 0.9465851411488619, 'eval_recall': 0.9421179108709055, 'eval_runtime': 114.3837, 'eval_samples_per_second': 35.984, 'eval_steps_per_second': 1.128, 'epoch': 5.9}\n" ] }, { @@ -3130,13 +3115,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.7558, 'grad_norm': 4.820581436157227, 'learning_rate': 1.1500000000000002e-05, 'epoch': 5.98}\n" + "{'loss': 0.2391, 'grad_norm': 36.5975227355957, 'learning_rate': 1.1500000000000002e-05, 'epoch': 5.98}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a878ae3d93574fd48891ed2e7ed622cd", + "model_id": "7fae512fcad34cdd86d2cb2f3bbec39d", "version_major": 2, "version_minor": 0 }, @@ -3151,7 +3136,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.4673177897930145, 'eval_accuracy': 0.8760932944606414, 'eval_f1': 0.8711123206151186, 'eval_precision': 0.8786942477792753, 'eval_recall': 0.8728619451532288, 'eval_runtime': 50.633, 'eval_samples_per_second': 81.291, 'eval_steps_per_second': 2.548, 'epoch': 5.98}\n" + "{'eval_loss': 0.19245532155036926, 'eval_accuracy': 0.9438775510204082, 'eval_f1': 0.9413665363647609, 'eval_precision': 0.9459785586190361, 'eval_recall': 0.942074546074078, 'eval_runtime': 114.3684, 'eval_samples_per_second': 35.989, 'eval_steps_per_second': 1.128, 'epoch': 5.98}\n" ] }, { @@ -3166,13 +3151,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.7169, 'grad_norm': 4.377044677734375, 'learning_rate': 1.1000000000000001e-05, 'epoch': 6.06}\n" + "{'loss': 0.2601, 'grad_norm': 42.9268913269043, 'learning_rate': 1.1000000000000001e-05, 'epoch': 6.06}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e55ea7243183473da6b46f117aaf7246", + "model_id": "473515b76d0a483d8fde41abbcb89d72", "version_major": 2, "version_minor": 0 }, @@ -3187,7 +3172,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.46775296330451965, 'eval_accuracy': 0.8782798833819242, 'eval_f1': 0.8735617162330748, 'eval_precision': 0.8800886974673685, 'eval_recall': 0.8748617977732224, 'eval_runtime': 49.9977, 'eval_samples_per_second': 82.324, 'eval_steps_per_second': 2.58, 'epoch': 6.06}\n" + "{'eval_loss': 0.19217662513256073, 'eval_accuracy': 0.9465500485908649, 'eval_f1': 0.9446123481197052, 'eval_precision': 0.9485415624396621, 'eval_recall': 0.9450066690417543, 'eval_runtime': 114.6574, 'eval_samples_per_second': 35.898, 'eval_steps_per_second': 1.125, 'epoch': 6.06}\n" ] }, { @@ -3202,13 +3187,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.7042, 'grad_norm': 4.936257839202881, 'learning_rate': 1.05e-05, 'epoch': 6.14}\n" + "{'loss': 0.2499, 'grad_norm': 32.35667419433594, 'learning_rate': 1.05e-05, 'epoch': 6.14}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ffea5be2164845ae9c580ec5c605fc6f", + "model_id": "b285393b922446b4a80341de180a0314", "version_major": 2, "version_minor": 0 }, @@ -3223,7 +3208,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.46284279227256775, 'eval_accuracy': 0.8758503401360545, 'eval_f1': 0.8709985887620147, 'eval_precision': 0.8772956583762153, 'eval_recall': 0.8723718558867367, 'eval_runtime': 50.6139, 'eval_samples_per_second': 81.322, 'eval_steps_per_second': 2.549, 'epoch': 6.14}\n" + "{'eval_loss': 0.19207020103931427, 'eval_accuracy': 0.9460641399416909, 'eval_f1': 0.9442995122333805, 'eval_precision': 0.9479958854083244, 'eval_recall': 0.9443226322813668, 'eval_runtime': 114.6419, 'eval_samples_per_second': 35.903, 'eval_steps_per_second': 1.125, 'epoch': 6.14}\n" ] }, { @@ -3238,13 +3223,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.7332, 'grad_norm': 5.362953186035156, 'learning_rate': 1e-05, 'epoch': 6.21}\n" + "{'loss': 0.264, 'grad_norm': 37.09341049194336, 'learning_rate': 1e-05, 'epoch': 6.21}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "f72b07f685d740c39117ae3540b6c9c1", + "model_id": "14dbe06cf5454902837aada02785dc0d", "version_major": 2, "version_minor": 0 }, @@ -3259,7 +3244,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.46724751591682434, 'eval_accuracy': 0.8765792031098154, 'eval_f1': 0.8719893491508406, 'eval_precision': 0.879019819340113, 'eval_recall': 0.8730514618344557, 'eval_runtime': 49.8858, 'eval_samples_per_second': 82.508, 'eval_steps_per_second': 2.586, 'epoch': 6.21}\n" + "{'eval_loss': 0.18772649765014648, 'eval_accuracy': 0.9465500485908649, 'eval_f1': 0.9449748257567703, 'eval_precision': 0.9479318204339404, 'eval_recall': 0.9450885190747927, 'eval_runtime': 115.0287, 'eval_samples_per_second': 35.782, 'eval_steps_per_second': 1.121, 'epoch': 6.21}\n" ] }, { @@ -3274,13 +3259,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.7027, 'grad_norm': 4.152902603149414, 'learning_rate': 9.5e-06, 'epoch': 6.29}\n" + "{'loss': 0.2523, 'grad_norm': 23.960466384887695, 'learning_rate': 9.5e-06, 'epoch': 6.29}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "f778735db90d40d19e540b904c52349b", + "model_id": "62cedd37587046ac917292774dbc6e50", "version_major": 2, "version_minor": 0 }, @@ -3295,7 +3280,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.46439963579177856, 'eval_accuracy': 0.8785228377065112, 'eval_f1': 0.8735523200546538, 'eval_precision': 0.8804909761784245, 'eval_recall': 0.8749045098067426, 'eval_runtime': 50.3797, 'eval_samples_per_second': 81.7, 'eval_steps_per_second': 2.561, 'epoch': 6.29}\n" + "{'eval_loss': 0.18751277029514313, 'eval_accuracy': 0.9467930029154519, 'eval_f1': 0.9452585905069442, 'eval_precision': 0.9482655680231242, 'eval_recall': 0.9454911591930526, 'eval_runtime': 115.3611, 'eval_samples_per_second': 35.679, 'eval_steps_per_second': 1.118, 'epoch': 6.29}\n" ] }, { @@ -3310,13 +3295,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.7283, 'grad_norm': 4.413781642913818, 'learning_rate': 9e-06, 'epoch': 6.37}\n" + "{'loss': 0.2406, 'grad_norm': 24.072086334228516, 'learning_rate': 9e-06, 'epoch': 6.37}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "885761a62bae4e378e54e99f8d4af322", + "model_id": "6ca09ea241ee400098c25a45944ee8c7", "version_major": 2, "version_minor": 0 }, @@ -3331,7 +3316,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.46416357159614563, 'eval_accuracy': 0.8775510204081632, 'eval_f1': 0.8724396776688207, 'eval_precision': 0.8793020021973419, 'eval_recall': 0.8739752415363203, 'eval_runtime': 49.8025, 'eval_samples_per_second': 82.646, 'eval_steps_per_second': 2.59, 'epoch': 6.37}\n" + "{'eval_loss': 0.18802115321159363, 'eval_accuracy': 0.9494655004859086, 'eval_f1': 0.9477070574852993, 'eval_precision': 0.9515929762382033, 'eval_recall': 0.9480766180643243, 'eval_runtime': 114.8757, 'eval_samples_per_second': 35.83, 'eval_steps_per_second': 1.123, 'epoch': 6.37}\n" ] }, { @@ -3346,13 +3331,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.7305, 'grad_norm': 4.686371326446533, 'learning_rate': 8.500000000000002e-06, 'epoch': 6.45}\n" + "{'loss': 0.2749, 'grad_norm': 20.991914749145508, 'learning_rate': 8.500000000000002e-06, 'epoch': 6.45}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "7f0cf7570dad4348ac88be8089c09046", + "model_id": "ad8958ac9557485c851372c6c8d0ebd0", "version_major": 2, "version_minor": 0 }, @@ -3367,7 +3352,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.4613053500652313, 'eval_accuracy': 0.8780369290573372, 'eval_f1': 0.8728666918594699, 'eval_precision': 0.8784721423973952, 'eval_recall': 0.8741626802482955, 'eval_runtime': 49.8874, 'eval_samples_per_second': 82.506, 'eval_steps_per_second': 2.586, 'epoch': 6.45}\n" + "{'eval_loss': 0.1885104477405548, 'eval_accuracy': 0.9465500485908649, 'eval_f1': 0.9447709867884129, 'eval_precision': 0.9482607163831029, 'eval_recall': 0.9450556842845805, 'eval_runtime': 114.6381, 'eval_samples_per_second': 35.904, 'eval_steps_per_second': 1.125, 'epoch': 6.45}\n" ] }, { @@ -3382,13 +3367,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.7186, 'grad_norm': 4.24678897857666, 'learning_rate': 8.000000000000001e-06, 'epoch': 6.52}\n" + "{'loss': 0.2702, 'grad_norm': 72.34149932861328, 'learning_rate': 8.000000000000001e-06, 'epoch': 6.52}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a5c3c2b3d6e646d282ac54f8b9e4fe4d", + "model_id": "d93fbc6ed558463a9022ddaf375de746", "version_major": 2, "version_minor": 0 }, @@ -3403,7 +3388,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.4606040418148041, 'eval_accuracy': 0.8768221574344023, 'eval_f1': 0.8723055815441728, 'eval_precision': 0.8782669006595472, 'eval_recall': 0.873391847929954, 'eval_runtime': 52.8696, 'eval_samples_per_second': 77.852, 'eval_steps_per_second': 2.44, 'epoch': 6.52}\n" + "{'eval_loss': 0.1884743869304657, 'eval_accuracy': 0.9467930029154519, 'eval_f1': 0.9450964819057274, 'eval_precision': 0.9482242985135603, 'eval_recall': 0.9454769319797258, 'eval_runtime': 115.2162, 'eval_samples_per_second': 35.724, 'eval_steps_per_second': 1.12, 'epoch': 6.52}\n" ] }, { @@ -3418,13 +3403,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.759, 'grad_norm': 5.842624664306641, 'learning_rate': 7.5e-06, 'epoch': 6.6}\n" + "{'loss': 0.2482, 'grad_norm': 17.251073837280273, 'learning_rate': 7.5e-06, 'epoch': 6.6}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c0c4f8ea9f624383a818bb5602b4d0bc", + "model_id": "8532dfe9533e41029ac885345c4a832a", "version_major": 2, "version_minor": 0 }, @@ -3439,7 +3424,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.4591958522796631, 'eval_accuracy': 0.8765792031098154, 'eval_f1': 0.8718567630200298, 'eval_precision': 0.8768576798958346, 'eval_recall': 0.8730267406203741, 'eval_runtime': 49.5505, 'eval_samples_per_second': 83.067, 'eval_steps_per_second': 2.603, 'epoch': 6.6}\n" + "{'eval_loss': 0.18626774847507477, 'eval_accuracy': 0.9475218658892128, 'eval_f1': 0.9460705692945173, 'eval_precision': 0.949297813063323, 'eval_recall': 0.9463759071528689, 'eval_runtime': 114.6516, 'eval_samples_per_second': 35.9, 'eval_steps_per_second': 1.125, 'epoch': 6.6}\n" ] }, { @@ -3454,13 +3439,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.6865, 'grad_norm': 4.285186767578125, 'learning_rate': 7.000000000000001e-06, 'epoch': 6.68}\n" + "{'loss': 0.2403, 'grad_norm': 19.474308013916016, 'learning_rate': 7.000000000000001e-06, 'epoch': 6.68}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "7dec167c14864bbcb91ceeb79b58c94d", + "model_id": "d196ec3c0cff43ffad4455a931f2a459", "version_major": 2, "version_minor": 0 }, @@ -3475,7 +3460,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.4580075442790985, 'eval_accuracy': 0.8770651117589893, 'eval_f1': 0.8726847483122707, 'eval_precision': 0.8781790052824746, 'eval_recall': 0.8737403779009486, 'eval_runtime': 49.3962, 'eval_samples_per_second': 83.326, 'eval_steps_per_second': 2.612, 'epoch': 6.68}\n" + "{'eval_loss': 0.1897239238023758, 'eval_accuracy': 0.9470359572400389, 'eval_f1': 0.9450683372184614, 'eval_precision': 0.9496556504600578, 'eval_recall': 0.9453010975584315, 'eval_runtime': 114.474, 'eval_samples_per_second': 35.956, 'eval_steps_per_second': 1.127, 'epoch': 6.68}\n" ] }, { @@ -3490,13 +3475,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.689, 'grad_norm': 3.939384698867798, 'learning_rate': 6.5000000000000004e-06, 'epoch': 6.76}\n" + "{'loss': 0.2509, 'grad_norm': 33.18558883666992, 'learning_rate': 6.5000000000000004e-06, 'epoch': 6.76}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "aba8ac01860e4de09a13462adef73ab8", + "model_id": "cc444a03a6ae4c50a2cbc50871a95713", "version_major": 2, "version_minor": 0 }, @@ -3511,7 +3496,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.4574354290962219, 'eval_accuracy': 0.8775510204081632, 'eval_f1': 0.8734721931770111, 'eval_precision': 0.8788074205888925, 'eval_recall': 0.8744626910929137, 'eval_runtime': 49.8033, 'eval_samples_per_second': 82.645, 'eval_steps_per_second': 2.59, 'epoch': 6.76}\n" + "{'eval_loss': 0.19059084355831146, 'eval_accuracy': 0.9482507288629738, 'eval_f1': 0.9461830780670489, 'eval_precision': 0.9507805867016828, 'eval_recall': 0.9464756298884152, 'eval_runtime': 114.5318, 'eval_samples_per_second': 35.938, 'eval_steps_per_second': 1.126, 'epoch': 6.76}\n" ] }, { @@ -3526,13 +3511,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.6851, 'grad_norm': 4.196829795837402, 'learning_rate': 6e-06, 'epoch': 6.83}\n" + "{'loss': 0.2689, 'grad_norm': 18.298492431640625, 'learning_rate': 6e-06, 'epoch': 6.83}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9bb8fd642b714211bd16964a70836aa9", + "model_id": "e6fe40b5a91241748953659950e8773c", "version_major": 2, "version_minor": 0 }, @@ -3547,7 +3532,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.45608749985694885, 'eval_accuracy': 0.88022351797862, 'eval_f1': 0.8764027773305088, 'eval_precision': 0.8815479408871224, 'eval_recall': 0.8772828367171955, 'eval_runtime': 49.6734, 'eval_samples_per_second': 82.861, 'eval_steps_per_second': 2.597, 'epoch': 6.83}\n" + "{'eval_loss': 0.18674452602863312, 'eval_accuracy': 0.9484936831875608, 'eval_f1': 0.9459116119918208, 'eval_precision': 0.9506477204257867, 'eval_recall': 0.9466156759052429, 'eval_runtime': 114.3375, 'eval_samples_per_second': 35.999, 'eval_steps_per_second': 1.128, 'epoch': 6.83}\n" ] }, { @@ -3562,13 +3547,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.7158, 'grad_norm': 5.825681686401367, 'learning_rate': 5.500000000000001e-06, 'epoch': 6.91}\n" + "{'loss': 0.2159, 'grad_norm': 18.049999237060547, 'learning_rate': 5.500000000000001e-06, 'epoch': 6.91}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "25101b0eb09947ef83dfbc56d1e646f8", + "model_id": "b1a8cdae65044fffb9f902bd092f0e0d", "version_major": 2, "version_minor": 0 }, @@ -3583,7 +3568,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.4546578824520111, 'eval_accuracy': 0.8794946550048591, 'eval_f1': 0.8758979357138711, 'eval_precision': 0.8807529307033017, 'eval_recall': 0.8765534710340543, 'eval_runtime': 49.5559, 'eval_samples_per_second': 83.058, 'eval_steps_per_second': 2.603, 'epoch': 6.91}\n" + "{'eval_loss': 0.18661876022815704, 'eval_accuracy': 0.9484936831875608, 'eval_f1': 0.946438620230781, 'eval_precision': 0.9503522868270984, 'eval_recall': 0.9467846203446506, 'eval_runtime': 115.019, 'eval_samples_per_second': 35.785, 'eval_steps_per_second': 1.122, 'epoch': 6.91}\n" ] }, { @@ -3598,13 +3583,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.6938, 'grad_norm': 4.582016468048096, 'learning_rate': 5e-06, 'epoch': 6.99}\n" + "{'loss': 0.2488, 'grad_norm': 35.76055908203125, 'learning_rate': 5e-06, 'epoch': 6.99}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "459a0e5daf224f5da4436b0c54aec93d", + "model_id": "66ad88cb226546bcad6024431f7d8f7b", "version_major": 2, "version_minor": 0 }, @@ -3619,7 +3604,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.453294575214386, 'eval_accuracy': 0.879980563654033, 'eval_f1': 0.8758781646283256, 'eval_precision': 0.8809514799562479, 'eval_recall': 0.8767653206254079, 'eval_runtime': 49.256, 'eval_samples_per_second': 83.563, 'eval_steps_per_second': 2.619, 'epoch': 6.99}\n" + "{'eval_loss': 0.1865723431110382, 'eval_accuracy': 0.9460641399416909, 'eval_f1': 0.9434604111462372, 'eval_precision': 0.9481526440603574, 'eval_recall': 0.9443130519259704, 'eval_runtime': 114.4672, 'eval_samples_per_second': 35.958, 'eval_steps_per_second': 1.127, 'epoch': 6.99}\n" ] }, { @@ -3634,13 +3619,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.6596, 'grad_norm': 4.835186958312988, 'learning_rate': 4.5e-06, 'epoch': 7.07}\n" + "{'loss': 0.2366, 'grad_norm': 42.35475158691406, 'learning_rate': 4.5e-06, 'epoch': 7.07}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d58d9f293dd74711884ce4a51980a757", + "model_id": "52d045d117f149f6a24668674b7e0131", "version_major": 2, "version_minor": 0 }, @@ -3655,7 +3640,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.4539879262447357, 'eval_accuracy': 0.879980563654033, 'eval_f1': 0.8759039042788321, 'eval_precision': 0.8807516245618193, 'eval_recall': 0.8768043957798509, 'eval_runtime': 48.7144, 'eval_samples_per_second': 84.493, 'eval_steps_per_second': 2.648, 'epoch': 7.07}\n" + "{'eval_loss': 0.1870640069246292, 'eval_accuracy': 0.9448493683187561, 'eval_f1': 0.9422096792950079, 'eval_precision': 0.9463818135151401, 'eval_recall': 0.9429973410538078, 'eval_runtime': 114.4117, 'eval_samples_per_second': 35.975, 'eval_steps_per_second': 1.128, 'epoch': 7.07}\n" ] }, { @@ -3670,13 +3655,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.7519, 'grad_norm': 5.059776782989502, 'learning_rate': 4.000000000000001e-06, 'epoch': 7.15}\n" + "{'loss': 0.2602, 'grad_norm': 16.04122543334961, 'learning_rate': 4.000000000000001e-06, 'epoch': 7.15}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "0c8f47edc2414d9ea0dc3c96fbab821d", + "model_id": "b5a56bb089da4da8bfb981af20222114", "version_major": 2, "version_minor": 0 }, @@ -3691,7 +3676,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.45295435190200806, 'eval_accuracy': 0.879980563654033, 'eval_f1': 0.8757567495587376, 'eval_precision': 0.8808919420348952, 'eval_recall': 0.8769035301436731, 'eval_runtime': 49.0589, 'eval_samples_per_second': 83.899, 'eval_steps_per_second': 2.629, 'epoch': 7.15}\n" + "{'eval_loss': 0.18540120124816895, 'eval_accuracy': 0.9465500485908649, 'eval_f1': 0.9440799651911433, 'eval_precision': 0.9482822745243968, 'eval_recall': 0.9447404023533209, 'eval_runtime': 115.0761, 'eval_samples_per_second': 35.768, 'eval_steps_per_second': 1.121, 'epoch': 7.15}\n" ] }, { @@ -3706,13 +3691,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.6836, 'grad_norm': 3.6825928688049316, 'learning_rate': 3.5000000000000004e-06, 'epoch': 7.22}\n" + "{'loss': 0.2236, 'grad_norm': 19.42647361755371, 'learning_rate': 3.5000000000000004e-06, 'epoch': 7.22}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "18fbcb19ffa24da9b08f8aaf03735c9d", + "model_id": "d29b80a00382432aaf2e72492858a805", "version_major": 2, "version_minor": 0 }, @@ -3727,7 +3712,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.45185598731040955, 'eval_accuracy': 0.8792517006802721, 'eval_f1': 0.875335738676512, 'eval_precision': 0.8806141104029087, 'eval_recall': 0.8761768131247331, 'eval_runtime': 49.3043, 'eval_samples_per_second': 83.481, 'eval_steps_per_second': 2.616, 'epoch': 7.22}\n" + "{'eval_loss': 0.18591512739658356, 'eval_accuracy': 0.9453352769679301, 'eval_f1': 0.9429143190138815, 'eval_precision': 0.9467375634808105, 'eval_recall': 0.9436323198742707, 'eval_runtime': 122.2756, 'eval_samples_per_second': 33.662, 'eval_steps_per_second': 1.055, 'epoch': 7.22}\n" ] }, { @@ -3742,13 +3727,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.7407, 'grad_norm': 5.052607536315918, 'learning_rate': 3e-06, 'epoch': 7.3}\n" + "{'loss': 0.2463, 'grad_norm': 64.06370544433594, 'learning_rate': 3e-06, 'epoch': 7.3}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "3e401cc679c7416889831edecc5408d2", + "model_id": "fc610185a9434b61be261f707eabb28e", "version_major": 2, "version_minor": 0 }, @@ -3763,7 +3748,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.452021062374115, 'eval_accuracy': 0.8787657920310982, 'eval_f1': 0.8751404192266866, 'eval_precision': 0.8807042407923154, 'eval_recall': 0.8756976782889806, 'eval_runtime': 49.038, 'eval_samples_per_second': 83.935, 'eval_steps_per_second': 2.631, 'epoch': 7.3}\n" + "{'eval_loss': 0.18627004325389862, 'eval_accuracy': 0.9470359572400389, 'eval_f1': 0.9449708759201191, 'eval_precision': 0.9487505429064071, 'eval_recall': 0.9453900648174994, 'eval_runtime': 116.2336, 'eval_samples_per_second': 35.411, 'eval_steps_per_second': 1.11, 'epoch': 7.3}\n" ] }, { @@ -3778,13 +3763,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.6823, 'grad_norm': 4.002208232879639, 'learning_rate': 2.5e-06, 'epoch': 7.38}\n" + "{'loss': 0.2355, 'grad_norm': 24.91596031188965, 'learning_rate': 2.5e-06, 'epoch': 7.38}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "77e18d913a0a4926ac73e6adae42b9b5", + "model_id": "e67be50ba3e24c79b49c839932a36567", "version_major": 2, "version_minor": 0 }, @@ -3799,7 +3784,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.4522157609462738, 'eval_accuracy': 0.8785228377065112, 'eval_f1': 0.8750055154287301, 'eval_precision': 0.8801977637402548, 'eval_recall': 0.8753405721032443, 'eval_runtime': 49.3417, 'eval_samples_per_second': 83.418, 'eval_steps_per_second': 2.614, 'epoch': 7.38}\n" + "{'eval_loss': 0.18624159693717957, 'eval_accuracy': 0.9460641399416909, 'eval_f1': 0.9437366562287047, 'eval_precision': 0.94760386001111, 'eval_recall': 0.9442699931329116, 'eval_runtime': 116.0377, 'eval_samples_per_second': 35.471, 'eval_steps_per_second': 1.112, 'epoch': 7.38}\n" ] }, { @@ -3814,13 +3799,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.7029, 'grad_norm': 5.051811695098877, 'learning_rate': 2.0000000000000003e-06, 'epoch': 7.46}\n" + "{'loss': 0.263, 'grad_norm': 15.299092292785645, 'learning_rate': 2.0000000000000003e-06, 'epoch': 7.46}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1f3cc66be08a4f0fb4d176c6259a3340", + "model_id": "1d861da2f3ef401d9bd907c67ee088b5", "version_major": 2, "version_minor": 0 }, @@ -3835,7 +3820,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.45244845747947693, 'eval_accuracy': 0.8785228377065112, 'eval_f1': 0.8745807697520653, 'eval_precision': 0.8801529782523756, 'eval_recall': 0.8753060117121474, 'eval_runtime': 49.2834, 'eval_samples_per_second': 83.517, 'eval_steps_per_second': 2.618, 'epoch': 7.46}\n" + "{'eval_loss': 0.18598560988903046, 'eval_accuracy': 0.9472789115646258, 'eval_f1': 0.9452704296383371, 'eval_precision': 0.9491189751700734, 'eval_recall': 0.9455716980636488, 'eval_runtime': 114.861, 'eval_samples_per_second': 35.835, 'eval_steps_per_second': 1.123, 'epoch': 7.46}\n" ] }, { @@ -3850,13 +3835,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.6536, 'grad_norm': 4.637792587280273, 'learning_rate': 1.5e-06, 'epoch': 7.53}\n" + "{'loss': 0.2384, 'grad_norm': 23.91373634338379, 'learning_rate': 1.5e-06, 'epoch': 7.53}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "cde51ca08f0b486da946c5b8d6ee04a9", + "model_id": "04838d265e0447a18065ed4b655fd3aa", "version_major": 2, "version_minor": 0 }, @@ -3871,7 +3856,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.45153847336769104, 'eval_accuracy': 0.8794946550048591, 'eval_f1': 0.8756437646345548, 'eval_precision': 0.8811909128354769, 'eval_recall': 0.8762632947964467, 'eval_runtime': 49.0871, 'eval_samples_per_second': 83.851, 'eval_steps_per_second': 2.628, 'epoch': 7.53}\n" + "{'eval_loss': 0.18595248460769653, 'eval_accuracy': 0.9472789115646258, 'eval_f1': 0.9452692090442015, 'eval_precision': 0.949199907194322, 'eval_recall': 0.9455716980636488, 'eval_runtime': 115.1001, 'eval_samples_per_second': 35.76, 'eval_steps_per_second': 1.121, 'epoch': 7.53}\n" ] }, { @@ -3886,13 +3871,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.6837, 'grad_norm': 4.7587785720825195, 'learning_rate': 1.0000000000000002e-06, 'epoch': 7.61}\n" + "{'loss': 0.2229, 'grad_norm': 23.1019229888916, 'learning_rate': 1.0000000000000002e-06, 'epoch': 7.61}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "99c04e9c45c64e78934b5a709f4b979d", + "model_id": "d29cc0d1fd4643a7b526c6eb2de940cb", "version_major": 2, "version_minor": 0 }, @@ -3907,7 +3892,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.4512999355792999, 'eval_accuracy': 0.879980563654033, 'eval_f1': 0.8761225709982359, 'eval_precision': 0.8814970016458431, 'eval_recall': 0.8767682113216306, 'eval_runtime': 49.9882, 'eval_samples_per_second': 82.339, 'eval_steps_per_second': 2.581, 'epoch': 7.61}\n" + "{'eval_loss': 0.18562324345111847, 'eval_accuracy': 0.9477648202137998, 'eval_f1': 0.9458057740133103, 'eval_precision': 0.9496601468457383, 'eval_recall': 0.9461182930457277, 'eval_runtime': 114.5456, 'eval_samples_per_second': 35.933, 'eval_steps_per_second': 1.126, 'epoch': 7.61}\n" ] }, { @@ -3922,13 +3907,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.6604, 'grad_norm': 4.972753524780273, 'learning_rate': 5.000000000000001e-07, 'epoch': 7.69}\n" + "{'loss': 0.2277, 'grad_norm': 18.580551147460938, 'learning_rate': 5.000000000000001e-07, 'epoch': 7.69}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8f8042eba9204e6e87e8ff67163005d2", + "model_id": "9f66bccaeb3c44c0b87008df697c0c19", "version_major": 2, "version_minor": 0 }, @@ -3943,7 +3928,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.4512011408805847, 'eval_accuracy': 0.8797376093294461, 'eval_f1': 0.875907796412304, 'eval_precision': 0.8812059046279704, 'eval_recall': 0.876557313613651, 'eval_runtime': 49.7753, 'eval_samples_per_second': 82.692, 'eval_steps_per_second': 2.592, 'epoch': 7.69}\n" + "{'eval_loss': 0.1854560524225235, 'eval_accuracy': 0.9480077745383868, 'eval_f1': 0.9459908448725385, 'eval_precision': 0.949860554097917, 'eval_recall': 0.9463076869851217, 'eval_runtime': 114.7098, 'eval_samples_per_second': 35.882, 'eval_steps_per_second': 1.125, 'epoch': 7.69}\n" ] }, { @@ -3958,13 +3943,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.683, 'grad_norm': 4.704882621765137, 'learning_rate': 0.0, 'epoch': 7.77}\n" + "{'loss': 0.2485, 'grad_norm': 21.011415481567383, 'learning_rate': 0.0, 'epoch': 7.77}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "3b4f26d3499f43f58514f2f11ebcef23", + "model_id": "04dac833b7b146859c65a939279778d1", "version_major": 2, "version_minor": 0 }, @@ -3979,28 +3964,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.4510863125324249, 'eval_accuracy': 0.8797376093294461, 'eval_f1': 0.8759381135610711, 'eval_precision': 0.88124155438923, 'eval_recall': 0.876557313613651, 'eval_runtime': 49.6247, 'eval_samples_per_second': 82.943, 'eval_steps_per_second': 2.6, 'epoch': 7.77}\n", - "{'train_runtime': 10633.03, 'train_samples_per_second': 12.038, 'train_steps_per_second': 0.094, 'train_loss': 1.2550339183807373, 'epoch': 7.77}\n" + "{'eval_loss': 0.18543945252895355, 'eval_accuracy': 0.9480077745383868, 'eval_f1': 0.9459908448725385, 'eval_precision': 0.949860554097917, 'eval_recall': 0.9463076869851217, 'eval_runtime': 114.7322, 'eval_samples_per_second': 35.875, 'eval_steps_per_second': 1.124, 'epoch': 7.77}\n", + "{'train_runtime': 28927.1207, 'train_samples_per_second': 4.425, 'train_steps_per_second': 0.035, 'train_loss': 0.5650921467542648, 'epoch': 7.77}\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "bb41fb4998654ba782171e0c039e7f00", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "training_args.bin: 0%| | 0.00/5.11k [00:00 1\u001b[0m \u001b[43mmlflow\u001b[49m\u001b[38;5;241m.\u001b[39mend_run()\n", + "\u001b[0;31mNameError\u001b[0m: name 'mlflow' is not defined" ] } ], "source": [ - "metrics = {metric: evaluate.load(metric) for metric in METRICS}\n", - "for lr in [5e-3, 5e-4, 5e-5]:\n", - " for batch in [32, 64, 128]:\n", - " for model_name in [\"google/vit-base-patch16-224\", \"microsoft/swinv2-base-patch4-window16-256\", \"google/siglip-base-patch16-224\", \"facebook/dinov2-base\"]:\n", - "\n", - " image_processor = AutoImageProcessor.from_pretrained(model_name)\n", - " model = AutoModelForImageClassification.from_pretrained(\n", - " model_name,\n", - " num_labels=len(label2int),\n", - " id2label=int2label,\n", - " label2id=label2int,\n", - " ignore_mismatched_sizes=True,\n", - " )\n", - "\n", - " # Then, in your transformations:\n", - " def train_transform(examples, num_ops=10, magnitude=9, num_magnitude_bins=31):\n", - "\n", - " transformation = v2.Compose(\n", - " [\n", - " v2.RandAugment(\n", - " num_ops=num_ops,\n", - " magnitude=magnitude,\n", - " num_magnitude_bins=num_magnitude_bins,\n", - " )\n", - " ]\n", - " )\n", - " # Ensure each image has three dimensions (in this case, ensure it's RGB)\n", - " examples[\"pixel_values\"] = [\n", - " image.convert(\"RGB\") for image in examples[\"pixel_values\"]\n", - " ]\n", - " # Apply transformations\n", - " examples[\"pixel_values\"] = [\n", - " image_processor(transformation(image), return_tensors=\"pt\")[\n", - " \"pixel_values\"\n", - " ].squeeze()\n", - " for image in examples[\"pixel_values\"]\n", - " ]\n", - " return examples\n", - "\n", - "\n", - " def test_transform(examples):\n", - " # Ensure each image is RGB\n", - " examples[\"pixel_values\"] = [\n", - " image.convert(\"RGB\") for image in examples[\"pixel_values\"]\n", - " ]\n", - " # Apply processing\n", - " examples[\"pixel_values\"] = [\n", - " image_processor(image, return_tensors=\"pt\")[\"pixel_values\"].squeeze()\n", - " for image in examples[\"pixel_values\"]\n", - " ]\n", - " return examples\n", - "\n", - "\n", - " def compute_metrics(eval_pred):\n", - " predictions, labels = eval_pred\n", - " # predictions = np.argmax(logits, axis=-1)\n", - " results = {}\n", - " for key, val in metrics.items():\n", - " if \"accuracy\" == key:\n", - " result = next(\n", - " iter(val.compute(predictions=predictions, references=labels).items())\n", - " )\n", - " if \"accuracy\" != key:\n", - " result = next(\n", - " iter(\n", - " val.compute(\n", - " predictions=predictions, references=labels, average=\"macro\"\n", - " ).items()\n", - " )\n", - " )\n", - " results[result[0]] = result[1]\n", - " return results\n", - "\n", - "\n", - " def collate_fn(examples):\n", - " pixel_values = torch.stack([example[\"pixel_values\"] for example in examples])\n", - " labels = torch.tensor([example[\"label\"] for example in examples])\n", - " return {\"pixel_values\": pixel_values, \"labels\": labels}\n", - "\n", - "\n", - " def preprocess_logits_for_metrics(logits, labels):\n", - " \"\"\"\n", - " Original Trainer may have a memory leak.\n", - " This is a workaround to avoid storing too many tensors that are not needed.\n", - " \"\"\"\n", - " pred_ids = torch.argmax(logits, dim=-1)\n", - " return pred_ids\n", - "\n", - " ds[\"train\"].set_transform(train_transform)\n", - " ds[\"test\"].set_transform(test_transform)\n", - "\n", - " training_args = TrainingArguments(**CONFIG[\"training_args\"])\n", - " training_args.per_device_train_batch_size = batch\n", - " training_args.per_device_eval_batch_size = batch\n", - " training_args.hub_model_id = f\"amaye15/{model_name.replace('/','-')}-batch{batch}-lr{lr}-standford-dogs\"\n", - "\n", - " mlflow.start_run(run_name=f\"{model_name.replace('/','-')}-batch{batch}-lr{lr}\")\n", - "\n", - " trainer = Trainer(\n", - " model=model,\n", - " args=training_args,\n", - " train_dataset=ds[\"train\"],\n", - " eval_dataset=ds[\"test\"],\n", - " tokenizer=image_processor,\n", - " data_collator=collate_fn,\n", - " compute_metrics=compute_metrics,\n", - " # callbacks=[early_stopping_callback],\n", - " preprocess_logits_for_metrics=preprocess_logits_for_metrics,\n", - " )\n", - "\n", - " # Train the model\n", - " trainer.train()\n", - "\n", - " trainer.push_to_hub()\n", - "\n", - " mlflow.end_run()" + "mlflow.end_run()" ] }, {