MHGanainy/gpt2-xl-lora-ecthr-tf-idf-kmeans-imbalanced-uniform-cluster-8-id-2

Browse files

Files changed (5) hide show

README.md +2 -0
all_results.json +13 -0
eval_results.json +8 -0
train_results.json +8 -0
trainer_state.json +476 -0

README.md CHANGED Viewed

@@ -15,6 +15,8 @@ should probably proofread and complete it, then remove this comment. -->
 # gpt2-xl-lora-ecthr-tf-idf-kmeans-imbalanced-uniform-cluster-8-id-2
 This model is a fine-tuned version of [openai-community/gpt2-xl](https://huggingface.co/openai-community/gpt2-xl) on an unknown dataset.
 ## Model description

 # gpt2-xl-lora-ecthr-tf-idf-kmeans-imbalanced-uniform-cluster-8-id-2
 This model is a fine-tuned version of [openai-community/gpt2-xl](https://huggingface.co/openai-community/gpt2-xl) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.9218
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "epoch": 1.0,
+    "eval_loss": 1.9218220710754395,
+    "eval_runtime": 62.1945,
+    "eval_samples_per_second": 22.269,
+    "eval_steps_per_second": 2.798,
+    "perplexity": 6.833398069901086,
+    "total_flos": 1.1351431053312e+17,
+    "train_loss": 2.079003376893422,
+    "train_runtime": 1659.922,
+    "train_samples_per_second": 7.509,
+    "train_steps_per_second": 3.754
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.0,
+    "eval_loss": 1.9218220710754395,
+    "eval_runtime": 62.1945,
+    "eval_samples_per_second": 22.269,
+    "eval_steps_per_second": 2.798,
+    "perplexity": 6.833398069901086
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.0,
+    "total_flos": 1.1351431053312e+17,
+    "train_loss": 2.079003376893422,
+    "train_runtime": 1659.922,
+    "train_samples_per_second": 7.509,
+    "train_steps_per_second": 3.754
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,476 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 6232,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.016046213093709884,
+      "grad_norm": 0.17895939946174622,
+      "learning_rate": 2.0000000000000003e-06,
+      "loss": 2.4769,
+      "step": 100
+    },
+    {
+      "epoch": 0.03209242618741977,
+      "grad_norm": 0.15942341089248657,
+      "learning_rate": 4.000000000000001e-06,
+      "loss": 2.4803,
+      "step": 200
+    },
+    {
+      "epoch": 0.04813863928112965,
+      "grad_norm": 0.24720104038715363,
+      "learning_rate": 6e-06,
+      "loss": 2.438,
+      "step": 300
+    },
+    {
+      "epoch": 0.06418485237483953,
+      "grad_norm": 0.39716702699661255,
+      "learning_rate": 8.000000000000001e-06,
+      "loss": 2.4056,
+      "step": 400
+    },
+    {
+      "epoch": 0.08023106546854943,
+      "grad_norm": 0.3956620395183563,
+      "learning_rate": 1e-05,
+      "loss": 2.3633,
+      "step": 500
+    },
+    {
+      "epoch": 0.0962772785622593,
+      "grad_norm": 0.5021713972091675,
+      "learning_rate": 1.2e-05,
+      "loss": 2.3186,
+      "step": 600
+    },
+    {
+      "epoch": 0.1123234916559692,
+      "grad_norm": 0.567215085029602,
+      "learning_rate": 1.4e-05,
+      "loss": 2.2991,
+      "step": 700
+    },
+    {
+      "epoch": 0.12836970474967907,
+      "grad_norm": 0.6055766344070435,
+      "learning_rate": 1.6000000000000003e-05,
+      "loss": 2.2618,
+      "step": 800
+    },
+    {
+      "epoch": 0.14441591784338895,
+      "grad_norm": 0.5610828995704651,
+      "learning_rate": 1.8e-05,
+      "loss": 2.2398,
+      "step": 900
+    },
+    {
+      "epoch": 0.16046213093709885,
+      "grad_norm": 0.6880731582641602,
+      "learning_rate": 2e-05,
+      "loss": 2.2216,
+      "step": 1000
+    },
+    {
+      "epoch": 0.17650834403080873,
+      "grad_norm": 0.6766607761383057,
+      "learning_rate": 1.9981977966686475e-05,
+      "loss": 2.1763,
+      "step": 1100
+    },
+    {
+      "epoch": 0.1925545571245186,
+      "grad_norm": 0.8101206421852112,
+      "learning_rate": 1.992797682548284e-05,
+      "loss": 2.1818,
+      "step": 1200
+    },
+    {
+      "epoch": 0.2086007702182285,
+      "grad_norm": 0.8453436493873596,
+      "learning_rate": 1.983819121846225e-05,
+      "loss": 2.1666,
+      "step": 1300
+    },
+    {
+      "epoch": 0.2246469833119384,
+      "grad_norm": 0.8145064115524292,
+      "learning_rate": 1.9712944769464864e-05,
+      "loss": 2.1279,
+      "step": 1400
+    },
+    {
+      "epoch": 0.24069319640564826,
+      "grad_norm": 0.8634337782859802,
+      "learning_rate": 1.9552688917625927e-05,
+      "loss": 2.1279,
+      "step": 1500
+    },
+    {
+      "epoch": 0.25673940949935814,
+      "grad_norm": 1.1356147527694702,
+      "learning_rate": 1.9358001290205542e-05,
+      "loss": 2.1238,
+      "step": 1600
+    },
+    {
+      "epoch": 0.27278562259306804,
+      "grad_norm": 0.7424245476722717,
+      "learning_rate": 1.9129583620585137e-05,
+      "loss": 2.1072,
+      "step": 1700
+    },
+    {
+      "epoch": 0.2888318356867779,
+      "grad_norm": 1.1842221021652222,
+      "learning_rate": 1.886825921893497e-05,
+      "loss": 2.0933,
+      "step": 1800
+    },
+    {
+      "epoch": 0.3048780487804878,
+      "grad_norm": 0.9710640907287598,
+      "learning_rate": 1.8574970004669464e-05,
+      "loss": 2.0651,
+      "step": 1900
+    },
+    {
+      "epoch": 0.3209242618741977,
+      "grad_norm": 1.1665947437286377,
+      "learning_rate": 1.8250773111386633e-05,
+      "loss": 2.0682,
+      "step": 2000
+    },
+    {
+      "epoch": 0.33697047496790755,
+      "grad_norm": 1.1069215536117554,
+      "learning_rate": 1.7896837076528647e-05,
+      "loss": 2.089,
+      "step": 2100
+    },
+    {
+      "epoch": 0.35301668806161746,
+      "grad_norm": 1.1600021123886108,
+      "learning_rate": 1.751443762949772e-05,
+      "loss": 2.0485,
+      "step": 2200
+    },
+    {
+      "epoch": 0.36906290115532736,
+      "grad_norm": 1.4247888326644897,
+      "learning_rate": 1.7104953093408548e-05,
+      "loss": 2.0807,
+      "step": 2300
+    },
+    {
+      "epoch": 0.3851091142490372,
+      "grad_norm": 1.3733174800872803,
+      "learning_rate": 1.666985941705128e-05,
+      "loss": 2.0503,
+      "step": 2400
+    },
+    {
+      "epoch": 0.4011553273427471,
+      "grad_norm": 1.2357269525527954,
+      "learning_rate": 1.6210724854971885e-05,
+      "loss": 2.059,
+      "step": 2500
+    },
+    {
+      "epoch": 0.417201540436457,
+      "grad_norm": 1.0586000680923462,
+      "learning_rate": 1.5729204314845002e-05,
+      "loss": 2.0246,
+      "step": 2600
+    },
+    {
+      "epoch": 0.43324775353016687,
+      "grad_norm": 1.1663570404052734,
+      "learning_rate": 1.5227033392513684e-05,
+      "loss": 2.0421,
+      "step": 2700
+    },
+    {
+      "epoch": 0.4492939666238768,
+      "grad_norm": 1.1528408527374268,
+      "learning_rate": 1.4706022116196208e-05,
+      "loss": 2.0494,
+      "step": 2800
+    },
+    {
+      "epoch": 0.4653401797175867,
+      "grad_norm": 1.0500417947769165,
+      "learning_rate": 1.4168048422408272e-05,
+      "loss": 2.008,
+      "step": 2900
+    },
+    {
+      "epoch": 0.4813863928112965,
+      "grad_norm": 1.2136881351470947,
+      "learning_rate": 1.3615051387116131e-05,
+      "loss": 2.026,
+      "step": 3000
+    },
+    {
+      "epoch": 0.49743260590500643,
+      "grad_norm": 1.1245627403259277,
+      "learning_rate": 1.3049024236518244e-05,
+      "loss": 1.9962,
+      "step": 3100
+    },
+    {
+      "epoch": 0.5134788189987163,
+      "grad_norm": 1.0942673683166504,
+      "learning_rate": 1.24720071626475e-05,
+      "loss": 2.0102,
+      "step": 3200
+    },
+    {
+      "epoch": 0.5295250320924262,
+      "grad_norm": 1.087973952293396,
+      "learning_rate": 1.1886079969689454e-05,
+      "loss": 1.9989,
+      "step": 3300
+    },
+    {
+      "epoch": 0.5455712451861361,
+      "grad_norm": 1.1616392135620117,
+      "learning_rate": 1.1293354577522264e-05,
+      "loss": 2.0078,
+      "step": 3400
+    },
+    {
+      "epoch": 0.561617458279846,
+      "grad_norm": 1.0014444589614868,
+      "learning_rate": 1.0695967409498614e-05,
+      "loss": 1.9948,
+      "step": 3500
+    },
+    {
+      "epoch": 0.5776636713735558,
+      "grad_norm": 1.045386552810669,
+      "learning_rate": 1.0096071691907137e-05,
+      "loss": 2.0031,
+      "step": 3600
+    },
+    {
+      "epoch": 0.5937098844672657,
+      "grad_norm": 1.7991660833358765,
+      "learning_rate": 9.495829692869255e-06,
+      "loss": 2.0029,
+      "step": 3700
+    },
+    {
+      "epoch": 0.6097560975609756,
+      "grad_norm": 1.1647707223892212,
+      "learning_rate": 8.897404928645529e-06,
+      "loss": 1.9819,
+      "step": 3800
+    },
+    {
+      "epoch": 0.6258023106546855,
+      "grad_norm": 1.0043553113937378,
+      "learning_rate": 8.302954365443264e-06,
+      "loss": 2.0047,
+      "step": 3900
+    },
+    {
+      "epoch": 0.6418485237483954,
+      "grad_norm": 1.0303337574005127,
+      "learning_rate": 7.71462064483311e-06,
+      "loss": 1.986,
+      "step": 4000
+    },
+    {
+      "epoch": 0.6578947368421053,
+      "grad_norm": 1.3229223489761353,
+      "learning_rate": 7.13452436079753e-06,
+      "loss": 2.0151,
+      "step": 4100
+    },
+    {
+      "epoch": 0.6739409499358151,
+      "grad_norm": 1.4595571756362915,
+      "learning_rate": 6.564756416247712e-06,
+      "loss": 1.9585,
+      "step": 4200
+    },
+    {
+      "epoch": 0.689987163029525,
+      "grad_norm": 1.1201053857803345,
+      "learning_rate": 6.007370486559185e-06,
+      "loss": 1.9846,
+      "step": 4300
+    },
+    {
+      "epoch": 0.7060333761232349,
+      "grad_norm": 1.0485244989395142,
+      "learning_rate": 5.46437561729062e-06,
+      "loss": 1.9853,
+      "step": 4400
+    },
+    {
+      "epoch": 0.7220795892169448,
+      "grad_norm": 1.0388679504394531,
+      "learning_rate": 4.937728982766622e-06,
+      "loss": 2.0268,
+      "step": 4500
+    },
+    {
+      "epoch": 0.7381258023106547,
+      "grad_norm": 1.3947858810424805,
+      "learning_rate": 4.429328831625565e-06,
+      "loss": 1.9645,
+      "step": 4600
+    },
+    {
+      "epoch": 0.7541720154043645,
+      "grad_norm": 1.3646990060806274,
+      "learning_rate": 3.941007644759535e-06,
+      "loss": 1.9704,
+      "step": 4700
+    },
+    {
+      "epoch": 0.7702182284980744,
+      "grad_norm": 1.2408106327056885,
+      "learning_rate": 3.474525530308016e-06,
+      "loss": 2.0122,
+      "step": 4800
+    },
+    {
+      "epoch": 0.7862644415917843,
+      "grad_norm": 1.3304516077041626,
+      "learning_rate": 3.0315638795123726e-06,
+      "loss": 1.9989,
+      "step": 4900
+    },
+    {
+      "epoch": 0.8023106546854942,
+      "grad_norm": 1.200531005859375,
+      "learning_rate": 2.6137193062980506e-06,
+      "loss": 2.0135,
+      "step": 5000
+    },
+    {
+      "epoch": 0.8183568677792041,
+      "grad_norm": 1.3504362106323242,
+      "learning_rate": 2.2224978924287243e-06,
+      "loss": 2.0073,
+      "step": 5100
+    },
+    {
+      "epoch": 0.834403080872914,
+      "grad_norm": 1.4387364387512207,
+      "learning_rate": 1.8593097589751318e-06,
+      "loss": 1.9344,
+      "step": 5200
+    },
+    {
+      "epoch": 0.8504492939666238,
+      "grad_norm": 1.4003574848175049,
+      "learning_rate": 1.5254639836653117e-06,
+      "loss": 2.0001,
+      "step": 5300
+    },
+    {
+      "epoch": 0.8664955070603337,
+      "grad_norm": 1.2966829538345337,
+      "learning_rate": 1.222163882436107e-06,
+      "loss": 1.9852,
+      "step": 5400
+    },
+    {
+      "epoch": 0.8825417201540436,
+      "grad_norm": 1.3298251628875732,
+      "learning_rate": 9.505026721931898e-07,
+      "loss": 1.9768,
+      "step": 5500
+    },
+    {
+      "epoch": 0.8985879332477535,
+      "grad_norm": 1.1031887531280518,
+      "learning_rate": 7.114595304127536e-07,
+      "loss": 1.9921,
+      "step": 5600
+    },
+    {
+      "epoch": 0.9146341463414634,
+      "grad_norm": 1.3485647439956665,
+      "learning_rate": 5.077834417277494e-07,
+      "loss": 1.9674,
+      "step": 5700
+    },
+    {
+      "epoch": 0.9306803594351734,
+      "grad_norm": 1.056166172027588,
+      "learning_rate": 3.3609511784804427e-07,
+      "loss": 2.0047,
+      "step": 5800
+    },
+    {
+      "epoch": 0.9467265725288831,
+      "grad_norm": 1.2811501026153564,
+      "learning_rate": 1.9923943711331996e-07,
+      "loss": 1.9755,
+      "step": 5900
+    },
+    {
+      "epoch": 0.962772785622593,
+      "grad_norm": 1.4452365636825562,
+      "learning_rate": 9.770968305104822e-08,
+      "loss": 1.9793,
+      "step": 6000
+    },
+    {
+      "epoch": 0.978818998716303,
+      "grad_norm": 1.8246029615402222,
+      "learning_rate": 3.18718101832316e-08,
+      "loss": 2.0008,
+      "step": 6100
+    },
+    {
+      "epoch": 0.9948652118100129,
+      "grad_norm": 1.3338468074798584,
+      "learning_rate": 1.963124977494291e-09,
+      "loss": 1.9654,
+      "step": 6200
+    },
+    {
+      "epoch": 1.0,
+      "step": 6232,
+      "total_flos": 1.1351431053312e+17,
+      "train_loss": 2.079003376893422,
+      "train_runtime": 1659.922,
+      "train_samples_per_second": 7.509,
+      "train_steps_per_second": 3.754
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 6232,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.1351431053312e+17,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}