MHGanainy/best-performing-clustering-7

Browse files

Files changed (5) hide show

README.md +2 -0
all_results.json +13 -0
eval_results.json +8 -0
train_results.json +8 -0
trainer_state.json +336 -0

README.md CHANGED Viewed

@@ -15,6 +15,8 @@ should probably proofread and complete it, then remove this comment. -->
 # best-performing-clustering-7
 This model is a fine-tuned version of [openai-community/gpt2-xl](https://huggingface.co/openai-community/gpt2-xl) on an unknown dataset.
 ## Model description

 # best-performing-clustering-7
 This model is a fine-tuned version of [openai-community/gpt2-xl](https://huggingface.co/openai-community/gpt2-xl) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.8230
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+ "epoch": 1.0,
+ "eval_loss": 1.8229724168777466,
+ "eval_runtime": 65.6293,
+ "eval_samples_per_second": 10.895,
+ "eval_steps_per_second": 1.371,
+ "perplexity": 6.190231078134954,
+ "total_flos": 7.717588795392e+16,
+ "train_loss": 1.9823122004278713,
+ "train_runtime": 2216.0595,
+ "train_samples_per_second": 3.824,
+ "train_steps_per_second": 1.912
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+ "epoch": 1.0,
+ "eval_loss": 1.8229724168777466,
+ "eval_runtime": 65.6293,
+ "eval_samples_per_second": 10.895,
+ "eval_steps_per_second": 1.371,
+ "perplexity": 6.190231078134954
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+ "epoch": 1.0,
+ "total_flos": 7.717588795392e+16,
+ "train_loss": 1.9823122004278713,
+ "train_runtime": 2216.0595,
+ "train_samples_per_second": 3.824,
+ "train_steps_per_second": 1.912
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,336 @@

+{
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 1.0,
+ "eval_steps": 500,
+ "global_step": 4237,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.023601604909133822,
+ "grad_norm": 0.10703875869512558,
+ "learning_rate": 6.666666666666667e-06,
+ "loss": 2.4126,
+ "step": 100
+ },
+ {
+ "epoch": 0.047203209818267644,
+ "grad_norm": 0.20992980897426605,
+ "learning_rate": 1.3333333333333333e-05,
+ "loss": 2.3493,
+ "step": 200
+ },
+ {
+ "epoch": 0.07080481472740147,
+ "grad_norm": 0.38506367802619934,
+ "learning_rate": 2e-05,
+ "loss": 2.2884,
+ "step": 300
+ },
+ {
+ "epoch": 0.09440641963653529,
+ "grad_norm": 0.5256543159484863,
+ "learning_rate": 1.9968179392958305e-05,
+ "loss": 2.2043,
+ "step": 400
+ },
+ {
+ "epoch": 0.11800802454566911,
+ "grad_norm": 0.5416532158851624,
+ "learning_rate": 1.987292008203972e-05,
+ "loss": 2.1605,
+ "step": 500
+ },
+ {
+ "epoch": 0.14160962945480293,
+ "grad_norm": 0.6629260778427124,
+ "learning_rate": 1.9714828309064202e-05,
+ "loss": 2.138,
+ "step": 600
+ },
+ {
+ "epoch": 0.16521123436393675,
+ "grad_norm": 0.7280109524726868,
+ "learning_rate": 1.9494910189268627e-05,
+ "loss": 2.091,
+ "step": 700
+ },
+ {
+ "epoch": 0.18881283927307058,
+ "grad_norm": 0.8726409673690796,
+ "learning_rate": 1.921456530826727e-05,
+ "loss": 2.0719,
+ "step": 800
+ },
+ {
+ "epoch": 0.2124144441822044,
+ "grad_norm": 0.786993145942688,
+ "learning_rate": 1.8875577814919035e-05,
+ "loss": 2.0387,
+ "step": 900
+ },
+ {
+ "epoch": 0.23601604909133822,
+ "grad_norm": 0.7426193952560425,
+ "learning_rate": 1.848010506678749e-05,
+ "loss": 2.0183,
+ "step": 1000
+ },
+ {
+ "epoch": 0.25961765400047204,
+ "grad_norm": 0.9175330996513367,
+ "learning_rate": 1.803066390045544e-05,
+ "loss": 2.045,
+ "step": 1100
+ },
+ {
+ "epoch": 0.28321925890960586,
+ "grad_norm": 0.795050859451294,
+ "learning_rate": 1.753011461407132e-05,
+ "loss": 2.0278,
+ "step": 1200
+ },
+ {
+ "epoch": 0.3068208638187397,
+ "grad_norm": 0.9610442519187927,
+ "learning_rate": 1.6981642764064544e-05,
+ "loss": 2.0395,
+ "step": 1300
+ },
+ {
+ "epoch": 0.3304224687278735,
+ "grad_norm": 0.863545298576355,
+ "learning_rate": 1.6388738891877607e-05,
+ "loss": 2.0042,
+ "step": 1400
+ },
+ {
+ "epoch": 0.35402407363700733,
+ "grad_norm": 1.0420805215835571,
+ "learning_rate": 1.5755176309736586e-05,
+ "loss": 1.9713,
+ "step": 1500
+ },
+ {
+ "epoch": 0.37762567854614115,
+ "grad_norm": 1.0546514987945557,
+ "learning_rate": 1.5084987086834003e-05,
+ "loss": 1.968,
+ "step": 1600
+ },
+ {
+ "epoch": 0.401227283455275,
+ "grad_norm": 0.8405762910842896,
+ "learning_rate": 1.4382436388750968e-05,
+ "loss": 1.9442,
+ "step": 1700
+ },
+ {
+ "epoch": 0.4248288883644088,
+ "grad_norm": 1.089908480644226,
+ "learning_rate": 1.36519953334256e-05,
+ "loss": 1.908,
+ "step": 1800
+ },
+ {
+ "epoch": 0.4484304932735426,
+ "grad_norm": 1.351189136505127,
+ "learning_rate": 1.2898312536415628e-05,
+ "loss": 1.9473,
+ "step": 1900
+ },
+ {
+ "epoch": 0.47203209818267644,
+ "grad_norm": 0.981865406036377,
+ "learning_rate": 1.2126184526544591e-05,
+ "loss": 1.9577,
+ "step": 2000
+ },
+ {
+ "epoch": 0.49563370309181026,
+ "grad_norm": 0.9919114708900452,
+ "learning_rate": 1.1340525220210092e-05,
+ "loss": 1.9136,
+ "step": 2100
+ },
+ {
+ "epoch": 0.5192353080009441,
+ "grad_norm": 1.1692713499069214,
+ "learning_rate": 1.0546334648623235e-05,
+ "loss": 1.9127,
+ "step": 2200
+ },
+ {
+ "epoch": 0.5428369129100779,
+ "grad_norm": 1.028548240661621,
+ "learning_rate": 9.748667137002961e-06,
+ "loss": 1.8947,
+ "step": 2300
+ },
+ {
+ "epoch": 0.5664385178192117,
+ "grad_norm": 1.3835196495056152,
+ "learning_rate": 8.9525991382367e-06,
+ "loss": 1.9242,
+ "step": 2400
+ },
+ {
+ "epoch": 0.5900401227283455,
+ "grad_norm": 1.0017223358154297,
+ "learning_rate": 8.163196925717906e-06,
+ "loss": 1.9241,
+ "step": 2500
+ },
+ {
+ "epoch": 0.6136417276374794,
+ "grad_norm": 1.3177546262741089,
+ "learning_rate": 7.385484350967048e-06,
+ "loss": 1.8967,
+ "step": 2600
+ },
+ {
+ "epoch": 0.6372433325466131,
+ "grad_norm": 1.2489763498306274,
+ "learning_rate": 6.6244108712306435e-06,
+ "loss": 1.9141,
+ "step": 2700
+ },
+ {
+ "epoch": 0.660844937455747,
+ "grad_norm": 1.4832775592803955,
+ "learning_rate": 5.884820050534392e-06,
+ "loss": 1.8921,
+ "step": 2800
+ },
+ {
+ "epoch": 0.6844465423648808,
+ "grad_norm": 1.0875434875488281,
+ "learning_rate": 5.171418734653707e-06,
+ "loss": 1.8483,
+ "step": 2900
+ },
+ {
+ "epoch": 0.7080481472740147,
+ "grad_norm": 1.1204584836959839,
+ "learning_rate": 4.488747096175717e-06,
+ "loss": 1.8557,
+ "step": 3000
+ },
+ {
+ "epoch": 0.7316497521831484,
+ "grad_norm": 0.9455496072769165,
+ "learning_rate": 3.841149740289725e-06,
+ "loss": 1.8695,
+ "step": 3100
+ },
+ {
+ "epoch": 0.7552513570922823,
+ "grad_norm": 0.9588963389396667,
+ "learning_rate": 3.2327480551923107e-06,
+ "loss": 1.9,
+ "step": 3200
+ },
+ {
+ "epoch": 0.7788529620014161,
+ "grad_norm": 0.9655967354774475,
+ "learning_rate": 2.6674139830724722e-06,
+ "loss": 1.9159,
+ "step": 3300
+ },
+ {
+ "epoch": 0.80245456691055,
+ "grad_norm": 1.3765445947647095,
+ "learning_rate": 2.1487453786014513e-06,
+ "loss": 1.8497,
+ "step": 3400
+ },
+ {
+ "epoch": 0.8260561718196837,
+ "grad_norm": 1.257179617881775,
+ "learning_rate": 1.6800431117487958e-06,
+ "loss": 1.8931,
+ "step": 3500
+ },
+ {
+ "epoch": 0.8496577767288176,
+ "grad_norm": 1.1668704748153687,
+ "learning_rate": 1.2642900606451148e-06,
+ "loss": 1.8853,
+ "step": 3600
+ },
+ {
+ "epoch": 0.8732593816379514,
+ "grad_norm": 1.0066955089569092,
+ "learning_rate": 9.04132128183528e-07,
+ "loss": 1.8997,
+ "step": 3700
+ },
+ {
+ "epoch": 0.8968609865470852,
+ "grad_norm": 1.234889030456543,
+ "learning_rate": 6.018614031723913e-07,
+ "loss": 1.8899,
+ "step": 3800
+ },
+ {
+ "epoch": 0.920462591456219,
+ "grad_norm": 1.4220229387283325,
+ "learning_rate": 3.594015732038625e-07,
+ "loss": 1.91,
+ "step": 3900
+ },
+ {
+ "epoch": 0.9440641963653529,
+ "grad_norm": 1.0287963151931763,
+ "learning_rate": 1.7829568207250902e-07,
+ "loss": 1.8958,
+ "step": 4000
+ },
+ {
+ "epoch": 0.9676658012744866,
+ "grad_norm": 0.9667606949806213,
+ "learning_rate": 5.969630965725448e-08,
+ "loss": 1.9538,
+ "step": 4100
+ },
+ {
+ "epoch": 0.9912674061836205,
+ "grad_norm": 1.3044230937957764,
+ "learning_rate": 4.3582367631034295e-09,
+ "loss": 1.8733,
+ "step": 4200
+ },
+ {
+ "epoch": 1.0,
+ "step": 4237,
+ "total_flos": 7.717588795392e+16,
+ "train_loss": 1.9823122004278713,
+ "train_runtime": 2216.0595,
+ "train_samples_per_second": 3.824,
+ "train_steps_per_second": 1.912
+ }
+ ],
+ "logging_steps": 100,
+ "max_steps": 4237,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 1,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": true
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 7.717588795392e+16,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}