azliza794 commited on Oct 2, 2024

Commit

406a9ed

verified ·

1 Parent(s): 6ea7748

Training in progress, epoch 5

Browse files

Files changed (46) hide show

model.safetensors +1 -1
run-6/checkpoint-1070/config.json +25 -0
run-6/checkpoint-1070/model.safetensors +3 -0
run-6/checkpoint-1070/optimizer.pt +3 -0
run-6/checkpoint-1070/rng_state.pth +3 -0
run-6/checkpoint-1070/scheduler.pt +3 -0
run-6/checkpoint-1070/special_tokens_map.json +7 -0
run-6/checkpoint-1070/tokenizer.json +0 -0
run-6/checkpoint-1070/tokenizer_config.json +55 -0
run-6/checkpoint-1070/trainer_state.json +70 -0
run-6/checkpoint-1070/training_args.bin +3 -0
run-6/checkpoint-1070/vocab.txt +0 -0
run-6/checkpoint-1605/config.json +25 -0
run-6/checkpoint-1605/model.safetensors +3 -0
run-6/checkpoint-1605/optimizer.pt +3 -0
run-6/checkpoint-1605/rng_state.pth +3 -0
run-6/checkpoint-1605/scheduler.pt +3 -0
run-6/checkpoint-1605/special_tokens_map.json +7 -0
run-6/checkpoint-1605/tokenizer.json +0 -0
run-6/checkpoint-1605/tokenizer_config.json +55 -0
run-6/checkpoint-1605/trainer_state.json +86 -0
run-6/checkpoint-1605/training_args.bin +3 -0
run-6/checkpoint-1605/vocab.txt +0 -0
run-6/checkpoint-2140/config.json +25 -0
run-6/checkpoint-2140/model.safetensors +3 -0
run-6/checkpoint-2140/optimizer.pt +3 -0
run-6/checkpoint-2140/rng_state.pth +3 -0
run-6/checkpoint-2140/scheduler.pt +3 -0
run-6/checkpoint-2140/special_tokens_map.json +7 -0
run-6/checkpoint-2140/tokenizer.json +0 -0
run-6/checkpoint-2140/tokenizer_config.json +55 -0
run-6/checkpoint-2140/trainer_state.json +102 -0
run-6/checkpoint-2140/training_args.bin +3 -0
run-6/checkpoint-2140/vocab.txt +0 -0
run-6/checkpoint-2675/config.json +25 -0
run-6/checkpoint-2675/model.safetensors +3 -0
run-6/checkpoint-2675/optimizer.pt +3 -0
run-6/checkpoint-2675/rng_state.pth +3 -0
run-6/checkpoint-2675/scheduler.pt +3 -0
run-6/checkpoint-2675/special_tokens_map.json +7 -0
run-6/checkpoint-2675/tokenizer.json +0 -0
run-6/checkpoint-2675/tokenizer_config.json +55 -0
run-6/checkpoint-2675/trainer_state.json +118 -0
run-6/checkpoint-2675/training_args.bin +3 -0
run-6/checkpoint-2675/vocab.txt +0 -0
runs/Oct02_05-18-55_0fd1839bafe2/events.out.tfevents.1727848503.0fd1839bafe2.228.8 +2 -2

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e0cdd0af875626d3496bb082790ac5c76fd691e06315cb4505cebe6e2ecb65d7
 size 267832560

 version https://git-lfs.github.com/spec/v1
+oid sha256:554ad4273df72d1b50153faa8df78d3999bf3edd62772fbab80fef4d28b54a32
 size 267832560

run-6/checkpoint-1070/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.45.1",
+  "vocab_size": 30522
+}

run-6/checkpoint-1070/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fc616171333e2ada2b809f891efc9cfaa66bd67f1bc9b0a29135318b6a0f73aa
+size 267832560

run-6/checkpoint-1070/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4ccfb36ae4f0cc72d67d89125bb139dbc7ff77826a80d013520b8fcd708dc482
+size 535727290

run-6/checkpoint-1070/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8933c7a8628eb86ba73fde23c3294603d45d0ccc6afbf52bebe8d2594bc9a6ec
+size 14180

run-6/checkpoint-1070/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:91f6a238ba8dc2174fe25880c5c7c36b1fc6f9f76a7b6b66ab73a9750a1bbf52
+size 1064

run-6/checkpoint-1070/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-6/checkpoint-1070/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-6/checkpoint-1070/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-6/checkpoint-1070/trainer_state.json ADDED Viewed

	@@ -0,0 +1,70 @@

+{
+  "best_metric": 0.5324104093270398,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-6/checkpoint-1070",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 1070,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.9345794392523364,
+      "grad_norm": 4.85833215713501,
+      "learning_rate": 3.693802167824332e-05,
+      "loss": 0.5213,
+      "step": 500
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.48894789814949036,
+      "eval_matthews_correlation": 0.41310412373921845,
+      "eval_runtime": 0.7397,
+      "eval_samples_per_second": 1410.045,
+      "eval_steps_per_second": 89.226,
+      "step": 535
+    },
+    {
+      "epoch": 1.8691588785046729,
+      "grad_norm": 12.77253532409668,
+      "learning_rate": 2.844652244186554e-05,
+      "loss": 0.3193,
+      "step": 1000
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.5021252036094666,
+      "eval_matthews_correlation": 0.5324104093270398,
+      "eval_runtime": 0.835,
+      "eval_samples_per_second": 1249.081,
+      "eval_steps_per_second": 79.041,
+      "step": 1070
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 2675,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 90595540587468.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 4.542952091462109e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 16,
+    "seed": 10
+  }
+}

run-6/checkpoint-1070/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f3725dffbe4e652fc6d238c39cd8917882b7d3f1059d60294bb324af43692dbc
+size 5304

run-6/checkpoint-1070/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-6/checkpoint-1605/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.45.1",
+  "vocab_size": 30522
+}

run-6/checkpoint-1605/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb4555dea7090c31444db87cc8d628d083aacb6e56a85b64df6af3cc0e5a185e
+size 267832560

run-6/checkpoint-1605/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:430451db3354c52baafac97fb0c4393bdbf004e3ccc08d44aac06295ed2e173f
+size 535727290

run-6/checkpoint-1605/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4d01d7b53976e4bed3d4a32a8bb0728877e6a95928a05274d754f7a184877684
+size 14180

run-6/checkpoint-1605/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b99256f69a986cf16c6806bfb211f7061569f0122cef450927165c3791b933a2
+size 1064

run-6/checkpoint-1605/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-6/checkpoint-1605/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-6/checkpoint-1605/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-6/checkpoint-1605/trainer_state.json ADDED Viewed

	@@ -0,0 +1,86 @@

+{
+  "best_metric": 0.5324104093270398,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-6/checkpoint-1070",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 1605,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.9345794392523364,
+      "grad_norm": 4.85833215713501,
+      "learning_rate": 3.693802167824332e-05,
+      "loss": 0.5213,
+      "step": 500
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.48894789814949036,
+      "eval_matthews_correlation": 0.41310412373921845,
+      "eval_runtime": 0.7397,
+      "eval_samples_per_second": 1410.045,
+      "eval_steps_per_second": 89.226,
+      "step": 535
+    },
+    {
+      "epoch": 1.8691588785046729,
+      "grad_norm": 12.77253532409668,
+      "learning_rate": 2.844652244186554e-05,
+      "loss": 0.3193,
+      "step": 1000
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.5021252036094666,
+      "eval_matthews_correlation": 0.5324104093270398,
+      "eval_runtime": 0.835,
+      "eval_samples_per_second": 1249.081,
+      "eval_steps_per_second": 79.041,
+      "step": 1070
+    },
+    {
+      "epoch": 2.803738317757009,
+      "grad_norm": 0.39278796315193176,
+      "learning_rate": 1.9955023205487768e-05,
+      "loss": 0.219,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.808618426322937,
+      "eval_matthews_correlation": 0.5035428557399136,
+      "eval_runtime": 0.8341,
+      "eval_samples_per_second": 1250.464,
+      "eval_steps_per_second": 79.128,
+      "step": 1605
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 2675,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 133840454290116.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 4.542952091462109e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 16,
+    "seed": 10
+  }
+}

run-6/checkpoint-1605/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f3725dffbe4e652fc6d238c39cd8917882b7d3f1059d60294bb324af43692dbc
+size 5304

run-6/checkpoint-1605/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-6/checkpoint-2140/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.45.1",
+  "vocab_size": 30522
+}

run-6/checkpoint-2140/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dd627bd07826dfab70aef5000f5f15540717e74d59ef2449010a250297af65bc
+size 267832560

run-6/checkpoint-2140/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c40fcbf70d9f5dd736931840744e294bdf6c214ce030eebf347fb9d689f738c1
+size 535727290

run-6/checkpoint-2140/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:100272c808987747e369f215dfc43490204cfa59dd210ebb989e228c9fe4a856
+size 14180

run-6/checkpoint-2140/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:041e954fc294d1a62f234ed87ceeb3a776bca33f8f22bd6cdb2f3ad276686366
+size 1064

run-6/checkpoint-2140/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-6/checkpoint-2140/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-6/checkpoint-2140/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-6/checkpoint-2140/trainer_state.json ADDED Viewed

	@@ -0,0 +1,102 @@

+{
+  "best_metric": 0.5353626178321323,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-6/checkpoint-2140",
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 2140,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.9345794392523364,
+      "grad_norm": 4.85833215713501,
+      "learning_rate": 3.693802167824332e-05,
+      "loss": 0.5213,
+      "step": 500
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.48894789814949036,
+      "eval_matthews_correlation": 0.41310412373921845,
+      "eval_runtime": 0.7397,
+      "eval_samples_per_second": 1410.045,
+      "eval_steps_per_second": 89.226,
+      "step": 535
+    },
+    {
+      "epoch": 1.8691588785046729,
+      "grad_norm": 12.77253532409668,
+      "learning_rate": 2.844652244186554e-05,
+      "loss": 0.3193,
+      "step": 1000
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.5021252036094666,
+      "eval_matthews_correlation": 0.5324104093270398,
+      "eval_runtime": 0.835,
+      "eval_samples_per_second": 1249.081,
+      "eval_steps_per_second": 79.041,
+      "step": 1070
+    },
+    {
+      "epoch": 2.803738317757009,
+      "grad_norm": 0.39278796315193176,
+      "learning_rate": 1.9955023205487768e-05,
+      "loss": 0.219,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.808618426322937,
+      "eval_matthews_correlation": 0.5035428557399136,
+      "eval_runtime": 0.8341,
+      "eval_samples_per_second": 1250.464,
+      "eval_steps_per_second": 79.128,
+      "step": 1605
+    },
+    {
+      "epoch": 3.7383177570093458,
+      "grad_norm": 38.36227035522461,
+      "learning_rate": 1.1463523969109994e-05,
+      "loss": 0.1254,
+      "step": 2000
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.8212687969207764,
+      "eval_matthews_correlation": 0.5353626178321323,
+      "eval_runtime": 0.808,
+      "eval_samples_per_second": 1290.873,
+      "eval_steps_per_second": 81.685,
+      "step": 2140
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 2675,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 176447609911344.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 4.542952091462109e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 16,
+    "seed": 10
+  }
+}

run-6/checkpoint-2140/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f3725dffbe4e652fc6d238c39cd8917882b7d3f1059d60294bb324af43692dbc
+size 5304

run-6/checkpoint-2140/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-6/checkpoint-2675/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.45.1",
+  "vocab_size": 30522
+}

run-6/checkpoint-2675/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:554ad4273df72d1b50153faa8df78d3999bf3edd62772fbab80fef4d28b54a32
+size 267832560

run-6/checkpoint-2675/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:195161d46b1a09f11a6db1368aeb47efeae3c98cce8ebc297903f68322db564f
+size 535727290

run-6/checkpoint-2675/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f619e6e9bfa1fbd13c8f4f80abd766bb910adb196ae28ddc5a9cec493f3b9a8
+size 14180

run-6/checkpoint-2675/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3719f5408f5fab5c513b4b9a28fce1b4ceb0f1af0382af76849cf156b1c9e032
+size 1064

run-6/checkpoint-2675/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-6/checkpoint-2675/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-6/checkpoint-2675/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-6/checkpoint-2675/trainer_state.json ADDED Viewed

	@@ -0,0 +1,118 @@

+{
+  "best_metric": 0.5353626178321323,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-6/checkpoint-2140",
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 2675,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.9345794392523364,
+      "grad_norm": 4.85833215713501,
+      "learning_rate": 3.693802167824332e-05,
+      "loss": 0.5213,
+      "step": 500
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.48894789814949036,
+      "eval_matthews_correlation": 0.41310412373921845,
+      "eval_runtime": 0.7397,
+      "eval_samples_per_second": 1410.045,
+      "eval_steps_per_second": 89.226,
+      "step": 535
+    },
+    {
+      "epoch": 1.8691588785046729,
+      "grad_norm": 12.77253532409668,
+      "learning_rate": 2.844652244186554e-05,
+      "loss": 0.3193,
+      "step": 1000
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.5021252036094666,
+      "eval_matthews_correlation": 0.5324104093270398,
+      "eval_runtime": 0.835,
+      "eval_samples_per_second": 1249.081,
+      "eval_steps_per_second": 79.041,
+      "step": 1070
+    },
+    {
+      "epoch": 2.803738317757009,
+      "grad_norm": 0.39278796315193176,
+      "learning_rate": 1.9955023205487768e-05,
+      "loss": 0.219,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.808618426322937,
+      "eval_matthews_correlation": 0.5035428557399136,
+      "eval_runtime": 0.8341,
+      "eval_samples_per_second": 1250.464,
+      "eval_steps_per_second": 79.128,
+      "step": 1605
+    },
+    {
+      "epoch": 3.7383177570093458,
+      "grad_norm": 38.36227035522461,
+      "learning_rate": 1.1463523969109994e-05,
+      "loss": 0.1254,
+      "step": 2000
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.8212687969207764,
+      "eval_matthews_correlation": 0.5353626178321323,
+      "eval_runtime": 0.808,
+      "eval_samples_per_second": 1290.873,
+      "eval_steps_per_second": 81.685,
+      "step": 2140
+    },
+    {
+      "epoch": 4.672897196261682,
+      "grad_norm": 0.10120061784982681,
+      "learning_rate": 2.9720247327322206e-06,
+      "loss": 0.0886,
+      "step": 2500
+    },
+    {
+      "epoch": 5.0,
+      "eval_loss": 0.9762260317802429,
+      "eval_matthews_correlation": 0.5203121591830998,
+      "eval_runtime": 0.895,
+      "eval_samples_per_second": 1165.369,
+      "eval_steps_per_second": 73.743,
+      "step": 2675
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 2675,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 219123327760392.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 4.542952091462109e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 16,
+    "seed": 10
+  }
+}

run-6/checkpoint-2675/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f3725dffbe4e652fc6d238c39cd8917882b7d3f1059d60294bb324af43692dbc
+size 5304

run-6/checkpoint-2675/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

runs/Oct02_05-18-55_0fd1839bafe2/events.out.tfevents.1727848503.0fd1839bafe2.228.8 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e47dc43881ae8006d966a14db29ad8b27d2e30c97f32225aa7c4f9eb586792a7
-size 7110

 version https://git-lfs.github.com/spec/v1
+oid sha256:19097ba8be6e5024c5169b7a4e32f1608caf943688108cef0f074f132ab2df17
+size 8010