gc394 commited on
Commit
dad47e4
·
verified ·
1 Parent(s): 68bfdfc

End of training

Browse files
Files changed (7) hide show
  1. README.md +14 -12
  2. config.json +2 -3
  3. model.safetensors +2 -2
  4. tokenizer.json +0 -0
  5. tokenizer_config.json +2 -2
  6. training_args.bin +2 -2
  7. vocab.txt +0 -0
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  license: apache-2.0
3
- base_model: distilbert-base-cased
4
  tags:
5
  - generated_from_trainer
6
  model-index:
@@ -13,9 +13,9 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # da_distilbert
15
 
16
- This model is a fine-tuned version of [distilbert-base-cased](https://huggingface.co/distilbert-base-cased) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 1.6089
19
 
20
  ## Model description
21
 
@@ -40,20 +40,22 @@ The following hyperparameters were used during training:
40
  - seed: 42
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: linear
43
- - num_epochs: 8
44
 
45
  ### Training results
46
 
47
  | Training Loss | Epoch | Step | Validation Loss |
48
  |:-------------:|:-----:|:----:|:---------------:|
49
- | No log | 1.0 | 130 | 1.9587 |
50
- | No log | 2.0 | 260 | 1.8275 |
51
- | No log | 3.0 | 390 | 1.7576 |
52
- | 1.9627 | 4.0 | 520 | 1.7045 |
53
- | 1.9627 | 5.0 | 650 | 1.6049 |
54
- | 1.9627 | 6.0 | 780 | 1.6452 |
55
- | 1.9627 | 7.0 | 910 | 1.5920 |
56
- | 1.6873 | 8.0 | 1040 | 1.6354 |
 
 
57
 
58
 
59
  ### Framework versions
 
1
  ---
2
  license: apache-2.0
3
+ base_model: distilbert-base-uncased
4
  tags:
5
  - generated_from_trainer
6
  model-index:
 
13
 
14
  # da_distilbert
15
 
16
+ This model is a fine-tuned version of [distilbert-base-uncased](https://huggingface.co/distilbert-base-uncased) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 1.7151
19
 
20
  ## Model description
21
 
 
40
  - seed: 42
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: linear
43
+ - num_epochs: 10
44
 
45
  ### Training results
46
 
47
  | Training Loss | Epoch | Step | Validation Loss |
48
  |:-------------:|:-----:|:----:|:---------------:|
49
+ | No log | 1.0 | 130 | 2.1034 |
50
+ | No log | 2.0 | 260 | 1.9547 |
51
+ | No log | 3.0 | 390 | 1.8459 |
52
+ | 2.1418 | 4.0 | 520 | 1.8499 |
53
+ | 2.1418 | 5.0 | 650 | 1.7874 |
54
+ | 2.1418 | 6.0 | 780 | 1.7771 |
55
+ | 2.1418 | 7.0 | 910 | 1.7605 |
56
+ | 1.8295 | 8.0 | 1040 | 1.7202 |
57
+ | 1.8295 | 9.0 | 1170 | 1.6926 |
58
+ | 1.8295 | 10.0 | 1300 | 1.7350 |
59
 
60
 
61
  ### Framework versions
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "distilbert-base-cased",
3
  "activation": "gelu",
4
  "architectures": [
5
  "DistilBertForMaskedLM"
@@ -13,7 +13,6 @@
13
  "model_type": "distilbert",
14
  "n_heads": 12,
15
  "n_layers": 6,
16
- "output_past": true,
17
  "pad_token_id": 0,
18
  "qa_dropout": 0.1,
19
  "seq_classif_dropout": 0.2,
@@ -21,5 +20,5 @@
21
  "tie_weights_": true,
22
  "torch_dtype": "float32",
23
  "transformers_version": "4.40.1",
24
- "vocab_size": 28996
25
  }
 
1
  {
2
+ "_name_or_path": "distilbert-base-uncased",
3
  "activation": "gelu",
4
  "architectures": [
5
  "DistilBertForMaskedLM"
 
13
  "model_type": "distilbert",
14
  "n_heads": 12,
15
  "n_layers": 6,
 
16
  "pad_token_id": 0,
17
  "qa_dropout": 0.1,
18
  "seq_classif_dropout": 0.2,
 
20
  "tie_weights_": true,
21
  "torch_dtype": "float32",
22
  "transformers_version": "4.40.1",
23
+ "vocab_size": 30522
24
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb8f9b788a7dd1858ff3099aceadd76027c6663bc8abf9382b1bed09ac274180
3
- size 263260784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eff1a41cfde9865d4c2f05171292cd2bcdecb2c0f7d4ed7a7b71b44f6447802c
3
+ size 267954768
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -43,9 +43,9 @@
43
  },
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
46
- "do_lower_case": false,
47
  "mask_token": "[MASK]",
48
- "model_max_length": 1000000000000000019884624838656,
49
  "pad_token": "[PAD]",
50
  "sep_token": "[SEP]",
51
  "strip_accents": null,
 
43
  },
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
  "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
  "pad_token": "[PAD]",
50
  "sep_token": "[SEP]",
51
  "strip_accents": null,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51a709fd88a40cd7d65704303344c951e970172f3a14ecb222a9b9c05ed6ebba
3
- size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b52f3b68a04ebd189bd9cf0bfeec1c67d27cf6c66420b49ba2bc9ab91a56e5b0
3
+ size 4920
vocab.txt CHANGED
The diff for this file is too large to render. See raw diff