Iiro commited on
Commit
9d828c4
1 Parent(s): cafe970

Iiro/bert_reviews

Browse files
Files changed (4) hide show
  1. README.md +48 -11
  2. config.json +19 -4
  3. pytorch_model.bin +2 -2
  4. training_args.bin +1 -1
README.md CHANGED
@@ -1,12 +1,28 @@
1
  ---
 
 
2
  tags:
3
  - generated_from_trainer
4
  datasets:
5
  - amazon_reviews_multi
 
 
6
  model-index:
7
  - name: bert_reviews
8
- results: []
9
- pipeline_tag: text-classification
 
 
 
 
 
 
 
 
 
 
 
 
10
  ---
11
 
12
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -14,15 +30,10 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  # bert_reviews
16
 
17
- This model was trained from scratch on the amazon_reviews_multi dataset.
18
  It achieves the following results on the evaluation set:
19
- - eval_loss: 0.8196
20
- - eval_accuracy: 0.6544
21
- - eval_runtime: 39.7329
22
- - eval_samples_per_second: 125.84
23
- - eval_steps_per_second: 15.73
24
- - epoch: 0.52
25
- - step: 13000
26
 
27
  ## Model description
28
 
@@ -49,9 +60,35 @@ The following hyperparameters were used during training:
49
  - lr_scheduler_type: linear
50
  - training_steps: 20000
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  ### Framework versions
53
 
54
  - Transformers 4.34.1
55
  - Pytorch 2.1.0+cu118
56
  - Datasets 2.14.6
57
- - Tokenizers 0.14.1
 
1
  ---
2
+ license: apache-2.0
3
+ base_model: distilbert-base-uncased
4
  tags:
5
  - generated_from_trainer
6
  datasets:
7
  - amazon_reviews_multi
8
+ metrics:
9
+ - accuracy
10
  model-index:
11
  - name: bert_reviews
12
+ results:
13
+ - task:
14
+ name: Text Classification
15
+ type: text-classification
16
+ dataset:
17
+ name: amazon_reviews_multi
18
+ type: amazon_reviews_multi
19
+ config: en
20
+ split: test
21
+ args: en
22
+ metrics:
23
+ - name: Accuracy
24
+ type: accuracy
25
+ value: 0.6062
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
30
 
31
  # bert_reviews
32
 
33
+ This model is a fine-tuned version of [distilbert-base-uncased](https://huggingface.co/distilbert-base-uncased) on the amazon_reviews_multi dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.9204
36
+ - Accuracy: 0.6062
 
 
 
 
 
37
 
38
  ## Model description
39
 
 
60
  - lr_scheduler_type: linear
61
  - training_steps: 20000
62
 
63
+ ### Training results
64
+
65
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
66
+ |:-------------:|:-----:|:-----:|:---------------:|:--------:|
67
+ | 0.8812 | 0.04 | 1000 | 0.9970 | 0.5738 |
68
+ | 0.8495 | 0.08 | 2000 | 1.0120 | 0.569 |
69
+ | 0.8067 | 0.12 | 3000 | 1.0442 | 0.5766 |
70
+ | 0.7934 | 0.16 | 4000 | 1.0629 | 0.5772 |
71
+ | 0.7845 | 0.2 | 5000 | 1.0236 | 0.5876 |
72
+ | 0.9033 | 0.24 | 6000 | 0.9822 | 0.5774 |
73
+ | 0.8993 | 0.28 | 7000 | 0.9693 | 0.5816 |
74
+ | 0.9012 | 0.32 | 8000 | 1.0075 | 0.5738 |
75
+ | 0.873 | 0.36 | 9000 | 0.9663 | 0.5886 |
76
+ | 0.9376 | 0.4 | 10000 | 0.9447 | 0.5816 |
77
+ | 0.9398 | 0.44 | 11000 | 0.9509 | 0.5802 |
78
+ | 0.9402 | 0.48 | 12000 | 0.9561 | 0.5916 |
79
+ | 0.9247 | 0.52 | 13000 | 0.9303 | 0.6008 |
80
+ | 0.9247 | 0.56 | 14000 | 0.9241 | 0.5998 |
81
+ | 0.9192 | 0.6 | 15000 | 0.9276 | 0.6104 |
82
+ | 0.907 | 0.64 | 16000 | 0.9251 | 0.603 |
83
+ | 0.9177 | 0.68 | 17000 | 0.9198 | 0.6056 |
84
+ | 0.9129 | 0.72 | 18000 | 0.9167 | 0.6078 |
85
+ | 0.8948 | 0.76 | 19000 | 0.9213 | 0.604 |
86
+ | 0.906 | 0.8 | 20000 | 0.9204 | 0.6062 |
87
+
88
+
89
  ### Framework versions
90
 
91
  - Transformers 4.34.1
92
  - Pytorch 2.1.0+cu118
93
  - Datasets 2.14.6
94
+ - Tokenizers 0.14.1
config.json CHANGED
@@ -1,9 +1,13 @@
1
  {
 
 
2
  "architectures": [
3
- "Bert"
4
  ],
5
- "hidden_bert": 768,
6
- "hidden_ffnn": 24,
 
 
7
  "id2label": {
8
  "0": "LABEL_0",
9
  "1": "LABEL_1",
@@ -11,6 +15,7 @@
11
  "3": "LABEL_3",
12
  "4": "LABEL_4"
13
  },
 
14
  "label2id": {
15
  "LABEL_0": 0,
16
  "LABEL_1": 1,
@@ -18,7 +23,17 @@
18
  "LABEL_3": 3,
19
  "LABEL_4": 4
20
  },
 
 
 
 
 
 
 
 
 
 
21
  "torch_dtype": "float32",
22
  "transformers_version": "4.34.1",
23
- "vocab_size": 28996
24
  }
 
1
  {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
  "architectures": [
5
+ "DistilBertForSequenceClassification"
6
  ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
  "id2label": {
12
  "0": "LABEL_0",
13
  "1": "LABEL_1",
 
15
  "3": "LABEL_3",
16
  "4": "LABEL_4"
17
  },
18
+ "initializer_range": 0.02,
19
  "label2id": {
20
  "LABEL_0": 0,
21
  "LABEL_1": 1,
 
23
  "LABEL_3": 3,
24
  "LABEL_4": 4
25
  },
26
+ "max_position_embeddings": 512,
27
+ "model_type": "distilbert",
28
+ "n_heads": 12,
29
+ "n_layers": 6,
30
+ "pad_token_id": 0,
31
+ "problem_type": "single_label_classification",
32
+ "qa_dropout": 0.1,
33
+ "seq_classif_dropout": 0.2,
34
+ "sinusoidal_pos_embds": false,
35
+ "tie_weights_": true,
36
  "torch_dtype": "float32",
37
  "transformers_version": "4.34.1",
38
+ "vocab_size": 30522
39
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:459282ffb2884070a380ef30845b262bf77813534ff708275ca9e6853bd97625
3
- size 433384758
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86ee65e834160aace6c12aa0d503cd6099f9b07664b12a565dbb52d046636268
3
+ size 267865194
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c277da8f4bedc9a2e1636c9c4bcac05169afdd2c0e9f7cb4183d1486ada8cfb0
3
  size 4472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9a7349f61988d6e6a3c339b566c8b29407e9359421ec483dcd69ca1f45a41f5
3
  size 4472