text_mapping

Browse files

Files changed (8) hide show

README.md +22 -22
config.json +28 -21
model.safetensors +2 -2
special_tokens_map.json +5 -18
tokenizer.json +0 -0
tokenizer_config.json +19 -47
training_args.bin +2 -2
vocab.txt +0 -0

README.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 library_name: transformers
-license: mit
-base_model: camembert-base
 tags:
 - generated_from_trainer
 metrics:
@@ -19,13 +19,13 @@ should probably proofread and complete it, then remove this comment. -->
 # my_awesome_wnut_model
-This model is a fine-tuned version of [camembert-base](https://huggingface.co/camembert-base) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.0033
 - Precision: 0.0
 - Recall: 0.0
 - F1: 0.0
-- Accuracy: 0.9990
 ## Model description
@@ -44,33 +44,33 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 2e-05
 - train_batch_size: 16
 - eval_batch_size: 16
 - seed: 42
-- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: linear
 - num_epochs: 10
 ### Training results
-| Training Loss | Epoch | Step  | Validation Loss | Precision | Recall | F1  | Accuracy |
-|:-------------:|:-----:|:-----:|:---------------:|:---------:|:------:|:---:|:--------:|
-| 0.0136        | 1.0   | 1559  | 0.0085          | 0.0       | 0.0    | 0.0 | 0.9983   |
-| 0.0087        | 2.0   | 3118  | 0.0056          | 0.0       | 0.0    | 0.0 | 0.9986   |
-| 0.0061        | 3.0   | 4677  | 0.0046          | 0.0       | 0.0    | 0.0 | 0.9987   |
-| 0.0061        | 4.0   | 6236  | 0.0040          | 0.0       | 0.0    | 0.0 | 0.9988   |
-| 0.005         | 5.0   | 7795  | 0.0038          | 0.0       | 0.0    | 0.0 | 0.9988   |
-| 0.0049        | 6.0   | 9354  | 0.0035          | 0.0       | 0.0    | 0.0 | 0.9989   |
-| 0.0044        | 7.0   | 10913 | 0.0034          | 0.0       | 0.0    | 0.0 | 0.9989   |
-| 0.004         | 8.0   | 12472 | 0.0036          | 0.0       | 0.0    | 0.0 | 0.9988   |
-| 0.004         | 9.0   | 14031 | 0.0034          | 0.0       | 0.0    | 0.0 | 0.9990   |
-| 0.0044        | 10.0  | 15590 | 0.0033          | 0.0       | 0.0    | 0.0 | 0.9990   |
 ### Framework versions
-- Transformers 4.46.2
 - Pytorch 2.5.1
-- Datasets 3.1.0
-- Tokenizers 0.20.3

 ---
 library_name: transformers
+license: apache-2.0
+base_model: distilbert/distilbert-base-cased
 tags:
 - generated_from_trainer
 metrics:
 # my_awesome_wnut_model
+This model is a fine-tuned version of [distilbert/distilbert-base-cased](https://huggingface.co/distilbert/distilbert-base-cased) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.0832
 - Precision: 0.0
 - Recall: 0.0
 - F1: 0.0
+- Accuracy: 0.9821
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 0.0002
 - train_batch_size: 16
 - eval_batch_size: 16
 - seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - num_epochs: 10
 ### Training results
+| Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1  | Accuracy |
+|:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:---:|:--------:|
+| No log        | 1.0   | 118  | 0.0767          | 0.0       | 0.0    | 0.0 | 0.9725   |
+| No log        | 2.0   | 236  | 0.0554          | 0.0       | 0.0    | 0.0 | 0.9799   |
+| No log        | 3.0   | 354  | 0.0695          | 0.0       | 0.0    | 0.0 | 0.9799   |
+| No log        | 4.0   | 472  | 0.0762          | 0.0       | 0.0    | 0.0 | 0.9795   |
+| 0.0497        | 5.0   | 590  | 0.0888          | 0.0       | 0.0    | 0.0 | 0.9804   |
+| 0.0497        | 6.0   | 708  | 0.0820          | 0.0       | 0.0    | 0.0 | 0.9812   |
+| 0.0497        | 7.0   | 826  | 0.0877          | 0.0       | 0.0    | 0.0 | 0.9814   |
+| 0.0497        | 8.0   | 944  | 0.0864          | 0.0       | 0.0    | 0.0 | 0.9815   |
+| 0.003         | 9.0   | 1062 | 0.0876          | 0.0       | 0.0    | 0.0 | 0.9823   |
+| 0.003         | 10.0  | 1180 | 0.0832          | 0.0       | 0.0    | 0.0 | 0.9821   |
 ### Framework versions
+- Transformers 4.44.2
 - Pytorch 2.5.1
+- Datasets 2.19.0
+- Tokenizers 0.19.1

config.json CHANGED Viewed

@@ -1,28 +1,35 @@
 {
-  "_name_or_path": "camembert-base",
   "architectures": [
-    "CamembertForTokenClassification"
   ],
-  "attention_probs_dropout_prob": 0.1,
-  "bos_token_id": 5,
-  "classifier_dropout": null,
-  "eos_token_id": 6,
-  "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
   "initializer_range": 0.02,
-  "intermediate_size": 3072,
-  "layer_norm_eps": 1e-05,
-  "max_position_embeddings": 514,
-  "model_type": "camembert",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
   "output_past": true,
-  "pad_token_id": 1,
-  "position_embedding_type": "absolute",
   "torch_dtype": "float32",
-  "transformers_version": "4.46.2",
-  "type_vocab_size": 1,
-  "use_cache": true,
-  "vocab_size": 32005
 }

 {
+  "_name_or_path": "distilbert/distilbert-base-cased",
+  "activation": "gelu",
   "architectures": [
+    "DistilBertForTokenClassification"
   ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
   "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
   "output_past": true,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
   "torch_dtype": "float32",
+  "transformers_version": "4.44.2",
+  "vocab_size": 28996
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bec9a9d5a6c8205f96eb8fbdc58958cf267523e17c2dc6b383919549f17236da
-size 440155504

 version https://git-lfs.github.com/spec/v1
+oid sha256:f47facfb339c6465ee025b15245c249ea6e96b6de40abddd120f09695af1dc24
+size 260785212

special_tokens_map.json CHANGED Viewed

@@ -1,20 +1,7 @@
 {
-  "additional_special_tokens": [
-    "<s>NOTUSED",
-    "</s>NOTUSED",
-    "<unk>NOTUSED"
-  ],
-  "bos_token": "<s>",
-  "cls_token": "<s>",
-  "eos_token": "</s>",
-  "mask_token": {
-    "content": "<mask>",
-    "lstrip": true,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": "<pad>",
-  "sep_token": "</s>",
-  "unk_token": "<unk>"
 }

 {
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
 }

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -2,63 +2,39 @@
   "add_prefix_space": true,
   "added_tokens_decoder": {
     "0": {
-      "content": "<s>NOTUSED",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "1": {
-      "content": "<pad>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "2": {
-      "content": "</s>NOTUSED",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "4": {
-      "content": "<unk>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "5": {
-      "content": "<s>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "6": {
-      "content": "</s>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "32004": {
-      "content": "<mask>",
-      "lstrip": true,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "32005": {
-      "content": "<unk>NOTUSED",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -66,19 +42,15 @@
       "special": true
     }
   },
-  "additional_special_tokens": [
-    "<s>NOTUSED",
-    "</s>NOTUSED",
-    "<unk>NOTUSED"
-  ],
-  "bos_token": "<s>",
-  "clean_up_tokenization_spaces": false,
-  "cls_token": "<s>",
-  "eos_token": "</s>",
-  "mask_token": "<mask>",
   "model_max_length": 512,
-  "pad_token": "<pad>",
-  "sep_token": "</s>",
-  "tokenizer_class": "CamembertTokenizer",
-  "unk_token": "<unk>"
 }

   "add_prefix_space": true,
   "added_tokens_decoder": {
     "0": {
+      "content": "[PAD]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "100": {
+      "content": "[UNK]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "101": {
+      "content": "[CLS]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "102": {
+      "content": "[SEP]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "103": {
+      "content": "[MASK]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "special": true
     }
   },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "mask_token": "[MASK]",
   "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:28226c1b70e899c61ac969dfed2363de9a7e92518ee348172294ed0b822cbccc
-size 5304

 version https://git-lfs.github.com/spec/v1
+oid sha256:c43497cc82970d13d7ba5492c2e1f1013a8ed4b26030c75dd08816764f5f51b7
+size 5176

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff