End of training

Browse files

Files changed (6) hide show

README.md +12 -68
model.safetensors +1 -1
runs/Mar15_13-26-29_af6d53f073f1/events.out.tfevents.1710509191.af6d53f073f1.1790.0 +3 -0
tmp-checkpoint-1059/config.json +147 -0
tmp-checkpoint-1059/model.safetensors +3 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -3,11 +3,6 @@ license: apache-2.0
 base_model: ai-forever/ruBert-base
 tags:
 - generated_from_trainer
-metrics:
-- precision
-- recall
-- f1
-- accuracy
 model-index:
 - name: ruBert-base-finetuned-pos
   results: []
@@ -18,13 +13,18 @@ should probably proofread and complete it, then remove this comment. -->
 # ruBert-base-finetuned-pos
-This model is a fine-tuned version of [ai-forever/ruBert-base](https://huggingface.co/ai-forever/ruBert-base) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.4473
-- Precision: 0.5945
-- Recall: 0.4954
-- F1: 0.5405
-- Accuracy: 0.6219
 ## Model description
@@ -49,63 +49,7 @@ The following hyperparameters were used during training:
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- num_epochs: 50
-### Training results
-| Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1     | Accuracy |
-|:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
-| No log        | 1.0   | 2    | 3.2246          | 0.0300    | 0.0160 | 0.0209 | 0.0062   |
-| No log        | 2.0   | 4    | 2.6688          | 0.0       | 0.0    | 0.0    | 0.0021   |
-| No log        | 3.0   | 6    | 2.1227          | 0.0       | 0.0    | 0.0    | 0.0021   |
-| No log        | 4.0   | 8    | 1.6906          | 0.0       | 0.0    | 0.0    | 0.0      |
-| No log        | 5.0   | 10   | 1.4171          | 0.0       | 0.0    | 0.0    | 0.0      |
-| No log        | 6.0   | 12   | 1.2636          | 0.0       | 0.0    | 0.0    | 0.0      |
-| No log        | 7.0   | 14   | 1.1762          | 0.0       | 0.0    | 0.0    | 0.0      |
-| No log        | 8.0   | 16   | 1.1150          | 0.0       | 0.0    | 0.0    | 0.0      |
-| No log        | 9.0   | 18   | 1.0601          | 0.0       | 0.0    | 0.0    | 0.0      |
-| No log        | 10.0  | 20   | 1.0094          | 0.0       | 0.0    | 0.0    | 0.0      |
-| No log        | 11.0  | 22   | 0.9662          | 0.0       | 0.0    | 0.0    | 0.0021   |
-| No log        | 12.0  | 24   | 0.9311          | 0.0       | 0.0    | 0.0    | 0.0124   |
-| No log        | 13.0  | 26   | 0.9011          | 0.0       | 0.0    | 0.0    | 0.0847   |
-| No log        | 14.0  | 28   | 0.8737          | 0.0       | 0.0    | 0.0    | 0.1921   |
-| No log        | 15.0  | 30   | 0.8476          | 0.0       | 0.0    | 0.0    | 0.2231   |
-| No log        | 16.0  | 32   | 0.8230          | 0.0       | 0.0    | 0.0    | 0.2335   |
-| No log        | 17.0  | 34   | 0.7996          | 0.5       | 0.0160 | 0.0310 | 0.25     |
-| No log        | 18.0  | 36   | 0.7772          | 0.5       | 0.0342 | 0.0641 | 0.2665   |
-| No log        | 19.0  | 38   | 0.7550          | 0.4630    | 0.0571 | 0.1016 | 0.2913   |
-| No log        | 20.0  | 40   | 0.7323          | 0.4706    | 0.0731 | 0.1265 | 0.3079   |
-| No log        | 21.0  | 42   | 0.7100          | 0.4333    | 0.0890 | 0.1477 | 0.3244   |
-| No log        | 22.0  | 44   | 0.6888          | 0.4122    | 0.1233 | 0.1898 | 0.3595   |
-| No log        | 23.0  | 46   | 0.6686          | 0.3778    | 0.1553 | 0.2201 | 0.3967   |
-| No log        | 24.0  | 48   | 0.6490          | 0.3972    | 0.1941 | 0.2607 | 0.4236   |
-| No log        | 25.0  | 50   | 0.6304          | 0.4149    | 0.2283 | 0.2946 | 0.4483   |
-| No log        | 26.0  | 52   | 0.6130          | 0.4504    | 0.2694 | 0.3371 | 0.4773   |
-| No log        | 27.0  | 54   | 0.5967          | 0.4593    | 0.2831 | 0.3503 | 0.4855   |
-| No log        | 28.0  | 56   | 0.5815          | 0.4657    | 0.2945 | 0.3608 | 0.4938   |
-| No log        | 29.0  | 58   | 0.5675          | 0.4842    | 0.3151 | 0.3817 | 0.5041   |
-| No log        | 30.0  | 60   | 0.5545          | 0.4916    | 0.3356 | 0.3989 | 0.5165   |
-| No log        | 31.0  | 62   | 0.5423          | 0.4967    | 0.3447 | 0.4070 | 0.5269   |
-| No log        | 32.0  | 64   | 0.5311          | 0.5016    | 0.3539 | 0.4150 | 0.5372   |
-| No log        | 33.0  | 66   | 0.5209          | 0.5016    | 0.3539 | 0.4150 | 0.5372   |
-| No log        | 34.0  | 68   | 0.5118          | 0.5063    | 0.3653 | 0.4244 | 0.5455   |
-| No log        | 35.0  | 70   | 0.5035          | 0.5140    | 0.3767 | 0.4348 | 0.5537   |
-| No log        | 36.0  | 72   | 0.4960          | 0.5105    | 0.3881 | 0.4410 | 0.5599   |
-| No log        | 37.0  | 74   | 0.4891          | 0.5208    | 0.3995 | 0.4522 | 0.5682   |
-| No log        | 38.0  | 76   | 0.4827          | 0.5249    | 0.4087 | 0.4596 | 0.5723   |
-| No log        | 39.0  | 78   | 0.4770          | 0.5407    | 0.4247 | 0.4757 | 0.5806   |
-| No log        | 40.0  | 80   | 0.4719          | 0.5473    | 0.4361 | 0.4854 | 0.5888   |
-| No log        | 41.0  | 82   | 0.4673          | 0.5568    | 0.4475 | 0.4962 | 0.5971   |
-| No log        | 42.0  | 84   | 0.4632          | 0.5581    | 0.4498 | 0.4981 | 0.5992   |
-| No log        | 43.0  | 86   | 0.4597          | 0.5682    | 0.4658 | 0.5119 | 0.6074   |
-| No log        | 44.0  | 88   | 0.4565          | 0.5754    | 0.4703 | 0.5176 | 0.6136   |
-| No log        | 45.0  | 90   | 0.4538          | 0.5766    | 0.4726 | 0.5194 | 0.6136   |
-| No log        | 46.0  | 92   | 0.4515          | 0.5810    | 0.4749 | 0.5226 | 0.6157   |
-| No log        | 47.0  | 94   | 0.4497          | 0.5845    | 0.4817 | 0.5282 | 0.6178   |
-| No log        | 48.0  | 96   | 0.4484          | 0.5918    | 0.4932 | 0.5380 | 0.6198   |
-| No log        | 49.0  | 98   | 0.4477          | 0.5918    | 0.4932 | 0.5380 | 0.6198   |
-| No log        | 50.0  | 100  | 0.4473          | 0.5945    | 0.4954 | 0.5405 | 0.6219   |
 ### Framework versions

 base_model: ai-forever/ruBert-base
 tags:
 - generated_from_trainer
 model-index:
 - name: ruBert-base-finetuned-pos
   results: []
 # ruBert-base-finetuned-pos
+This model is a fine-tuned version of [ai-forever/ruBert-base](https://huggingface.co/ai-forever/ruBert-base) on the None dataset.
 It achieves the following results on the evaluation set:
+- eval_loss: 0.0021
+- eval_precision: 0.9995
+- eval_recall: 0.9994
+- eval_f1: 0.9995
+- eval_accuracy: 0.9996
+- eval_runtime: 39.8442
+- eval_samples_per_second: 161.705
+- eval_steps_per_second: 2.535
+- epoch: 3.0
+- step: 1059
 ## Model description
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- num_epochs: 10
 ### Framework versions

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:68d1e78a5fe838ede415e521343a41ae1544521e8c80c650a34a5d95301a8a67
 size 711062560

 version https://git-lfs.github.com/spec/v1
+oid sha256:1f27c9d39236051dca997c77239ecc8d101cbc3e696637d6fa8421f36f9cf45c
 size 711062560

runs/Mar15_13-26-29_af6d53f073f1/events.out.tfevents.1710509191.af6d53f073f1.1790.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:64fb6c2264113ccbcaa09169458b72aeaecaeee627807ed73bbcffa1a2a2b521
+size 9302

tmp-checkpoint-1059/config.json ADDED Viewed

	@@ -0,0 +1,147 @@

+{
+  "_name_or_path": "ai-forever/ruBert-base",
+  "architectures": [
+    "BertForTokenClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "O",
+    "1": "Not",
+    "2": "Abbr",
+    "3": "Adj",
+    "4": "Adv",
+    "5": "Adv/action_des\u0441",
+    "6": "Adv/action_time",
+    "7": "Adv/measure",
+    "8": "Adv/place",
+    "9": "Adv/emph",
+    "10": "Advt",
+    "11": "Advt1",
+    "12": "Aux",
+    "13": "Bracket",
+    "14": "Colon",
+    "15": "Comma",
+    "16": "Conj.comp",
+    "17": "Coord",
+    "18": "Dash",
+    "19": "Dot",
+    "20": "Ellipsis",
+    "21": "Fw",
+    "22": "Interj.",
+    "23": "Interrog.word",
+    "24": "Introduct",
+    "25": "Mark/excl",
+    "26": "Mark/quest",
+    "27": "Measure",
+    "28": "N",
+    "29": "Num",
+    "30": "Num/ordinal",
+    "31": "Part/dem",
+    "32": "Part/excl",
+    "33": "Part/limit",
+    "34": "Part/imp",
+    "35": "Part/intens",
+    "36": "Part/neg",
+    "37": "Part/quest",
+    "38": "Part/soft",
+    "39": "Prep",
+    "40": "Pron",
+    "41": "Pron/attr",
+    "42": "Pron/dem",
+    "43": "Pron/neg",
+    "44": "Pron/pos",
+    "45": "Pron/rel",
+    "46": "Particip/adj",
+    "47": "Particip/v",
+    "48": "Punct",
+    "49": "Quote",
+    "50": "Semicolon",
+    "51": "State",
+    "52": "Sub",
+    "53": "Vf",
+    "54": "Vinf",
+    "55": "Vm"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "Abbr": 2,
+    "Adj": 3,
+    "Adv": 4,
+    "Adv/action_des\u0441": 5,
+    "Adv/action_time": 6,
+    "Adv/emph": 9,
+    "Adv/measure": 7,
+    "Adv/place": 8,
+    "Advt": 10,
+    "Advt1": 11,
+    "Aux": 12,
+    "Bracket": 13,
+    "Colon": 14,
+    "Comma": 15,
+    "Conj.comp": 16,
+    "Coord": 17,
+    "Dash": 18,
+    "Dot": 19,
+    "Ellipsis": 20,
+    "Fw": 21,
+    "Interj.": 22,
+    "Interrog.word": 23,
+    "Introduct": 24,
+    "Mark/excl": 25,
+    "Mark/quest": 26,
+    "Measure": 27,
+    "N": 28,
+    "Not": 1,
+    "Num": 29,
+    "Num/ordinal": 30,
+    "O": 0,
+    "Part/dem": 31,
+    "Part/excl": 32,
+    "Part/imp": 34,
+    "Part/intens": 35,
+    "Part/limit": 33,
+    "Part/neg": 36,
+    "Part/quest": 37,
+    "Part/soft": 38,
+    "Particip/adj": 46,
+    "Particip/v": 47,
+    "Prep": 39,
+    "Pron": 40,
+    "Pron/attr": 41,
+    "Pron/dem": 42,
+    "Pron/neg": 43,
+    "Pron/pos": 44,
+    "Pron/rel": 45,
+    "Punct": 48,
+    "Quote": 49,
+    "Semicolon": 50,
+    "State": 51,
+    "Sub": 52,
+    "Vf": 53,
+    "Vinf": 54,
+    "Vm": 55
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 120138
+}

tmp-checkpoint-1059/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1f27c9d39236051dca997c77239ecc8d101cbc3e696637d6fa8421f36f9cf45c
+size 711062560

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:75bb1fb4593a8f50556e93e5c07f2008ebe25cd6a1f4b2e165c4bd2389c06877
 size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:91e133dc65cfa7de7b6309e9d4ffa61691d54a4222c584940217dddb4b3f6a40
 size 4920