End of training

Browse files

Files changed (7) hide show

README.md +51 -51
config.json +3 -2
model.safetensors +2 -2
runs/Jan19_11-17-46_eab14bd28cfb/events.out.tfevents.1705663074.eab14bd28cfb.651.7 +3 -0
tokenizer_config.json +1 -1
training_args.bin +1 -1
vocab.txt +0 -0

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 license: apache-2.0
-base_model: distilbert-base-uncased
 tags:
 - generated_from_trainer
 metrics:
@@ -18,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
 # DIALOGUE_one
-This model is a fine-tuned version of [distilbert-base-uncased](https://huggingface.co/distilbert-base-uncased) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.1947
 - Precision: 0.9762
 - Recall: 0.9737
 - F1: 0.9736
@@ -55,54 +55,54 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1     | Accuracy |
 |:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
-| 1.1919        | 0.62  | 30   | 0.8161          | 1.0       | 1.0    | 1.0    | 1.0      |
-| 0.6182        | 1.25  | 60   | 0.2981          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.2564        | 1.88  | 90   | 0.1427          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0833        | 2.5   | 120  | 0.0918          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0436        | 3.12  | 150  | 0.1185          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0215        | 3.75  | 180  | 0.1243          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0109        | 4.38  | 210  | 0.1179          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0075        | 5.0   | 240  | 0.1240          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0062        | 5.62  | 270  | 0.1362          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0049        | 6.25  | 300  | 0.1385          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0042        | 6.88  | 330  | 0.1572          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0037        | 7.5   | 360  | 0.1569          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0031        | 8.12  | 390  | 0.1501          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0029        | 8.75  | 420  | 0.1563          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0024        | 9.38  | 450  | 0.1617          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0023        | 10.0  | 480  | 0.1625          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0021        | 10.62 | 510  | 0.1658          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.002         | 11.25 | 540  | 0.1699          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0017        | 11.88 | 570  | 0.1727          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0017        | 12.5  | 600  | 0.1731          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0015        | 13.12 | 630  | 0.1756          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0015        | 13.75 | 660  | 0.1764          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0014        | 14.38 | 690  | 0.1797          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0013        | 15.0  | 720  | 0.1817          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0012        | 15.62 | 750  | 0.1822          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0011        | 16.25 | 780  | 0.1833          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0011        | 16.88 | 810  | 0.1843          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.001         | 17.5  | 840  | 0.1857          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.001         | 18.12 | 870  | 0.1872          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0009        | 18.75 | 900  | 0.1884          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0009        | 19.38 | 930  | 0.1879          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0009        | 20.0  | 960  | 0.1882          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0008        | 20.62 | 990  | 0.1888          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0008        | 21.25 | 1020 | 0.1895          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0008        | 21.88 | 1050 | 0.1902          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0007        | 22.5  | 1080 | 0.1904          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0008        | 23.12 | 1110 | 0.1911          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0007        | 23.75 | 1140 | 0.1919          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0007        | 24.38 | 1170 | 0.1923          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0007        | 25.0  | 1200 | 0.1928          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0007        | 25.62 | 1230 | 0.1933          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0007        | 26.25 | 1260 | 0.1938          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0007        | 26.88 | 1290 | 0.1939          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0007        | 27.5  | 1320 | 0.1943          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0006        | 28.12 | 1350 | 0.1945          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0007        | 28.75 | 1380 | 0.1946          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0007        | 29.38 | 1410 | 0.1947          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
-| 0.0007        | 30.0  | 1440 | 0.1947          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
 ### Framework versions

 ---
 license: apache-2.0
+base_model: distilbert-base-cased
 tags:
 - generated_from_trainer
 metrics:
 # DIALOGUE_one
+This model is a fine-tuned version of [distilbert-base-cased](https://huggingface.co/distilbert-base-cased) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.1862
 - Precision: 0.9762
 - Recall: 0.9737
 - F1: 0.9736
 | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1     | Accuracy |
 |:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
+| 1.1763        | 0.62  | 30   | 0.7339          | 0.9083    | 0.8553 | 0.8420 | 0.8553   |
+| 0.5684        | 1.25  | 60   | 0.2496          | 0.9524    | 0.9474 | 0.9472 | 0.9474   |
+| 0.2445        | 1.88  | 90   | 0.1581          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0728        | 2.5   | 120  | 0.0472          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.038         | 3.12  | 150  | 0.1179          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.012         | 3.75  | 180  | 0.0859          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0065        | 4.38  | 210  | 0.1251          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0046        | 5.0   | 240  | 0.1168          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0034        | 5.62  | 270  | 0.1213          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0028        | 6.25  | 300  | 0.1257          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0025        | 6.88  | 330  | 0.1355          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0022        | 7.5   | 360  | 0.1392          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0019        | 8.12  | 390  | 0.1435          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0016        | 8.75  | 420  | 0.1442          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0014        | 9.38  | 450  | 0.1474          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0013        | 10.0  | 480  | 0.1490          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0012        | 10.62 | 510  | 0.1514          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0011        | 11.25 | 540  | 0.1534          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0011        | 11.88 | 570  | 0.1549          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.001         | 12.5  | 600  | 0.1599          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0009        | 13.12 | 630  | 0.1642          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0009        | 13.75 | 660  | 0.1657          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0008        | 14.38 | 690  | 0.1659          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0008        | 15.0  | 720  | 0.1681          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0007        | 15.62 | 750  | 0.1689          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0007        | 16.25 | 780  | 0.1707          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0006        | 16.88 | 810  | 0.1722          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0006        | 17.5  | 840  | 0.1720          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0006        | 18.12 | 870  | 0.1749          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0006        | 18.75 | 900  | 0.1765          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0005        | 19.38 | 930  | 0.1774          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0005        | 20.0  | 960  | 0.1776          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0005        | 20.62 | 990  | 0.1778          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0005        | 21.25 | 1020 | 0.1794          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0005        | 21.88 | 1050 | 0.1804          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0004        | 22.5  | 1080 | 0.1810          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0005        | 23.12 | 1110 | 0.1819          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0004        | 23.75 | 1140 | 0.1825          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0004        | 24.38 | 1170 | 0.1830          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0004        | 25.0  | 1200 | 0.1836          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0004        | 25.62 | 1230 | 0.1841          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0004        | 26.25 | 1260 | 0.1845          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0004        | 26.88 | 1290 | 0.1848          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0004        | 27.5  | 1320 | 0.1856          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0004        | 28.12 | 1350 | 0.1858          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0004        | 28.75 | 1380 | 0.1861          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0004        | 29.38 | 1410 | 0.1862          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
+| 0.0004        | 30.0  | 1440 | 0.1862          | 0.9762    | 0.9737 | 0.9736 | 0.9737   |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "distilbert-base-uncased",
   "activation": "gelu",
   "architectures": [
     "DistilBertForSequenceClassification"
@@ -25,6 +25,7 @@
   "model_type": "distilbert",
   "n_heads": 12,
   "n_layers": 6,
   "pad_token_id": 0,
   "problem_type": "single_label_classification",
   "qa_dropout": 0.1,
@@ -33,5 +34,5 @@
   "tie_weights_": true,
   "torch_dtype": "float32",
   "transformers_version": "4.36.2",
-  "vocab_size": 30522
 }

 {
+  "_name_or_path": "distilbert-base-cased",
   "activation": "gelu",
   "architectures": [
     "DistilBertForSequenceClassification"
   "model_type": "distilbert",
   "n_heads": 12,
   "n_layers": 6,
+  "output_past": true,
   "pad_token_id": 0,
   "problem_type": "single_label_classification",
   "qa_dropout": 0.1,
   "tie_weights_": true,
   "torch_dtype": "float32",
   "transformers_version": "4.36.2",
+  "vocab_size": 28996
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:96bdef24d48f0c6f87c2a920e6ef34f13bfbb966f125647b30aa530e52b28c6c
-size 267838720

 version https://git-lfs.github.com/spec/v1
+oid sha256:e6553f38536959c3669985b4cd855c25f1af78594021109a39c0e832cb059210
+size 263150840

runs/Jan19_11-17-46_eab14bd28cfb/events.out.tfevents.1705663074.eab14bd28cfb.651.7 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fff0f34ac4a18bcfa2bd2b4bc08f631978b50456f1a14f03fb825cef7e346b1c
+size 34967

tokenizer_config.json CHANGED Viewed

@@ -44,7 +44,7 @@
   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
   "do_basic_tokenize": true,
-  "do_lower_case": true,
   "mask_token": "[MASK]",
   "model_max_length": 512,
   "never_split": null,

   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
   "do_basic_tokenize": true,
+  "do_lower_case": false,
   "mask_token": "[MASK]",
   "model_max_length": 512,
   "never_split": null,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e5b3123904567851b446ebdffb8f17633061b523edb42620c9d8a852269af133
 size 4664

 version https://git-lfs.github.com/spec/v1
+oid sha256:acc62f5a95f15dd638e7c3c75d3dfc7e5279ea8c3aa1d9b0619c35dce2266f30
 size 4664

vocab.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff