PeppoCola commited on
Commit
df49dc8
1 Parent(s): 3ca9469

update model and tokenizer

Browse files
config.json CHANGED
@@ -23,6 +23,7 @@
23
  "LABEL_2": 2
24
  },
25
  "layer_norm_eps": 1e-05,
 
26
  "max_position_embeddings": 514,
27
  "model_type": "roberta",
28
  "num_attention_heads": 12,
@@ -31,7 +32,7 @@
31
  "position_embedding_type": "absolute",
32
  "problem_type": "single_label_classification",
33
  "torch_dtype": "float32",
34
- "transformers_version": "4.16.2",
35
  "type_vocab_size": 1,
36
  "use_cache": true,
37
  "vocab_size": 50265
 
23
  "LABEL_2": 2
24
  },
25
  "layer_norm_eps": 1e-05,
26
+ "max_length": 128,
27
  "max_position_embeddings": 514,
28
  "model_type": "roberta",
29
  "num_attention_heads": 12,
 
32
  "position_embedding_type": "absolute",
33
  "problem_type": "single_label_classification",
34
  "torch_dtype": "float32",
35
+ "transformers_version": "4.27.1",
36
  "type_vocab_size": 1,
37
  "use_cache": true,
38
  "vocab_size": 50265
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30a6eb25f7563869988c8f35106ecb996228d6fc3a0ebfc40bba1603854af66a
3
+ size 498666677
special_tokens_map.json CHANGED
@@ -1 +1,15 @@
1
- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json CHANGED
@@ -6,59 +6,69 @@
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
- "padding": null,
 
 
 
 
 
 
 
 
 
10
  "added_tokens": [
11
  {
12
  "id": 0,
13
- "special": true,
14
  "content": "<s>",
15
  "single_word": false,
16
  "lstrip": false,
17
  "rstrip": false,
18
- "normalized": false
 
19
  },
20
  {
21
  "id": 1,
22
- "special": true,
23
  "content": "<pad>",
24
  "single_word": false,
25
  "lstrip": false,
26
  "rstrip": false,
27
- "normalized": false
 
28
  },
29
  {
30
  "id": 2,
31
- "special": true,
32
  "content": "</s>",
33
  "single_word": false,
34
  "lstrip": false,
35
  "rstrip": false,
36
- "normalized": false
 
37
  },
38
  {
39
  "id": 3,
40
- "special": true,
41
  "content": "<unk>",
42
  "single_word": false,
43
  "lstrip": false,
44
  "rstrip": false,
45
- "normalized": false
 
46
  },
47
  {
48
  "id": 50264,
49
- "special": true,
50
  "content": "<mask>",
51
  "single_word": false,
52
  "lstrip": true,
53
  "rstrip": false,
54
- "normalized": false
 
55
  }
56
  ],
57
  "normalizer": null,
58
  "pre_tokenizer": {
59
  "type": "ByteLevel",
60
  "add_prefix_space": false,
61
- "trim_offsets": true
 
62
  },
63
  "post_processor": {
64
  "type": "RobertaProcessing",
@@ -76,7 +86,8 @@
76
  "decoder": {
77
  "type": "ByteLevel",
78
  "add_prefix_space": true,
79
- "trim_offsets": true
 
80
  },
81
  "model": {
82
  "type": "BPE",
 
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
+ "padding": {
10
+ "strategy": {
11
+ "Fixed": 128
12
+ },
13
+ "direction": "Right",
14
+ "pad_to_multiple_of": null,
15
+ "pad_id": 1,
16
+ "pad_type_id": 0,
17
+ "pad_token": "<pad>"
18
+ },
19
  "added_tokens": [
20
  {
21
  "id": 0,
 
22
  "content": "<s>",
23
  "single_word": false,
24
  "lstrip": false,
25
  "rstrip": false,
26
+ "normalized": false,
27
+ "special": true
28
  },
29
  {
30
  "id": 1,
 
31
  "content": "<pad>",
32
  "single_word": false,
33
  "lstrip": false,
34
  "rstrip": false,
35
+ "normalized": false,
36
+ "special": true
37
  },
38
  {
39
  "id": 2,
 
40
  "content": "</s>",
41
  "single_word": false,
42
  "lstrip": false,
43
  "rstrip": false,
44
+ "normalized": false,
45
+ "special": true
46
  },
47
  {
48
  "id": 3,
 
49
  "content": "<unk>",
50
  "single_word": false,
51
  "lstrip": false,
52
  "rstrip": false,
53
+ "normalized": false,
54
+ "special": true
55
  },
56
  {
57
  "id": 50264,
 
58
  "content": "<mask>",
59
  "single_word": false,
60
  "lstrip": true,
61
  "rstrip": false,
62
+ "normalized": false,
63
+ "special": true
64
  }
65
  ],
66
  "normalizer": null,
67
  "pre_tokenizer": {
68
  "type": "ByteLevel",
69
  "add_prefix_space": false,
70
+ "trim_offsets": true,
71
+ "use_regex": true
72
  },
73
  "post_processor": {
74
  "type": "RobertaProcessing",
 
86
  "decoder": {
87
  "type": "ByteLevel",
88
  "add_prefix_space": true,
89
+ "trim_offsets": true,
90
+ "use_regex": true
91
  },
92
  "model": {
93
  "type": "BPE",
tokenizer_config.json CHANGED
@@ -1 +1,15 @@
1
- {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": false, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "trim_offsets": true, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "roberta-base", "tokenizer_class": "RobertaTokenizer"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<s>",
4
+ "cls_token": "<s>",
5
+ "eos_token": "</s>",
6
+ "errors": "replace",
7
+ "mask_token": "<mask>",
8
+ "model_max_length": 128,
9
+ "pad_token": "<pad>",
10
+ "sep_token": "</s>",
11
+ "special_tokens_map_file": null,
12
+ "tokenizer_class": "RobertaTokenizer",
13
+ "trim_offsets": true,
14
+ "unk_token": "<unk>"
15
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfde9d2264282d3eb39110a80986e198835368bd2728758efb9a426f6861585a
3
+ size 3567