Sara Price commited on
Commit
1658fe5
1 Parent(s): 0bd7a5c

Training in progress, epoch 1

Browse files
added_tokens.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</headline>": 32008,
3
+ "</scratchpad>": 32006,
4
+ "<</SYS>>": 32004,
5
+ "<<SYS>>": 32003,
6
+ "<headline>": 32007,
7
+ "<pad>": 32000,
8
+ "<scratchpad>": 32005,
9
+ "[/INST]": 32002,
10
+ "[INST]": 32001
11
+ }
special_tokens_map.json CHANGED
@@ -1,4 +1,62 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "bos_token": {
3
  "content": "<s>",
4
  "lstrip": false,
@@ -14,7 +72,7 @@
14
  "single_word": false
15
  },
16
  "pad_token": {
17
- "content": "<unk>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
 
1
  {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "[INST]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": true
9
+ },
10
+ {
11
+ "content": "[/INST]",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": true
16
+ },
17
+ {
18
+ "content": "<<SYS>>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": true
23
+ },
24
+ {
25
+ "content": "<</SYS>>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": true
30
+ },
31
+ {
32
+ "content": "<scratchpad>",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": true
37
+ },
38
+ {
39
+ "content": "</scratchpad>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": true
44
+ },
45
+ {
46
+ "content": "<headline>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": true
51
+ },
52
+ {
53
+ "content": "</headline>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": true
58
+ }
59
+ ],
60
  "bos_token": {
61
  "content": "<s>",
62
  "lstrip": false,
 
72
  "single_word": false
73
  },
74
  "pad_token": {
75
+ "content": "<pad>",
76
  "lstrip": false,
77
  "normalized": false,
78
  "rstrip": false,
tokenizer.json CHANGED
@@ -29,6 +29,87 @@
29
  "rstrip": false,
30
  "normalized": false,
31
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  }
33
  ],
34
  "normalizer": {
@@ -134,7 +215,6 @@
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
136
  "byte_fallback": true,
137
- "ignore_merges": false,
138
  "vocab": {
139
  "<unk>": 0,
140
  "<s>": 1,
 
29
  "rstrip": false,
30
  "normalized": false,
31
  "special": true
32
+ },
33
+ {
34
+ "id": 32000,
35
+ "content": "<pad>",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 32001,
44
+ "content": "[INST]",
45
+ "single_word": true,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ },
51
+ {
52
+ "id": 32002,
53
+ "content": "[/INST]",
54
+ "single_word": true,
55
+ "lstrip": false,
56
+ "rstrip": false,
57
+ "normalized": false,
58
+ "special": true
59
+ },
60
+ {
61
+ "id": 32003,
62
+ "content": "<<SYS>>",
63
+ "single_word": true,
64
+ "lstrip": false,
65
+ "rstrip": false,
66
+ "normalized": false,
67
+ "special": true
68
+ },
69
+ {
70
+ "id": 32004,
71
+ "content": "<</SYS>>",
72
+ "single_word": true,
73
+ "lstrip": false,
74
+ "rstrip": false,
75
+ "normalized": false,
76
+ "special": true
77
+ },
78
+ {
79
+ "id": 32005,
80
+ "content": "<scratchpad>",
81
+ "single_word": true,
82
+ "lstrip": false,
83
+ "rstrip": false,
84
+ "normalized": false,
85
+ "special": true
86
+ },
87
+ {
88
+ "id": 32006,
89
+ "content": "</scratchpad>",
90
+ "single_word": true,
91
+ "lstrip": false,
92
+ "rstrip": false,
93
+ "normalized": false,
94
+ "special": true
95
+ },
96
+ {
97
+ "id": 32007,
98
+ "content": "<headline>",
99
+ "single_word": true,
100
+ "lstrip": false,
101
+ "rstrip": false,
102
+ "normalized": false,
103
+ "special": true
104
+ },
105
+ {
106
+ "id": 32008,
107
+ "content": "</headline>",
108
+ "single_word": true,
109
+ "lstrip": false,
110
+ "rstrip": false,
111
+ "normalized": false,
112
+ "special": true
113
  }
114
  ],
115
  "normalizer": {
 
215
  "end_of_word_suffix": null,
216
  "fuse_unk": true,
217
  "byte_fallback": true,
 
218
  "vocab": {
219
  "<unk>": 0,
220
  "<s>": 1,
tokenizer_config.json CHANGED
@@ -26,14 +26,97 @@
26
  "rstrip": false,
27
  "single_word": false,
28
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  }
30
  },
 
 
 
 
 
 
 
 
 
 
31
  "bos_token": "<s>",
32
  "clean_up_tokenization_spaces": false,
33
  "eos_token": "</s>",
34
  "legacy": false,
35
  "model_max_length": 1000000000000000019884624838656,
36
- "pad_token": "<unk>",
 
37
  "sp_model_kwargs": {},
38
  "spaces_between_special_tokens": false,
39
  "tokenizer_class": "LlamaTokenizer",
 
26
  "rstrip": false,
27
  "single_word": false,
28
  "special": true
29
+ },
30
+ "32000": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "32001": {
39
+ "content": "[INST]",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": true,
44
+ "special": true
45
+ },
46
+ "32002": {
47
+ "content": "[/INST]",
48
+ "lstrip": false,
49
+ "normalized": false,
50
+ "rstrip": false,
51
+ "single_word": true,
52
+ "special": true
53
+ },
54
+ "32003": {
55
+ "content": "<<SYS>>",
56
+ "lstrip": false,
57
+ "normalized": false,
58
+ "rstrip": false,
59
+ "single_word": true,
60
+ "special": true
61
+ },
62
+ "32004": {
63
+ "content": "<</SYS>>",
64
+ "lstrip": false,
65
+ "normalized": false,
66
+ "rstrip": false,
67
+ "single_word": true,
68
+ "special": true
69
+ },
70
+ "32005": {
71
+ "content": "<scratchpad>",
72
+ "lstrip": false,
73
+ "normalized": false,
74
+ "rstrip": false,
75
+ "single_word": true,
76
+ "special": true
77
+ },
78
+ "32006": {
79
+ "content": "</scratchpad>",
80
+ "lstrip": false,
81
+ "normalized": false,
82
+ "rstrip": false,
83
+ "single_word": true,
84
+ "special": true
85
+ },
86
+ "32007": {
87
+ "content": "<headline>",
88
+ "lstrip": false,
89
+ "normalized": false,
90
+ "rstrip": false,
91
+ "single_word": true,
92
+ "special": true
93
+ },
94
+ "32008": {
95
+ "content": "</headline>",
96
+ "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": false,
99
+ "single_word": true,
100
+ "special": true
101
  }
102
  },
103
+ "additional_special_tokens": [
104
+ "[INST]",
105
+ "[/INST]",
106
+ "<<SYS>>",
107
+ "<</SYS>>",
108
+ "<scratchpad>",
109
+ "</scratchpad>",
110
+ "<headline>",
111
+ "</headline>"
112
+ ],
113
  "bos_token": "<s>",
114
  "clean_up_tokenization_spaces": false,
115
  "eos_token": "</s>",
116
  "legacy": false,
117
  "model_max_length": 1000000000000000019884624838656,
118
+ "pad_token": "<pad>",
119
+ "padding_side": "left",
120
  "sp_model_kwargs": {},
121
  "spaces_between_special_tokens": false,
122
  "tokenizer_class": "LlamaTokenizer",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e42a94e750e88e0f9c4818312e700d3f390694982183b060e26daa9a806bc4b
3
- size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d30b5b30165b9cbbf9b81d2842f43b19051081a1f93691886d2806a8de8d472a
3
+ size 5112