nrshoudi commited on
Commit
8bcc5ab
1 Parent(s): cc245c2

Upload tokenizer

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. added_tokens.json +2 -2
  3. tokenizer_config.json +4 -4
  4. vocab.json +123 -116
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  license: apache-2.0
3
- base_model: facebook/hubert-large-ll60k
4
  tags:
5
  - generated_from_trainer
 
6
  metrics:
7
  - wer
8
  model-index:
 
1
  ---
2
  license: apache-2.0
 
3
  tags:
4
  - generated_from_trainer
5
+ base_model: facebook/hubert-large-ll60k
6
  metrics:
7
  - wer
8
  model-index:
added_tokens.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "</s>": 119,
3
- "<s>": 118
4
  }
 
1
  {
2
+ "</s>": 126,
3
+ "<s>": 125
4
  }
tokenizer_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "added_tokens_decoder": {
3
- "116": {
4
  "content": "[UNK]",
5
  "lstrip": false,
6
  "normalized": true,
@@ -8,7 +8,7 @@
8
  "single_word": false,
9
  "special": true
10
  },
11
- "117": {
12
  "content": "[PAD]",
13
  "lstrip": false,
14
  "normalized": true,
@@ -16,7 +16,7 @@
16
  "single_word": false,
17
  "special": true
18
  },
19
- "118": {
20
  "content": "<s>",
21
  "lstrip": false,
22
  "normalized": true,
@@ -24,7 +24,7 @@
24
  "single_word": false,
25
  "special": true
26
  },
27
- "119": {
28
  "content": "</s>",
29
  "lstrip": false,
30
  "normalized": true,
 
1
  {
2
  "added_tokens_decoder": {
3
+ "123": {
4
  "content": "[UNK]",
5
  "lstrip": false,
6
  "normalized": true,
 
8
  "single_word": false,
9
  "special": true
10
  },
11
+ "124": {
12
  "content": "[PAD]",
13
  "lstrip": false,
14
  "normalized": true,
 
16
  "single_word": false,
17
  "special": true
18
  },
19
+ "125": {
20
  "content": "<s>",
21
  "lstrip": false,
22
  "normalized": true,
 
24
  "single_word": false,
25
  "special": true
26
  },
27
+ "126": {
28
  "content": "</s>",
29
  "lstrip": false,
30
  "normalized": true,
vocab.json CHANGED
@@ -1,120 +1,127 @@
1
  {
2
  " ": 0,
3
  "<DEL>": 1,
4
- "<unk>": 2,
5
- "AA": 3,
6
- "AA0": 4,
7
- "AA1": 5,
8
- "AA2": 6,
9
- "AE": 7,
10
- "AE*": 8,
11
- "AE0": 9,
12
- "AE1": 10,
13
- "AE2": 11,
14
- "AH": 12,
15
- "AH*": 13,
16
- "AH0": 14,
17
- "AH1": 15,
18
- "AH2": 16,
19
- "AO": 17,
20
- "AO*": 18,
21
- "AO0": 19,
22
- "AO1": 20,
23
- "AO2": 21,
24
- "AR": 22,
25
- "AW": 23,
26
- "AW0": 24,
27
- "AW1": 25,
28
- "AW2": 26,
29
- "AY": 27,
30
- "AY*": 28,
31
- "AY0": 29,
32
- "AY1": 30,
33
- "AY2": 31,
34
- "B": 32,
35
- "B*": 33,
36
- "CH": 34,
37
- "CH*": 35,
38
- "D": 36,
39
- "D*": 37,
40
- "DH": 38,
41
- "DR": 39,
42
- "DZ": 40,
43
- "EH": 41,
44
- "EH*": 42,
45
- "EH0": 43,
46
- "EH1": 44,
47
- "EH2": 45,
48
- "ER": 46,
49
- "ER*": 47,
50
- "ER0": 48,
51
- "ER1": 49,
52
- "EY": 50,
53
- "EY*": 51,
54
- "EY0": 52,
55
- "EY1": 53,
56
- "EY2": 54,
57
- "F": 55,
58
- "F*": 56,
59
- "G": 57,
60
- "G*": 58,
61
- "HH": 59,
62
- "IH": 60,
63
- "IH*": 61,
64
- "IH0": 62,
65
- "IH1": 63,
66
- "IH2": 64,
67
- "IR": 65,
68
- "IY": 66,
69
- "IY*": 67,
70
- "IY0": 68,
71
- "IY1": 69,
72
- "JH": 70,
73
- "JH*": 71,
74
- "K": 72,
75
- "K*": 73,
76
- "L": 74,
77
- "L*": 75,
78
- "M": 76,
79
- "M*": 77,
80
- "N": 78,
81
- "N*": 79,
82
- "NG": 80,
83
- "NG*": 81,
84
- "OW": 82,
85
- "OW*": 83,
86
- "OW0": 84,
87
- "OW1": 85,
88
- "OY0": 86,
89
- "OY1": 87,
90
- "P": 88,
91
- "P*": 89,
92
- "R": 90,
93
- "R*": 91,
94
- "S": 92,
95
- "S*": 93,
96
- "SH": 94,
97
- "T": 95,
98
- "T*": 96,
99
- "TH": 97,
100
- "TH*": 98,
101
- "TR": 99,
102
- "TS": 100,
103
- "UH": 101,
104
- "UH*": 102,
105
- "UH0": 103,
106
- "UH1": 104,
107
- "UW": 105,
108
- "UW0": 106,
109
- "UW1": 107,
110
- "UW2": 108,
111
- "V": 109,
112
- "V*": 110,
113
- "W": 111,
114
- "W*": 112,
115
- "Y": 113,
116
- "Z": 114,
117
- "ZH": 115,
118
- "[PAD]": 117,
119
- "[UNK]": 116
 
 
 
 
 
 
 
120
  }
 
1
  {
2
  " ": 0,
3
  "<DEL>": 1,
4
+ "<DEL>0": 2,
5
+ "<DEL>1": 3,
6
+ "<unk>": 4,
7
+ "<unk>0": 5,
8
+ "AA": 6,
9
+ "AA0": 7,
10
+ "AA1": 8,
11
+ "AA2": 9,
12
+ "AE": 10,
13
+ "AE*": 11,
14
+ "AE0": 12,
15
+ "AE1": 13,
16
+ "AE2": 14,
17
+ "AH": 15,
18
+ "AH*": 16,
19
+ "AH0": 17,
20
+ "AH1": 18,
21
+ "AH2": 19,
22
+ "AO": 20,
23
+ "AO*": 21,
24
+ "AO0": 22,
25
+ "AO1": 23,
26
+ "AO2": 24,
27
+ "AR": 25,
28
+ "AW": 26,
29
+ "AW0": 27,
30
+ "AW1": 28,
31
+ "AW2": 29,
32
+ "AY": 30,
33
+ "AY*": 31,
34
+ "AY0": 32,
35
+ "AY1": 33,
36
+ "AY2": 34,
37
+ "B": 35,
38
+ "B*": 36,
39
+ "CH": 37,
40
+ "CH*": 38,
41
+ "D": 39,
42
+ "D*": 40,
43
+ "DH": 41,
44
+ "DR": 42,
45
+ "DZ": 43,
46
+ "EH": 44,
47
+ "EH*": 45,
48
+ "EH0": 46,
49
+ "EH1": 47,
50
+ "EH2": 48,
51
+ "ER": 49,
52
+ "ER*": 50,
53
+ "ER0": 51,
54
+ "ER1": 52,
55
+ "EY": 53,
56
+ "EY*": 54,
57
+ "EY0": 55,
58
+ "EY1": 56,
59
+ "EY2": 57,
60
+ "F": 58,
61
+ "F*": 59,
62
+ "G": 60,
63
+ "G*": 61,
64
+ "HH": 62,
65
+ "I<DEL>0": 63,
66
+ "IH": 64,
67
+ "IH*": 65,
68
+ "IH0": 66,
69
+ "IH1": 67,
70
+ "IH2": 68,
71
+ "IR": 69,
72
+ "IR0": 70,
73
+ "IY": 71,
74
+ "IY*": 72,
75
+ "IY0": 73,
76
+ "IY1": 74,
77
+ "JH": 75,
78
+ "JH*": 76,
79
+ "K": 77,
80
+ "K*": 78,
81
+ "L": 79,
82
+ "L*": 80,
83
+ "M": 81,
84
+ "M*": 82,
85
+ "N": 83,
86
+ "N*": 84,
87
+ "NG": 85,
88
+ "NG*": 86,
89
+ "NK": 87,
90
+ "OW": 88,
91
+ "OW*": 89,
92
+ "OW0": 90,
93
+ "OW1": 91,
94
+ "OY0": 92,
95
+ "OY1": 93,
96
+ "P": 94,
97
+ "P*": 95,
98
+ "R": 96,
99
+ "R*": 97,
100
+ "S": 98,
101
+ "S*": 99,
102
+ "SH": 100,
103
+ "T": 101,
104
+ "T*": 102,
105
+ "TH": 103,
106
+ "TH*": 104,
107
+ "THH": 105,
108
+ "TR": 106,
109
+ "TS": 107,
110
+ "UH": 108,
111
+ "UH*": 109,
112
+ "UH0": 110,
113
+ "UH1": 111,
114
+ "UW": 112,
115
+ "UW0": 113,
116
+ "UW1": 114,
117
+ "UW2": 115,
118
+ "V": 116,
119
+ "V*": 117,
120
+ "W": 118,
121
+ "W*": 119,
122
+ "Y": 120,
123
+ "Z": 121,
124
+ "ZH": 122,
125
+ "[PAD]": 124,
126
+ "[UNK]": 123
127
  }