jonatasgrosman
commited on
Commit
•
2c5152a
1
Parent(s):
0c578dd
add LM
Browse files- alphabet.json +1 -0
- config.json +0 -1
- language_model/attrs.json +1 -0
- language_model/lm.binary +3 -0
- language_model/unigrams.txt +3 -0
- preprocessor_config.json +2 -1
- vocab.json +1 -1
alphabet.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"labels": ["", "<s>", "</s>", "⁇", " ", "'", "-", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "à", "á", "â", "ä", "ç", "è", "é", "ê", "ë", "í", "î", "ï", "ñ", "ó", "ô", "ö", "ù", "ú", "û", "ü", "ć", "č", "ō", "œ", "š", "ș"], "is_bpe": false}
|
config.json
CHANGED
@@ -44,7 +44,6 @@
|
|
44 |
"feat_extract_norm": "layer",
|
45 |
"feat_proj_dropout": 0.05,
|
46 |
"final_dropout": 0.0,
|
47 |
-
"gradient_checkpointing": true,
|
48 |
"hidden_act": "gelu",
|
49 |
"hidden_dropout": 0.05,
|
50 |
"hidden_size": 1024,
|
|
|
44 |
"feat_extract_norm": "layer",
|
45 |
"feat_proj_dropout": 0.05,
|
46 |
"final_dropout": 0.0,
|
|
|
47 |
"hidden_act": "gelu",
|
48 |
"hidden_dropout": 0.05,
|
49 |
"hidden_size": 1024,
|
language_model/attrs.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"alpha": 0.5, "beta": 1.5, "unk_score_offset": -10.0, "score_boundary": true}
|
language_model/lm.binary
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3bf65ae758f1233037f33763a7d1490e048d9a8e9af7e61d56d010228b3a8eaf
|
3 |
+
size 1152710843
|
language_model/unigrams.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d57265b246c2e4e3fb59061b91c691fe8f2796069cc9277f5f37e7ee485b1cc
|
3 |
+
size 9812573
|
preprocessor_config.json
CHANGED
@@ -5,5 +5,6 @@
|
|
5 |
"padding_side": "right",
|
6 |
"padding_value": 0.0,
|
7 |
"return_attention_mask": true,
|
8 |
-
"sampling_rate": 16000
|
|
|
9 |
}
|
|
|
5 |
"padding_side": "right",
|
6 |
"padding_value": 0.0,
|
7 |
"return_attention_mask": true,
|
8 |
+
"sampling_rate": 16000,
|
9 |
+
"processor_class": "Wav2Vec2ProcessorWithLM"
|
10 |
}
|
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"<pad>": 0, "<s>": 1, "</s>": 2, "<unk>": 3, "|": 4, "'": 5, "-": 6, "
|
|
|
1 |
+
{"<pad>": 0, "<s>": 1, "</s>": 2, "<unk>": 3, "|": 4, "'": 5, "-": 6, "a": 7, "b": 8, "c": 9, "d": 10, "e": 11, "f": 12, "g": 13, "h": 14, "i": 15, "j": 16, "k": 17, "l": 18, "m": 19, "n": 20, "o": 21, "p": 22, "q": 23, "r": 24, "s": 25, "t": 26, "u": 27, "v": 28, "w": 29, "x": 30, "y": 31, "z": 32, "à": 33, "á": 34, "â": 35, "ä": 36, "ç": 37, "è": 38, "é": 39, "ê": 40, "ë": 41, "í": 42, "î": 43, "ï": 44, "ñ": 45, "ó": 46, "ô": 47, "ö": 48, "ù": 49, "ú": 50, "û": 51, "ü": 52, "ć": 53, "č": 54, "ō": 55, "œ": 56, "š": 57, "ș": 58}
|