add tokenizer
Browse files
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
checkpoint-*/
|
runs/Jun02_17-58-25_7d45805ab643/1654192820.2816024/events.out.tfevents.1654192820.7d45805ab643.80.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:186fe3eaa4b60c6c8b7d2fb4996cdc5c4c7a47f7b41ee4e7aa3502aa1f800164
|
3 |
+
size 4597
|
runs/Jun02_17-58-25_7d45805ab643/events.out.tfevents.1654192820.7d45805ab643.80.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:693b49f6da379a039316b057c3cbbc2a653f19de32a4f51a136093c4a1ab78c6
|
3 |
+
size 4136
|
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"<pad>": 0, "<s>": 1, "</s>": 2, "<unk>": 3, "|": 4, "ء": 5, "آ": 6, "ئ": 7, "ا": 8, "ب": 9, "ت": 10, "ث": 11, "ج": 12, "ح": 13, "خ": 14, "د": 15, "ذ": 16, "ر": 17, "ز": 18, "س": 19, "ش": 20, "ص": 21, "ض": 22, "ط": 23, "ظ": 24, "ع": 25, "غ": 26, "ف": 27, "ق": 28, "ل": 29, "م": 30, "ن": 31, "ه": 32, "و": 33, "پ": 34, "چ": 35, "ژ": 36, "ک": 37, "گ": 38, "ی": 39}
|
|
|
1 |
+
{"<pad>": 0, "<s>": 1, "</s>": 2, "<unk>": 3, "|": 4, "ء": 5, "آ": 6, "ئ": 7, "ا": 8, "ب": 9, "ت": 10, "ث": 11, "ج": 12, "ح": 13, "خ": 14, "د": 15, "ذ": 16, "ر": 17, "ز": 18, "س": 19, "ش": 20, "ص": 21, "ض": 22, "ط": 23, "ظ": 24, "ع": 25, "غ": 26, "ف": 27, "ق": 28, "ل": 29, "م": 30, "ن": 31, "ه": 32, "و": 33, "پ": 34, "چ": 35, "ژ": 36, "ک": 37, "گ": 38, "ی": 39, "[UNK]": 40, "[PAD]": 41}
|