Bajiyo
/

w2v-bert-2_7_datasets

@@ -1,19 +1,19 @@
 ---
-library_name: transformers
-license: mit
 base_model: facebook/w2v-bert-2.0
-tags:
-- generated_from_trainer
 datasets:
 - common_voice_17_0
 metrics:
 - wer
 model-index:
 - name: w2v-bert-2_6_datasets
   results:
   - task:
-      name: Automatic Speech Recognition
       type: automatic-speech-recognition
     dataset:
       name: common_voice_17_0
       type: common_voice_17_0
@@ -21,9 +21,9 @@ model-index:
       split: validation
       args: ml
     metrics:
-    - name: Wer
-      type: wer
       value: 0.43922053819981444
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You

 ---
 base_model: facebook/w2v-bert-2.0
 datasets:
 - common_voice_17_0
+library_name: transformers
+license: mit
 metrics:
 - wer
+tags:
+- generated_from_trainer
 model-index:
 - name: w2v-bert-2_6_datasets
   results:
   - task:
       type: automatic-speech-recognition
+      name: Automatic Speech Recognition
     dataset:
       name: common_voice_17_0
       type: common_voice_17_0
       split: validation
       args: ml
     metrics:
+    - type: wer
       value: 0.43922053819981444
+      name: Wer
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You

added_tokens.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-  "</s>": 80,
-  "<s>": 79
 }

 {
+  "</s>": 99,
+  "<s>": 98
 }

tokenizer_config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "added_tokens_decoder": {
-    "77": {
       "content": "[UNK]",
       "lstrip": true,
       "normalized": false,
@@ -8,7 +8,7 @@
       "single_word": false,
       "special": false
     },
-    "78": {
       "content": "[PAD]",
       "lstrip": true,
       "normalized": false,
@@ -16,7 +16,7 @@
       "single_word": false,
       "special": false
     },
-    "79": {
       "content": "<s>",
       "lstrip": false,
       "normalized": false,
@@ -24,7 +24,7 @@
       "single_word": false,
       "special": true
     },
-    "80": {
       "content": "</s>",
       "lstrip": false,
       "normalized": false,
@@ -39,7 +39,6 @@
   "eos_token": "</s>",
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "[PAD]",
-  "processor_class": "Wav2Vec2BertProcessor",
   "replace_word_delimiter_char": " ",
   "target_lang": null,
   "tokenizer_class": "Wav2Vec2CTCTokenizer",

 {
   "added_tokens_decoder": {
+    "96": {
       "content": "[UNK]",
       "lstrip": true,
       "normalized": false,
       "single_word": false,
       "special": false
     },
+    "97": {
       "content": "[PAD]",
       "lstrip": true,
       "normalized": false,
       "single_word": false,
       "special": false
     },
+    "98": {
       "content": "<s>",
       "lstrip": false,
       "normalized": false,
       "single_word": false,
       "special": true
     },
+    "99": {
       "content": "</s>",
       "lstrip": false,
       "normalized": false,
   "eos_token": "</s>",
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "[PAD]",
   "replace_word_delimiter_char": " ",
   "target_lang": null,
   "tokenizer_class": "Wav2Vec2CTCTokenizer",

vocab.json CHANGED Viewed

@@ -1,81 +1,100 @@
 {
-  "[PAD]": 78,
-  "[UNK]": 77,
-  "_": 1,
   "|": 0,
-  "ം": 2,
-  "ഃ": 3,
-  "അ": 4,
-  "ആ": 5,
-  "ഇ": 6,
-  "ഈ": 7,
-  "ഉ": 8,
-  "ഊ": 9,
-  "ഋ": 10,
-  "എ": 11,
-  "ഏ": 12,
-  "ഐ": 13,
-  "ഒ": 14,
-  "ഓ": 15,
-  "ഔ": 16,
-  "ക": 17,
-  "ഖ": 18,
-  "ഗ": 19,
-  "ഘ": 20,
-  "ങ": 21,
-  "ച": 22,
-  "ഛ": 23,
-  "ജ": 24,
-  "ഝ": 25,
-  "ഞ": 26,
-  "ട": 27,
-  "ഠ": 28,
-  "ഡ": 29,
-  "ഢ": 30,
-  "ണ": 31,
-  "ത": 32,
-  "ഥ": 33,
-  "ദ": 34,
-  "ധ": 35,
-  "ന": 36,
-  "പ": 37,
-  "ഫ": 38,
-  "ബ": 39,
-  "ഭ": 40,
-  "മ": 41,
-  "യ": 42,
-  "ര": 43,
-  "റ": 44,
-  "ല": 45,
-  "ള": 46,
-  "ഴ": 47,
-  "വ": 48,
-  "ശ": 49,
-  "ഷ": 50,
-  "സ": 51,
-  "ഹ": 52,
-  "ാ": 53,
-  "ി": 54,
-  "ീ": 55,
-  "ു": 56,
-  "ൂ": 57,
-  "ൃ": 58,
-  "െ": 59,
-  "േ": 60,
-  "ൈ": 61,
-  "ൊ": 62,
-  "ോ": 63,
-  "ൌ": 64,
-  "്": 65,
-  "ൗ": 66,
-  "൱": 67,
-  "ൺ": 68,
-  "ൻ": 69,
-  "ർ": 70,
-  "ൽ": 71,
-  "ൾ": 72,
-  "ൿ": 73,
-  "‌": 74,
-  "‍": 75,
-  "’": 76
 }

 {
+  "$": 1,
+  "&": 2,
+  "+": 3,
+  "/": 4,
+  "0": 5,
+  "1": 6,
+  "2": 7,
+  "3": 8,
+  "4": 9,
+  "5": 10,
+  "6": 11,
+  "7": 12,
+  "8": 13,
+  "9": 14,
+  "[": 15,
+  "[PAD]": 97,
+  "[UNK]": 96,
+  "]": 16,
+  "_": 17,
   "|": 0,
+  "~": 18,
+  "°": 19,
+  "ം": 20,
+  "ഃ": 21,
+  "അ": 22,
+  "ആ": 23,
+  "ഇ": 24,
+  "ഈ": 25,
+  "ഉ": 26,
+  "ഊ": 27,
+  "ഋ": 28,
+  "എ": 29,
+  "ഏ": 30,
+  "ഐ": 31,
+  "ഒ": 32,
+  "ഓ": 33,
+  "ഔ": 34,
+  "ക": 35,
+  "ഖ": 36,
+  "ഗ": 37,
+  "ഘ": 38,
+  "ങ": 39,
+  "ച": 40,
+  "ഛ": 41,
+  "ജ": 42,
+  "ഝ": 43,
+  "ഞ": 44,
+  "ട": 45,
+  "ഠ": 46,
+  "ഡ": 47,
+  "ഢ": 48,
+  "ണ": 49,
+  "ത": 50,
+  "ഥ": 51,
+  "ദ": 52,
+  "ധ": 53,
+  "ന": 54,
+  "പ": 55,
+  "ഫ": 56,
+  "ബ": 57,
+  "ഭ": 58,
+  "മ": 59,
+  "യ": 60,
+  "ര": 61,
+  "റ": 62,
+  "ല": 63,
+  "ള": 64,
+  "ഴ": 65,
+  "വ": 66,
+  "ശ": 67,
+  "ഷ": 68,
+  "സ": 69,
+  "ഹ": 70,
+  "ാ": 71,
+  "ി": 72,
+  "ീ": 73,
+  "ു": 74,
+  "ൂ": 75,
+  "ൃ": 76,
+  "െ": 77,
+  "േ": 78,
+  "ൈ": 79,
+  "ൊ": 80,
+  "ോ": 81,
+  "ൌ": 82,
+  "്": 83,
+  "ൗ": 84,
+  "൱": 85,
+  "ൺ": 86,
+  "ൻ": 87,
+  "ർ": 88,
+  "ൽ": 89,
+  "ൾ": 90,
+  "ൿ": 91,
+  "‌": 92,
+  "‍": 93,
+  "–": 94,
+  "’": 95
 }