jonatasgrosman
/

wav2vec2-large-xlsr-53-portuguese

Automatic Speech Recognition

hf-asr-leaderboard

mozilla-foundation/common_voice_6_0

robust-speech-event

xlsr-fine-tuning-week

Inference Endpoints

Model card Files Files and versions Community

jonatasgrosman commited on Apr 1, 2021

Commit

f1f3121

•

1 Parent(s): 70a29f6

model improvement

Files changed (2) hide show

README.md +17 -6
pytorch_model.bin +1 -1

README.md CHANGED Viewed

@@ -4,6 +4,7 @@ datasets:
 - common_voice
 metrics:
 - wer
 tags:
 - audio
 - automatic-speech-recognition
@@ -23,7 +24,11 @@ model-index:
     metrics:
        - name: Test WER
          type: wer
-         value: 13.48
 ---
 # Wav2Vec2-Large-XLSR-53-portuguese
@@ -86,12 +91,13 @@ LANG_ID = "pt"
 MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-portuguese"
 DEVICE = "cuda"
-CHARS_TO_IGNORE = [",", "?", ".", "!", "-", ";", ":", '""', "%", "'", '"', "�", "ʿ", "·", "჻", "¿", "¡", "~", "՞",
                    "؟", "،", "।", "॥", "«", "»", "„", "“", "”", "「", "」", "‘", "’", "《", "》", "(", ")", "[", "]",
-                   "=", "`", "_", "+", "<", ">", "…", "–", "°", "´", "ʾ"]
 test_dataset = load_dataset("common_voice", LANG_ID, split="test")
-wer = load_metric("wer")
 chars_to_ignore_regex = f"[{re.escape(''.join(CHARS_TO_IGNORE))}]"
@@ -123,7 +129,12 @@ def evaluate(batch):
 result = test_dataset.map(evaluate, batched=True, batch_size=32)
-print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
 ```
-**Test Result**: 13.48%

 - common_voice
 metrics:
 - wer
+- cer
 tags:
 - audio
 - automatic-speech-recognition
     metrics:
        - name: Test WER
          type: wer
+         value: 12.18
+    metrics:
+       - name: Test CER
+         type: cer
+         value: 11.01
 ---
 # Wav2Vec2-Large-XLSR-53-portuguese
 MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-portuguese"
 DEVICE = "cuda"
+CHARS_TO_IGNORE = [",", "?", "¿", ".", "!", "¡", "-", ";", ":", '""', "%", "'", '"', "�", "ʿ", "·", "჻", "~", "՞",
                    "؟", "،", "।", "॥", "«", "»", "„", "“", "”", "「", "」", "‘", "’", "《", "》", "(", ")", "[", "]",
+                   "=", "`", "_", "+", "<", ">", "…", "–", "°", "´", "ʾ", "‹", "›", "©", "®", "—", "→", "。"]
 test_dataset = load_dataset("common_voice", LANG_ID, split="test")
+wer = load_metric("wer.py") # https://github.com/jonatasgrosman/wav2vec2-sprint/blob/main/wer.py
+cer = load_metric("cer.py") # https://github.com/jonatasgrosman/wav2vec2-sprint/blob/main/cer.py
 chars_to_ignore_regex = f"[{re.escape(''.join(CHARS_TO_IGNORE))}]"
 result = test_dataset.map(evaluate, batched=True, batch_size=32)
+print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"], chunk_size=8000)))
+print("CER: {:2f}".format(100 * cer.compute(predictions=result["pred_strings"], references=result["sentence"], chunk_size=8000)))
 ```
+**Test Result**:
+WER: 12.18%
+CER: 11.01%

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e26bfd51de967cc8e0b5eb55916e2d36d3b3a5fb41699b150cb25a15cd801374
 size 1262126551

 version https://git-lfs.github.com/spec/v1
+oid sha256:8751d9a798871897adeede1d2187f8c4ae79ccb1ea294173471232c5dd9287b0
 size 1262126551