fix some typos
Browse files
README.md
CHANGED
@@ -41,7 +41,7 @@ Please install:
|
|
41 |
|
42 |
We evaluated the model against different Arabic-STT Wav2Vec models.
|
43 |
|
44 |
-
| | Model | [using transliteration](https://pypi.org/project/lang-trans/) | WER |
|
45 |
|---:|:--------------------------------------|:---------------------|---------:|---------:|
|
46 |
| 1 | bakrianoo/sinai-voice-ar-stt | True | 0.238001 |Common Voice 6|
|
47 |
| 2 | elgeish/wav2vec2-large-xlsr-53-arabic | True | 0.266527 |Common Voice 6 + Arabic Speech Corpus|
|
@@ -80,8 +80,8 @@ resamplers = { # all three sampling rates exist in test split
|
|
80 |
transformation = jiwer.Compose([
|
81 |
# normalize some diacritics, remove punctuation, and replace Persian letters with Arabic ones
|
82 |
jiwer.SubstituteRegexes({
|
83 |
-
r'[auiFNKo
|
84 |
-
r"[
|
85 |
# default transformation below
|
86 |
jiwer.RemoveMultipleSpaces(),
|
87 |
jiwer.Strip(),
|
@@ -274,8 +274,8 @@ test_split = test_split.map(predict, batched=True, batch_size=16, remove_columns
|
|
274 |
transformation = jiwer.Compose([
|
275 |
# normalize some diacritics, remove punctuation, and replace Persian letters with Arabic ones
|
276 |
jiwer.SubstituteRegexes({
|
277 |
-
r'[auiFNKo
|
278 |
-
r"[
|
279 |
# default transformation below
|
280 |
jiwer.RemoveMultipleSpaces(),
|
281 |
jiwer.Strip(),
|
|
|
41 |
|
42 |
We evaluated the model against different Arabic-STT Wav2Vec models.
|
43 |
|
44 |
+
| | Model | [using transliteration](https://pypi.org/project/lang-trans/) | WER | Training Datasets |
|
45 |
|---:|:--------------------------------------|:---------------------|---------:|---------:|
|
46 |
| 1 | bakrianoo/sinai-voice-ar-stt | True | 0.238001 |Common Voice 6|
|
47 |
| 2 | elgeish/wav2vec2-large-xlsr-53-arabic | True | 0.266527 |Common Voice 6 + Arabic Speech Corpus|
|
|
|
80 |
transformation = jiwer.Compose([
|
81 |
# normalize some diacritics, remove punctuation, and replace Persian letters with Arabic ones
|
82 |
jiwer.SubstituteRegexes({
|
83 |
+
r'[auiFNKo\\\\\\\\\\\\\\\\~_،؟»\\\\\\\\\\\\\\\\?;:\\\\\\\\\\\\\\\\-,\\\\\\\\\\\\\\\\.؛«!"]': "", "\\\\\\\\\\\\\\\\u06D6": "",
|
84 |
+
r"[\\\\\\\\\\\\\\\\|\\\\\\\\\\\\\\\\{]": "A", "p": "h", "ک": "k", "ی": "y"}),
|
85 |
# default transformation below
|
86 |
jiwer.RemoveMultipleSpaces(),
|
87 |
jiwer.Strip(),
|
|
|
274 |
transformation = jiwer.Compose([
|
275 |
# normalize some diacritics, remove punctuation, and replace Persian letters with Arabic ones
|
276 |
jiwer.SubstituteRegexes({
|
277 |
+
r'[auiFNKo\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\~_،؟»\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\?;:\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\-,\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\.؛«!"]': "", "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\u06D6": "",
|
278 |
+
r"[\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\|\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\{]": "A", "p": "h", "ک": "k", "ی": "y"}),
|
279 |
# default transformation below
|
280 |
jiwer.RemoveMultipleSpaces(),
|
281 |
jiwer.Strip(),
|