Update WER score
Browse files
README.md
CHANGED
@@ -23,7 +23,7 @@ model-index:
|
|
23 |
metrics:
|
24 |
- name: Test WER
|
25 |
type: wer
|
26 |
-
value:
|
27 |
---
|
28 |
|
29 |
# Sinai Voice Arabic Speech Recognition Model
|
@@ -137,6 +137,7 @@ def predict(batch):
|
|
137 |
batch["predicted"] = processor.batch_decode(predicted)
|
138 |
return batch
|
139 |
test_split = test_split.map(predict, batched=True, batch_size=16, remove_columns=["speech"])
|
|
|
140 |
transformation = jiwer.Compose([
|
141 |
# normalize some diacritics, remove punctuation, and replace Persian letters with Arabic ones
|
142 |
jiwer.SubstituteRegexes({
|
@@ -148,12 +149,24 @@ transformation = jiwer.Compose([
|
|
148 |
jiwer.SentencesToListOfWords(),
|
149 |
jiwer.RemoveEmptyStrings(),
|
150 |
])
|
|
|
151 |
metrics = jiwer.compute_measures(
|
152 |
truth=[buckwalter.trans(s) for s in test_split["sentence"]], # Buckwalter transliteration
|
153 |
-
hypothesis=test_split["predicted"],
|
154 |
truth_transform=transformation,
|
155 |
hypothesis_transform=transformation,
|
156 |
)
|
157 |
print(f"WER: {metrics['wer']:.2%}")
|
158 |
```
|
159 |
-
**Test Result**:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
metrics:
|
24 |
- name: Test WER
|
25 |
type: wer
|
26 |
+
value: 23.70
|
27 |
---
|
28 |
|
29 |
# Sinai Voice Arabic Speech Recognition Model
|
|
|
137 |
batch["predicted"] = processor.batch_decode(predicted)
|
138 |
return batch
|
139 |
test_split = test_split.map(predict, batched=True, batch_size=16, remove_columns=["speech"])
|
140 |
+
|
141 |
transformation = jiwer.Compose([
|
142 |
# normalize some diacritics, remove punctuation, and replace Persian letters with Arabic ones
|
143 |
jiwer.SubstituteRegexes({
|
|
|
149 |
jiwer.SentencesToListOfWords(),
|
150 |
jiwer.RemoveEmptyStrings(),
|
151 |
])
|
152 |
+
|
153 |
metrics = jiwer.compute_measures(
|
154 |
truth=[buckwalter.trans(s) for s in test_split["sentence"]], # Buckwalter transliteration
|
155 |
+
hypothesis=[buckwalter.trans(s) for s in test_split["predicted"]],
|
156 |
truth_transform=transformation,
|
157 |
hypothesis_transform=transformation,
|
158 |
)
|
159 |
print(f"WER: {metrics['wer']:.2%}")
|
160 |
```
|
161 |
+
**Test Result**: 23.70%
|
162 |
+
|
163 |
+
|
164 |
+
## Other Arabic Voice recognition Models
|
165 |
+
|
166 |
+
ุงูููู
ุงุช ูุง ุชููู ูุดูุฑ ุฃููุฆู ุงูุฐูู ูุคู
ููู ุฃู ููุงูู ุฃู
ู, ู ูุณุนูู ู
ู ุฃุฌูู
|
167 |
+
|
168 |
+
- [elgeish/wav2vec2-large-xlsr-53-arabic](https://huggingface.co/elgeish/wav2vec2-large-xlsr-53-arabic)
|
169 |
+
- [othrif/wav2vec2-large-xlsr-arabic](https://huggingface.co/othrif/wav2vec2-large-xlsr-arabic)
|
170 |
+
- [anas/wav2vec2-large-xlsr-arabic](https://huggingface.co/anas/wav2vec2-large-xlsr-arabic)
|
171 |
+
|
172 |
+
|