ivangtorre
commited on
Commit
•
93f4739
1
Parent(s):
db7615b
Update README.md
Browse files
README.md
CHANGED
@@ -77,7 +77,7 @@ from datasets import load_dataset
|
|
77 |
import soundfile as sf
|
78 |
|
79 |
americasnlp = load_dataset("ivangtorre/second_americas_nlp_2022", "bribri", split="dev")
|
80 |
-
|
81 |
|
82 |
model = Wav2Vec2ForCTC.from_pretrained("ivangtorre/wav2vec2-xlsr-300m-bribri")
|
83 |
processor = Wav2Vec2Processor.from_pretrained("ivangtorre/wav2vec2-xlsr-300m-bribri")
|
@@ -92,7 +92,7 @@ def map_to_pred(batch):
|
|
92 |
batch["transcription"] = processor.batch_decode(predicted_ids)
|
93 |
return batch
|
94 |
|
95 |
-
result =
|
96 |
|
97 |
print("CER:", cer(result["source_processed"], result["transcription"]))
|
98 |
```
|
|
|
77 |
import soundfile as sf
|
78 |
|
79 |
americasnlp = load_dataset("ivangtorre/second_americas_nlp_2022", "bribri", split="dev")
|
80 |
+
guarani = americasnlp.filter(lambda language: language['subset']=='bribri')
|
81 |
|
82 |
model = Wav2Vec2ForCTC.from_pretrained("ivangtorre/wav2vec2-xlsr-300m-bribri")
|
83 |
processor = Wav2Vec2Processor.from_pretrained("ivangtorre/wav2vec2-xlsr-300m-bribri")
|
|
|
92 |
batch["transcription"] = processor.batch_decode(predicted_ids)
|
93 |
return batch
|
94 |
|
95 |
+
result = guarani.map(map_to_pred, batched=True, batch_size=1)
|
96 |
|
97 |
print("CER:", cer(result["source_processed"], result["transcription"]))
|
98 |
```
|