ivangtorre
commited on
Commit
•
5016634
1
Parent(s):
51a557e
Update README.md
Browse files
README.md
CHANGED
@@ -73,9 +73,11 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
|
73 |
import torch
|
74 |
from jiwer import cer
|
75 |
import torch.nn.functional as F
|
|
|
|
|
76 |
|
77 |
-
|
78 |
-
|
79 |
|
80 |
model = Wav2Vec2ForCTC.from_pretrained("ivangtorre/wav2vec2-xlsr-300m-quechua")
|
81 |
processor = Wav2Vec2Processor.from_pretrained("ivangtorre/wav2vec2-xlsr-300m-quechua")
|
@@ -90,7 +92,7 @@ def map_to_pred(batch):
|
|
90 |
batch["transcription"] = processor.batch_decode(predicted_ids)
|
91 |
return batch
|
92 |
|
93 |
-
result =
|
94 |
|
95 |
print("CER:", cer(result["source_processed"], result["transcription"]))
|
96 |
```
|
|
|
73 |
import torch
|
74 |
from jiwer import cer
|
75 |
import torch.nn.functional as F
|
76 |
+
from datasets import load_dataset
|
77 |
+
import soundfile as sf
|
78 |
|
79 |
+
americasnlp = load_dataset("ivangtorre/second_americas_nlp_2022", "quechua", split="dev")
|
80 |
+
quechua = americasnlp.filter(lambda language: language['subset']=='quechua')
|
81 |
|
82 |
model = Wav2Vec2ForCTC.from_pretrained("ivangtorre/wav2vec2-xlsr-300m-quechua")
|
83 |
processor = Wav2Vec2Processor.from_pretrained("ivangtorre/wav2vec2-xlsr-300m-quechua")
|
|
|
92 |
batch["transcription"] = processor.batch_decode(predicted_ids)
|
93 |
return batch
|
94 |
|
95 |
+
result = quechua.map(map_to_pred, batched=True, batch_size=1)
|
96 |
|
97 |
print("CER:", cer(result["source_processed"], result["transcription"]))
|
98 |
```
|