Spaces:
Sleeping
Sleeping
cointegrated
commited on
Commit
•
75f7f85
1
Parent(s):
edff202
add real code
Browse files- blaser_2_0_qe.py +20 -8
- requirements.txt +2 -1
blaser_2_0_qe.py
CHANGED
@@ -15,6 +15,10 @@
|
|
15 |
|
16 |
import evaluate
|
17 |
import datasets
|
|
|
|
|
|
|
|
|
18 |
|
19 |
|
20 |
# TODO: Add BibTeX citation
|
@@ -71,8 +75,8 @@ class BLASER20QE(evaluate.Metric):
|
|
71 |
inputs_description=_KWARGS_DESCRIPTION,
|
72 |
# This defines the format of each prediction and reference
|
73 |
features=datasets.Features({
|
74 |
-
'predictions': datasets.Value('
|
75 |
-
'references': datasets.Value('
|
76 |
}),
|
77 |
# Homepage of the module for documentation
|
78 |
homepage="http://module.homepage",
|
@@ -83,13 +87,21 @@ class BLASER20QE(evaluate.Metric):
|
|
83 |
|
84 |
def _download_and_prepare(self, dl_manager):
|
85 |
"""Optional: download external resources useful to compute the scores"""
|
86 |
-
|
87 |
-
|
|
|
|
|
88 |
|
89 |
-
def _compute(self,
|
90 |
"""Returns the scores"""
|
91 |
-
# TODO:
|
92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
return {
|
94 |
-
"
|
95 |
}
|
|
|
15 |
|
16 |
import evaluate
|
17 |
import datasets
|
18 |
+
import torch
|
19 |
+
|
20 |
+
from sonar.inference_pipelines.text import TextToEmbeddingModelPipeline
|
21 |
+
from sonar.models.blaser.loader import load_blaser_model
|
22 |
|
23 |
|
24 |
# TODO: Add BibTeX citation
|
|
|
75 |
inputs_description=_KWARGS_DESCRIPTION,
|
76 |
# This defines the format of each prediction and reference
|
77 |
features=datasets.Features({
|
78 |
+
'predictions': datasets.Value('str'),
|
79 |
+
'references': datasets.Value('str'),
|
80 |
}),
|
81 |
# Homepage of the module for documentation
|
82 |
homepage="http://module.homepage",
|
|
|
87 |
|
88 |
def _download_and_prepare(self, dl_manager):
|
89 |
"""Optional: download external resources useful to compute the scores"""
|
90 |
+
|
91 |
+
self.text_embedder = TextToEmbeddingModelPipeline(encoder="text_sonar_basic_encoder", tokenizer="text_sonar_basic_encoder")
|
92 |
+
self.blaser_qe = load_blaser_model("blaser_2_0_qe").eval()
|
93 |
+
#self.blaser_ref = load_blaser_model("blaser_2_0_ref").eval()
|
94 |
|
95 |
+
def _compute(self, sources, translations):
|
96 |
"""Returns the scores"""
|
97 |
+
# TODO: adjust the languages
|
98 |
+
src_embs = text_embedder.predict(sources, source_lang="eng_Latn")
|
99 |
+
mt_embs = text_embedder.predict(translations, source_lang="eng_Latn")
|
100 |
+
|
101 |
+
with torch.inference_mode():
|
102 |
+
#ref_score = blaser_ref(src=src_embs, ref=ref_embs, mt=mt_embs).mean().item()
|
103 |
+
qe_score = blaser_qe(src=src_embs, mt=mt_embs).mean().item()
|
104 |
+
|
105 |
return {
|
106 |
+
"BLASER 2.0-QE": qe_score,
|
107 |
}
|
requirements.txt
CHANGED
@@ -1 +1,2 @@
|
|
1 |
-
git+https://github.com/huggingface/evaluate@main
|
|
|
|
1 |
+
git+https://github.com/huggingface/evaluate@main
|
2 |
+
sonar-space
|