cointegrated commited on
Commit
75f7f85
1 Parent(s): edff202

add real code

Browse files
Files changed (2) hide show
  1. blaser_2_0_qe.py +20 -8
  2. requirements.txt +2 -1
blaser_2_0_qe.py CHANGED
@@ -15,6 +15,10 @@
15
 
16
  import evaluate
17
  import datasets
 
 
 
 
18
 
19
 
20
  # TODO: Add BibTeX citation
@@ -71,8 +75,8 @@ class BLASER20QE(evaluate.Metric):
71
  inputs_description=_KWARGS_DESCRIPTION,
72
  # This defines the format of each prediction and reference
73
  features=datasets.Features({
74
- 'predictions': datasets.Value('int64'),
75
- 'references': datasets.Value('int64'),
76
  }),
77
  # Homepage of the module for documentation
78
  homepage="http://module.homepage",
@@ -83,13 +87,21 @@ class BLASER20QE(evaluate.Metric):
83
 
84
  def _download_and_prepare(self, dl_manager):
85
  """Optional: download external resources useful to compute the scores"""
86
- # TODO: Download external resources if needed
87
- pass
 
 
88
 
89
- def _compute(self, predictions, references):
90
  """Returns the scores"""
91
- # TODO: Compute the different scores of the module
92
- accuracy = sum(i == j for i, j in zip(predictions, references)) / len(predictions)
 
 
 
 
 
 
93
  return {
94
- "accuracy": accuracy,
95
  }
 
15
 
16
  import evaluate
17
  import datasets
18
+ import torch
19
+
20
+ from sonar.inference_pipelines.text import TextToEmbeddingModelPipeline
21
+ from sonar.models.blaser.loader import load_blaser_model
22
 
23
 
24
  # TODO: Add BibTeX citation
 
75
  inputs_description=_KWARGS_DESCRIPTION,
76
  # This defines the format of each prediction and reference
77
  features=datasets.Features({
78
+ 'predictions': datasets.Value('str'),
79
+ 'references': datasets.Value('str'),
80
  }),
81
  # Homepage of the module for documentation
82
  homepage="http://module.homepage",
 
87
 
88
  def _download_and_prepare(self, dl_manager):
89
  """Optional: download external resources useful to compute the scores"""
90
+
91
+ self.text_embedder = TextToEmbeddingModelPipeline(encoder="text_sonar_basic_encoder", tokenizer="text_sonar_basic_encoder")
92
+ self.blaser_qe = load_blaser_model("blaser_2_0_qe").eval()
93
+ #self.blaser_ref = load_blaser_model("blaser_2_0_ref").eval()
94
 
95
+ def _compute(self, sources, translations):
96
  """Returns the scores"""
97
+ # TODO: adjust the languages
98
+ src_embs = text_embedder.predict(sources, source_lang="eng_Latn")
99
+ mt_embs = text_embedder.predict(translations, source_lang="eng_Latn")
100
+
101
+ with torch.inference_mode():
102
+ #ref_score = blaser_ref(src=src_embs, ref=ref_embs, mt=mt_embs).mean().item()
103
+ qe_score = blaser_qe(src=src_embs, mt=mt_embs).mean().item()
104
+
105
  return {
106
+ "BLASER 2.0-QE": qe_score,
107
  }
requirements.txt CHANGED
@@ -1 +1,2 @@
1
- git+https://github.com/huggingface/evaluate@main
 
 
1
+ git+https://github.com/huggingface/evaluate@main
2
+ sonar-space