seonil commited on
Commit
dbb453d
1 Parent(s): d8c7f26
Files changed (1) hide show
  1. harim_plus.py +20 -10
harim_plus.py CHANGED
@@ -7,16 +7,26 @@ from harim_scorer import Harimplus_Scorer
7
 
8
  logger = evaluate.logging.get_logger(__name__)
9
 
10
- CODEBASE_URL=''
11
- PAPER_URL='TBA'
12
 
13
  _CITATION = """\
14
- @inproceedings{harimplus,
15
- title={HaRiM+: Evaluating Summary Quality with Hallucination Risk},
16
- author={Seonil Son, Junsoo Park, Jeong-in Hwang, Hyungjong Noh, Yeonsoo Lee},
17
- booktitle={AACL},
18
- year={2022},
19
- url={TBA}
 
 
 
 
 
 
 
 
 
 
20
  }
21
  """
22
 
@@ -56,7 +66,7 @@ Examples:
56
  >>> scorer = evaluate.load("NCSOFT/harim_plus") #, pretrained_name='PRETRAINEDNAME', tokenizer=TOKENIZER # optional
57
  >>> results = scorer.compute(predictions=summaries, references=articles) # use_aggregator=True # optional
58
  >>> print([round(v, 2) for v in results["harim+"]])
59
- [0.4, 0.4]
60
  """
61
 
62
 
@@ -94,7 +104,7 @@ class Harimplus(evaluate.Metric):
94
 
95
  def _download_and_prepare(self, dl_manager):
96
  pretrained_name = self.myconfig['pretrained_name']
97
- is_custom_tokenzer = self.myconfig['tokenizer'] is not None
98
  logger.warning(
99
  "Loading HaRiM+ score"
100
  f"\tpretrained_name = {pretrained_name}"
 
7
 
8
  logger = evaluate.logging.get_logger(__name__)
9
 
10
+ CODEBASE_URL='https://huggingface.co/spaces/NCSOFT/harim_plus'
11
+ PAPER_URL='https://arxiv.org/abs/2211.12118'
12
 
13
  _CITATION = """\
14
+ @inproceedings{son-etal-2022-harim,
15
+ title = "{H}a{R}i{M}$^+$: Evaluating Summary Quality with Hallucination Risk",
16
+ author = "Son, Seonil (Simon) and
17
+ Park, Junsoo and
18
+ Hwang, Jeong-in and
19
+ Lee, Junghwa and
20
+ Noh, Hyungjong and
21
+ Lee, Yeonsoo",
22
+ booktitle = "Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing",
23
+ month = nov,
24
+ year = "2022",
25
+ address = "Online only",
26
+ publisher = "Association for Computational Linguistics",
27
+ url = "https://aclanthology.org/2022.aacl-main.66",
28
+ pages = "895--924",
29
+ abstract = "One of the challenges of developing a summarization model arises from the difficulty in measuring the factual inconsistency of the generated text. In this study, we reinterpret the decoder overconfidence-regularizing objective suggested in (Miao et al., 2021) as a hallucination risk measurement to better estimate the quality of generated summaries. We propose a reference-free metric, HaRiM+, which only requires an off-the-shelf summarization model to compute the hallucination risk based on token likelihoods. Deploying it requires no additional training of models or ad-hoc modules, which usually need alignment to human judgments. For summary-quality estimation, HaRiM+ records state-of-the-art correlation to human judgment on three summary-quality annotation sets: FRANK, QAGS, and SummEval. We hope that our work, which merits the use of summarization models, facilitates the progress of both automated evaluation and generation of summary.",
30
  }
31
  """
32
 
 
66
  >>> scorer = evaluate.load("NCSOFT/harim_plus") #, pretrained_name='PRETRAINEDNAME', tokenizer=TOKENIZER # optional
67
  >>> results = scorer.compute(predictions=summaries, references=articles) # use_aggregator=True # optional
68
  >>> print([round(v, 2) for v in results["harim+"]])
69
+ [float, float]
70
  """
71
 
72
 
 
104
 
105
  def _download_and_prepare(self, dl_manager):
106
  pretrained_name = self.myconfig['pretrained_name']
107
+ is_custom_tokenizer = self.myconfig['tokenizer'] is not None
108
  logger.warning(
109
  "Loading HaRiM+ score"
110
  f"\tpretrained_name = {pretrained_name}"