Spaces:
Running
Running
seonil
commited on
Commit
•
dbb453d
1
Parent(s):
d8c7f26
typo
Browse files- harim_plus.py +20 -10
harim_plus.py
CHANGED
@@ -7,16 +7,26 @@ from harim_scorer import Harimplus_Scorer
|
|
7 |
|
8 |
logger = evaluate.logging.get_logger(__name__)
|
9 |
|
10 |
-
CODEBASE_URL=''
|
11 |
-
PAPER_URL='
|
12 |
|
13 |
_CITATION = """\
|
14 |
-
@inproceedings{
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
}
|
21 |
"""
|
22 |
|
@@ -56,7 +66,7 @@ Examples:
|
|
56 |
>>> scorer = evaluate.load("NCSOFT/harim_plus") #, pretrained_name='PRETRAINEDNAME', tokenizer=TOKENIZER # optional
|
57 |
>>> results = scorer.compute(predictions=summaries, references=articles) # use_aggregator=True # optional
|
58 |
>>> print([round(v, 2) for v in results["harim+"]])
|
59 |
-
[
|
60 |
"""
|
61 |
|
62 |
|
@@ -94,7 +104,7 @@ class Harimplus(evaluate.Metric):
|
|
94 |
|
95 |
def _download_and_prepare(self, dl_manager):
|
96 |
pretrained_name = self.myconfig['pretrained_name']
|
97 |
-
|
98 |
logger.warning(
|
99 |
"Loading HaRiM+ score"
|
100 |
f"\tpretrained_name = {pretrained_name}"
|
|
|
7 |
|
8 |
logger = evaluate.logging.get_logger(__name__)
|
9 |
|
10 |
+
CODEBASE_URL='https://huggingface.co/spaces/NCSOFT/harim_plus'
|
11 |
+
PAPER_URL='https://arxiv.org/abs/2211.12118'
|
12 |
|
13 |
_CITATION = """\
|
14 |
+
@inproceedings{son-etal-2022-harim,
|
15 |
+
title = "{H}a{R}i{M}$^+$: Evaluating Summary Quality with Hallucination Risk",
|
16 |
+
author = "Son, Seonil (Simon) and
|
17 |
+
Park, Junsoo and
|
18 |
+
Hwang, Jeong-in and
|
19 |
+
Lee, Junghwa and
|
20 |
+
Noh, Hyungjong and
|
21 |
+
Lee, Yeonsoo",
|
22 |
+
booktitle = "Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing",
|
23 |
+
month = nov,
|
24 |
+
year = "2022",
|
25 |
+
address = "Online only",
|
26 |
+
publisher = "Association for Computational Linguistics",
|
27 |
+
url = "https://aclanthology.org/2022.aacl-main.66",
|
28 |
+
pages = "895--924",
|
29 |
+
abstract = "One of the challenges of developing a summarization model arises from the difficulty in measuring the factual inconsistency of the generated text. In this study, we reinterpret the decoder overconfidence-regularizing objective suggested in (Miao et al., 2021) as a hallucination risk measurement to better estimate the quality of generated summaries. We propose a reference-free metric, HaRiM+, which only requires an off-the-shelf summarization model to compute the hallucination risk based on token likelihoods. Deploying it requires no additional training of models or ad-hoc modules, which usually need alignment to human judgments. For summary-quality estimation, HaRiM+ records state-of-the-art correlation to human judgment on three summary-quality annotation sets: FRANK, QAGS, and SummEval. We hope that our work, which merits the use of summarization models, facilitates the progress of both automated evaluation and generation of summary.",
|
30 |
}
|
31 |
"""
|
32 |
|
|
|
66 |
>>> scorer = evaluate.load("NCSOFT/harim_plus") #, pretrained_name='PRETRAINEDNAME', tokenizer=TOKENIZER # optional
|
67 |
>>> results = scorer.compute(predictions=summaries, references=articles) # use_aggregator=True # optional
|
68 |
>>> print([round(v, 2) for v in results["harim+"]])
|
69 |
+
[float, float]
|
70 |
"""
|
71 |
|
72 |
|
|
|
104 |
|
105 |
def _download_and_prepare(self, dl_manager):
|
106 |
pretrained_name = self.myconfig['pretrained_name']
|
107 |
+
is_custom_tokenizer = self.myconfig['tokenizer'] is not None
|
108 |
logger.warning(
|
109 |
"Loading HaRiM+ score"
|
110 |
f"\tpretrained_name = {pretrained_name}"
|