File size: 2,140 Bytes
dfdc6c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import sys
import numpy as np

DATASETS1 = [
    "qmsum",
    "qasper",
    "quality",
    'musique',
    'hotpotqa',
    'multifieldqa_en'
]

DATASETS = [
    "qmsum",
    "qasper",
    "quality",
    'musique',
    'hotpotqa',
    'multifieldqa_en',
]

outrow = ''
data2res = dict()

def average(data2res):
    sumvalue = 0.0
    sumnum = 0.0
    for adata in data2res:
        avalue = data2res[adata]
        sumvalue += avalue
        sumnum += 1

    assert sumnum > 0.0
    return sumvalue/sumnum

def collect(value_list, outrow, data2res):
    #print(value_list)
    # first add the single avg score:
    avg = round(np.mean(value_list), 4)
    outrow += str(avg) + ' '

    avg2 = average(data2res)
    avg2 = round(avg2, 4)
    outrow += str(avg2) + ' '

    for adata in DATASETS:
        ares = data2res[adata] if adata in data2res else "NA"
        outrow += str(ares) + " "
    print(outrow.strip())

print('system avg6 avg6 ' + ' '.join(DATASETS))

#infn = "eval_retro_2.sh.log.2"
#with open(infn) as br:
    #for aline in br.readlines():
value_list = list()
for aline in sys.stdin:
    #import ipdb; ipdb.set_trace()
    aline = aline.strip()
    if 'final display' in aline:
        if '-baseline-' in aline or '-ret-' in aline:
            if len(outrow) > 0 and len(data2res) > 0:
                collect(value_list, outrow, data2res)

            outrow = "" # reset
            data2res = dict()
            value_list = list()
            
            aline2 = aline.replace('-', '')
            aline2 = aline2.replace('final display', '')
            outrow += aline2 + ' '
            continue

        cols = aline.split(' ')
        adata = cols[2]
        ares = '/'.join(cols[3:]) # NOTE use one geometric_mean instead
        scores = cols[3:]
        # for R1/R2/RL geometric_mean:
        if len(scores) == 3:
            scores = [float(item) for item in scores]
            geo_mean = (scores[0] * scores[1] * scores[2]) ** (1.0 / 3.0)
            ares = str(round(geo_mean, 4)) 
        
        data2res[adata] = float(ares)
        value_list.append(float(ares))

collect(value_list, outrow, data2res)