Spaces:
Running
Running
Zhuosheng Zhang
commited on
Commit
•
90f65fb
1
Parent(s):
634ce4e
update
Browse files- app.py +201 -0
- requirements.txt +3 -0
app.py
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import AlbertTokenizer, AlbertForSequenceClassification, AlbertForQuestionAnswering
|
3 |
+
import collections
|
4 |
+
import math
|
5 |
+
import gradio as gr
|
6 |
+
|
7 |
+
cls_modelPath = "./cls_model"
|
8 |
+
mrc_modelPath = "./model4"
|
9 |
+
|
10 |
+
tokenizer = AlbertTokenizer.from_pretrained(mrc_modelPath)
|
11 |
+
cls_model = AlbertForSequenceClassification.from_pretrained(cls_modelPath)
|
12 |
+
cls_model.eval()
|
13 |
+
mrc_model = AlbertForQuestionAnswering.from_pretrained(mrc_modelPath)
|
14 |
+
mrc_model.eval()
|
15 |
+
|
16 |
+
def _get_best_indexes(logits, n_best_size):
|
17 |
+
"""Get the n-best logits from a list."""
|
18 |
+
index_and_score = sorted(enumerate(logits), key=lambda x: x[1], reverse=True)
|
19 |
+
|
20 |
+
best_indexes = []
|
21 |
+
for i in range(len(index_and_score)):
|
22 |
+
if i >= n_best_size:
|
23 |
+
break
|
24 |
+
best_indexes.append(index_and_score[i][0])
|
25 |
+
return best_indexes
|
26 |
+
|
27 |
+
def _compute_softmax(scores):
|
28 |
+
"""Compute softmax probability over raw logits."""
|
29 |
+
if not scores:
|
30 |
+
return []
|
31 |
+
|
32 |
+
max_score = None
|
33 |
+
for score in scores:
|
34 |
+
if max_score is None or score > max_score:
|
35 |
+
max_score = score
|
36 |
+
|
37 |
+
exp_scores = []
|
38 |
+
total_sum = 0.0
|
39 |
+
for score in scores:
|
40 |
+
x = math.exp(score - max_score)
|
41 |
+
exp_scores.append(x)
|
42 |
+
total_sum += x
|
43 |
+
|
44 |
+
probs = []
|
45 |
+
for score in exp_scores:
|
46 |
+
probs.append(score / total_sum)
|
47 |
+
return probs
|
48 |
+
|
49 |
+
def get_qa_nbest(start_logits, end_logits, seq_len, n_best_size=20, max_answer_length=30):
|
50 |
+
score_null = 1000000 # large and positive
|
51 |
+
prelim_predictions = []
|
52 |
+
null_start_logit = 0 # the start logit at the slice with min null score
|
53 |
+
null_end_logit = 0 # the end logit at the slice with min null score
|
54 |
+
_PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name
|
55 |
+
"PrelimPrediction",
|
56 |
+
["start_index", "end_index", "start_logit", "end_logit"])
|
57 |
+
_NbestPrediction = collections.namedtuple( # pylint: disable=invalid-name
|
58 |
+
"NbestPrediction", ["text", "start_logit", "end_logit"])
|
59 |
+
|
60 |
+
start_indexes = _get_best_indexes(start_logits, n_best_size)
|
61 |
+
end_indexes = _get_best_indexes(end_logits, n_best_size)
|
62 |
+
|
63 |
+
feature_null_score = start_logits[0] + end_logits[0]
|
64 |
+
if feature_null_score < score_null:
|
65 |
+
score_null = feature_null_score
|
66 |
+
for start_index in start_indexes:
|
67 |
+
for end_index in end_indexes:
|
68 |
+
if end_index < start_index:
|
69 |
+
continue
|
70 |
+
length = end_index - start_index + 1
|
71 |
+
if length > max_answer_length:
|
72 |
+
continue
|
73 |
+
if start_index >= seq_len:
|
74 |
+
continue
|
75 |
+
if end_index >= seq_len:
|
76 |
+
continue
|
77 |
+
prelim_predictions.append(
|
78 |
+
_PrelimPrediction(
|
79 |
+
start_index=start_index,
|
80 |
+
end_index=end_index,
|
81 |
+
start_logit=start_logits[start_index],
|
82 |
+
end_logit=end_logits[end_index]))
|
83 |
+
prelim_predictions = sorted(
|
84 |
+
prelim_predictions,
|
85 |
+
key=lambda x: (x.start_logit + x.start_logit),
|
86 |
+
reverse=True)
|
87 |
+
|
88 |
+
seen_predictions = {}
|
89 |
+
nbest = []
|
90 |
+
for pred in prelim_predictions:
|
91 |
+
if len(nbest) >= n_best_size:
|
92 |
+
break
|
93 |
+
|
94 |
+
if pred.start_index > 0: # this is a non-null prediction\
|
95 |
+
predict_answer_tokens = inputs.input_ids[0, pred.start_index: (pred.end_index + 1)]
|
96 |
+
final_text = tokenizer.decode(predict_answer_tokens)
|
97 |
+
if final_text in seen_predictions:
|
98 |
+
continue
|
99 |
+
seen_predictions[final_text] = True
|
100 |
+
else:
|
101 |
+
final_text = ""
|
102 |
+
seen_predictions[final_text] = True
|
103 |
+
|
104 |
+
|
105 |
+
nbest.append(
|
106 |
+
_NbestPrediction(
|
107 |
+
text=final_text,
|
108 |
+
start_logit=pred.start_logit,
|
109 |
+
end_logit=pred.end_logit))
|
110 |
+
if "" not in seen_predictions:
|
111 |
+
nbest.append(
|
112 |
+
_NbestPrediction(
|
113 |
+
text="",
|
114 |
+
start_logit=null_start_logit,
|
115 |
+
end_logit=null_end_logit))
|
116 |
+
|
117 |
+
# In very rare edge cases we could only have single null prediction.
|
118 |
+
# So we just create a nonce prediction in this case to avoid failure.
|
119 |
+
if len(nbest) == 1:
|
120 |
+
nbest.insert(0,
|
121 |
+
_NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0))
|
122 |
+
|
123 |
+
# In very rare edge cases we could have no valid predictions. So we
|
124 |
+
# just create a nonce prediction in this case to avoid failure.
|
125 |
+
if not nbest:
|
126 |
+
nbest.append(
|
127 |
+
_NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0))
|
128 |
+
|
129 |
+
|
130 |
+
total_scores = []
|
131 |
+
best_non_null_entry = None
|
132 |
+
for entry in nbest:
|
133 |
+
total_scores.append(entry.start_logit + entry.end_logit)
|
134 |
+
if not best_non_null_entry:
|
135 |
+
if entry.text:
|
136 |
+
best_non_null_entry = entry
|
137 |
+
|
138 |
+
probs = _compute_softmax(total_scores)
|
139 |
+
nbest_json = []
|
140 |
+
for (i, entry) in enumerate(nbest):
|
141 |
+
output = collections.OrderedDict()
|
142 |
+
output["text"] = entry.text
|
143 |
+
output["probability"] = probs[i]
|
144 |
+
output["start_logit"] = entry.start_logit
|
145 |
+
output["end_logit"] = entry.end_logit
|
146 |
+
nbest_json.append(output)
|
147 |
+
|
148 |
+
score_diff = score_null - best_non_null_entry.start_logit - (
|
149 |
+
best_non_null_entry.end_logit)
|
150 |
+
|
151 |
+
return nbest_json, score_diff
|
152 |
+
|
153 |
+
def inference(question, context):
|
154 |
+
inputs = tokenizer(
|
155 |
+
question,
|
156 |
+
context,
|
157 |
+
add_special_tokens=True,
|
158 |
+
pad_to_max_length=True,
|
159 |
+
max_length=512,
|
160 |
+
return_tensors="pt"
|
161 |
+
)
|
162 |
+
|
163 |
+
seq_len = inputs.input_ids[0].tolist().index(0)
|
164 |
+
|
165 |
+
with torch.no_grad():
|
166 |
+
cls_outputs = cls_model(**inputs)
|
167 |
+
qa_outputs = mrc_model(**inputs)
|
168 |
+
|
169 |
+
cls_logits = cls_outputs.logits[0]
|
170 |
+
cls_divide = cls_logits[1] - cls_logits[0]
|
171 |
+
|
172 |
+
|
173 |
+
nbest, score_diff = get_qa_nbest(qa_outputs.start_logits[0], qa_outputs.end_logits[0], seq_len=seq_len)
|
174 |
+
|
175 |
+
thresh = -1.246073067188263
|
176 |
+
|
177 |
+
print(cls_divide, score_diff)
|
178 |
+
|
179 |
+
na_score = (0.5*cls_divide + 0.5*score_diff)*0.5
|
180 |
+
if na_score > thresh:
|
181 |
+
final_answer = "The question is not answerable according to the context."
|
182 |
+
else:
|
183 |
+
final_answer = nbest[0]["text"]
|
184 |
+
return final_answer
|
185 |
+
|
186 |
+
demo = gr.Interface(
|
187 |
+
fn=inference,
|
188 |
+
inputs=[gr.inputs.Textbox(label="Context"),
|
189 |
+
gr.inputs.Textbox(label="Question")],
|
190 |
+
outputs=gr.outputs.Textbox(label="Machine Reading Comprehension"),
|
191 |
+
examples = [
|
192 |
+
["The Norman dynasty had a major political, cultural and military impact on medieval Europe and even the Near East. The Normans were famed for their martial spirit and eventually for their Christian piety, becoming exponents of the Catholic orthodoxy into which they assimilated. They adopted the Gallo-Romance language of the Frankish land they settled, their dialect becoming known as Norman, Normaund or Norman French, an important literary language. The Duchy of Normandy, which they formed by treaty with the French crown, was a great fief of medieval France, and under Richard I of Normandy was forged into a cohesive and formidable principality in feudal tenure. The Normans are noted both for their culture, such as their unique Romanesque architecture and musical traditions, and for their significant military accomplishments and innovations. Norman adventurers founded the Kingdom of Sicily under Roger II after conquering southern Italy on the Saracens and Byzantines, and an expedition on behalf of their duke, William the Conqueror, led to the Norman conquest of England at the Battle of Hastings in 1066. Norman cultural and military influence spread from these new European centres to the Crusader states of the Near East, where their prince Bohemond I founded the Principality of Antioch in the Levant, to Scotland and Wales in Great Britain, to Ireland, and to the coasts of north Africa and the Canary Islands.", "Who was the duke in the battle of Hastings?", "William the Conqueror"],
|
193 |
+
["The Norman dynasty had a major political, cultural and military impact on medieval Europe and even the Near East. The Normans were famed for their martial spirit and eventually for their Christian piety, becoming exponents of the Catholic orthodoxy into which they assimilated. They adopted the Gallo-Romance language of the Frankish land they settled, their dialect becoming known as Norman, Normaund or Norman French, an important literary language. The Duchy of Normandy, which they formed by treaty with the French crown, was a great fief of medieval France, and under Richard I of Normandy was forged into a cohesive and formidable principality in feudal tenure. The Normans are noted both for their culture, such as their unique Romanesque architecture and musical traditions, and for their significant military accomplishments and innovations. Norman adventurers founded the Kingdom of Sicily under Roger II after conquering southern Italy on the Saracens and Byzantines, and an expedition on behalf of their duke, William the Conqueror, led to the Norman conquest of England at the Battle of Hastings in 1066. Norman cultural and military influence spread from these new European centres to the Crusader states of the Near East, where their prince Bohemond I founded the Principality of Antioch in the Levant, to Scotland and Wales in Great Britain, to Ireland, and to the coasts of north Africa and the Canary Islands.", "What type of major impact did the Norman dynasty have on modern Europe?", "<No Answer>"],
|
194 |
+
["Steam engines are external combustion engines, where the working fluid is separate from the combustion products. Non-combustion heat sources such as solar power, nuclear power or geothermal energy may be used. The ideal thermodynamic cycle used to analyze this process is called the Rankine cycle. In the cycle, water is heated and transforms into steam within a boiler operating at a high pressure. When expanded through pistons or turbines, mechanical work is done. The reduced-pressure steam is then condensed and pumped back into the boiler.", "What types of engines are steam engines?", "external combustion engines"]
|
195 |
+
["Steam engines are external combustion engines, where the working fluid is separate from the combustion products. Non-combustion heat sources such as solar power, nuclear power or geothermal energy may be used. The ideal thermodynamic cycle used to analyze this process is called the Rankine cycle. In the cycle, water is heated and transforms into steam within a boiler operating at a high pressure. When expanded through pistons or turbines, mechanical work is done. The reduced-pressure steam is then condensed and pumped back into the boiler.", "Along with geothermal and nuclear, what is a notable non-combustion heat source?", "<No Answer>"]
|
196 |
+
],
|
197 |
+
title="Retrospective Reader for Machine Reading Comprehension",
|
198 |
+
description=("The model achieved the best performance at the SQuAD2.0 leaderboard. See more details at: https://github.com/cooelf/AwesomeMRC")
|
199 |
+
)
|
200 |
+
|
201 |
+
demo.launch(debug=True)
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
transformers
|
2 |
+
sentencepiece
|
3 |
+
torch
|