robinzixuan
commited on
Update modeling_bert.py
Browse files- modeling_bert.py +2 -1
modeling_bert.py
CHANGED
@@ -384,7 +384,8 @@ class BertSelfAttention(nn.Module):
|
|
384 |
attention_scores = attention_scores + attention_mask
|
385 |
|
386 |
# Normalize the attention scores to probabilities.
|
387 |
-
attention_probs = nn.functional.softmax(attention_scores, dim=-1)
|
|
|
388 |
|
389 |
# This is actually dropping out entire tokens to attend to, which might
|
390 |
# seem a bit unusual, but is taken from the original Transformer paper.
|
|
|
384 |
attention_scores = attention_scores + attention_mask
|
385 |
|
386 |
# Normalize the attention scores to probabilities.
|
387 |
+
#attention_probs = nn.functional.softmax(attention_scores, dim=-1)
|
388 |
+
attention_probs = softmax_1(attention_scores, dim=-1)
|
389 |
|
390 |
# This is actually dropping out entire tokens to attend to, which might
|
391 |
# seem a bit unusual, but is taken from the original Transformer paper.
|