robinzixuan
commited on
Update modeling_bert.py
Browse files- modeling_bert.py +1 -1
modeling_bert.py
CHANGED
@@ -387,7 +387,7 @@ class BertSelfAttention(nn.Module):
|
|
387 |
# Normalize the attention scores to probabilities.
|
388 |
#attention_probs = nn.functional.softmax(attention_scores, dim=-1)
|
389 |
attention_probs = softmax_1(attention_scores, dim=-1)
|
390 |
-
|
391 |
|
392 |
# This is actually dropping out entire tokens to attend to, which might
|
393 |
# seem a bit unusual, but is taken from the original Transformer paper.
|
|
|
387 |
# Normalize the attention scores to probabilities.
|
388 |
#attention_probs = nn.functional.softmax(attention_scores, dim=-1)
|
389 |
attention_probs = softmax_1(attention_scores, dim=-1)
|
390 |
+
|
391 |
|
392 |
# This is actually dropping out entire tokens to attend to, which might
|
393 |
# seem a bit unusual, but is taken from the original Transformer paper.
|