byroneverson commited on
Commit
ee3a450
1 Parent(s): 2e01e95

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,6 +33,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
- 4b804bbc-d751-433c-bca5-57a5d098854a.png filter=lfs diff=lfs merge=lfs -text
37
- logo.png filter=lfs diff=lfs merge=lfs -text
38
- e1b62557-ffd2-4d4e-90d1-69dfd300c72e.png filter=lfs diff=lfs merge=lfs -text
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
configuration_internlm2.py CHANGED
@@ -177,4 +177,4 @@ class InternLM2Config(PretrainedConfig):
177
  raise ValueError(
178
  f"`rope_scaling`'s factor field must be a number >= 1, got {rope_scaling_factor} "
179
  f"of type {type(rope_scaling_factor)}"
180
- )
 
177
  raise ValueError(
178
  f"`rope_scaling`'s factor field must be a number >= 1, got {rope_scaling_factor} "
179
  f"of type {type(rope_scaling_factor)}"
180
+ )
model-00001-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52c0c370f77d5ef7f392d6e534d4952482175cbbf8ec558751f6ad7ca2027ee1
3
  size 1949337704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21211c8cd36a2384e5a61b8ee697f7af2fd5cb5dbd82b98f2ba45e3811bf387a
3
  size 1949337704
model-00002-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e1de0f83c7769f1d3127bfd661b2370c59c6d04785596596c4f5afa862953b9
3
  size 1946242696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1733085ee9a676914af5540727b679c8737253d93ba4696d3a58f95bf49aeebf
3
  size 1946242696
model-00003-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ea0e42a05753401b3b899a2d32f6c3c3cde9717732c914d5cf1e5e7958b088b
3
  size 1979780440
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e02974992365928151707ffe6ad52479f198ff67207b3c5c61c50a5aa554ce98
3
  size 1979780440
model-00004-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f546d9119dc3010c1d45abb77248f7f13d01d8205e1b015983dc6ea1bf881cb9
3
  size 1946242728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b669fe17d08ced4a718e28d943edacf76b8d92c0d4838d0dd0b10bbcba5c6b47
3
  size 1946242728
model-00005-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01f6c7059abb9001ec5a7bec6e55a026eabc52a5a49f6a24a776f345bf9d682a
3
  size 1979780456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2163417fadce89473af5bd418c7b63198e88d609ae535fa957ea201f6b12a924
3
  size 1979780456
model-00006-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1dbeb4bc3ec15af734816631af7cb8460992ac81e46a5696ed490dcba119df38
3
  size 1946242728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ea17ca20b1402de4a1e3fdf991ff50f42ea1206e72fb4ead80f6ab5f4455804
3
  size 1946242728
model-00007-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13b904b4babcf4bc96ff1622afd05ee3fb3ae449fb971bb516d672a24c09d97b
3
  size 1979780456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e41f333e461d790d0413a1bef2b1f7f9881bc2e7cd2a516ecbc679d8e07e371
3
  size 1979780456
model-00008-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:035116e069e5485bd2db6c12362f8f520b5828f2098fe8ee927773610e74df8f
3
  size 1748035640
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffb634c4772fae37b3993801e3117a89b75dbb5c53f4213b45a9a3bc461c3da2
3
  size 1748035640
model.safetensors.index.json CHANGED
@@ -231,4 +231,4 @@
231
  "model.tok_embeddings.weight": "model-00001-of-00008.safetensors",
232
  "output.weight": "model-00008-of-00008.safetensors"
233
  }
234
- }
 
231
  "model.tok_embeddings.weight": "model-00001-of-00008.safetensors",
232
  "output.weight": "model-00008-of-00008.safetensors"
233
  }
234
+ }
modeling_internlm2.py CHANGED
@@ -13,7 +13,7 @@
13
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
  # See the License for the specific language governing permissions and
15
  # limitations under the License.
16
- """PyTorch InternLM2.5 model."""
17
  import math
18
  import queue
19
  import threading
@@ -59,6 +59,10 @@ try:
59
  except:
60
  pass
61
 
 
 
 
 
62
 
63
  logger = logging.get_logger(__name__)
64
 
@@ -1093,7 +1097,11 @@ class InternLM2Model(InternLM2PreTrainedModel):
1093
  else:
1094
  causal_mask = torch.full((sequence_length, target_length), fill_value=min_dtype, dtype=dtype, device=device)
1095
  if sequence_length != 1:
1096
- causal_mask = torch.triu(causal_mask, diagonal=1)
 
 
 
 
1097
  causal_mask *= torch.arange(target_length, device=device) > cache_position.reshape(-1, 1)
1098
  causal_mask = causal_mask[None, None, :, :].expand(input_tensor.shape[0], 1, -1, -1)
1099
  if attention_mask is not None:
 
13
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
  # See the License for the specific language governing permissions and
15
  # limitations under the License.
16
+ """PyTorch InternLM2 model."""
17
  import math
18
  import queue
19
  import threading
 
59
  except:
60
  pass
61
 
62
+ try:
63
+ support_bf16_triu = torch.__version__ >= "2.1.0"
64
+ except Exception:
65
+ support_bf16_triu = False
66
 
67
  logger = logging.get_logger(__name__)
68
 
 
1097
  else:
1098
  causal_mask = torch.full((sequence_length, target_length), fill_value=min_dtype, dtype=dtype, device=device)
1099
  if sequence_length != 1:
1100
+ if support_bf16_triu or dtype == torch.float32:
1101
+ causal_mask = torch.triu(causal_mask, diagonal=1)
1102
+ else:
1103
+ triu_mask = torch.triu(torch.ones(causal_mask.size(), device=device), diagonal=1).bool()
1104
+ causal_mask.masked_fill_(~triu_mask, 0)
1105
  causal_mask *= torch.arange(target_length, device=device) > cache_position.reshape(-1, 1)
1106
  causal_mask = causal_mask[None, None, :, :].expand(input_tensor.shape[0], 1, -1, -1)
1107
  if attention_mask is not None:
refusal_direction.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28661ab6e9ee1cc96f9ab55b161f3c14a610747cd983c64683c00d1cea3c7026
3
+ size 9422