Upload folder using huggingface_hub

Files changed (13) hide show

.gitattributes CHANGED Viewed

@@ -33,6 +33,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
-4b804bbc-d751-433c-bca5-57a5d098854a.png filter=lfs diff=lfs merge=lfs -text
-logo.png filter=lfs diff=lfs merge=lfs -text
-e1b62557-ffd2-4d4e-90d1-69dfd300c72e.png filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

configuration_internlm2.py CHANGED Viewed

@@ -177,4 +177,4 @@ class InternLM2Config(PretrainedConfig):
             raise ValueError(
                 f"`rope_scaling`'s factor field must be a number >= 1, got {rope_scaling_factor} "
                 f"of type {type(rope_scaling_factor)}"
-            )

             raise ValueError(
                 f"`rope_scaling`'s factor field must be a number >= 1, got {rope_scaling_factor} "
                 f"of type {type(rope_scaling_factor)}"
+            )

model-00001-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:52c0c370f77d5ef7f392d6e534d4952482175cbbf8ec558751f6ad7ca2027ee1
 size 1949337704

 version https://git-lfs.github.com/spec/v1
+oid sha256:21211c8cd36a2384e5a61b8ee697f7af2fd5cb5dbd82b98f2ba45e3811bf387a
 size 1949337704

model-00002-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6e1de0f83c7769f1d3127bfd661b2370c59c6d04785596596c4f5afa862953b9
 size 1946242696

 version https://git-lfs.github.com/spec/v1
+oid sha256:1733085ee9a676914af5540727b679c8737253d93ba4696d3a58f95bf49aeebf
 size 1946242696

model-00003-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0ea0e42a05753401b3b899a2d32f6c3c3cde9717732c914d5cf1e5e7958b088b
 size 1979780440

 version https://git-lfs.github.com/spec/v1
+oid sha256:e02974992365928151707ffe6ad52479f198ff67207b3c5c61c50a5aa554ce98
 size 1979780440

model-00004-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f546d9119dc3010c1d45abb77248f7f13d01d8205e1b015983dc6ea1bf881cb9
 size 1946242728

 version https://git-lfs.github.com/spec/v1
+oid sha256:b669fe17d08ced4a718e28d943edacf76b8d92c0d4838d0dd0b10bbcba5c6b47
 size 1946242728

model-00005-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:01f6c7059abb9001ec5a7bec6e55a026eabc52a5a49f6a24a776f345bf9d682a
 size 1979780456

 version https://git-lfs.github.com/spec/v1
+oid sha256:2163417fadce89473af5bd418c7b63198e88d609ae535fa957ea201f6b12a924
 size 1979780456

model-00006-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1dbeb4bc3ec15af734816631af7cb8460992ac81e46a5696ed490dcba119df38
 size 1946242728

 version https://git-lfs.github.com/spec/v1
+oid sha256:8ea17ca20b1402de4a1e3fdf991ff50f42ea1206e72fb4ead80f6ab5f4455804
 size 1946242728

model-00007-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:13b904b4babcf4bc96ff1622afd05ee3fb3ae449fb971bb516d672a24c09d97b
 size 1979780456

 version https://git-lfs.github.com/spec/v1
+oid sha256:5e41f333e461d790d0413a1bef2b1f7f9881bc2e7cd2a516ecbc679d8e07e371
 size 1979780456

model-00008-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:035116e069e5485bd2db6c12362f8f520b5828f2098fe8ee927773610e74df8f
 size 1748035640

 version https://git-lfs.github.com/spec/v1
+oid sha256:ffb634c4772fae37b3993801e3117a89b75dbb5c53f4213b45a9a3bc461c3da2
 size 1748035640

model.safetensors.index.json CHANGED Viewed

@@ -231,4 +231,4 @@
     "model.tok_embeddings.weight": "model-00001-of-00008.safetensors",
     "output.weight": "model-00008-of-00008.safetensors"
   }
-}

     "model.tok_embeddings.weight": "model-00001-of-00008.safetensors",
     "output.weight": "model-00008-of-00008.safetensors"
   }
+}

modeling_internlm2.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""PyTorch InternLM2.5 model."""
 import math
 import queue
 import threading
@@ -59,6 +59,10 @@ try:
 except:
     pass
 logger = logging.get_logger(__name__)
@@ -1093,7 +1097,11 @@ class InternLM2Model(InternLM2PreTrainedModel):
         else:
             causal_mask = torch.full((sequence_length, target_length), fill_value=min_dtype, dtype=dtype, device=device)
             if sequence_length != 1:
-                causal_mask = torch.triu(causal_mask, diagonal=1)
             causal_mask *= torch.arange(target_length, device=device) > cache_position.reshape(-1, 1)
             causal_mask = causal_mask[None, None, :, :].expand(input_tensor.shape[0], 1, -1, -1)
             if attention_mask is not None:

 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""PyTorch InternLM2 model."""
 import math
 import queue
 import threading
 except:
     pass
+try:
+    support_bf16_triu = torch.__version__ >= "2.1.0"
+except Exception:
+    support_bf16_triu = False
 logger = logging.get_logger(__name__)
         else:
             causal_mask = torch.full((sequence_length, target_length), fill_value=min_dtype, dtype=dtype, device=device)
             if sequence_length != 1:
+                if support_bf16_triu or dtype == torch.float32:
+                    causal_mask = torch.triu(causal_mask, diagonal=1)
+                else:
+                    triu_mask = torch.triu(torch.ones(causal_mask.size(), device=device), diagonal=1).bool()
+                    causal_mask.masked_fill_(~triu_mask, 0)
             causal_mask *= torch.arange(target_length, device=device) > cache_position.reshape(-1, 1)
             causal_mask = causal_mask[None, None, :, :].expand(input_tensor.shape[0], 1, -1, -1)
             if attention_mask is not None:

refusal_direction.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:28661ab6e9ee1cc96f9ab55b161f3c14a610747cd983c64683c00d1cea3c7026
+size 9422