byroneverson
commited on
Commit
•
ee3a450
1
Parent(s):
2e01e95
Upload folder using huggingface_hub
Browse files- .gitattributes +0 -3
- configuration_internlm2.py +1 -1
- model-00001-of-00008.safetensors +1 -1
- model-00002-of-00008.safetensors +1 -1
- model-00003-of-00008.safetensors +1 -1
- model-00004-of-00008.safetensors +1 -1
- model-00005-of-00008.safetensors +1 -1
- model-00006-of-00008.safetensors +1 -1
- model-00007-of-00008.safetensors +1 -1
- model-00008-of-00008.safetensors +1 -1
- model.safetensors.index.json +1 -1
- modeling_internlm2.py +10 -2
- refusal_direction.pt +3 -0
.gitattributes
CHANGED
@@ -33,6 +33,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
-
4b804bbc-d751-433c-bca5-57a5d098854a.png filter=lfs diff=lfs merge=lfs -text
|
37 |
-
logo.png filter=lfs diff=lfs merge=lfs -text
|
38 |
-
e1b62557-ffd2-4d4e-90d1-69dfd300c72e.png filter=lfs diff=lfs merge=lfs -text
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
configuration_internlm2.py
CHANGED
@@ -177,4 +177,4 @@ class InternLM2Config(PretrainedConfig):
|
|
177 |
raise ValueError(
|
178 |
f"`rope_scaling`'s factor field must be a number >= 1, got {rope_scaling_factor} "
|
179 |
f"of type {type(rope_scaling_factor)}"
|
180 |
-
)
|
|
|
177 |
raise ValueError(
|
178 |
f"`rope_scaling`'s factor field must be a number >= 1, got {rope_scaling_factor} "
|
179 |
f"of type {type(rope_scaling_factor)}"
|
180 |
+
)
|
model-00001-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1949337704
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21211c8cd36a2384e5a61b8ee697f7af2fd5cb5dbd82b98f2ba45e3811bf387a
|
3 |
size 1949337704
|
model-00002-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1946242696
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1733085ee9a676914af5540727b679c8737253d93ba4696d3a58f95bf49aeebf
|
3 |
size 1946242696
|
model-00003-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1979780440
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e02974992365928151707ffe6ad52479f198ff67207b3c5c61c50a5aa554ce98
|
3 |
size 1979780440
|
model-00004-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1946242728
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b669fe17d08ced4a718e28d943edacf76b8d92c0d4838d0dd0b10bbcba5c6b47
|
3 |
size 1946242728
|
model-00005-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1979780456
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2163417fadce89473af5bd418c7b63198e88d609ae535fa957ea201f6b12a924
|
3 |
size 1979780456
|
model-00006-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1946242728
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ea17ca20b1402de4a1e3fdf991ff50f42ea1206e72fb4ead80f6ab5f4455804
|
3 |
size 1946242728
|
model-00007-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1979780456
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e41f333e461d790d0413a1bef2b1f7f9881bc2e7cd2a516ecbc679d8e07e371
|
3 |
size 1979780456
|
model-00008-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1748035640
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffb634c4772fae37b3993801e3117a89b75dbb5c53f4213b45a9a3bc461c3da2
|
3 |
size 1748035640
|
model.safetensors.index.json
CHANGED
@@ -231,4 +231,4 @@
|
|
231 |
"model.tok_embeddings.weight": "model-00001-of-00008.safetensors",
|
232 |
"output.weight": "model-00008-of-00008.safetensors"
|
233 |
}
|
234 |
-
}
|
|
|
231 |
"model.tok_embeddings.weight": "model-00001-of-00008.safetensors",
|
232 |
"output.weight": "model-00008-of-00008.safetensors"
|
233 |
}
|
234 |
+
}
|
modeling_internlm2.py
CHANGED
@@ -13,7 +13,7 @@
|
|
13 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14 |
# See the License for the specific language governing permissions and
|
15 |
# limitations under the License.
|
16 |
-
"""PyTorch InternLM2
|
17 |
import math
|
18 |
import queue
|
19 |
import threading
|
@@ -59,6 +59,10 @@ try:
|
|
59 |
except:
|
60 |
pass
|
61 |
|
|
|
|
|
|
|
|
|
62 |
|
63 |
logger = logging.get_logger(__name__)
|
64 |
|
@@ -1093,7 +1097,11 @@ class InternLM2Model(InternLM2PreTrainedModel):
|
|
1093 |
else:
|
1094 |
causal_mask = torch.full((sequence_length, target_length), fill_value=min_dtype, dtype=dtype, device=device)
|
1095 |
if sequence_length != 1:
|
1096 |
-
|
|
|
|
|
|
|
|
|
1097 |
causal_mask *= torch.arange(target_length, device=device) > cache_position.reshape(-1, 1)
|
1098 |
causal_mask = causal_mask[None, None, :, :].expand(input_tensor.shape[0], 1, -1, -1)
|
1099 |
if attention_mask is not None:
|
|
|
13 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14 |
# See the License for the specific language governing permissions and
|
15 |
# limitations under the License.
|
16 |
+
"""PyTorch InternLM2 model."""
|
17 |
import math
|
18 |
import queue
|
19 |
import threading
|
|
|
59 |
except:
|
60 |
pass
|
61 |
|
62 |
+
try:
|
63 |
+
support_bf16_triu = torch.__version__ >= "2.1.0"
|
64 |
+
except Exception:
|
65 |
+
support_bf16_triu = False
|
66 |
|
67 |
logger = logging.get_logger(__name__)
|
68 |
|
|
|
1097 |
else:
|
1098 |
causal_mask = torch.full((sequence_length, target_length), fill_value=min_dtype, dtype=dtype, device=device)
|
1099 |
if sequence_length != 1:
|
1100 |
+
if support_bf16_triu or dtype == torch.float32:
|
1101 |
+
causal_mask = torch.triu(causal_mask, diagonal=1)
|
1102 |
+
else:
|
1103 |
+
triu_mask = torch.triu(torch.ones(causal_mask.size(), device=device), diagonal=1).bool()
|
1104 |
+
causal_mask.masked_fill_(~triu_mask, 0)
|
1105 |
causal_mask *= torch.arange(target_length, device=device) > cache_position.reshape(-1, 1)
|
1106 |
causal_mask = causal_mask[None, None, :, :].expand(input_tensor.shape[0], 1, -1, -1)
|
1107 |
if attention_mask is not None:
|
refusal_direction.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28661ab6e9ee1cc96f9ab55b161f3c14a610747cd983c64683c00d1cea3c7026
|
3 |
+
size 9422
|