softwareweaver
commited on
Commit
•
81db0c3
1
Parent(s):
476529f
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- README.md +38 -0
- config.json +27 -0
- mergekit_config.yml +9 -0
- model-00001-of-00081.safetensors +3 -0
- model-00002-of-00081.safetensors +3 -0
- model-00003-of-00081.safetensors +3 -0
- model-00004-of-00081.safetensors +3 -0
- model-00005-of-00081.safetensors +3 -0
- model-00006-of-00081.safetensors +3 -0
- model-00007-of-00081.safetensors +3 -0
- model-00008-of-00081.safetensors +3 -0
- model-00009-of-00081.safetensors +3 -0
- model-00010-of-00081.safetensors +3 -0
- model-00011-of-00081.safetensors +3 -0
- model-00012-of-00081.safetensors +3 -0
- model-00013-of-00081.safetensors +3 -0
- model-00014-of-00081.safetensors +3 -0
- model-00015-of-00081.safetensors +3 -0
- model-00016-of-00081.safetensors +3 -0
- model-00017-of-00081.safetensors +3 -0
- model-00018-of-00081.safetensors +3 -0
- model-00019-of-00081.safetensors +3 -0
- model-00020-of-00081.safetensors +3 -0
- model-00021-of-00081.safetensors +3 -0
- model-00022-of-00081.safetensors +3 -0
- model-00023-of-00081.safetensors +3 -0
- model-00024-of-00081.safetensors +3 -0
- model-00025-of-00081.safetensors +3 -0
- model-00026-of-00081.safetensors +3 -0
- model-00027-of-00081.safetensors +3 -0
- model-00028-of-00081.safetensors +3 -0
- model-00029-of-00081.safetensors +3 -0
- model-00030-of-00081.safetensors +3 -0
- model-00031-of-00081.safetensors +3 -0
- model-00032-of-00081.safetensors +3 -0
- model-00033-of-00081.safetensors +3 -0
- model-00034-of-00081.safetensors +3 -0
- model-00035-of-00081.safetensors +3 -0
- model-00036-of-00081.safetensors +3 -0
- model-00037-of-00081.safetensors +3 -0
- model-00038-of-00081.safetensors +3 -0
- model-00039-of-00081.safetensors +3 -0
- model-00040-of-00081.safetensors +3 -0
- model-00041-of-00081.safetensors +3 -0
- model-00042-of-00081.safetensors +3 -0
- model-00043-of-00081.safetensors +3 -0
- model-00044-of-00081.safetensors +3 -0
- model-00045-of-00081.safetensors +3 -0
- model-00046-of-00081.safetensors +3 -0
- model-00047-of-00081.safetensors +3 -0
README.md
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model:
|
3 |
+
- TheDrummer/Behemoth-123B-v1
|
4 |
+
library_name: transformers
|
5 |
+
tags:
|
6 |
+
- mergekit
|
7 |
+
- merge
|
8 |
+
|
9 |
+
---
|
10 |
+
# Behemoth-Xtra-Large
|
11 |
+
|
12 |
+
This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit).
|
13 |
+
|
14 |
+
## Merge Details
|
15 |
+
### Merge Method
|
16 |
+
|
17 |
+
This model was merged using the passthrough merge method.
|
18 |
+
|
19 |
+
### Models Merged
|
20 |
+
|
21 |
+
The following models were included in the merge:
|
22 |
+
* [TheDrummer/Behemoth-123B-v1](https://huggingface.co/TheDrummer/Behemoth-123B-v1)
|
23 |
+
|
24 |
+
### Configuration
|
25 |
+
|
26 |
+
The following YAML configuration was used to produce this model:
|
27 |
+
|
28 |
+
```yaml
|
29 |
+
dtype: bfloat16
|
30 |
+
merge_method: passthrough
|
31 |
+
slices:
|
32 |
+
- sources:
|
33 |
+
- layer_range: [0, 70]
|
34 |
+
model: TheDrummer/Behemoth-123B-v1
|
35 |
+
- sources:
|
36 |
+
- layer_range: [18, 88]
|
37 |
+
model: TheDrummer/Behemoth-123B-v1
|
38 |
+
```
|
config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "softwareweaver/Behemoth-XL-195B-v1",
|
3 |
+
"architectures": [
|
4 |
+
"MistralForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 1,
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"head_dim": 128,
|
10 |
+
"hidden_act": "silu",
|
11 |
+
"hidden_size": 12288,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 28672,
|
14 |
+
"max_position_embeddings": 131072,
|
15 |
+
"model_type": "mistral",
|
16 |
+
"num_attention_heads": 96,
|
17 |
+
"num_hidden_layers": 140,
|
18 |
+
"num_key_value_heads": 8,
|
19 |
+
"rms_norm_eps": 1e-05,
|
20 |
+
"rope_theta": 1000000.0,
|
21 |
+
"sliding_window": null,
|
22 |
+
"tie_word_embeddings": false,
|
23 |
+
"torch_dtype": "bfloat16",
|
24 |
+
"transformers_version": "4.44.2",
|
25 |
+
"use_cache": true,
|
26 |
+
"vocab_size": 32768
|
27 |
+
}
|
mergekit_config.yml
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dtype: bfloat16
|
2 |
+
merge_method: passthrough
|
3 |
+
slices:
|
4 |
+
- sources:
|
5 |
+
- layer_range: [0, 70]
|
6 |
+
model: TheDrummer/Behemoth-123B-v1
|
7 |
+
- sources:
|
8 |
+
- layer_range: [18, 88]
|
9 |
+
model: TheDrummer/Behemoth-123B-v1
|
model-00001-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b3ca45903c0c8cdc2d38012b0574606bbb86b225a43e303cf969e8478869b08
|
3 |
+
size 4378928504
|
model-00002-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e325faa7f9547ea52d31f23942f683824d67b88cc12baa6711ab14d43df90842
|
3 |
+
size 4907411088
|
model-00003-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8fbbdc567e12bfb9167a23a4f2eb94ae23f42f8f00fa4e384138a84282f63f31
|
3 |
+
size 4806747904
|
model-00004-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a145902b8f9547e7d48c0ecc050fcb01551b88a52d2bb222aedf65e2929a3ba1
|
3 |
+
size 4831938544
|
model-00005-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f33874a55ebb5409e87e71085f2d4e04b54fd980336d01bfb9f435e99a28be10
|
3 |
+
size 4831938552
|
model-00006-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef7cf29534435aaab168220510466a1928f1768797bb84af7be4766c46015ad7
|
3 |
+
size 4907411096
|
model-00007-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0d2ca79b079e0a6e57b621a544c48779273f17b9ac9cf9c264173c837f98dc0
|
3 |
+
size 4907435776
|
model-00008-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5d841dd04684bf192973da792d6a2363ec3c39959ebd3323287f2f695d19de9
|
3 |
+
size 4781557264
|
model-00009-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:443a7f49dd58d56be9d813da072f45b159557aa0b9b46b4597c8b8f79134032e
|
3 |
+
size 4781656640
|
model-00010-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b9c5fdfbf7a2e8aaa882861effd7e0cf4a0bab3ac74015bd1bb656dfb06fa649
|
3 |
+
size 4882220464
|
model-00011-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e95e0d1b4b6cf304dab23db33232bb6f39e88dea87a60eb81ac105fd93c89aeb
|
3 |
+
size 4932601720
|
model-00012-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71bdfa31cb8320e1864cf58ea149c9a56728ec13911e51063806686313f78ac7
|
3 |
+
size 4781557264
|
model-00013-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c15ff41487dded4623360130b2f7cd8c997310262e991b9137c91351119f2c9
|
3 |
+
size 4831938552
|
model-00014-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ce7dba47635c77726bb5f3d2a07970ed3253899bc27dd5541028a177bfa8222
|
3 |
+
size 4831938544
|
model-00015-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b4c3d04a4cda3362bd7689a216feeba9b0bdeead8f2e3021dae6ef25bd62adf
|
3 |
+
size 4831938552
|
model-00016-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b34d496f78d1cb56b9864e9dca9cd477c848d270d5a99df2abaf1ba0477ccc7
|
3 |
+
size 4831938552
|
model-00017-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f301d2c92008aea4cc85f011ca86573032c900682e8772d822cb6ff475f1a292
|
3 |
+
size 4831938552
|
model-00018-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:07cfc38de79de9f5ccca2ad25958587f2caaa3ba5383409ea925d109ccd961af
|
3 |
+
size 4882220464
|
model-00019-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1627b44d8b4d61caa922e616bbb730bd91791591785cccd92870e3fbf3078d9a
|
3 |
+
size 4932601720
|
model-00020-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c6fbd309143fb9d433d34e840881bf4c6e16f5c9bc5383074bf4d276f4901b6e
|
3 |
+
size 4781557264
|
model-00021-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:34ae7d60460fd1aa5cbda769cef9cf7694d11cdd9077c44b23fd10408381cc0b
|
3 |
+
size 4781656640
|
model-00022-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aab219887fd08e409503bab8209d2c7430f91e5a7c11e19c55e8ef00e0030c03
|
3 |
+
size 4882220464
|
model-00023-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7afed163b0891dbd7522ec44a2979c893b02e749bfbf766ba4a4eea4c243328b
|
3 |
+
size 4932601720
|
model-00024-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c82c29c2ffd41eda783c206ff5dafede2296ae1c4fb21d1df0f3adca408e3ece
|
3 |
+
size 4781557264
|
model-00025-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23425737e50360cf5ac13db09f5e5835b157a274c91b63e3b0a2bf65da2abbc7
|
3 |
+
size 4831938552
|
model-00026-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7bac0f59eabb99fe222544dcfdd2c3cbe0f94d4c96cea9fdce09940c16493c18
|
3 |
+
size 4831938544
|
model-00027-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc003e19269a8593ee1d2df6b12e2b53084ab85cb41bfa3661ce2cae9c6bbcb8
|
3 |
+
size 4831938552
|
model-00028-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cbfeead16cff7fe99caacac86b9af11c3a6b12b8693116b86d4361834636b445
|
3 |
+
size 4831938552
|
model-00029-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:caefa032ecd6078694c5e56775bf6684784d3978241e0af80376bc8699b1afe9
|
3 |
+
size 4831938552
|
model-00030-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3982236fe30a62ff7ee7da32eb81a2ef8cef60f3544b3bcc8a33e2e942ee3799
|
3 |
+
size 4882220464
|
model-00031-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a234d791d25e5a8bf9154bca35755ad92a0b7985d6ceb9b815c4266181134c5a
|
3 |
+
size 4932601720
|
model-00032-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3fa6299427f3718c5a5aa7ac2b3b81b58049cdca31c59c070e1f317649bcb86c
|
3 |
+
size 4781557264
|
model-00033-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3bf4f397e8b7cf9503cb9165a757d5435dca308cce226cff57c5a619e71a80ab
|
3 |
+
size 4781656640
|
model-00034-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:970512407aedecb3152d9307856e106c1b1ff7c574c750a554091f9278a00fe1
|
3 |
+
size 4882220464
|
model-00035-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3220aa78e6d74f0747f813b7e8a7a0a4c522beb723c293f2fae633e9072b84b
|
3 |
+
size 4932601720
|
model-00036-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb848a0f8a12c6f282782a9f63e73461eba2d3319b182d90058db0f4d5cf77c2
|
3 |
+
size 4781557264
|
model-00037-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f172f73858c7d012107d76a76422ed07d80a52b9141011ff116f901f048493fe
|
3 |
+
size 4831938552
|
model-00038-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7771b47cd09474e2eee3d8e8ce1c980c41a872bf4e21636a5fd143c279320a7
|
3 |
+
size 4831938544
|
model-00039-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4aa031ee0e3d0518c5190b9bfd0aa772c4209769bb3cfc67cd2ca98676476ede
|
3 |
+
size 4831938552
|
model-00040-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c15de36c68ff41c9a0c285629f528085767274f1137ba2ccda342d5a916dad90
|
3 |
+
size 4831938552
|
model-00041-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f2cfefeb8207baf5af43b7ac745b6881bbcf31047174a7efe79bcfd30dcd14f
|
3 |
+
size 4831938552
|
model-00042-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:708054b53572d642291215b57f3c6395b4721360b838e4ac9171780096a1a94d
|
3 |
+
size 4882220464
|
model-00043-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2921cdc7385e7880a590af8c0039b2b838dc64b46f5fd5b2f09746baccb65815
|
3 |
+
size 4932601720
|
model-00044-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:267f621545ff71e848a754135c19a0a98595dd015d79313220179f53013d9214
|
3 |
+
size 4781557272
|
model-00045-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e942f90bf1cb33aecba10f88239ea3ab43c7923ec2577aff0a7dabbf32c6531c
|
3 |
+
size 4781656632
|
model-00046-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5388de04f579b7755fa4c07109610e14cb6a3066237efe8683198ca7f9a7ec52
|
3 |
+
size 4882220464
|
model-00047-of-00081.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e8110cca10ad9f6df5669e7af3496976a4cce621044c14c7805b7e44b00d3bd
|
3 |
+
size 4932601728
|