nexa-collaboration
/

output_llama3-1_8b_2of4_ckpt

Text Generation

text-generation-inference

Inference Endpoints

compressed-tensors

Model card Files Files and versions Community

Mossy20 commited on 9 days ago

Commit

63a3136

·

verified ·

1 Parent(s): 61dd154

Upload LlamaForCausalLM

Files changed (3) hide show

config.json +1 -1
model.safetensors +1 -1
recipe.yaml +1 -1

config.json CHANGED Viewed

@@ -51,7 +51,7 @@
     "quantization_status": "compressed",
     "sparsity_config": {
       "format": "dense",
-      "global_sparsity": 0.4504805113143489,
       "ignore": [
         "lm_head"
       ],

     "quantization_status": "compressed",
     "sparsity_config": {
       "format": "dense",
+      "global_sparsity": 0.45048483360247604,
       "ignore": [
         "lm_head"
       ],

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4b820a04dfcb60f9ba541d402291eda85e45d211e4de44dacbbfbb0bd60d94eb
 size 4721963376

 version https://git-lfs.github.com/spec/v1
+oid sha256:3774433910fd03838b3bea93174ae7156782dfc32afb7df2a4138ac7d86b121b
 size 4721963376

recipe.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 sparsity_stage:
   sparsity_modifiers:
-    SparseGPTModifier: {sparsity: 0.5, mask_structure: '2:4', sequential_update: false}
   run_type: &id001 !!python/object/apply:llmcompressor.recipe.stage.StageRunType [oneshot]
 finetuning_stage:
   finetuning_modifiers:

 sparsity_stage:
   sparsity_modifiers:
+    SparseGPTModifier: {sparsity: 0.9, mask_structure: '2:4', sequential_update: false}
   run_type: &id001 !!python/object/apply:llmcompressor.recipe.stage.StageRunType [oneshot]
 finetuning_stage:
   finetuning_modifiers: