sharpenb commited on
Commit
1d452ff
·
verified ·
1 Parent(s): 54e0249

Upload folder using huggingface_hub (#4)

Browse files

- f64dc01db5d029dd0e0af6a2d648a932c94d5d60899dd7258509436741a21972 (461d0a8bfc3d98af8b552abd1cdfb061ca009ba7)
- 125cfa1d90b1e1cfc8321e65140f6217c0025144a9af712e12d4a048b1137bc1 (e0e15ad40e0bc95c359426f9961a9d150b1aa5e3)

Files changed (3) hide show
  1. config.json +21 -1
  2. qmodel.pt +2 -2
  3. smash_config.json +1 -1
config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "_attn_implementation_autoset": true,
3
- "_name_or_path": "PowerInfer/SmallThinker-3B-Preview",
4
  "architectures": [
5
  "Qwen2ForCausalLM"
6
  ],
@@ -17,6 +17,26 @@
17
  "num_attention_heads": 16,
18
  "num_hidden_layers": 36,
19
  "num_key_value_heads": 2,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  "rms_norm_eps": 1e-06,
21
  "rope_scaling": null,
22
  "rope_theta": 1000000.0,
 
1
  {
2
  "_attn_implementation_autoset": true,
3
+ "_name_or_path": "/tmp/models/tmpdm8gg0sl/tmp0_8ifq2x",
4
  "architectures": [
5
  "Qwen2ForCausalLM"
6
  ],
 
17
  "num_attention_heads": 16,
18
  "num_hidden_layers": 36,
19
  "num_key_value_heads": 2,
20
+ "quantization_config": {
21
+ "quant_config": {
22
+ "offload_meta": false,
23
+ "scale_quant_params": null,
24
+ "weight_quant_params": {
25
+ "axis": 1,
26
+ "channel_wise": true,
27
+ "group_size": 64,
28
+ "nbits": 8,
29
+ "optimize": true,
30
+ "round_zero": false,
31
+ "view_as_float": false
32
+ },
33
+ "zero_quant_params": null
34
+ },
35
+ "quant_method": "hqq",
36
+ "skip_modules": [
37
+ "lm_head"
38
+ ]
39
+ },
40
  "rms_norm_eps": 1e-06,
41
  "rope_scaling": null,
42
  "rope_theta": 1000000.0,
qmodel.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9baca159124a5a857abcb0465d0a7ee2f56deb08118880d54a3cc247372a20c
3
- size 3571055856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb8040ddec59d65a1bfbade0823d9351c892a4755ad27fa3ee7f7c11060db435
3
+ size 4367082836
smash_config.json CHANGED
@@ -11,7 +11,7 @@
11
  "quant_hqq_weight_bits": 8,
12
  "max_batch_size": 1,
13
  "device": "cuda",
14
- "cache_dir": "/tmp/models/tmpeqq5s829",
15
  "task": "",
16
  "save_load_fn": "hqq",
17
  "save_load_fn_args": {},
 
11
  "quant_hqq_weight_bits": 8,
12
  "max_batch_size": 1,
13
  "device": "cuda",
14
+ "cache_dir": "/tmp/models/tmpdm8gg0sl",
15
  "task": "",
16
  "save_load_fn": "hqq",
17
  "save_load_fn_args": {},