dacorvo HF staff commited on
Commit
f4f3dcd
·
verified ·
1 Parent(s): 0701bb9

Add DeepSeek distilled models

Browse files
Files changed (1) hide show
  1. inference-cache-config/qwen2.5.json +60 -0
inference-cache-config/qwen2.5.json CHANGED
@@ -30,6 +30,38 @@
30
  "num_cores": 8,
31
  "auto_cast_type": "bf16"
32
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  ],
34
  "Qwen/Qwen2.5-0.5B": [
35
  {
@@ -59,6 +91,20 @@
59
  "auto_cast_type": "bf16"
60
  }
61
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  "Qwen/Qwen2.5-14B": [
63
  {
64
  "batch_size": 1,
@@ -72,5 +118,19 @@
72
  "num_cores": 8,
73
  "auto_cast_type": "bf16"
74
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  ]
76
  }
 
30
  "num_cores": 8,
31
  "auto_cast_type": "bf16"
32
  }
33
+ ],
34
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B": [
35
+ {
36
+ "batch_size": 1,
37
+ "sequence_length": 4096,
38
+ "num_cores": 2,
39
+ "auto_cast_type": "bf16"
40
+ },
41
+ {
42
+ "batch_size": 4,
43
+ "sequence_length": 4096,
44
+ "num_cores": 2,
45
+ "auto_cast_type": "bf16"
46
+ },
47
+ {
48
+ "batch_size": 8,
49
+ "sequence_length": 4096,
50
+ "num_cores": 2,
51
+ "auto_cast_type": "bf16"
52
+ },
53
+ {
54
+ "batch_size": 1,
55
+ "sequence_length": 4096,
56
+ "num_cores": 8,
57
+ "auto_cast_type": "bf16"
58
+ },
59
+ {
60
+ "batch_size": 32,
61
+ "sequence_length": 4096,
62
+ "num_cores": 8,
63
+ "auto_cast_type": "bf16"
64
+ }
65
  ],
66
  "Qwen/Qwen2.5-0.5B": [
67
  {
 
91
  "auto_cast_type": "bf16"
92
  }
93
  ],
94
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": [
95
+ {
96
+ "batch_size": 1,
97
+ "sequence_length": 4096,
98
+ "num_cores": 2,
99
+ "auto_cast_type": "bf16"
100
+ },
101
+ {
102
+ "batch_size": 4,
103
+ "sequence_length": 4096,
104
+ "num_cores": 2,
105
+ "auto_cast_type": "bf16"
106
+ }
107
+ ],
108
  "Qwen/Qwen2.5-14B": [
109
  {
110
  "batch_size": 1,
 
118
  "num_cores": 8,
119
  "auto_cast_type": "bf16"
120
  }
121
+ ],
122
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": [
123
+ {
124
+ "batch_size": 1,
125
+ "sequence_length": 4096,
126
+ "num_cores": 8,
127
+ "auto_cast_type": "bf16"
128
+ },
129
+ {
130
+ "batch_size": 16,
131
+ "sequence_length": 4096,
132
+ "num_cores": 8,
133
+ "auto_cast_type": "bf16"
134
+ }
135
  ]
136
  }