Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -29,21 +29,13 @@ models = [
|
|
29 |
"bits": "3 bits"
|
30 |
},
|
31 |
{
|
32 |
-
"name": "VPTQ-community/
|
33 |
-
"bits": "
|
34 |
-
},
|
35 |
-
{
|
36 |
-
"name": "VPTQ-community/Qwen2.5-72B-Instruct-v8-k65536-65536-woft",
|
37 |
-
"bits": "4 bits"
|
38 |
},
|
39 |
{
|
40 |
"name": "VPTQ-community/Qwen2.5-72B-Instruct-v8-k65536-256-woft",
|
41 |
"bits": "3 bits"
|
42 |
},
|
43 |
-
{
|
44 |
-
"name": "VPTQ-community/Qwen2.5-72B-Instruct-v16-k65536-65536-woft",
|
45 |
-
"bits": "2 bits"
|
46 |
-
},
|
47 |
]
|
48 |
|
49 |
# Queues for storing historical data (saving the last 100 GPU utilization and memory usage values)
|
@@ -177,7 +169,7 @@ download_thread.start()
|
|
177 |
|
178 |
loaded_models = {}
|
179 |
|
180 |
-
@spaces.GPU
|
181 |
def respond(
|
182 |
message,
|
183 |
history: list[tuple[str, str]],
|
|
|
29 |
"bits": "3 bits"
|
30 |
},
|
31 |
{
|
32 |
+
"name": "VPTQ-community/Qwen2.5-72B-Instruct-v16-k65536-65536-woft",
|
33 |
+
"bits": "2 bits"
|
|
|
|
|
|
|
|
|
34 |
},
|
35 |
{
|
36 |
"name": "VPTQ-community/Qwen2.5-72B-Instruct-v8-k65536-256-woft",
|
37 |
"bits": "3 bits"
|
38 |
},
|
|
|
|
|
|
|
|
|
39 |
]
|
40 |
|
41 |
# Queues for storing historical data (saving the last 100 GPU utilization and memory usage values)
|
|
|
169 |
|
170 |
loaded_models = {}
|
171 |
|
172 |
+
@spaces.GPU
|
173 |
def respond(
|
174 |
message,
|
175 |
history: list[tuple[str, str]],
|