Spaces:

OpenSourceRonin
/

VPTQ_demo

Running on Zero

OpenSourceRonin commited on Oct 16, 2024

Commit

0ec2418

verified ·

1 Parent(s): 9da61be

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -29,21 +29,13 @@ models = [
         "bits": "3 bits"
     },
     {
-        "name": "VPTQ-community/Meta-Llama-3.1-70B-Instruct-v8-k65536-65536-woft",
-        "bits": "4 bits"
-    },
-    {
-        "name": "VPTQ-community/Qwen2.5-72B-Instruct-v8-k65536-65536-woft",
-        "bits": "4 bits"
     },
     {
         "name": "VPTQ-community/Qwen2.5-72B-Instruct-v8-k65536-256-woft",
         "bits": "3 bits"
     },
-    {
-        "name": "VPTQ-community/Qwen2.5-72B-Instruct-v16-k65536-65536-woft",
-        "bits": "2 bits"
-    },
 ]
 # Queues for storing historical data (saving the last 100 GPU utilization and memory usage values)
@@ -177,7 +169,7 @@ download_thread.start()
 loaded_models = {}
-@spaces.GPU(duration=120)
 def respond(
     message,
     history: list[tuple[str, str]],

         "bits": "3 bits"
     },
     {
+        "name": "VPTQ-community/Qwen2.5-72B-Instruct-v16-k65536-65536-woft",
+        "bits": "2 bits"
     },
     {
         "name": "VPTQ-community/Qwen2.5-72B-Instruct-v8-k65536-256-woft",
         "bits": "3 bits"
     },
 ]
 # Queues for storing historical data (saving the last 100 GPU utilization and memory usage values)
 loaded_models = {}
+@spaces.GPU
 def respond(
     message,
     history: list[tuple[str, str]],