Spaces:

vmuchinov
/

sysprompt

Running on Zero

vmuchinov commited on 3 days ago

Commit

76a6d8e

•

1 Parent(s): a75c7a4

Upload 2 files

Files changed (2) hide show

app.py CHANGED Viewed

@@ -12,18 +12,16 @@ DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 ACCESS_TOKEN = os.getenv("HF_TOKEN", "")
-model_id = "Qwen/Qwen2.5-Coder-32B-Instruct-GGUF"
-filename = "qwen2.5-coder-32b-instruct-q4_k_m.gguf"
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
-    gguf_file=filename,
     torch_dtype=torch.float16,
     device_map="auto",
     trust_remote_code=True,
     token=ACCESS_TOKEN)
 tokenizer = AutoTokenizer.from_pretrained(
     model_id,
-    gguf_file=filename,
     trust_remote_code=True,
     token=ACCESS_TOKEN)
 tokenizer.use_default_system_prompt = False
@@ -55,7 +53,7 @@ def generate(
     ]
     '''
-    streamer = TextIteratorStreamer(tokenizer, timeout=300.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
         {"input_ids": input_ids},
         streamer=streamer,
@@ -106,7 +104,7 @@ chat_interface = gr.Interface(
             value=1.0,
         ),
     ],
-    title="Model testing - Qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-Int4",
     description="Provide system settings and a prompt to interact with the model.",
 )

 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 ACCESS_TOKEN = os.getenv("HF_TOKEN", "")
+model_id = "Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-Int8"
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     torch_dtype=torch.float16,
     device_map="auto",
     trust_remote_code=True,
     token=ACCESS_TOKEN)
 tokenizer = AutoTokenizer.from_pretrained(
     model_id,
     trust_remote_code=True,
     token=ACCESS_TOKEN)
 tokenizer.use_default_system_prompt = False
     ]
     '''
+    streamer = TextIteratorStreamer(tokenizer, timeout=600.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
         {"input_ids": input_ids},
         streamer=streamer,
             value=1.0,
         ),
     ],
+    title="Model testing - Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-Int8",
     description="Provide system settings and a prompt to interact with the model.",
 )

requirements.txt CHANGED Viewed

@@ -88,7 +88,7 @@ mpmath==1.3.0
     # via sympy
 networkx==3.3
     # via torch
-numpy<2.0.0
     # via
     #   accelerate
     #   bitsandbytes

     # via sympy
 networkx==3.3
     # via torch
+numpy==2.1.1
     # via
     #   accelerate
     #   bitsandbytes