vmuchinov commited on
Commit
76a6d8e
1 Parent(s): a75c7a4

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +4 -6
  2. requirements.txt +1 -1
app.py CHANGED
@@ -12,18 +12,16 @@ DEFAULT_MAX_NEW_TOKENS = 1024
12
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
13
  ACCESS_TOKEN = os.getenv("HF_TOKEN", "")
14
 
15
- model_id = "Qwen/Qwen2.5-Coder-32B-Instruct-GGUF"
16
- filename = "qwen2.5-coder-32b-instruct-q4_k_m.gguf"
17
  model = AutoModelForCausalLM.from_pretrained(
18
  model_id,
19
- gguf_file=filename,
20
  torch_dtype=torch.float16,
21
  device_map="auto",
22
  trust_remote_code=True,
23
  token=ACCESS_TOKEN)
24
  tokenizer = AutoTokenizer.from_pretrained(
25
  model_id,
26
- gguf_file=filename,
27
  trust_remote_code=True,
28
  token=ACCESS_TOKEN)
29
  tokenizer.use_default_system_prompt = False
@@ -55,7 +53,7 @@ def generate(
55
  ]
56
  '''
57
 
58
- streamer = TextIteratorStreamer(tokenizer, timeout=300.0, skip_prompt=True, skip_special_tokens=True)
59
  generate_kwargs = dict(
60
  {"input_ids": input_ids},
61
  streamer=streamer,
@@ -106,7 +104,7 @@ chat_interface = gr.Interface(
106
  value=1.0,
107
  ),
108
  ],
109
- title="Model testing - Qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-Int4",
110
  description="Provide system settings and a prompt to interact with the model.",
111
  )
112
 
 
12
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
13
  ACCESS_TOKEN = os.getenv("HF_TOKEN", "")
14
 
15
+ model_id = "Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-Int8"
16
+
17
  model = AutoModelForCausalLM.from_pretrained(
18
  model_id,
 
19
  torch_dtype=torch.float16,
20
  device_map="auto",
21
  trust_remote_code=True,
22
  token=ACCESS_TOKEN)
23
  tokenizer = AutoTokenizer.from_pretrained(
24
  model_id,
 
25
  trust_remote_code=True,
26
  token=ACCESS_TOKEN)
27
  tokenizer.use_default_system_prompt = False
 
53
  ]
54
  '''
55
 
56
+ streamer = TextIteratorStreamer(tokenizer, timeout=600.0, skip_prompt=True, skip_special_tokens=True)
57
  generate_kwargs = dict(
58
  {"input_ids": input_ids},
59
  streamer=streamer,
 
104
  value=1.0,
105
  ),
106
  ],
107
+ title="Model testing - Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-Int8",
108
  description="Provide system settings and a prompt to interact with the model.",
109
  )
110
 
requirements.txt CHANGED
@@ -88,7 +88,7 @@ mpmath==1.3.0
88
  # via sympy
89
  networkx==3.3
90
  # via torch
91
- numpy<2.0.0
92
  # via
93
  # accelerate
94
  # bitsandbytes
 
88
  # via sympy
89
  networkx==3.3
90
  # via torch
91
+ numpy==2.1.1
92
  # via
93
  # accelerate
94
  # bitsandbytes