AI-B commited on
Commit
667f1e5
·
verified ·
1 Parent(s): 5760858

🔧🚀 Updated app.py: BitNet Model Repo Swap

Browse files

swapping out the dead “microsoft/bitnet‑b1.58‑2B‑4T” identifier for the live “1bitLLM/bitnet_b1_58-large” repo—which actually carries the needed configuration_bitnet.py (and its tokenizer/model code)

Files changed (1) hide show
  1. app.py +3 -8
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
4
 
5
  # Singleton for model and tokenizer
6
  _model = None
@@ -9,18 +9,13 @@ _tokenizer = None
9
  def load_model():
10
  global _model, _tokenizer
11
  if _model is None or _tokenizer is None:
12
- model_id = "microsoft/bitnet-b1.58-2B-4T"
13
  _tokenizer = AutoTokenizer.from_pretrained(
14
  model_id,
15
  trust_remote_code=True
16
  )
17
- config = AutoConfig.from_pretrained(
18
- model_id,
19
- trust_remote_code=True
20
- )
21
  _model = AutoModelForCausalLM.from_pretrained(
22
  model_id,
23
- config=config,
24
  torch_dtype=torch.bfloat16,
25
  trust_remote_code=True
26
  )
@@ -157,4 +152,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
157
  if __name__ == "__main__":
158
  # Preload model to avoid threading issues
159
  load_model()
160
- demo.launch(ssr_mode=False, share=True)
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
  # Singleton for model and tokenizer
6
  _model = None
 
9
  def load_model():
10
  global _model, _tokenizer
11
  if _model is None or _tokenizer is None:
12
+ model_id = "1bitLLM/bitnet_b1_58-large" # patched from microsoft/bitnetb1.582B4T
13
  _tokenizer = AutoTokenizer.from_pretrained(
14
  model_id,
15
  trust_remote_code=True
16
  )
 
 
 
 
17
  _model = AutoModelForCausalLM.from_pretrained(
18
  model_id,
 
19
  torch_dtype=torch.bfloat16,
20
  trust_remote_code=True
21
  )
 
152
  if __name__ == "__main__":
153
  # Preload model to avoid threading issues
154
  load_model()
155
+ demo.launch(ssr_mode=False, share=True)