arunb74 commited on
Commit
7186535
1 Parent(s): 221d82e

Update README.md

Browse files

Edited the content

Files changed (1) hide show
  1. README.md +0 -46
README.md CHANGED
@@ -35,52 +35,6 @@ model = "arunb74/Luxeai-anu-1-bit-70M"
35
  tokenizer = AutoTokenizer.from_pretrained(model)
36
  model = AutoModelForCausalLM.from_pretrained(model)
37
 
38
- def activation_norm_quant(x):
39
- x = RMSNorm(x)
40
- scale = 127.0 / x.abs().max(dim=-1, keepdim=True).values.clamp_(min=1e-5)
41
- y = (x * scale).round().clamp_(-128, 127)
42
- y = y / scale
43
- return y, scale
44
-
45
- def weight_quant(w):
46
- scale = 1.0 / w.abs().mean().clamp_(min=1e-5)
47
- u = (w * scale).round().clamp_(-1, 1)
48
- u = u / scale
49
- return u
50
-
51
- class BitNetInference(nn.Linear):
52
- def forward(self, x):
53
- w = self.weight # a weight tensor with shape [d, k]
54
- x = x.to(w.device)
55
- RMSNorm = LlamaRMSNorm(x.shape[-1]).to(w.device)
56
- x_norm = RMSNorm(x)
57
- # A trick for implementing Straight−Through−Estimator (STE) using detach()
58
- x_quant = x_norm + (activation_norm_quant(x_norm) - x_norm).detach()
59
- w_quant = w + (weight_quant(w) - w).detach()
60
- y = F.linear(x_quant, w_quant)
61
- return y
62
-
63
-
64
- def convert_to_bitnet(model, copy_weights):
65
- for name, module in model.named_modules():
66
- # Replace linear layers with BitNet
67
- if isinstance(module, LlamaSdpaAttention) or isinstance(module, LlamaMLP):
68
- for child_name, child_module in module.named_children():
69
- if isinstance(child_module, nn.Linear):
70
- bitlinear = BitNetInference(child_module.in_features, child_module.out_features, child_module.bias is not None).to(device="cuda:0")
71
- if copy_weights:
72
- bitlinear.weight = child_module.weight
73
- if child_module.bias is not None:
74
- bitlinear.bias = child_module.bias
75
- setattr(module, child_name, bitlinear)
76
- # Remove redundant input_layernorms
77
- elif isinstance(module, LlamaDecoderLayer):
78
- for child_name, child_module in module.named_children():
79
- if isinstance(child_module, LlamaRMSNorm) and child_name == "input_layernorm":
80
- setattr(module, child_name, nn.Identity().to(device="cuda:0"))
81
-
82
-
83
- convert_to_bitnet(model, copy_weights=True)
84
 
85
  # Create a text generation pipeline
86
  pipe = pipeline(
 
35
  tokenizer = AutoTokenizer.from_pretrained(model)
36
  model = AutoModelForCausalLM.from_pretrained(model)
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  # Create a text generation pipeline
40
  pipe = pipeline(