Prasanna Dhungana commited on
Commit
661fe9a
1 Parent(s): bed7540

updated quantization config

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -1,15 +1,16 @@
1
  import gradio as gr
2
  import torch
3
  from peft import PeftModel, PeftConfig
4
- from transformers import AutoTokenizer, AutoModelForCausalLM
5
 
6
 
7
  #Loading model
 
 
8
  model_path = "parsanna17/finetune_starcoder2_with_R_data"
9
  checkpoint = "bigcode/starcoder2-3b"
10
- device = "cuda" if torch.cuda.is_available() else "cpu"
11
  config = PeftConfig.from_pretrained(model_path)
12
- model = AutoModelForCausalLM.from_pretrained(checkpoint , device_map=device, torch_dtype=torch.bfloat16)
13
  model = PeftModel.from_pretrained(model, model_path).to(device)
14
  tokenizer = AutoTokenizer.from_pretrained(checkpoint)
15
 
 
1
  import gradio as gr
2
  import torch
3
  from peft import PeftModel, PeftConfig
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
5
 
6
 
7
  #Loading model
8
+ quantization_config = BitsAndBytesConfig(load_in_4bit=True)
9
+
10
  model_path = "parsanna17/finetune_starcoder2_with_R_data"
11
  checkpoint = "bigcode/starcoder2-3b"
 
12
  config = PeftConfig.from_pretrained(model_path)
13
+ model = AutoModelForCausalLM.from_pretrained(checkpoint, quantization_config=quantization_config)
14
  model = PeftModel.from_pretrained(model, model_path).to(device)
15
  tokenizer = AutoTokenizer.from_pretrained(checkpoint)
16