hey1800 commited on
Commit
a290be8
·
verified ·
1 Parent(s): a211c4e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -2
app.py CHANGED
@@ -1,10 +1,14 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, GPTJForCausalLM
3
  import torch
4
 
5
  model_name = "rycont/kakaobrain__kogpt-6b-8bit"
 
 
 
 
6
  tokenizer = AutoTokenizer.from_pretrained(model_name)
7
- model = GPTJForCausalLM.from_pretrained(model_name, device_map="auto", load_in_8bit=True)
8
 
9
  def generate_response(prompt):
10
  inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, GPTJForCausalLM, BitsAndBytesConfig
3
  import torch
4
 
5
  model_name = "rycont/kakaobrain__kogpt-6b-8bit"
6
+
7
+ # BitsAndBytesConfig 설정
8
+ bnb_config = BitsAndBytesConfig(load_in_8bit=True)
9
+
10
  tokenizer = AutoTokenizer.from_pretrained(model_name)
11
+ model = GPTJForCausalLM.from_pretrained(model_name, device_map="auto", quantization_config=bnb_config)
12
 
13
  def generate_response(prompt):
14
  inputs = tokenizer(prompt, return_tensors="pt").to("cuda")