hey1800 commited on
Commit
4e931e5
·
verified ·
1 Parent(s): 1e99b77

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -8
app.py CHANGED
@@ -1,17 +1,13 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, GPTJForCausalLM, BitsAndBytesConfig
3
- import torch
4
 
5
  model_name = "rycont/kakaobrain__kogpt-6b-8bit"
6
 
7
- # BitsAndBytesConfig 설정
8
- bnb_config = BitsAndBytesConfig(load_in_8bit=True)
9
-
10
  tokenizer = AutoTokenizer.from_pretrained(model_name)
11
- model = GPTJForCausalLM.from_pretrained(model_name, device_map="auto", quantization_config=bnb_config)
12
 
13
  def generate_response(prompt):
14
- inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
15
  outputs = model.generate(inputs['input_ids'], max_new_tokens=50)
16
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
17
  return response
@@ -20,7 +16,7 @@ iface = gr.Interface(
20
  fn=generate_response,
21
  inputs="text",
22
  outputs="text",
23
- title="KoGPT-6B 8bit Chatbot",
24
  description="Enter a prompt and the model will generate a response."
25
  )
26
 
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, GPTJForCausalLM
 
3
 
4
  model_name = "rycont/kakaobrain__kogpt-6b-8bit"
5
 
 
 
 
6
  tokenizer = AutoTokenizer.from_pretrained(model_name)
7
+ model = GPTJForCausalLM.from_pretrained(model_name)
8
 
9
  def generate_response(prompt):
10
+ inputs = tokenizer(prompt, return_tensors="pt")
11
  outputs = model.generate(inputs['input_ids'], max_new_tokens=50)
12
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
13
  return response
 
16
  fn=generate_response,
17
  inputs="text",
18
  outputs="text",
19
+ title="KoGPT-6B Chatbot",
20
  description="Enter a prompt and the model will generate a response."
21
  )
22