Spaces:

CreitinGameplays
/

bloom-3b-conversational-gradio

Sleeping

CreitinGameplays commited on Apr 12

Commit

42753e5

•

1 Parent(s): e36f792

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,13 +1,22 @@
 import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 # Define the BLOOM model name
 model_name = "CreitinGameplays/bloom-3b-conversational"
 # Load tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name)
 def generate_text(user_prompt):
   """Generates text using the BLOOM model from Hugging Face Transformers and removes the user prompt."""

 import gradio as gr
 import torch
+import bitsandbytes as bnb
 from transformers import AutoTokenizer, AutoModelForCausalLM
+# BNB config
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.bfloat16
+)
 # Define the BLOOM model name
 model_name = "CreitinGameplays/bloom-3b-conversational"
 # Load tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config)
 def generate_text(user_prompt):
   """Generates text using the BLOOM model from Hugging Face Transformers and removes the user prompt."""