Spaces:

DR-Rakshitha
/

wizardlm_api

App Files Files Community

DR-Rakshitha commited on Oct 1, 2023

Commit

0c8c4f5

•

1 Parent(s): 7217acf

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -18

app.py CHANGED Viewed

@@ -1,26 +1,118 @@
-import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer
-# # Specify the directory containing the model and tokenizer
-# model_name = "gpt4all"  # Make sure this matches the actual model directory
-# model_path = f"./"  # Path to the model directory
-# # Initialize the GPT-4 model and tokenizer
-# model = AutoModelForCausalLM.from_pretrained(model_path)
-# tokenizer = AutoTokenizer.from_pretrained(model_path)
-from gpt4all import GPT4All
-model = GPT4All("wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin")
-# output = model.generate("How to go to the hospital?")
-# print(output)
-def generate_text(input_text):
-    # input_ids = tokenizer(input_text, return_tensors="pt").input_ids
-    # generated_ids = model.generate(input_ids)
-    # generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
-    output = model.generate(input_text)
-    return output
 text_generation_interface = gr.Interface(
     fn=generate_text,

+# import gradio as gr
+# from transformers import AutoModelForCausalLM, AutoTokenizer
+# from gpt4all import GPT4All
+# model = GPT4All("wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin")
+#----------------------------------------------------------------------------------------------------------------------------
+!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7
+import os
+import torch
+from datasets import load_dataset
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    BitsAndBytesConfig,
+    HfArgumentParser,
+    TrainingArguments,
+    pipeline,
+    logging,
+)
+from peft import LoraConfig, PeftModel
+from trl import SFTTrainer
+ # -----------------------------------------------------------------------------------------------------------------------------------------------------------------
+# LoRA attention dimension
+lora_r = 64
+# Alpha parameter for LoRA scaling
+lora_alpha = 16
+# Dropout probability for LoRA layers
+lora_dropout = 0.1
+################################################################################
+# bitsandbytes parameters
+################################################################################
+# Activate 4-bit precision base model loading
+use_4bit = True
+# Compute dtype for 4-bit base models
+bnb_4bit_compute_dtype = "float16"
+# Quantization type (fp4 or nf4)
+bnb_4bit_quant_type = "nf4"
+# Activate nested quantization for 4-bit base models (double quantization)
+use_nested_quant = False
+# Load the entire model on the GPU 0
+device_map = {"": 0}
+#----------------------------------------------------------------------------------------------------------------------------------------------------------------------
+model_name = "DR-DRR/Model_001"
+model_basename = "pytorch_model-00001-of-00002.bin" # the model is in bin format
+#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+# Load tokenizer and model with QLoRA configuration
+compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=use_4bit,
+    bnb_4bit_quant_type=bnb_4bit_quant_type,
+    bnb_4bit_compute_dtype=compute_dtype,
+    bnb_4bit_use_double_quant=use_nested_quant,
+)
+# Check GPU compatibility with bfloat16
+if compute_dtype == torch.float16 and use_4bit:
+    major, _ = torch.cuda.get_device_capability()
+    if major >= 8:
+        print("=" * 80)
+        print("Your GPU supports bfloat16: accelerate training with bf16=True")
+        print("=" * 80)
+# Load base model
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    quantization_config=bnb_config,
+    device_map=device_map
+)
+model.config.use_cache = False
+model.config.pretraining_tp = 1
+# Load LLaMA tokenizer
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+tokenizer.pad_token = tokenizer.eos_token
+tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training
+# Load LoRA configuration
+peft_config = LoraConfig(
+    lora_alpha=lora_alpha,
+    lora_dropout=lora_dropout,
+    r=lora_r,
+    bias="none",
+    task_type="CAUSAL_LM",
+)
+#---------------------------------------------------------------------------------------------------------------------------------------------------------------------
+# Ignore warnings
+logging.set_verbosity(logging.CRITICAL)
+# Run text generation pipeline with our next model
+# prompt = "What is a large language model?"
+# pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
+# result = pipe(f"<s>[INST] {prompt} [/INST]")
+# print(result[0]['generated_text'])
+def generate_text(prompt):
+    # output = model.generate(input_text)
+    pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
+    result = pipe(f"<s>[INST] {prompt} [/INST]")
+    return result
 text_generation_interface = gr.Interface(
     fn=generate_text,