Update README.md
Browse files
README.md
CHANGED
@@ -51,6 +51,9 @@ from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer
|
|
51 |
import torch
|
52 |
from peft import PeftModel, PeftConfig
|
53 |
from transformers import AutoModelForCausalLM, pipeline
|
|
|
|
|
|
|
54 |
|
55 |
# Check if CUDA is available
|
56 |
if torch.cuda.is_available():
|
@@ -70,6 +73,10 @@ base_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B")
|
|
70 |
model = PeftModel.from_pretrained(base_model, "smartinez1/Llama-3.1-8B-FINLLM")
|
71 |
# Load the tokenizer associated with the base model
|
72 |
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B")
|
|
|
|
|
|
|
|
|
73 |
|
74 |
# Set up the text generation pipeline with the PEFT model, specifying the device
|
75 |
generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
|
|
|
51 |
import torch
|
52 |
from peft import PeftModel, PeftConfig
|
53 |
from transformers import AutoModelForCausalLM, pipeline
|
54 |
+
import logging
|
55 |
+
# Suppress all warnings
|
56 |
+
logging.getLogger("transformers").setLevel(logging.CRITICAL) #weird warning when using model for inference
|
57 |
|
58 |
# Check if CUDA is available
|
59 |
if torch.cuda.is_available():
|
|
|
73 |
model = PeftModel.from_pretrained(base_model, "smartinez1/Llama-3.1-8B-FINLLM")
|
74 |
# Load the tokenizer associated with the base model
|
75 |
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B")
|
76 |
+
# Define the unique padding token for fine-tuning
|
77 |
+
custom_pad_token = "<|finetune_right_pad_id|>"
|
78 |
+
tokenizer.add_special_tokens({'pad_token': custom_pad_token})
|
79 |
+
pad_token_id = tokenizer.pad_token_id
|
80 |
|
81 |
# Set up the text generation pipeline with the PEFT model, specifying the device
|
82 |
generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
|