Adapters
Collection
1 item
•
Updated
•
1
This is a quantized adapter trained on the Ultrachat 200k dataset for the TinyLlama-1.1B Intermediate Step 1431k 3T model.
adapter_name = 'iqbalamo93/TinyLlama-1.1B-intermediate-1431k-3T-adapters-ultrachat'
Base model was quantized using BitsAndBytes
from bitsandbytes import BitsAndBytesConfig
bnb_config = BitsAndBytesConfig(
load_in_4bit=True, # Use 4-bit precision model loading
bnb_4bit_quant_type="nf4", # Quantization type
bnb_4bit_compute_dtype="float16", # Compute data type
bnb_4bit_use_double_quant=True # Apply nested quantization
)
This is quantized adapters trained on the Ultrachat 200k dataset for the TinyLlama-1.1B Intermediate Step 1431k 3T model.
from peft import PeftModel, AutoPeftModelForCausalLM
from transformers import pipeline, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
adapter_name = 'iqbalamo93/TinyLlama-1.1B-intermediate-1431k-3T-adapters-ultrachat'
model = AutoPeftModelForCausalLM.from_pretrained(
adapter_name,
device_map="auto"
)
model = model.merge_and_unload()
prompt = """<|user|>
Tell me something about Large Language Models.</s>
<|assistant|>
"""
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
print(pipe(prompt)[0]["generated_text"])
model = AutoModelForCausalLM.from_pretrained(adapter_name,
device_map="auto"
)
prompt = """<|user|>
Tell me something about Large Language Models.</s>
<|assistant|>
"""
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
print(pipe(prompt)[0]["generated_text"])
bnb_config = BitsAndBytesConfig(
load_in_4bit=True, # Use 4-bit precision model loading
bnb_4bit_quant_type="nf4", # Quantization type
bnb_4bit_compute_dtype="float16", # Compute dtype
bnb_4bit_use_double_quant=True, # Apply nested quantization
)
model_name = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
adapter_name = 'iqbalamo93/TinyLlama-1.1B-intermediate-1431k-3T-adapters-ultrachat'
model = AutoModelForCausalLM.from_pretrained(
model_name, quantization_config=bnb_config,)
model = PeftModel.from_pretrained(
model,adapter_name
)
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
prompt = """<|user|>
Tell me something about Large Language Models.</s>
<|assistant|>
"""
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
input_ids=inputs['input_ids'],
temperature=0.7, # Controls randomness: lower = more deterministic
top_p=0.9, # Nucleus sampling
top_k=50, # Top-K sampling
num_return_sequences=1,)
for i, output in enumerate(outputs):
generated_text = tokenizer.decode(output, skip_special_tokens=True)
print(f"--- Generated Sequence {i + 1} ---")
print(generated_text)