it run on colab t4
from transformers import pipeline
import os
Set environment variable for PyTorch memory management
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
messages = [
{"role": "user", "content": "Who are you?"},
]
pipe = pipeline("text-generation", model="ISTA-DASLab/Meta-Llama-3.1-70B-Instruct-AQLM-PV-2Bit-1x16", device_map="auto", do_sample=True)
Experiment with different values for max_new_tokens
max_new_tokens = 10 # Start with a small value and gradually increase if needed
Call the pipeline with max_new_tokens
output = pipe(messages, max_new_tokens=max_new_tokens)
print(output)
Loading checkpoint shards: 100%
5/5 [01:03<00:00, 7.56s/it]
WARNING:root:Some parameters are on the meta device device because they were offloaded to the cpu.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Setting pad_token_id
to eos_token_id
:128001 for open-end generation.
/usr/local/lib/python3.10/dist-packages/torch/utils/cpp_extension.py:1965: UserWarning: TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation.
If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'].
warnings.warn(
/usr/local/lib/python3.10/dist-packages/aqlm/inference_kernels/cuda_kernel.py:20: FutureWarning: torch.library.impl_abstract
was renamed to torch.library.register_fake
. Please use that instead; we will remove torch.library.impl_abstract
in a future version of PyTorch.
@torch
.library.impl_abstract("aqlm::code1x16_matmat")
/usr/local/lib/python3.10/dist-packages/aqlm/inference_kernels/cuda_kernel.py:33: FutureWarning: torch.library.impl_abstract
was renamed to torch.library.register_fake
. Please use that instead; we will remove torch.library.impl_abstract
in a future version of PyTorch.
@torch
.library.impl_abstract("aqlm::code1x16_matmat_dequant")
/usr/local/lib/python3.10/dist-packages/aqlm/inference_kernels/cuda_kernel.py:48: FutureWarning: torch.library.impl_abstract
was renamed to torch.library.register_fake
. Please use that instead; we will remove torch.library.impl_abstract
in a future version of PyTorch.
@torch
.library.impl_abstract("aqlm::code1x16_matmat_dequant_transposed")
/usr/local/lib/python3.10/dist-packages/aqlm/inference_kernels/cuda_kernel.py:62: FutureWarning: torch.library.impl_abstract
was renamed to torch.library.register_fake
. Please use that instead; we will remove torch.library.impl_abstract
in a future version of PyTorch.
@torch
.library.impl_abstract("aqlm::code2x8_matmat")
/usr/local/lib/python3.10/dist-packages/aqlm/inference_kernels/cuda_kernel.py:75: FutureWarning: torch.library.impl_abstract
was renamed to torch.library.register_fake
. Please use that instead; we will remove torch.library.impl_abstract
in a future version of PyTorch.
@torch
.library.impl_abstract("aqlm::code2x8_matmat_dequant")
/usr/local/lib/python3.10/dist-packages/aqlm/inference_kernels/cuda_kernel.py:88: FutureWarning: torch.library.impl_abstract
was renamed to torch.library.register_fake
. Please use that instead; we will remove torch.library.impl_abstract
in a future version of PyTorch.
@torch
.library.impl_abstract("aqlm::code2x8_matmat_dequant_transposed")
[{'generated_text': [{'role': 'user', 'content': 'Who are you?'}, {'role': 'assistant', 'content': "I'm an AI assistant, which means I'm"}]}]
[{'generated_text': [{'role': 'user', 'content': 'Who are you?'}, {'role': 'assistant', 'content': "I'm an AI assistant, which means I'm"}]}]
!pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --index-url https://download.pytorch.org/whl/cu121
!pip install aqlm[gpu]
requirements.txt
safetensors==0.4.3
datasets==2.19.0
sentencepiece==0.2.0
numpy>=1.26.4
transformers==4.40.1
accelerate==0.29.3
messages = [
{"role": "user", "content": "Write ten sentences ending with the word beauty."},
]
max_new_tokens = 50
output = pipe(messages, max_new_tokens=max_new_tokens)
print(output)
Setting pad_token_id
to eos_token_id
:128001 for open-end generation.
[{'generated_text': [{'role': 'user', 'content': 'Write ten sentences ending with the word beauty.'}, {'role': 'assistant', 'content': "Here are ten sentences ending with the word beauty:\n\n1. The serene landscape was a sight to behold, filled with natural beauty.\n2. Her smile could light up a room and radiate pure beauty.\n3. The intricate patterns on the butterfly's"}]}