it run on colab t4

#5
by rakmik - opened

from transformers import pipeline
import os

Set environment variable for PyTorch memory management

os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

messages = [
{"role": "user", "content": "Who are you?"},
]

pipe = pipeline("text-generation", model="ISTA-DASLab/Meta-Llama-3.1-70B-Instruct-AQLM-PV-2Bit-1x16", device_map="auto", do_sample=True)

Experiment with different values for max_new_tokens

max_new_tokens = 10 # Start with a small value and gradually increase if needed

Call the pipeline with max_new_tokens

output = pipe(messages, max_new_tokens=max_new_tokens)

print(output)

Loading checkpoint shards: 100%
 5/5 [01:03<00:00,  7.56s/it]
WARNING:root:Some parameters are on the meta device device because they were offloaded to the cpu.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Setting pad_token_id to eos_token_id:128001 for open-end generation.
/usr/local/lib/python3.10/dist-packages/torch/utils/cpp_extension.py:1965: UserWarning: TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation.
If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'].
warnings.warn(
/usr/local/lib/python3.10/dist-packages/aqlm/inference_kernels/cuda_kernel.py:20: FutureWarning: torch.library.impl_abstract was renamed to torch.library.register_fake. Please use that instead; we will remove torch.library.impl_abstract in a future version of PyTorch.
@torch .library.impl_abstract("aqlm::code1x16_matmat")
/usr/local/lib/python3.10/dist-packages/aqlm/inference_kernels/cuda_kernel.py:33: FutureWarning: torch.library.impl_abstract was renamed to torch.library.register_fake. Please use that instead; we will remove torch.library.impl_abstract in a future version of PyTorch.
@torch .library.impl_abstract("aqlm::code1x16_matmat_dequant")
/usr/local/lib/python3.10/dist-packages/aqlm/inference_kernels/cuda_kernel.py:48: FutureWarning: torch.library.impl_abstract was renamed to torch.library.register_fake. Please use that instead; we will remove torch.library.impl_abstract in a future version of PyTorch.
@torch .library.impl_abstract("aqlm::code1x16_matmat_dequant_transposed")
/usr/local/lib/python3.10/dist-packages/aqlm/inference_kernels/cuda_kernel.py:62: FutureWarning: torch.library.impl_abstract was renamed to torch.library.register_fake. Please use that instead; we will remove torch.library.impl_abstract in a future version of PyTorch.
@torch .library.impl_abstract("aqlm::code2x8_matmat")
/usr/local/lib/python3.10/dist-packages/aqlm/inference_kernels/cuda_kernel.py:75: FutureWarning: torch.library.impl_abstract was renamed to torch.library.register_fake. Please use that instead; we will remove torch.library.impl_abstract in a future version of PyTorch.
@torch .library.impl_abstract("aqlm::code2x8_matmat_dequant")
/usr/local/lib/python3.10/dist-packages/aqlm/inference_kernels/cuda_kernel.py:88: FutureWarning: torch.library.impl_abstract was renamed to torch.library.register_fake. Please use that instead; we will remove torch.library.impl_abstract in a future version of PyTorch.
@torch .library.impl_abstract("aqlm::code2x8_matmat_dequant_transposed")

[{'generated_text': [{'role': 'user', 'content': 'Who are you?'}, {'role': 'assistant', 'content': "I'm an AI assistant, which means I'm"}]}]

[{'generated_text': [{'role': 'user', 'content': 'Who are you?'}, {'role': 'assistant', 'content': "I'm an AI assistant, which means I'm"}]}]

!pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --index-url https://download.pytorch.org/whl/cu121

!pip install aqlm[gpu]

requirements.txt

safetensors==0.4.3
datasets==2.19.0
sentencepiece==0.2.0
numpy>=1.26.4
transformers==4.40.1
accelerate==0.29.3

Untitled.png

messages = [
{"role": "user", "content": "Write ten sentences ending with the word beauty."},
]

max_new_tokens = 50
output = pipe(messages, max_new_tokens=max_new_tokens)
print(output)

Setting pad_token_id to eos_token_id:128001 for open-end generation.
[{'generated_text': [{'role': 'user', 'content': 'Write ten sentences ending with the word beauty.'}, {'role': 'assistant', 'content': "Here are ten sentences ending with the word beauty:\n\n1. The serene landscape was a sight to behold, filled with natural beauty.\n2. Her smile could light up a room and radiate pure beauty.\n3. The intricate patterns on the butterfly's"}]}

Sign up or log in to comment