ꡐ윑용으둜 ν•™μŠ΅ ν•œ κ°„λ‹¨ν•œ instruction fine-tuning λͺ¨λΈ (updated 2023/08/06)

from transformers import AutoModelForCausalLM
from transformers import PreTrainedTokenizerFast

tokenizer = PreTrainedTokenizerFast.from_pretrained("hyunjae/skt-kogpt2-kullm-v2",
                                                    bos_token='</s>', eos_token='</s>', unk_token='<unk>',
                                                    pad_token='<pad>', mask_token='<mask>', padding_side="right", model_max_length=512)
model = AutoModelForCausalLM.from_pretrained('hyunjae/skt-kogpt2-kullm-v2').to('cuda')

PROMPT= "### system:μ‚¬μš©μžμ˜ μ§ˆλ¬Έμ— λ§žλŠ” μ μ ˆν•œ 응닡을 μƒμ„±ν•˜μ„Έμš”.\n### μ‚¬μš©μž:{instruction}\n### 응닡:"
text = PROMPT.format_map({'instruction':"μ•ˆλ…•? λ„ˆκ°€ ν•  수 μžˆλŠ”κ²Œ 뭐야?"})
input_ids = tokenizer.encode(text, return_tensors='pt').to(model.device)

gen_ids = model.generate(input_ids,
                        repetition_penalty=2.0,
                        pad_token_id=tokenizer.pad_token_id,
                        eos_token_id=tokenizer.eos_token_id,
                        bos_token_id=tokenizer.bos_token_id,
                        num_beams=4,
                        no_repeat_ngram_size=4,
                        max_new_tokens=128,
                        do_sample=True,
                        top_k=50)


generated = tokenizer.decode(gen_ids[0])
print(generated)
Downloads last month
717
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Dataset used to train hyunjae/skt-kogpt2-kullm-v2