--- library_name: transformers datasets: - Suraponn/thai_instruction_sft language: - th base_model: meta-llama/Meta-Llama-3.1-8B --- import json import torch from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig model_id = "Suraponn/llama_3.1_8B_Thai_instruct" tokenizer = AutoTokenizer.from_pretrained( model_id, ) model = AutoModelForCausalLM.from_pretrained( model_id, device_map="cuda:0", torch_dtype=torch.float16, ) config_setting = AutoConfig.from_pretrained( model_id, add_special_tokens=True, ) if tokenizer.chat_template is None: tokenizer.chat_template = tokenizer.default_chat_template if not "system" in tokenizer.chat_template and "system" in tokenizer.default_chat_template: tokenizer.chat_template = tokenizer.default_chat_template s_split = "เขียนบทความเกี่ยวกับการออกกำลังกายให้ถูกต้อง" chat = [ { "role": "system", "content": "You are a helpfull assistant. Please respond in Thai." }, { "role": "user", "content": s_split, }, ] tokenizer.use_default_system_prompt = False extract_input = tokenizer.apply_chat_template(chat, tokenize=False , add_generation_prompt=True) #extract_input = extract_input.split(s_split)[0] print("------------\n" + extract_input + "\n------------") inputs = tokenizer( extract_input, return_tensors="pt", add_special_tokens = False, ) #print(inputs) terminators = [ tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>"), ] #print(terminators) inputs = inputs.to(model.device) with torch.no_grad(): tokens = model.generate( **inputs, max_new_tokens=2048, do_sample=True, eos_token_id=terminators, temperature=0.7, #top_p=1, ) output = tokenizer.decode(tokens[0]) print(output)