File size: 1,745 Bytes
d93f604
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import torch
import json
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from datasets import load_dataset
from peft import LoraConfig, PeftModel

device_map = "auto"
model = AutoModelForCausalLM.from_pretrained(
    "/path/to/meta-llama3-8b",
    #low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map=device_map,
)

model = PeftModel.from_pretrained(model, "/path/to/llama3-8b-adapter", device_map=device_map)
model = model.merge_and_unload()

tokenizer = AutoTokenizer.from_pretrained("/path/to/meta-llama3-8b", trust_remote_code=True)
tokenizer.pad_token_id = 18610

pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=4096, do_sample=False)
print("Padding side:",tokenizer.padding_side)
val_dataset = load_dataset("csv", data_files={'val':'/path/to/actseq-val-new.csv'})["val"]
test_dataset = load_dataset("csv", data_files={'test':'/path/to/actseq-test-new.csv'})["test"]


def formatting_prompts_func(example):
     output_texts = []
     for i in range(len(example['dial_with_actions'])):
         text = f"Predict the action sequence (AS) for the Minecraft excerpt:\n {example['dial_with_actions'][i]}\n ### AS:"
         output_texts.append(text)
     return output_texts


val_texts = formatting_prompts_func(val_dataset)
test_texts = formatting_prompts_func(test_dataset)

print("Val Length:", len(val_texts))
print("Test Length:", len(test_texts))

f = open("/path/to/val-output-file","w")

for text in val_texts:
    print(text)
    print(pipe(text)[0]["generated_text"], file=f)

f.close()

f = open("/path/to/test-output-file","w")

for text in test_texts:
    print(text)
    print(pipe(text)[0]["generated_text"], file=f)

f.close()