Upload 2 files
Browse files- run_speed_tests.sh +7 -0
- speed_test.py +145 -0
run_speed_tests.sh
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
python speed_test.py --model_path "naver-clova-ix/donut-base"
|
4 |
+
|
5 |
+
python speed_test.py --model_path "naver-clova-ix/donut-base" --ja_bad_words
|
6 |
+
|
7 |
+
python speed_test.py --model_path "donut-base-ascii"
|
speed_test.py
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
|
3 |
+
import torch
|
4 |
+
from datasets import load_dataset
|
5 |
+
from transformers import AutoProcessor, VisionEncoderDecoderModel
|
6 |
+
|
7 |
+
|
8 |
+
def speedometer(
|
9 |
+
model: torch.nn.Module,
|
10 |
+
pixel_values: torch.Tensor,
|
11 |
+
decoder_input_ids: torch.Tensor,
|
12 |
+
processor: AutoProcessor,
|
13 |
+
bad_words_ids: list,
|
14 |
+
warmup_iters: int = 100,
|
15 |
+
timing_iters: int = 100,
|
16 |
+
num_tokens: int = 10,
|
17 |
+
) -> None:
|
18 |
+
"""Measure average run time for a PyTorch module
|
19 |
+
|
20 |
+
Performs forward passes.
|
21 |
+
"""
|
22 |
+
start = torch.cuda.Event(enable_timing=True)
|
23 |
+
end = torch.cuda.Event(enable_timing=True)
|
24 |
+
|
25 |
+
# Warmup runs
|
26 |
+
torch.cuda.synchronize()
|
27 |
+
for _ in range(warmup_iters):
|
28 |
+
outputs = model.generate(
|
29 |
+
pixel_values.to(model.device),
|
30 |
+
decoder_input_ids=decoder_input_ids.to(model.device),
|
31 |
+
early_stopping=True,
|
32 |
+
pad_token_id=processor.tokenizer.pad_token_id,
|
33 |
+
eos_token_id=processor.tokenizer.eos_token_id,
|
34 |
+
use_cache=True,
|
35 |
+
num_beams=1,
|
36 |
+
bad_words_ids=bad_words_ids,
|
37 |
+
return_dict_in_generate=True,
|
38 |
+
min_length=num_tokens,
|
39 |
+
max_length=num_tokens,
|
40 |
+
)
|
41 |
+
|
42 |
+
# Timing runs
|
43 |
+
start.record()
|
44 |
+
for _ in range(timing_iters):
|
45 |
+
outputs = model.generate(
|
46 |
+
pixel_values.to(model.device),
|
47 |
+
decoder_input_ids=decoder_input_ids.to(model.device),
|
48 |
+
early_stopping=True,
|
49 |
+
pad_token_id=processor.tokenizer.pad_token_id,
|
50 |
+
eos_token_id=processor.tokenizer.eos_token_id,
|
51 |
+
use_cache=True,
|
52 |
+
num_beams=1,
|
53 |
+
bad_words_ids=bad_words_ids,
|
54 |
+
return_dict_in_generate=True,
|
55 |
+
min_length=num_tokens,
|
56 |
+
max_length=num_tokens,
|
57 |
+
)
|
58 |
+
end.record()
|
59 |
+
torch.cuda.synchronize()
|
60 |
+
|
61 |
+
mean = start.elapsed_time(end) / timing_iters
|
62 |
+
print(f"Mean time: {mean} ms")
|
63 |
+
|
64 |
+
return mean
|
65 |
+
|
66 |
+
|
67 |
+
def get_ja_list_of_lists(processor):
|
68 |
+
def is_japanese(s):
|
69 |
+
"Made by GPT-4: https://chat.openai.com/share/a795b15c-8534-40b9-9699-c8c1319f5f25"
|
70 |
+
for char in s:
|
71 |
+
code_point = ord(char)
|
72 |
+
if (
|
73 |
+
0x3040 <= code_point <= 0x309F
|
74 |
+
or 0x30A0 <= code_point <= 0x30FF
|
75 |
+
or 0x4E00 <= code_point <= 0x9FFF
|
76 |
+
or 0x3400 <= code_point <= 0x4DBF
|
77 |
+
or 0x20000 <= code_point <= 0x2A6DF
|
78 |
+
or 0x31F0 <= code_point <= 0x31FF
|
79 |
+
or 0xFF00 <= code_point <= 0xFFEF
|
80 |
+
or 0x3000 <= code_point <= 0x303F
|
81 |
+
or 0x3200 <= code_point <= 0x32FF
|
82 |
+
):
|
83 |
+
continue
|
84 |
+
else:
|
85 |
+
return False
|
86 |
+
return True
|
87 |
+
|
88 |
+
ja_tokens, ja_ids = [], []
|
89 |
+
for token, id in processor.tokenizer.vocab.items():
|
90 |
+
if is_japanese(token.lstrip("▁")):
|
91 |
+
ja_tokens.append(token)
|
92 |
+
ja_ids.append(id)
|
93 |
+
|
94 |
+
return [[x] for x in ja_ids]
|
95 |
+
|
96 |
+
|
97 |
+
def main():
|
98 |
+
|
99 |
+
parser = argparse.ArgumentParser(description='Description of your program')
|
100 |
+
parser.add_argument('--model_path', help='Description for foo argument', required=True)
|
101 |
+
parser.add_argument('--ja_bad_words', help='Use ja bad_words_ids', action="store_true", default=False)
|
102 |
+
args = parser.parse_args()
|
103 |
+
|
104 |
+
print("Running speed test on model: ", args.model_path, "with ja_bad_words: ", args.ja_bad_words)
|
105 |
+
|
106 |
+
processor = AutoProcessor.from_pretrained(args.model_path)
|
107 |
+
model = VisionEncoderDecoderModel.from_pretrained(args.model_path)
|
108 |
+
|
109 |
+
device = 0 if torch.cuda.is_available() else torch.device("cpu")
|
110 |
+
|
111 |
+
model.to(device)
|
112 |
+
|
113 |
+
dataset = load_dataset("hf-internal-testing/example-documents", split="test")
|
114 |
+
|
115 |
+
image = dataset[1]["image"]
|
116 |
+
|
117 |
+
task_prompt = "<s_synthdog>"
|
118 |
+
decoder_input_ids = processor.tokenizer(
|
119 |
+
task_prompt, add_special_tokens=False, return_tensors="pt"
|
120 |
+
).input_ids
|
121 |
+
|
122 |
+
pixel_values = processor(image, return_tensors="pt").pixel_values
|
123 |
+
|
124 |
+
bad_words_ids = [[processor.tokenizer.unk_token_id]]
|
125 |
+
|
126 |
+
if args.ja_bad_words:
|
127 |
+
bad_words_ids += get_ja_list_of_lists(processor)
|
128 |
+
|
129 |
+
print("Length of bad_words_ids: ", len(bad_words_ids))
|
130 |
+
|
131 |
+
results = speedometer(
|
132 |
+
model,
|
133 |
+
pixel_values,
|
134 |
+
decoder_input_ids,
|
135 |
+
processor,
|
136 |
+
bad_words_ids=bad_words_ids,
|
137 |
+
warmup_iters=100,
|
138 |
+
timing_iters=100,
|
139 |
+
num_tokens=10,
|
140 |
+
)
|
141 |
+
|
142 |
+
|
143 |
+
if __name__ == "__main__":
|
144 |
+
|
145 |
+
main()
|