nbroad HF staff commited on
Commit
cbde782
1 Parent(s): f9e6f44

Upload 2 files

Browse files
Files changed (2) hide show
  1. run_speed_tests.sh +7 -0
  2. speed_test.py +145 -0
run_speed_tests.sh ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ python speed_test.py --model_path "naver-clova-ix/donut-base"
4
+
5
+ python speed_test.py --model_path "naver-clova-ix/donut-base" --ja_bad_words
6
+
7
+ python speed_test.py --model_path "donut-base-ascii"
speed_test.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+
3
+ import torch
4
+ from datasets import load_dataset
5
+ from transformers import AutoProcessor, VisionEncoderDecoderModel
6
+
7
+
8
+ def speedometer(
9
+ model: torch.nn.Module,
10
+ pixel_values: torch.Tensor,
11
+ decoder_input_ids: torch.Tensor,
12
+ processor: AutoProcessor,
13
+ bad_words_ids: list,
14
+ warmup_iters: int = 100,
15
+ timing_iters: int = 100,
16
+ num_tokens: int = 10,
17
+ ) -> None:
18
+ """Measure average run time for a PyTorch module
19
+
20
+ Performs forward passes.
21
+ """
22
+ start = torch.cuda.Event(enable_timing=True)
23
+ end = torch.cuda.Event(enable_timing=True)
24
+
25
+ # Warmup runs
26
+ torch.cuda.synchronize()
27
+ for _ in range(warmup_iters):
28
+ outputs = model.generate(
29
+ pixel_values.to(model.device),
30
+ decoder_input_ids=decoder_input_ids.to(model.device),
31
+ early_stopping=True,
32
+ pad_token_id=processor.tokenizer.pad_token_id,
33
+ eos_token_id=processor.tokenizer.eos_token_id,
34
+ use_cache=True,
35
+ num_beams=1,
36
+ bad_words_ids=bad_words_ids,
37
+ return_dict_in_generate=True,
38
+ min_length=num_tokens,
39
+ max_length=num_tokens,
40
+ )
41
+
42
+ # Timing runs
43
+ start.record()
44
+ for _ in range(timing_iters):
45
+ outputs = model.generate(
46
+ pixel_values.to(model.device),
47
+ decoder_input_ids=decoder_input_ids.to(model.device),
48
+ early_stopping=True,
49
+ pad_token_id=processor.tokenizer.pad_token_id,
50
+ eos_token_id=processor.tokenizer.eos_token_id,
51
+ use_cache=True,
52
+ num_beams=1,
53
+ bad_words_ids=bad_words_ids,
54
+ return_dict_in_generate=True,
55
+ min_length=num_tokens,
56
+ max_length=num_tokens,
57
+ )
58
+ end.record()
59
+ torch.cuda.synchronize()
60
+
61
+ mean = start.elapsed_time(end) / timing_iters
62
+ print(f"Mean time: {mean} ms")
63
+
64
+ return mean
65
+
66
+
67
+ def get_ja_list_of_lists(processor):
68
+ def is_japanese(s):
69
+ "Made by GPT-4: https://chat.openai.com/share/a795b15c-8534-40b9-9699-c8c1319f5f25"
70
+ for char in s:
71
+ code_point = ord(char)
72
+ if (
73
+ 0x3040 <= code_point <= 0x309F
74
+ or 0x30A0 <= code_point <= 0x30FF
75
+ or 0x4E00 <= code_point <= 0x9FFF
76
+ or 0x3400 <= code_point <= 0x4DBF
77
+ or 0x20000 <= code_point <= 0x2A6DF
78
+ or 0x31F0 <= code_point <= 0x31FF
79
+ or 0xFF00 <= code_point <= 0xFFEF
80
+ or 0x3000 <= code_point <= 0x303F
81
+ or 0x3200 <= code_point <= 0x32FF
82
+ ):
83
+ continue
84
+ else:
85
+ return False
86
+ return True
87
+
88
+ ja_tokens, ja_ids = [], []
89
+ for token, id in processor.tokenizer.vocab.items():
90
+ if is_japanese(token.lstrip("▁")):
91
+ ja_tokens.append(token)
92
+ ja_ids.append(id)
93
+
94
+ return [[x] for x in ja_ids]
95
+
96
+
97
+ def main():
98
+
99
+ parser = argparse.ArgumentParser(description='Description of your program')
100
+ parser.add_argument('--model_path', help='Description for foo argument', required=True)
101
+ parser.add_argument('--ja_bad_words', help='Use ja bad_words_ids', action="store_true", default=False)
102
+ args = parser.parse_args()
103
+
104
+ print("Running speed test on model: ", args.model_path, "with ja_bad_words: ", args.ja_bad_words)
105
+
106
+ processor = AutoProcessor.from_pretrained(args.model_path)
107
+ model = VisionEncoderDecoderModel.from_pretrained(args.model_path)
108
+
109
+ device = 0 if torch.cuda.is_available() else torch.device("cpu")
110
+
111
+ model.to(device)
112
+
113
+ dataset = load_dataset("hf-internal-testing/example-documents", split="test")
114
+
115
+ image = dataset[1]["image"]
116
+
117
+ task_prompt = "<s_synthdog>"
118
+ decoder_input_ids = processor.tokenizer(
119
+ task_prompt, add_special_tokens=False, return_tensors="pt"
120
+ ).input_ids
121
+
122
+ pixel_values = processor(image, return_tensors="pt").pixel_values
123
+
124
+ bad_words_ids = [[processor.tokenizer.unk_token_id]]
125
+
126
+ if args.ja_bad_words:
127
+ bad_words_ids += get_ja_list_of_lists(processor)
128
+
129
+ print("Length of bad_words_ids: ", len(bad_words_ids))
130
+
131
+ results = speedometer(
132
+ model,
133
+ pixel_values,
134
+ decoder_input_ids,
135
+ processor,
136
+ bad_words_ids=bad_words_ids,
137
+ warmup_iters=100,
138
+ timing_iters=100,
139
+ num_tokens=10,
140
+ )
141
+
142
+
143
+ if __name__ == "__main__":
144
+
145
+ main()