gyrojeff commited on
Commit
ecee822
1 Parent(s): 716ee53

feat: add generation statistics calc script

Browse files
Files changed (1) hide show
  1. font_ds_stat.py +62 -0
font_ds_stat.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import traceback
3
+ import pickle
4
+ import os
5
+ import concurrent.futures
6
+ from tqdm import tqdm
7
+ from font_dataset.font import load_fonts
8
+ from font_dataset.layout import generate_font_image
9
+ from font_dataset.text import CorpusGeneratorManager
10
+ from font_dataset.background import background_image_generator
11
+
12
+
13
+ cjk_ratio = 3
14
+
15
+ train_cnt = 100
16
+ val_cnt = 10
17
+ test_cnt = 30
18
+
19
+ train_cnt_cjk = int(train_cnt * cjk_ratio)
20
+ val_cnt_cjk = int(val_cnt * cjk_ratio)
21
+ test_cnt_cjk = int(test_cnt * cjk_ratio)
22
+
23
+ dataset_path = "./dataset/font_img"
24
+ os.makedirs(dataset_path, exist_ok=True)
25
+
26
+ fonts = load_fonts()
27
+
28
+
29
+ cnt = 0
30
+
31
+ for font in fonts:
32
+ if font.language == "CJK":
33
+ cnt += cjk_ratio
34
+ else:
35
+ cnt += 1
36
+
37
+
38
+ print("Total training images:", train_cnt * cnt)
39
+ print("Total validation images:", val_cnt * cnt)
40
+ print("Total testing images:", test_cnt * cnt)
41
+
42
+ if os.path.exists(os.path.join(dataset_path, "train")):
43
+ num_file_train = len(os.listdir(os.path.join(dataset_path, "train")))
44
+ else:
45
+ num_file_train = 0
46
+
47
+ if os.path.exists(os.path.join(dataset_path, "val")):
48
+ num_file_val = len(os.listdir(os.path.join(dataset_path, "val")))
49
+ else:
50
+ num_file_val = 0
51
+
52
+ if os.path.exists(os.path.join(dataset_path, "test")):
53
+ num_file_test = len(os.listdir(os.path.join(dataset_path, "test")))
54
+ else:
55
+ num_file_test = 0
56
+
57
+ print("Total files generated:", num_file_train + num_file_val + num_file_test)
58
+ print("Total files target:", (train_cnt + val_cnt + test_cnt) * cnt * 2)
59
+
60
+ print(
61
+ f"{(num_file_train + num_file_val + num_file_test) / ((train_cnt + val_cnt + test_cnt) * cnt * 2) * 100:.2f}% completed"
62
+ )