gyrojeff commited on
Commit
0e2310c
1 Parent(s): 911fce3

feat: more robust generation script

Browse files
font_dataset/font.py CHANGED
@@ -1,5 +1,7 @@
1
  import yaml
2
  import os
 
 
3
 
4
 
5
  from .utils import get_files
@@ -37,7 +39,7 @@ def load_fonts(config_path="configs/font.yml"):
37
  if rule is not None and not rule(file):
38
  print("skip: " + file)
39
  continue
40
- font_list.append(DSFont(file, spec["language"]))
41
 
42
  font_list.sort(key=lambda x: x.path)
43
 
@@ -51,3 +53,18 @@ def load_fonts(config_path="configs/font.yml"):
51
  return False
52
 
53
  return font_list, exclusion_rule
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import yaml
2
  import os
3
+ from typing import Dict
4
+ import pickle
5
 
6
 
7
  from .utils import get_files
 
39
  if rule is not None and not rule(file):
40
  print("skip: " + file)
41
  continue
42
+ font_list.append(DSFont(str(file).replace("\\", "/"), spec["language"]))
43
 
44
  font_list.sort(key=lambda x: x.path)
45
 
 
53
  return False
54
 
55
  return font_list, exclusion_rule
56
+
57
+
58
+ def load_font_with_exclusion(
59
+ config_path="configs/font.yml", cache_path="font_list_cache.bin"
60
+ ) -> Dict:
61
+ if os.path.exists(cache_path):
62
+ return pickle.load(open(cache_path, "rb"))
63
+ font_list, exclusion_rule = load_fonts(config_path)
64
+ font_list = list(filter(lambda x: not exclusion_rule(x), font_list))
65
+ font_list.sort(key=lambda x: x.path)
66
+ print("font count: " + str(len(font_list)))
67
+ ret = {font_list[i].path: i for i in range(len(font_list))}
68
+ with open("font_list_cache.bin", "wb") as f:
69
+ pickle.dump(ret, f)
70
+ return ret
font_dataset/layout.py CHANGED
@@ -1,6 +1,6 @@
1
  from typing import Tuple
2
 
3
- __all__ = ["generate_font_image"]
4
 
5
 
6
  epislon = 1e-6
@@ -237,6 +237,11 @@ def RGB2RGBA(color):
237
  return color + (255,)
238
 
239
 
 
 
 
 
 
240
  def generate_font_image(
241
  img_path: str, font: DSFont, corpus_manager: CorpusGeneratorManager
242
  ) -> Tuple[Image.Image, FontLabel]:
@@ -368,7 +373,7 @@ def generate_font_image(
368
  text_size = int(render_calculation_size * render_height / render_calculation_height)
369
 
370
  if text_size < text_size_min:
371
- raise ValueError("text size is too small")
372
 
373
  render_width_no_rotation = int(
374
  render_calculation_width_no_rotation / render_calculation_height * render_height
 
1
  from typing import Tuple
2
 
3
+ __all__ = ["generate_font_image", "TextSizeTooSmallException"]
4
 
5
 
6
  epislon = 1e-6
 
237
  return color + (255,)
238
 
239
 
240
+ class TextSizeTooSmallException(Exception):
241
+ def __init__(self):
242
+ super().__init__(f"Text Size Too Small")
243
+
244
+
245
  def generate_font_image(
246
  img_path: str, font: DSFont, corpus_manager: CorpusGeneratorManager
247
  ) -> Tuple[Image.Image, FontLabel]:
 
373
  text_size = int(render_calculation_size * render_height / render_calculation_height)
374
 
375
  if text_size < text_size_min:
376
+ raise TextSizeTooSmallException()
377
 
378
  render_width_no_rotation = int(
379
  render_calculation_width_no_rotation / render_calculation_height * render_height
font_ds_generate_script.py CHANGED
@@ -5,8 +5,8 @@ import os
5
  import concurrent.futures
6
  from tqdm import tqdm
7
  import time
8
- from font_dataset.font import load_fonts
9
- from font_dataset.layout import generate_font_image
10
  from font_dataset.text import CorpusGeneratorManager, UnqualifiedFontException
11
  from font_dataset.background import background_image_generator
12
 
@@ -39,9 +39,27 @@ corpus_manager = CorpusGeneratorManager()
39
  images = background_image_generator()
40
 
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def generate_dataset(dataset_type: str, cnt: int):
43
- dataset_bath_dir = os.path.join(dataset_path, dataset_type)
44
- os.makedirs(dataset_bath_dir, exist_ok=True)
45
 
46
  def _generate_single(args):
47
  i, j, font = args
@@ -61,8 +79,8 @@ def generate_dataset(dataset_type: str, cnt: int):
61
  image_file_name = f"font_{i}_img_{j}.jpg"
62
  label_file_name = f"font_{i}_img_{j}.bin"
63
 
64
- image_file_path = os.path.join(dataset_bath_dir, image_file_name)
65
- label_file_path = os.path.join(dataset_bath_dir, label_file_name)
66
 
67
  # detect cache
68
  if os.path.exists(image_file_path) and os.path.exists(label_file_path):
@@ -79,14 +97,16 @@ def generate_dataset(dataset_type: str, cnt: int):
79
  pickle.dump(label, open(label_file_path, "wb"))
80
  return
81
  except UnqualifiedFontException as e:
82
- print(f"SKIPPING Unqualified font: {e.font.path}")
83
- runtime_exclusion_list.append(e.font.path)
84
- with open(unqualified_log_file_name, "a+") as f:
85
- f.write(f"{e.font.path}\n")
86
  return
87
- except Exception as _:
88
  traceback.print_exc()
89
  continue
 
 
 
 
90
 
91
  work_list = []
92
 
 
5
  import concurrent.futures
6
  from tqdm import tqdm
7
  import time
8
+ from font_dataset.font import load_fonts, DSFont
9
+ from font_dataset.layout import generate_font_image, TextSizeTooSmallException
10
  from font_dataset.text import CorpusGeneratorManager, UnqualifiedFontException
11
  from font_dataset.background import background_image_generator
12
 
 
39
  images = background_image_generator()
40
 
41
 
42
+ def add_exclusion(font: DSFont, reason: str, dataset_base_dir: str, i: int, j: int):
43
+ print(f"Excluded font: {font.path}, reason: {reason}")
44
+ runtime_exclusion_list.append(font.path)
45
+ with open(unqualified_log_file_name, "a+") as f:
46
+ f.write(f"{font.path} # {reason}\n")
47
+ for i in range(j + 1):
48
+ image_file_name = f"font_{i}_img_{j}.jpg"
49
+ label_file_name = f"font_{i}_img_{j}.bin"
50
+
51
+ image_file_path = os.path.join(dataset_base_dir, image_file_name)
52
+ label_file_path = os.path.join(dataset_base_dir, label_file_name)
53
+
54
+ if os.path.exists(image_file_path):
55
+ os.remove(image_file_path)
56
+ if os.path.exists(label_file_path):
57
+ os.remove(label_file_path)
58
+
59
+
60
  def generate_dataset(dataset_type: str, cnt: int):
61
+ dataset_base_dir = os.path.join(dataset_path, dataset_type)
62
+ os.makedirs(dataset_base_dir, exist_ok=True)
63
 
64
  def _generate_single(args):
65
  i, j, font = args
 
79
  image_file_name = f"font_{i}_img_{j}.jpg"
80
  label_file_name = f"font_{i}_img_{j}.bin"
81
 
82
+ image_file_path = os.path.join(dataset_base_dir, image_file_name)
83
+ label_file_path = os.path.join(dataset_base_dir, label_file_name)
84
 
85
  # detect cache
86
  if os.path.exists(image_file_path) and os.path.exists(label_file_path):
 
97
  pickle.dump(label, open(label_file_path, "wb"))
98
  return
99
  except UnqualifiedFontException as e:
100
+ traceback.print_exc()
101
+ add_exclusion(font, "unqualified font", dataset_base_dir, i, j)
 
 
102
  return
103
+ except TextSizeTooSmallException as e:
104
  traceback.print_exc()
105
  continue
106
+ except Exception as e:
107
+ traceback.print_exc()
108
+ add_exclusion(font, f"other: {repr(e)}", dataset_base_dir, i, j)
109
+ return
110
 
111
  work_list = []
112