gyrojeff commited on
Commit
a9b34cd
·
1 Parent(s): 9752f7b

feat: add exclusion rules

Browse files
configs/font.yml CHANGED
@@ -11,6 +11,9 @@
11
  # - Others
12
  dataset:
13
  path: ./dataset/fonts
 
 
 
14
  specs:
15
  - path:
16
  - ./Adobe/CJK
 
11
  # - Others
12
  dataset:
13
  path: ./dataset/fonts
14
+ exclusion:
15
+ - ./Founder Type(方正)/韩文/方正朝文中圆.TTF
16
+ - ./Founder Type(方正)/简繁/ttf/方正宋体S-超大字符集(SIP).TTF
17
  specs:
18
  - path:
19
  - ./Adobe/CJK
font_dataset/font.py CHANGED
@@ -1,6 +1,5 @@
1
  import yaml
2
  import os
3
- from typing import List
4
 
5
 
6
  from .utils import get_files
@@ -15,7 +14,7 @@ class DSFont:
15
  self.language = language
16
 
17
 
18
- def load_fonts(config_path="configs/font.yml") -> List[DSFont]:
19
  with open(config_path, "r", encoding="utf-8") as f:
20
  config = yaml.safe_load(f)
21
 
@@ -41,4 +40,14 @@ def load_fonts(config_path="configs/font.yml") -> List[DSFont]:
41
  font_list.append(DSFont(file, spec["language"]))
42
 
43
  font_list.sort(key=lambda x: x.path)
44
- return font_list
 
 
 
 
 
 
 
 
 
 
 
1
  import yaml
2
  import os
 
3
 
4
 
5
  from .utils import get_files
 
14
  self.language = language
15
 
16
 
17
+ def load_fonts(config_path="configs/font.yml"):
18
  with open(config_path, "r", encoding="utf-8") as f:
19
  config = yaml.safe_load(f)
20
 
 
40
  font_list.append(DSFont(file, spec["language"]))
41
 
42
  font_list.sort(key=lambda x: x.path)
43
+
44
+ exclusion_list = ds_config["exclusion"]
45
+ exclusion_list = [os.path.join(ds_path, path) for path in exclusion_list]
46
+
47
+ def exclusion_rule(font: DSFont):
48
+ for exclusion in exclusion_list:
49
+ if os.path.samefile(font.path, exclusion):
50
+ return True
51
+ return False
52
+
53
+ return font_list, exclusion_rule
font_ds_generate_script.py CHANGED
@@ -31,7 +31,7 @@ dataset_path = "./dataset/font_img"
31
  os.makedirs(dataset_path, exist_ok=True)
32
 
33
 
34
- fonts = load_fonts()
35
  corpus_manager = CorpusGeneratorManager()
36
  images = background_image_generator()
37
 
@@ -41,10 +41,17 @@ def generate_dataset(dataset_type: str, cnt: int):
41
  os.makedirs(dataset_bath_dir, exist_ok=True)
42
 
43
  def _generate_single(args):
 
 
 
 
 
 
 
 
 
44
  while True:
45
  try:
46
- i, j, font = args
47
-
48
  image_file_name = f"font_{i}_img_{j}.jpg"
49
  label_file_name = f"font_{i}_img_{j}.bin"
50
 
 
31
  os.makedirs(dataset_path, exist_ok=True)
32
 
33
 
34
+ fonts, exclusion_rule = load_fonts()
35
  corpus_manager = CorpusGeneratorManager()
36
  images = background_image_generator()
37
 
 
41
  os.makedirs(dataset_bath_dir, exist_ok=True)
42
 
43
  def _generate_single(args):
44
+ i, j, font = args
45
+ print(
46
+ f"Generating {dataset_type} font: {font.path} {i} / {len(fonts)}, image {j}"
47
+ )
48
+
49
+ if exclusion_rule(font):
50
+ print(f"Excluded font: {font.path}")
51
+ return
52
+
53
  while True:
54
  try:
 
 
55
  image_file_name = f"font_{i}_img_{j}.jpg"
56
  label_file_name = f"font_{i}_img_{j}.bin"
57
 
font_ds_stat.py CHANGED
@@ -23,12 +23,16 @@ test_cnt_cjk = int(test_cnt * cjk_ratio)
23
  dataset_path = "./dataset/font_img"
24
  os.makedirs(dataset_path, exist_ok=True)
25
 
26
- fonts = load_fonts()
27
 
28
 
29
  cnt = 0
30
 
31
  for font in fonts:
 
 
 
 
32
  if font.language == "CJK":
33
  cnt += cjk_ratio
34
  else:
 
23
  dataset_path = "./dataset/font_img"
24
  os.makedirs(dataset_path, exist_ok=True)
25
 
26
+ fonts, exclusion_rule = load_fonts()
27
 
28
 
29
  cnt = 0
30
 
31
  for font in fonts:
32
+ if exclusion_rule(font):
33
+ print(f"Excluded font: {font.path}")
34
+ continue
35
+
36
  if font.language == "CJK":
37
  cnt += cjk_ratio
38
  else: