-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscan_fonts.py
72 lines (52 loc) · 2.01 KB
/
scan_fonts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import PIL
import numpy as np
import pandas as pd
from PIL import ImageDraw, ImageFont
from fontTools.ttLib import TTFont
from tqdm.contrib.concurrent import process_map
from manga_ocr_dev.env import ASSETS_PATH, FONTS_ROOT
vocab = pd.read_csv(ASSETS_PATH / 'vocab.csv').char.values
def has_glyph(font, glyph):
for table in font['cmap'].tables:
if ord(glyph) in table.cmap.keys():
return True
return False
def process(font_path):
"""
Get supported characters list for a given font.
Font metadata is not always reliable, so try to render each character and see if anything shows up.
Still not perfect, because sometimes unsupported characters show up as rectangles.
"""
try:
font_path = str(font_path)
ttfont = TTFont(font_path)
pil_font = ImageFont.truetype(font_path, 24)
supported_chars = []
for char in vocab:
if not has_glyph(ttfont, char):
continue
image = PIL.Image.new('L', (40, 40), 255)
draw = ImageDraw.Draw(image)
draw.text((10, 0), char, 0, font=pil_font)
if (np.array(image) != 255).sum() == 0:
continue
supported_chars.append(char)
supported_chars = ''.join(supported_chars)
except Exception as e:
print(f'Error while processing {font_path}: {e}')
supported_chars = ''
return supported_chars
def main():
path_in = FONTS_ROOT
out_path = ASSETS_PATH / 'fonts.csv'
suffixes = {'.TTF', '.otf', '.ttc', '.ttf'}
font_paths = [path for path in path_in.glob('**/*') if
path.suffix in suffixes]
data = process_map(process, font_paths, max_workers=16)
font_paths = [str(path.relative_to(FONTS_ROOT)) for path in font_paths]
data = pd.DataFrame({'font_path': font_paths, 'supported_chars': data})
data['num_chars'] = data.supported_chars.str.len()
data['label'] = 'regular'
data.to_csv(out_path, index=False)
if __name__ == '__main__':
main()