-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
boris.py
56 lines (41 loc) · 1.72 KB
/
boris.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import os
from typing import Optional
import fitz # import the bindings
from utils import FileDescriptor
from processor import ContentProcessor
from interfaces import AbstractBoris
from backends import MuPDFBackend
class Boris(AbstractBoris, MuPDFBackend):
def __init__(self,
source_path: str,
output_dir_path: str,
from_page: int = 0,
to_page: int = 0):
self.source_path = source_path
self.output_dir_path = output_dir_path
self.from_page: int = int(from_page)
# self.to_page: int = int(to_page)
self.book_font_sizes = set()
self.unknown_fonts_map_filepath = lambda: os.path.join(
self.output_dir_path, 'unknown_fonts_map.json'
)
self.unknown_fonts_file_descriptor: Optional[FileDescriptor] = None
self.doc: Optional[fitz.Document] = None
self.images_path = os.path.join(self.output_dir_path, 'images')
self.initial_boris()
self.processor: Optional[ContentProcessor] = ContentProcessor
def initial_boris(self):
try:
self.doc: fitz.Document = fitz.open(self.source_path) # open document
except RuntimeError as e:
exit(f'pdf_path param is invalid. unknown path {self.source_path}')
# if not self.to_page:
# self.to_page = int(self.doc.page_count)
if not os.path.isdir(self.output_dir_path):
os.mkdir(self.output_dir_path)
if not os.path.isdir(self.images_path):
os.mkdir(self.images_path)
self.unknown_fonts_file_descriptor = FileDescriptor(
filepath=self.unknown_fonts_map_filepath(), write_key='a'
)
self.load_unknown_fonts()