-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinterfaces.py
148 lines (101 loc) · 3.25 KB
/
interfaces.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
from abc import ABC, abstractmethod
from collections.abc import Sequence
from enum import Enum, IntEnum
from typing import List, Optional, Union
import fitz
from fitz import Page
class PDFContentType(IntEnum):
TEXT = 0
IMAGE = 1
class AbstractNode(ABC):
body: Union[str, dict]
__prev: object
__next: object
@abstractmethod
def set_next_node(self, node: object): raise NotImplementedError
@abstractmethod
def set_previous_node(self, node: object): raise NotImplementedError
class BaseContentNode(AbstractNode):
content_type: str
body: Union[str, dict]
__prev: object
__next: object
@abstractmethod
def set_next_node(self, node: object): raise NotImplementedError
@abstractmethod
def set_previous_node(self, node: object): raise NotImplementedError
class AbstractDoublyLinkedList(Sequence, ABC):
nodes: List[AbstractNode]
@staticmethod
@abstractmethod
def from_list(data: list): raise NotImplementedError
@abstractmethod
def append(self, data: AbstractNode): raise NotImplementedError
@abstractmethod
def pop(self, index: int) -> AbstractNode: raise NotImplementedError
def __getitem__(self, index: int) -> AbstractNode:
return self.nodes[index]
class AbstractProcessor(ABC):
content: AbstractDoublyLinkedList[AbstractNode]
result: List
unknown_fonts: List
@staticmethod
@abstractmethod
def pre_processor(text: str) -> str: ...
@staticmethod
@abstractmethod
def post_processor(text: str) -> str: ...
@abstractmethod
def fetch_content(self): ...
class TagRenderFormat(Enum):
LEFT_TAG_ONLY = 1
RIGHT_TAG_ONLY = 2
BOTH_TAGS = 3
NO_TAGS = 4
class AbstractTag(ABC):
buf: list
open_tag: Optional[str]
close_tag: Optional[str]
render_format: TagRenderFormat
@abstractmethod
def render(self, text: str) -> str: ...
@abstractmethod
def pre_processing(self, text: str) -> str: ...
@abstractmethod
def post_processing(self, rendered_string: str) -> str: ...
class AbstractArena(AbstractDoublyLinkedList, AbstractNode):
nodes: AbstractDoublyLinkedList[AbstractNode]
tag: AbstractTag
body: list
@abstractmethod
def append(self, node: AbstractNode): ...
@staticmethod
@abstractmethod
def from_list(content: AbstractDoublyLinkedList): raise NotImplementedError
@abstractmethod
def render(self) -> str: raise NotImplementedError
@abstractmethod
def prepare_body(self) -> None: raise NotImplementedError
class AbstractPDFBackend(ABC):
from_page: int
to_page: int
source_path: str
output_dir_path: str
processor: AbstractProcessor
@abstractmethod
def fetch_pages(self): ...
@abstractmethod
def handle_block(self, doc, page, block) -> List[str]: ...
@abstractmethod
def get_image(self, image: bytes, filename: str, ext: str): ...
@abstractmethod
def formatted_image(self, image_path: str, description: str): ...
@abstractmethod
def load_unknown_fonts(self): ...
@abstractmethod
def create_page(self, page) -> str: ...
@abstractmethod
def save_page(self, page_number: int, page: str): ...
class AbstractBoris(ABC):
@abstractmethod
def initial_boris(self): ...