-
Notifications
You must be signed in to change notification settings - Fork 0
/
outline.py
90 lines (68 loc) · 2.08 KB
/
outline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import io
import re
from typing import List, TypedDict
class OutlineItem(TypedDict):
"""目录项目"""
name: str
"""目录名"""
page: int
"""页号"""
children: List["OutlineItem"]
"""子目录"""
class ParseError(Exception):
"""解析异常"""
def parse_str(s: str):
"""将字符串解析为目录格式"""
return parse_reader(io.StringIO(s))
def parse_reader(r: io.TextIOBase) -> List[OutlineItem]:
"""将字符串解析为目录格式"""
li = []
parents = []
for s in r:
if s.strip() == "":
continue
if _reg_comment.match(s):
continue
depth, name, page = _parse_line(s)
if depth > len(parents):
raise ParseError(f'标题缩进数量错误 {name}')
item = OutlineItem(
name=name,
page=page,
children=[]
)
if depth == 0:
parents = [item]
li.append(item)
elif len(parents) == depth:
parents[depth-1]['children'].append(item)
parents.append(item)
else:
parents[depth-1]['children'].append(item)
parents = parents[:depth]
parents.append(item)
return li
_reg_outline = re.compile(r'^(\t*)([^\s].*)\s+(-?\d+)$')
"""书签正则"""
_reg_comment = re.compile(r'^--')
"""注释正则"""
def _parse_line(s: str):
"""解析一行"""
m = _reg_outline.match(s)
if m is None:
raise ParseError(f'格式错误 {s}')
depth, name, page = m.groups('')
return depth.count('\t'), name, int(page)
def format_str(items: List[OutlineItem]) -> str:
"""将目录数据格式化为字符串"""
with io.StringIO() as w:
format_writer(w, items)
return w.getvalue()
def format_writer(w: io.TextIOBase, items: List[OutlineItem]):
"""将目录格式化到流中"""
def f(item: OutlineItem, indent: str):
w.write(f"{indent}{item['name']} {item['page']}\n")
for sub in item['children']:
f(sub, indent+'\t')
for item in items:
f(item, '')