-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgeometry_helpers.py
121 lines (98 loc) · 4.02 KB
/
geometry_helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import math
from copy import copy
import cv2
import numpy as np
class AlignerConfig:
def __init__(self, args):
self.debug = args['debug']
self.min_keypoint_match_ratio = args['min_keypoint_match_ratio']
self.MAX_PIXEL_DISTANCE = args["MAX_PIXEL_DISTANCE"]
self.NUM_KEYPOINTS_FOR_ALIGNMENT = args["NUM_KEYPOINTS_FOR_ALIGNMENT"]
self.PARTIAL_AFFINE = args["PARTIAL_AFFINE"]
self.maxIters = args["maxIters"]
self.refineIters = args["refineIters"]
self.confidence = args["confidence"]
self.ransacReprojThreshold = args["ransacReprojThreshold"]
self.TEXT_EXTRACTION_ATOL = args["TEXT_EXTRACTION_ATOL"]
self.NEWLINE_PIXELS_THRESHOLD = args["NEWLINE_PIXELS_THRESHOLD"]
self.granularity = args["granularity"]
self.old_engine_folder_name = args["source_folder"]
self.new_engine_folder_name = args["target_folder"]
def pixel_distance(pos1, pos2):
x1 = (pos1["left"] + pos1["right"]) / 2
y1 = (pos1["top"] + pos1["bottom"]) / 2
x2 = (pos2["left"] + pos2["right"]) / 2
y2 = (pos2["top"] + pos2["bottom"]) / 2
return math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
def normalize_position(position, page_width, page_height):
return {
"top": position["top"] / page_height,
"bottom": position["bottom"] / page_height,
"left": position["left"] / page_width,
"right": position["right"] / page_width,
}
def in_box(pos, box, atol=0):
return (
box["left"] < float(pos["left"]) + atol
and box["right"] > float(pos["right"]) - atol
and box["top"] < float(pos["top"]) + atol * 0.5
and box["bottom"] > float(pos["bottom"]) - atol * 0.5
)
def normalize_tokens(tokens, page_size):
for token in tokens:
token["nposition"] = normalize_position(
token["position"], page_size["width"], page_size["height"]
)
token["text_lower"] = token["text"].lower()
return tokens
def apply_transform(left, top, right, bot, H):
bbox = np.array(
[[[left, bot]], [[right, bot]], [[left, top]], [[right, top]]], dtype=np.float32
)
transformed = np.ceil(cv2.perspectiveTransform(bbox, H))
left = int(min(transformed[0, 0][0], transformed[2, 0][0]))
bot = int(max(transformed[0, 0][1], transformed[1, 0][1]))
right = int(max(transformed[1, 0][0], transformed[3, 0][0]))
top = int(min(transformed[2, 0][1], transformed[3, 0][1]))
return left, top, right, bot
def transform_tokens(H, token_list):
transformed_tokens = []
for t in token_list:
transformed = copy(t)
bbLeft_t, bbTop_t, bbRight_t, bbBot_t = apply_transform(
t["position"]["bbLeft"],
t["position"]["bbTop"],
t["position"]["bbRight"],
t["position"]["bbBot"],
H,
)
transformed["position"]["bbLeft"] = bbLeft_t
transformed["position"]["bbTop"] = bbTop_t
transformed["position"]["bbRight"] = bbRight_t
transformed["position"]["bbBot"] = bbBot_t
left, top, right, bot = apply_transform(
t["position"]["left"],
t["position"]["top"],
t["position"]["right"],
t["position"]["bottom"],
H,
)
transformed["position"]["left"] = left
transformed["position"]["top"] = top
transformed["position"]["right"] = right
transformed["position"]["bottom"] = bot
transformed_tokens.append(transformed)
return transformed_tokens
def get_word_match_dict(old_tokens, new_tokens, match_words):
"""Find all words that occur in both the template and the image. Returns
dictionaries mapping these words to their respective tokens in each doc."""
old_matches = {}
new_matches = {}
for k in match_words:
old_matches[k] = [
tok for tok in old_tokens if tok["text"].lower() == k.lower()
]
new_matches[k] = [
tok for tok in new_tokens if tok["text"].lower() == k.lower()
]
return old_matches, new_matches