Skip to content

Commit

Permalink
Merge pull request #585 from aleju/fix_perspt_inaccuracy
Browse files Browse the repository at this point in the history
Fix inaccuracy in PerspectiveTransformation
  • Loading branch information
aleju authored Jan 18, 2020
2 parents 0bf705e + 12ae82c commit 339babc
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 25 deletions.
5 changes: 5 additions & 0 deletions changelogs/master/fixed/20200118_perspt_inaccuracy.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
* Fixed an inaccuracy in `PerspectiveTransform` that could lead to slightly
misaligned transformations between images and coordinate-based
augmentables (e.g. bounding boxes). The problem was more significant the
smaller the images and larger the `scale` values were. It was also
worsened by using `fit_output`. #585
18 changes: 11 additions & 7 deletions imgaug/augmenters/geometric.py
Original file line number Diff line number Diff line change
Expand Up @@ -3765,11 +3765,13 @@ def _draw_samples(self, shapes, random_state):
# (i.e. top-down view) of the image, again specifying points
# in the top-left, top-right, bottom-right, and bottom-left
# order
# do not use width-1 or height-1 here, as for e.g. width=3, height=2
# the bottom right coordinate is at (3.0, 2.0) and not (2.0, 1.0)
dst = np.array([
[0, 0],
[max_width - 1, 0],
[max_width - 1, max_height - 1],
[0, max_height - 1]
[max_width, 0],
[max_width, max_height],
[0, max_height]
], dtype=np.float32)

# compute the perspective transform matrix and then apply it
Expand Down Expand Up @@ -3814,11 +3816,13 @@ def _order_points(cls, pts):
@classmethod
def _expand_transform(cls, matrix, shape):
height, width = shape
# do not use width-1 or height-1 here, as for e.g. width=3, height=2
# the bottom right coordinate is at (3.0, 2.0) and not (2.0, 1.0)
rect = np.array([
[0, 0],
[width - 1, 0],
[width - 1, height - 1],
[0, height - 1]], dtype=np.float32)
[width, 0],
[width, height],
[0, height]], dtype=np.float32)
dst = cv2.perspectiveTransform(np.array([rect]), matrix)[0]

# get min x, y over transformed 4 points
Expand All @@ -3828,7 +3832,7 @@ def _expand_transform(cls, matrix, shape):
dst = np.around(dst, decimals=0)

matrix_expanded = cv2.getPerspectiveTransform(rect, dst)
max_width, max_height = dst.max(axis=0) + 1
max_width, max_height = dst.max(axis=0)
return matrix_expanded, max_width, max_height

def get_parameters(self):
Expand Down
46 changes: 28 additions & 18 deletions test/augmenters/test_geometric.py
Original file line number Diff line number Diff line change
Expand Up @@ -5601,22 +5601,25 @@ def test_heatmaps_smaller_than_image_without_keep_size(self):
aug = iaa.PerspectiveTransform(scale=0.2, keep_size=False)
aug.jitter = iap.Deterministic(0.2)

y1 = int(30*0.2)
y2 = int(30*0.8)
x1 = int(30*0.2)
x2 = int(30*0.8)
x1_small = int(25*0.2)
x2_small = int(25*0.8)
y1_small = int(20*0.2)
y2_small = int(20*0.8)
height, width = 300, 200
height_small, width_small = 150, 100

y1 = int(height*0.2)
y2 = int(height*0.8)
x1 = int(width*0.2)
x2 = int(width*0.8)
y1_small = int(height_small*0.2)
y2_small = int(height_small*0.8)
x1_small = int(width_small*0.2)
x2_small = int(width_small*0.8)

img_small = ia.imresize_single_image(
self.image,
(20, 25),
(height_small, width_small),
interpolation="cubic")
hm = ia.HeatmapsOnImage(
img_small.astype(np.float32)/255.0,
shape=(30, 30))
shape=(height, width))

img_aug = aug.augment_image(self.image)
hm_aug = aug.augment_heatmaps([hm])[0]
Expand All @@ -5635,7 +5638,7 @@ def test_heatmaps_smaller_than_image_without_keep_size(self):
])
img_aug_mask = img_aug > 255*0.1
hm_aug_mask = ia.imresize_single_image(
hm_aug.arr_0to1, img_aug.shape[0:2], interpolation="cubic"
hm_aug.arr_0to1, img_aug.shape[0:2], interpolation="linear"
) > 0.1
same = np.sum(img_aug_mask == hm_aug_mask[:, :, 0])
assert (same / img_aug_mask.size) >= 0.96
Expand Down Expand Up @@ -6319,7 +6322,12 @@ def test_mode_constant_uses_cval(self):
img_aug0 = aug0.augment_image(img)

assert (img_aug255 == 255).all()
assert not (img_aug0 == 255).all()
# TODO This was originally "assert not (...)", but since
# PerspectiveTransform has become more precise, there are no
# filled pixels anymore at the edges. That is because PerspT
# currently only zooms in and not out. Filled pixels at the sides
# were previously due to a bug.
assert (img_aug0 == 255).all()

# ---------
# fit_output
Expand All @@ -6345,7 +6353,7 @@ def test_fit_output_with_fixed_jitter(self):
x2 = np.argmax(image_aug[h-1, :, 2])

# different shape
assert image_aug.shape != image.shape
assert image_aug.shape == image.shape

# corners roughly still at top-left, top-right, bottom-right
assert 0 <= y0 <= 3
Expand Down Expand Up @@ -6429,7 +6437,7 @@ def test_fit_output_with_fixed_jitter__keypoints(self):
(0, 50)
], shape=(50, 50, 3))

for _ in sm.xrange(10):
for i in sm.xrange(10):
kpsoi_aug = aug(keypoints=kpsoi)

h, w = kpsoi_aug.shape[0:2]
Expand All @@ -6443,10 +6451,12 @@ def test_fit_output_with_fixed_jitter__keypoints(self):
x_min = min([x0, x1, x2, x3])
x_max = max([x0, x1, x2, x3])
tol = 0.5
assert 0-tol <= y_min <= tol
assert 0-tol <= x_min <= tol
assert h-tol <= y_max <= h+tol
assert w-tol <= x_max <= w+tol
assert 0-tol <= y_min <= tol, "Got y_min=%.4f at %d" % (y_min, i)
assert 0-tol <= x_min <= tol, "Got x_min=%.4f at %d" % (x_min, i)
assert h-tol <= y_max <= h+tol, (
"Got y_max=%.4f for h=%.2f at %d" % (y_max, h, i))
assert w-tol <= x_max <= w+tol, (
"Got x_max=%.4f for w=%.2f at %d" % (x_max, w, i))

# ---------
# unusual channel numbers
Expand Down

0 comments on commit 339babc

Please sign in to comment.