Skip to content

Commit

Permalink
Merge pull request #456 from aleju/improve_perspective_transform
Browse files Browse the repository at this point in the history
Improve PerspectiveTransform
  • Loading branch information
aleju authored Oct 25, 2019
2 parents f4d9109 + a3502ea commit 2a31615
Show file tree
Hide file tree
Showing 3 changed files with 408 additions and 81 deletions.
7 changes: 7 additions & 0 deletions changelogs/master/20191610_perspective_transform.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Changes to PerspectiveTransform #452 #456

* [rarely breaking] PerspectiveTransform has now a `fit_output` parameter,
similar to `Affine`. This change may break code that relied on the order of
arguments to `__init__`.
* The sampling code of `PerspectiveTransform` was reworked and should now
be faster.
175 changes: 98 additions & 77 deletions imgaug/augmenters/geometric.py
Original file line number Diff line number Diff line change
Expand Up @@ -2496,6 +2496,15 @@ class PerspectiveTransform(meta.Augmenter):
that parameter per image, i.e. it must return only the above
mentioned strings.
fit_output : bool, optional
If ``True``, the image plane size and position will be adjusted
to still capture the whole image after perspective transformation.
(Followed by image resizing if `keep_size` is set to ``True``.)
Otherwise, parts of the transformed image may be outside of the image
plane.
This setting should not be set to ``True`` when using large `scale`
values as it could lead to very large images.
polygon_recoverer : 'auto' or None or imgaug.augmentables.polygons._ConcavePolygonRecoverer, optional
The class to use to repair invalid polygons.
If ``"auto"``, a new instance of
Expand Down Expand Up @@ -2534,8 +2543,13 @@ class PerspectiveTransform(meta.Augmenter):
"""

def __init__(self, scale=0, cval=0, mode='constant', keep_size=True,
polygon_recoverer="auto", fit_output=False,
_BORDER_MODE_STR_TO_INT = {
"replicate": cv2.BORDER_REPLICATE,
"constant": cv2.BORDER_CONSTANT
}

def __init__(self, scale=0, cval=0, mode="constant", keep_size=True,
fit_output=False, polygon_recoverer="auto",
name=None, deterministic=False, random_state=None):
super(PerspectiveTransform, self).__init__(
name=name, deterministic=deterministic, random_state=random_state)
Expand All @@ -2544,25 +2558,23 @@ def __init__(self, scale=0, cval=0, mode='constant', keep_size=True,
scale, "scale", value_range=(0, None), tuple_to_uniform=True,
list_to_choice=True)
self.jitter = iap.Normal(loc=0, scale=self.scale)
self.keep_size = keep_size

# setting these to 1x1 caused problems for large scales and polygon
# augmentation
# TODO there is now a recoverer for polygons - are these minima still
# needed/sensible?
self.min_width = 2
self.min_height = 2
self.shift_step_size = 0.5

self.cval = _handle_cval_arg(cval)
self.mode = self._handle_mode_arg(mode)
self.keep_size = keep_size
self.fit_output = fit_output

self.polygon_recoverer = polygon_recoverer
if polygon_recoverer == "auto":
self.polygon_recoverer = _ConcavePolygonRecoverer()

self.fit_output = fit_output

# Special order, mode and cval parameters for heatmaps and
# segmentation maps. These may either be None or a fixed value.
# Stochastic parameters are currently *not* supported.
Expand Down Expand Up @@ -2828,88 +2840,90 @@ def _augment_keypoints_by_samples(self, kpsois, samples_images):
return result

def _draw_samples(self, shapes, random_state):
# TODO change these to class attributes
mode_str_to_int = {
"replicate": cv2.BORDER_REPLICATE,
"constant": cv2.BORDER_CONSTANT
}

matrices = []
max_heights = []
max_widths = []
nb_images = len(shapes)
rngs = random_state.duplicate(2+nb_images)
rngs = random_state.duplicate(3)

cval_samples = self.cval.draw_samples((nb_images, 3),
random_state=rngs[0])
mode_samples = self.mode.draw_samples((nb_images,),
random_state=rngs[1])
jitter = self.jitter.draw_samples((nb_images, 4, 2),
random_state=rngs[2])

# cv2 perspectiveTransform doesn't accept numpy arrays as cval
cval_samples_cv2 = cval_samples.tolist()

# if border modes are represented by strings, convert them to cv2
# border mode integers
if mode_samples.dtype.kind not in ["i", "u"]:
for mode, mapped_mode in self._BORDER_MODE_STR_TO_INT.items():
mode_samples[mode_samples == mode] = mapped_mode

# modify jitter to the four corner point coordinates
# some x/y values have to be modified from `jitter` to `1-jtter`
# for that
# TODO remove the abs() here. it currently only allows to "zoom-in",
# not to "zoom-out"
points = np.mod(np.abs(jitter), 1)

# top left -- no changes needed, just use jitter
# top right
points[:, 1, 0] = 1.0 - points[:, 1, 0] # w = 1.0 - jitter
# bottom right
points[:, 2, 0] = 1.0 - points[:, 2, 0] # w = 1.0 - jitter
points[:, 2, 1] = 1.0 - points[:, 2, 1] # h = 1.0 - jitter
# bottom left
points[:, 3, 1] = 1.0 - points[:, 3, 1] # h = 1.0 - jitter

for shape, points_i in zip(shapes, points):
h, w = shape[0:2]

points_i[:, 0] *= w
points_i[:, 1] *= h

# Obtain a consistent order of the points and unpack them
# individually.
# Warning: don't just do (tl, tr, br, bl) = _order_points(...)
# here, because the reordered points_i is used further below.
points_i = self._order_points(points_i)
(tl, tr, br, bl) = points_i

cval_samples_cv2 = []

for i in sm.xrange(nb_images):
mode = mode_samples[i]
mode_samples[i] = (
mode if ia.is_single_integer(mode) else mode_str_to_int[mode])

cval_samples_cv2.append([int(cval_i) for cval_i in cval_samples[i]])

h, w = shapes[i][0:2]

points = self.jitter.draw_samples((4, 2), random_state=rngs[2+i])
points = np.mod(np.abs(points), 1)

# modify jitter to the four corner point coordinates
# some x/y values have to be modified from `jitter` to `1-jtter`
# for that

# top left -- no changes needed, just use jitter
# top right
points[2, 0] = 1.0 - points[2, 0] # h = 1.0 - jitter
# bottom right
points[1, 0] = 1.0 - points[1, 0] # w = 1.0 - jitter
points[1, 1] = 1.0 - points[1, 1] # h = 1.0 - jitter
# bottom left
points[0, 1] = 1.0 - points[0, 1] # h = 1.0 - jitter

points[:, 0] = points[:, 0] * w
points[:, 1] = points[:, 1] * h

# obtain a consistent order of the points and unpack them
# individually
points = self._order_points(points)
(tl, tr, br, bl) = points

# TODO remove these loops
# compute the width of the new image, which will be the
# maximum distance between bottom-right and bottom-left
# x-coordiates or the top-right and top-left x-coordinates
min_width = None
max_width = None
while min_width is None or min_width < self.min_width:
width_a = np.sqrt(((br[0]-bl[0])**2) + ((br[1]-bl[1])**2))
width_b = np.sqrt(((tr[0]-tl[0])**2) + ((tr[1]-tl[1])**2))
max_width = max(int(width_a), int(width_b))
min_width = min(int(width_a), int(width_b))
width_top = np.sqrt(((tr[0]-tl[0])**2) + ((tr[1]-tl[1])**2))
width_bottom = np.sqrt(((br[0]-bl[0])**2) + ((br[1]-bl[1])**2))
max_width = int(max(width_top, width_bottom))
min_width = int(min(width_top, width_bottom))
if min_width < self.min_width:
tl[0] -= self.shift_step_size
tr[0] += self.shift_step_size
bl[0] -= self.shift_step_size
br[0] += self.shift_step_size
step_size = (self.min_width - min_width)/2
tl[0] -= step_size
tr[0] += step_size
bl[0] -= step_size
br[0] += step_size

# compute the height of the new image, which will be the
# maximum distance between the top-right and bottom-right
# y-coordinates or the top-left and bottom-left y-coordinates
min_height = None
max_height = None
while min_height is None or min_height < self.min_height:
height_a = np.sqrt(((tr[0]-br[0])**2) + ((tr[1]-br[1])**2))
height_b = np.sqrt(((tl[0]-bl[0])**2) + ((tl[1]-bl[1])**2))
max_height = max(int(height_a), int(height_b))
min_height = min(int(height_a), int(height_b))
height_right = np.sqrt(((tr[0]-br[0])**2) + ((tr[1]-br[1])**2))
height_left = np.sqrt(((tl[0]-bl[0])**2) + ((tl[1]-bl[1])**2))
max_height = int(max(height_right, height_left))
min_height = int(min(height_right, height_left))
if min_height < self.min_height:
tl[1] -= self.shift_step_size
tr[1] -= self.shift_step_size
bl[1] += self.shift_step_size
br[1] += self.shift_step_size
step_size = (self.min_height - min_height)/2
tl[1] -= step_size
tr[1] -= step_size
bl[1] += step_size
br[1] += step_size

# now that we have the dimensions of the new image, construct
# the set of destination points to obtain a "birds eye view",
Expand All @@ -2921,10 +2935,10 @@ def _draw_samples(self, shapes, random_state):
[max_width - 1, 0],
[max_width - 1, max_height - 1],
[0, max_height - 1]
], dtype="float32")
], dtype=np.float32)

# compute the perspective transform matrix and then apply it
m = cv2.getPerspectiveTransform(points, dst)
m = cv2.getPerspectiveTransform(points_i, dst)

if self.fit_output:
m, max_width, max_height = self._expand_transform(m, (h, w))
Expand All @@ -2933,7 +2947,7 @@ def _draw_samples(self, shapes, random_state):
max_heights.append(max_height)
max_widths.append(max_width)

mode_samples = mode_samples.astype(int)
mode_samples = mode_samples.astype(np.int32)
return _PerspectiveTransformSamplingResult(
matrices, max_heights, max_widths, cval_samples_cv2,
mode_samples)
Expand All @@ -2944,7 +2958,7 @@ def _order_points(cls, pts):
# such that the first entry in the list is the top-left,
# the second entry is the top-right, the third is the
# bottom-right, and the fourth is the bottom-left
pts_ordered = np.zeros((4, 2), dtype="float32")
pts_ordered = np.zeros((4, 2), dtype=np.float32)

# the top-left point will have the smallest sum, whereas
# the bottom-right point will have the largest sum
Expand All @@ -2962,22 +2976,29 @@ def _order_points(cls, pts):
# return the ordered coordinates
return pts_ordered

def _expand_transform(self, M, shape):
imgHeight, imgWidth = shape
@classmethod
def _expand_transform(cls, M, shape):
height, width = shape
rect = np.array([
[0, 0],
[imgWidth - 1, 0],
[imgWidth - 1, imgHeight - 1],
[0, imgHeight - 1]], dtype='float32')
[width - 1, 0],
[width - 1, height - 1],
[0, height - 1]], dtype=np.float32)
dst = cv2.perspectiveTransform(np.array([rect]), M)[0]

# get min x, y over transformed 4 points
# then modify target points by subtracting these minima
# => shift to (0, 0)
dst -= dst.min(axis=0, keepdims=True)
dst = np.around(dst, decimals=0)

M_expanded = cv2.getPerspectiveTransform(rect, dst)
maxWidth, maxHeight = dst.max(axis=0) + 1
return M_expanded, maxWidth, maxHeight
max_width, max_height = dst.max(axis=0) + 1
return M_expanded, max_width, max_height

def get_parameters(self):
return [self.jitter, self.keep_size, self.cval, self.mode]
return [self.jitter, self.keep_size, self.cval, self.mode,
self.fit_output]


class _ElasticTransformationSamplingResult(object):
Expand Down
Loading

0 comments on commit 2a31615

Please sign in to comment.