"""Augmenters that apply affine or similar transformations.
Do not import directly from this file, as the categorization is not final.
Use instead ::
from imgaug import augmenters as iaa
and then e.g. ::
seq = iaa.Sequential([
iaa.Affine(...),
iaa.PerspectiveTransform(...)
])
List of augmenters:
* Affine
* AffineCv2
* PiecewiseAffine
* PerspectiveTransform
* ElasticTransformation
* Rot90
"""
from __future__ import print_function, division, absolute_import
import math
from functools import partial
import numpy as np
from scipy import ndimage
from skimage import transform as tf
import cv2
import six.moves as sm
from . import meta
from . import blur as blur_lib
import imgaug as ia
from imgaug.augmentables.polys import _ConcavePolygonRecoverer
from .. import parameters as iap
from .. import dtypes as iadt
_VALID_DTYPES_CV2_ORDER_0 = {"uint8", "uint16", "int8", "int16", "int32",
"float16", "float32", "float64",
"bool"}
_VALID_DTYPES_CV2_ORDER_NOT_0 = {"uint8", "uint16", "int8", "int16",
"float16", "float32", "float64",
"bool"}
# skimage | cv2
# 0 | cv2.INTER_NEAREST
# 1 | cv2.INTER_LINEAR
# 2 | -
# 3 | cv2.INTER_CUBIC
# 4 | -
_AFFINE_INTERPOLATION_ORDER_SKIMAGE_TO_CV2 = {
0: cv2.INTER_NEAREST,
1: cv2.INTER_LINEAR,
2: cv2.INTER_CUBIC,
3: cv2.INTER_CUBIC,
4: cv2.INTER_CUBIC
}
# constant, edge, symmetric, reflect, wrap
# skimage | cv2
# constant | cv2.BORDER_CONSTANT
# edge | cv2.BORDER_REPLICATE
# symmetric | cv2.BORDER_REFLECT
# reflect | cv2.BORDER_REFLECT_101
# wrap | cv2.BORDER_WRAP
_AFFINE_MODE_SKIMAGE_TO_CV2 = {
"constant": cv2.BORDER_CONSTANT,
"edge": cv2.BORDER_REPLICATE,
"symmetric": cv2.BORDER_REFLECT,
"reflect": cv2.BORDER_REFLECT_101,
"wrap": cv2.BORDER_WRAP
}
def _handle_order_arg(order, backend):
# Peformance in skimage for Affine:
# 1.0x order 0
# 1.5x order 1
# 3.0x order 3
# 30.0x order 4
# 60.0x order 5
# measurement based on 256x256x3 batches, difference is smaller
# on smaller images (seems to grow more like exponentially with image
# size)
if order == ia.ALL:
if backend == "auto" or backend == "cv2":
return iap.Choice([0, 1, 3])
else:
# dont use order=2 (bi-quadratic) because that is apparently
# currently not recommended (and throws a warning)
return iap.Choice([0, 1, 3, 4, 5])
elif ia.is_single_integer(order):
assert 0 <= order <= 5, (
"Expected order's integer value to be in the interval [0, 5], "
"got %d." % (order,))
if backend == "cv2":
assert order in [0, 1, 3], (
"Backend \"cv2\" and order=%d was chosen, but cv2 backend "
"can only handle order 0, 1 or 3." % (order,))
return iap.Deterministic(order)
elif isinstance(order, list):
assert all([ia.is_single_integer(val) for val in order]), (
"Expected order list to only contain integers, "
"got types %s." % (str([type(val) for val in order]),))
assert all([0 <= val <= 5 for val in order]), (
"Expected all of order's integer values to be in range "
"0 <= x <= 5, got %s." % (str(order),))
if backend == "cv2":
assert all([val in [0, 1, 3] for val in order]), (
"cv2 backend can only handle order 0, 1 or 3. Got order "
"list of %s." % (order,))
return iap.Choice(order)
elif isinstance(order, iap.StochasticParameter):
return order
else:
raise Exception(
"Expected order to be imgaug.ALL, int, list of int or "
"StochasticParameter, got %s." % (type(order),))
def _handle_cval_arg(cval):
if cval == ia.ALL:
# TODO change this so that it is dynamically created per image
# (or once per dtype)
return iap.Uniform(0, 255) # skimage transform expects float
else:
return iap.handle_continuous_param(
cval, "cval", value_range=None, tuple_to_uniform=True,
list_to_choice=True)
# currently used for Affine and PiecewiseAffine
def _handle_mode_arg(mode):
if mode == ia.ALL:
return iap.Choice(["constant", "edge", "symmetric",
"reflect", "wrap"])
elif ia.is_string(mode):
return iap.Deterministic(mode)
elif isinstance(mode, list):
assert all([ia.is_string(val) for val in mode]), (
"Expected list of modes to only contain strings, got "
"types %s" % (", ".join([str(type(v)) for v in mode])))
return iap.Choice(mode)
elif isinstance(mode, iap.StochasticParameter):
return mode
else:
raise Exception(
"Expected mode to be imgaug.ALL, a string, a list of strings "
"or StochasticParameter, got %s." % (type(mode),))
def _warp_affine_arr(arr, matrix, order=1, mode="constant", cval=0,
output_shape=None, backend="auto"):
if ia.is_single_integer(cval):
cval = [cval] * len(arr.shape[2])
# no changes to zero-sized arrays
if arr.size == 0:
return arr
min_value, _center_value, max_value = \
iadt.get_value_range_of_dtype(arr.dtype)
cv2_bad_order = order not in [0, 1, 3]
if order == 0:
cv2_bad_dtype = (
arr.dtype.name
not in _VALID_DTYPES_CV2_ORDER_0)
else:
cv2_bad_dtype = (
arr.dtype.name
not in _VALID_DTYPES_CV2_ORDER_NOT_0
)
cv2_impossible = cv2_bad_order or cv2_bad_dtype
use_skimage = (
backend == "skimage"
or (backend == "auto" and cv2_impossible)
)
if use_skimage:
# cval contains 3 values as cv2 can handle 3, but
# skimage only 1
cval = cval[0]
# skimage does not clip automatically
cval = max(min(cval, max_value), min_value)
image_warped = _warp_affine_arr_skimage(
arr,
matrix,
cval=cval,
mode=mode,
order=order,
output_shape=output_shape
)
else:
assert not cv2_bad_dtype, (
not cv2_bad_dtype,
"cv2 backend in Affine got a dtype %s, which it "
"cannot handle. Try using a different dtype or set "
"order=0." % (
arr.dtype,))
image_warped = _warp_affine_arr_cv2(
arr,
matrix,
cval=tuple([int(v) for v in cval]),
mode=mode,
order=order,
output_shape=output_shape
)
return image_warped
def _warp_affine_arr_skimage(arr, matrix, cval, mode, order, output_shape):
iadt.gate_dtypes(
arr,
allowed=["bool",
"uint8", "uint16", "uint32",
"int8", "int16", "int32",
"float16", "float32", "float64"],
disallowed=["uint64", "uint128", "uint256",
"int64", "int128", "int256",
"float96", "float128", "float256"],
augmenter=None)
input_dtype = arr.dtype
image_warped = tf.warp(
arr,
matrix.inverse,
order=order,
mode=mode,
cval=cval,
preserve_range=True,
output_shape=output_shape,
)
# tf.warp changes all dtypes to float64, including uint8
if input_dtype == np.bool_:
image_warped = image_warped > 0.5
else:
image_warped = iadt.restore_dtypes_(image_warped, input_dtype)
return image_warped
def _warp_affine_arr_cv2(arr, matrix, cval, mode, order, output_shape):
iadt.gate_dtypes(
arr,
allowed=["bool",
"uint8", "uint16",
"int8", "int16", "int32",
"float16", "float32", "float64"],
disallowed=["uint32", "uint64", "uint128", "uint256",
"int64", "int128", "int256",
"float96", "float128", "float256"],
augmenter=None)
if order != 0:
assert arr.dtype.name != "int32", (
"Affine only supports cv2-based transformations of int32 "
"arrays when using order=0, but order was set to %d." % (
order,))
input_dtype = arr.dtype
if input_dtype in [np.bool_, np.float16]:
arr = arr.astype(np.float32)
elif input_dtype == np.int8 and order != 0:
arr = arr.astype(np.int16)
dsize = (
int(np.round(output_shape[1])),
int(np.round(output_shape[0]))
)
# map key X from skimage to cv2 or fall back to key X
mode = _AFFINE_MODE_SKIMAGE_TO_CV2.get(mode, mode)
order = _AFFINE_INTERPOLATION_ORDER_SKIMAGE_TO_CV2.get(order, order)
# TODO this uses always a tuple of 3 values for cval, even if
# #chans != 3, works with 1d but what in other cases?
nb_channels = arr.shape[-1]
if nb_channels <= 3:
# TODO this block can also be when order==0 for any nb_channels,
# but was deactivated for now, because cval would always
# contain 3 values and not nb_channels values
image_warped = cv2.warpAffine(
arr,
matrix.params[:2],
dsize=dsize,
flags=order,
borderMode=mode,
borderValue=cval
)
# cv2 warp drops last axis if shape is (H, W, 1)
if image_warped.ndim == 2:
image_warped = image_warped[..., np.newaxis]
else:
# warp each channel on its own, re-add channel axis, then stack
# the result from a list of [H, W, 1] to (H, W, C).
image_warped = [
cv2.warpAffine(
arr[:, :, c],
matrix.params[:2],
dsize=dsize,
flags=order,
borderMode=mode,
borderValue=tuple([cval[0]])
)
for c in sm.xrange(nb_channels)]
image_warped = np.stack(image_warped, axis=-1)
if input_dtype.name == "bool":
image_warped = image_warped > 0.5
elif input_dtype.name in ["int8", "float16"]:
image_warped = iadt.restore_dtypes_(image_warped, input_dtype)
return image_warped
def _compute_affine_warp_output_shape(matrix, input_shape):
height, width = input_shape[:2]
if height == 0 or width == 0:
return matrix, input_shape
# determine shape of output image
corners = np.array([
[0, 0],
[0, height - 1],
[width - 1, height - 1],
[width - 1, 0]
])
corners = matrix(corners)
minc = corners[:, 0].min()
minr = corners[:, 1].min()
maxc = corners[:, 0].max()
maxr = corners[:, 1].max()
out_height = maxr - minr + 1
out_width = maxc - minc + 1
if len(input_shape) == 3:
output_shape = np.ceil((out_height, out_width, input_shape[2]))
else:
output_shape = np.ceil((out_height, out_width))
output_shape = tuple([int(v) for v in output_shape.tolist()])
# fit output image in new shape
translation = (-minc, -minr)
matrix_to_fit = tf.SimilarityTransform(translation=translation)
matrix = matrix + matrix_to_fit
return matrix, output_shape
class _AffineSamplingResult(object):
def __init__(self, scale=None, translate=None, rotate=None, shear=None,
cval=None, mode=None, order=None):
self.scale = scale
self.translate = translate
self.rotate = rotate
self.shear = shear
self.cval = cval
self.mode = mode
self.order = order
def to_matrix(self, idx, arr_shape, fit_output):
height, width = arr_shape[0:2]
shift_x = width / 2.0 - 0.5
shift_y = height / 2.0 - 0.5
scale_x = self.scale[0][idx]
scale_y = self.scale[1][idx]
translate_x = self.translate[0][idx]
translate_y = self.translate[1][idx]
if ia.is_single_float(translate_y):
translate_y_px = int(
np.round(translate_y * height))
else:
translate_y_px = translate_y
if ia.is_single_float(translate_x):
translate_x_px = int(
np.round(translate_x * width))
else:
translate_x_px = translate_x
rotation_deg, shear_deg = np.deg2rad([
self.rotate[idx], self.shear[idx]])
matrix_to_topleft = tf.SimilarityTransform(
translation=[-shift_x, -shift_y])
matrix_transforms = tf.AffineTransform(
scale=(scale_x, scale_y),
translation=(translate_x_px, translate_y_px),
rotation=rotation_deg,
shear=shear_deg
)
matrix_to_center = tf.SimilarityTransform(
translation=[shift_x, shift_y])
matrix = (matrix_to_topleft
+ matrix_transforms
+ matrix_to_center)
if fit_output:
return _compute_affine_warp_output_shape(matrix, arr_shape)
return matrix, arr_shape
def _is_identity_matrix(matrix, eps=1e-4):
identity = np.float32([
[1, 0, 0],
[0, 1, 0],
[0, 0, 1]
])
return np.average(np.abs(identity - matrix.params)) <= eps
[docs]class Affine(meta.Augmenter):
"""
Augmenter to apply affine transformations to images.
This is mostly a wrapper around the corresponding classes and functions
in OpenCV and skimage..
Affine transformations involve:
- Translation ("move" image on the x-/y-axis)
- Rotation
- Scaling ("zoom" in/out)
- Shear (move one side of the image, turning a square into a trapezoid)
All such transformations can create "new" pixels in the image without a
defined content, e.g. if the image is translated to the left, pixels
are created on the right.
A method has to be defined to deal with these pixel values. The
parameters `cval` and `mode` of this class deal with this.
Some transformations involve interpolations between several pixels
of the input image to generate output pixel values. The parameter `order`
deals with the method of interpolation used for this.
dtype support::
if (backend="skimage", order in [0, 1])::
* ``uint8``: yes; tested
* ``uint16``: yes; tested
* ``uint32``: yes; tested (1)
* ``uint64``: no (2)
* ``int8``: yes; tested
* ``int16``: yes; tested
* ``int32``: yes; tested (1)
* ``int64``: no (2)
* ``float16``: yes; tested
* ``float32``: yes; tested
* ``float64``: yes; tested
* ``float128``: no (2)
* ``bool``: yes; tested
- (1) scikit-image converts internally to float64, which might
affect the accuracy of large integers. In tests this seemed
to not be an issue.
- (2) results too inaccurate
if (backend="skimage", order in [3, 4])::
* ``uint8``: yes; tested
* ``uint16``: yes; tested
* ``uint32``: yes; tested (1)
* ``uint64``: no (2)
* ``int8``: yes; tested
* ``int16``: yes; tested
* ``int32``: yes; tested (1)
* ``int64``: no (2)
* ``float16``: yes; tested
* ``float32``: yes; tested
* ``float64``: limited; tested (3)
* ``float128``: no (2)
* ``bool``: yes; tested
- (1) scikit-image converts internally to float64, which might
affect the accuracy of large integers. In tests this seemed
to not be an issue.
- (2) results too inaccurate
- (3) ``NaN`` around minimum and maximum of float64 value range
if (backend="skimage", order=5])::
* ``uint8``: yes; tested
* ``uint16``: yes; tested
* ``uint32``: yes; tested (1)
* ``uint64``: no (2)
* ``int8``: yes; tested
* ``int16``: yes; tested
* ``int32``: yes; tested (1)
* ``int64``: no (2)
* ``float16``: yes; tested
* ``float32``: yes; tested
* ``float64``: limited; not tested (3)
* ``float128``: no (2)
* ``bool``: yes; tested
- (1) scikit-image converts internally to ``float64``, which
might affect the accuracy of large integers. In tests
this seemed to not be an issue.
- (2) results too inaccurate
- (3) ``NaN`` around minimum and maximum of float64 value range
if (backend="cv2", order=0)::
* ``uint8``: yes; tested
* ``uint16``: yes; tested
* ``uint32``: no (1)
* ``uint64``: no (2)
* ``int8``: yes; tested
* ``int16``: yes; tested
* ``int32``: yes; tested
* ``int64``: no (2)
* ``float16``: yes; tested (3)
* ``float32``: yes; tested
* ``float64``: yes; tested
* ``float128``: no (1)
* ``bool``: yes; tested (3)
- (1) rejected by cv2
- (2) changed to ``int32`` by cv2
- (3) mapped internally to ``float32``
if (backend="cv2", order=1):
* ``uint8``: yes; fully tested
* ``uint16``: yes; tested
* ``uint32``: no (1)
* ``uint64``: no (2)
* ``int8``: yes; tested (3)
* ``int16``: yes; tested
* ``int32``: no (2)
* ``int64``: no (2)
* ``float16``: yes; tested (4)
* ``float32``: yes; tested
* ``float64``: yes; tested
* ``float128``: no (1)
* ``bool``: yes; tested (4)
- (1) rejected by cv2
- (2) causes cv2 error: ``cv2.error: OpenCV(3.4.4)
(...)imgwarp.cpp:1805: error:
(-215:Assertion failed) ifunc != 0 in function 'remap'``
- (3) mapped internally to ``int16``
- (4) mapped internally to ``float32``
if (backend="cv2", order=3):
* ``uint8``: yes; tested
* ``uint16``: yes; tested
* ``uint32``: no (1)
* ``uint64``: no (2)
* ``int8``: yes; tested (3)
* ``int16``: yes; tested
* ``int32``: no (2)
* ``int64``: no (2)
* ``float16``: yes; tested (4)
* ``float32``: yes; tested
* ``float64``: yes; tested
* ``float128``: no (1)
* ``bool``: yes; tested (4)
- (1) rejected by cv2
- (2) causes cv2 error: ``cv2.error: OpenCV(3.4.4)
(...)imgwarp.cpp:1805: error:
(-215:Assertion failed) ifunc != 0 in function 'remap'``
- (3) mapped internally to ``int16``
- (4) mapped internally to ``float32``
Parameters
----------
scale : number or tuple of number or list of number or imgaug.parameters.StochasticParameter or dict {"x": number/tuple/list/StochasticParameter, "y": number/tuple/list/StochasticParameter}, optional
Scaling factor to use, where ``1.0`` denotes "no change" and
``0.5`` is zoomed out to ``50`` percent of the original size.
* If a single number, then that value will be used for all images.
* If a tuple ``(a, b)``, then a value will be uniformly sampled
per image from the interval ``[a, b]``. That value will be
used identically for both x- and y-axis.
* If a list, then a random value will be sampled from that list
per image (again, used for both x- and y-axis).
* If a ``StochasticParameter``, then from that parameter a value
will be sampled per image (again, used for both x- and y-axis).
* If a dictionary, then it is expected to have the keys ``x``
and/or ``y``. Each of these keys can have the same values as
described above. Using a dictionary allows to set different
values for the two axis and sampling will then happen
*independently* per axis, resulting in samples that differ
between the axes.
translate_percent : None or number or tuple of number or list of number or imgaug.parameters.StochasticParameter or dict {"x": number/tuple/list/StochasticParameter, "y": number/tuple/list/StochasticParameter}, optional
Translation as a fraction of the image height/width (x-translation,
y-translation), where ``0`` denotes "no change" and ``0.5`` denotes
"half of the axis size".
* If ``None`` then equivalent to ``0.0`` unless `translate_px` has
a value other than ``None``.
* If a single number, then that value will be used for all images.
* If a tuple ``(a, b)``, then a value will be uniformly sampled
per image from the interval ``[a, b]``. That sampled fraction
value will be used identically for both x- and y-axis.
* If a list, then a random value will be sampled from that list
per image (again, used for both x- and y-axis).
* If a ``StochasticParameter``, then from that parameter a value
will be sampled per image (again, used for both x- and y-axis).
* If a dictionary, then it is expected to have the keys ``x``
and/or ``y``. Each of these keys can have the same values as
described above. Using a dictionary allows to set different
values for the two axis and sampling will then happen
*independently* per axis, resulting in samples that differ
between the axes.
translate_px : None or int or tuple of int or list of int or imgaug.parameters.StochasticParameter or dict {"x": int/tuple/list/StochasticParameter, "y": int/tuple/list/StochasticParameter}, optional
Translation in pixels.
* If ``None`` then equivalent to ``0`` unless `translate_percent`
has a value other than ``None``.
* If a single int, then that value will be used for all images.
* If a tuple ``(a, b)``, then a value will be uniformly sampled
per image from the discrete interval ``[a..b]``. That number
will be used identically for both x- and y-axis.
* If a list, then a random value will be sampled from that list
per image (again, used for both x- and y-axis).
* If a ``StochasticParameter``, then from that parameter a value
will be sampled per image (again, used for both x- and y-axis).
* If a dictionary, then it is expected to have the keys ``x``
and/or ``y``. Each of these keys can have the same values as
described above. Using a dictionary allows to set different
values for the two axis and sampling will then happen
*independently* per axis, resulting in samples that differ
between the axes.
rotate : number or tuple of number or list of number or imgaug.parameters.StochasticParameter, optional
Rotation in degrees (**NOT** radians), i.e. expected value range is
around ``[-360, 360]``. Rotation happens around the *center* of the
image, not the top left corner as in some other frameworks.
* If a number, then that value will be used for all images.
* If a tuple ``(a, b)``, then a value will be uniformly sampled
per image from the interval ``[a, b]`` and used as the rotation
value.
* If a list, then a random value will be sampled from that list
per image.
* If a ``StochasticParameter``, then this parameter will be used to
sample the rotation value per image.
shear : number or tuple of number or list of number or imgaug.parameters.StochasticParameter, optional
Shear in degrees (**NOT** radians), i.e. expected value range is
around ``[-360, 360]``.
* If a number, then that value will be used for all images.
* If a tuple ``(a, b)``, then a value will be uniformly sampled
per image from the interval ``[a, b]`` and be used as the
rotation value.
* If a list, then a random value will be sampled from that list
per image.
* If a ``StochasticParameter``, then this parameter will be used
to sample the shear value per image.
order : int or iterable of int or imgaug.ALL or imgaug.parameters.StochasticParameter, optional
Interpolation order to use. Same meaning as in ``skimage``:
* ``0``: ``Nearest-neighbor``
* ``1``: ``Bi-linear`` (default)
* ``2``: ``Bi-quadratic`` (not recommended by skimage)
* ``3``: ``Bi-cubic``
* ``4``: ``Bi-quartic``
* ``5``: ``Bi-quintic``
Method ``0`` and ``1`` are fast, ``3`` is a bit slower, ``4`` and
``5`` are very slow. If the backend is ``cv2``, the mapping to
OpenCV's interpolation modes is as follows:
* ``0`` -> ``cv2.INTER_NEAREST``
* ``1`` -> ``cv2.INTER_LINEAR``
* ``2`` -> ``cv2.INTER_CUBIC``
* ``3`` -> ``cv2.INTER_CUBIC``
* ``4`` -> ``cv2.INTER_CUBIC``
As datatypes this parameter accepts:
* If a single ``int``, then that order will be used for all images.
* If a list, then a random value will be sampled from that list
per image.
* If ``imgaug.ALL``, then equivalant to list ``[0, 1, 3, 4, 5]``
in case of ``backend=skimage`` and otherwise ``[0, 1, 3]``.
* If ``StochasticParameter``, then that parameter is queried per
image to sample the order value to use.
cval : number or tuple of number or list of number or imgaug.ALL or imgaug.parameters.StochasticParameter, optional
The constant value to use when filling in newly created pixels.
(E.g. translating by 1px to the right will create a new 1px-wide
column of pixels on the left of the image). The value is only used
when `mode=constant`. The expected value range is ``[0, 255]`` for
``uint8`` images. It may be a float value.
* If this is a single number, then that value will be used
(e.g. 0 results in black pixels).
* If a tuple ``(a, b)``, then three values (for three image
channels) will be uniformly sampled per image from the
interval ``[a, b]``.
* If a list, then a random value will be sampled from that list
per image.
* If ``imgaug.ALL`` then equivalent to tuple ``(0, 255)`.
* If a ``StochasticParameter``, a new value will be sampled from
the parameter per image.
mode : str or list of str or imgaug.ALL or imgaug.parameters.StochasticParameter, optional
Method to use when filling in newly created pixels.
Same meaning as in ``skimage`` (and :func:`numpy.pad`):
* ``constant``: Pads with a constant value
* ``edge``: Pads with the edge values of array
* ``symmetric``: Pads with the reflection of the vector mirrored
along the edge of the array.
* ``reflect``: Pads with the reflection of the vector mirrored on
the first and last values of the vector along each axis.
* ``wrap``: Pads with the wrap of the vector along the axis.
The first values are used to pad the end and the end values
are used to pad the beginning.
If ``cv2`` is chosen as the backend the mapping is as follows:
* ``constant`` -> ``cv2.BORDER_CONSTANT``
* ``edge`` -> ``cv2.BORDER_REPLICATE``
* ``symmetric`` -> ``cv2.BORDER_REFLECT``
* ``reflect`` -> ``cv2.BORDER_REFLECT_101``
* ``wrap`` -> ``cv2.BORDER_WRAP``
The datatype of the parameter may be:
* If a single string, then that mode will be used for all images.
* If a list of strings, then a random mode will be picked
from that list per image.
* If ``imgaug.ALL``, then a random mode from all possible modes
will be picked.
* If ``StochasticParameter``, then the mode will be sampled from
that parameter per image, i.e. it must return only the above
mentioned strings.
fit_output : bool, optional
Whether to modify the affine transformation so that the whole output
image is always contained in the image plane (``True``) or accept
parts of the image being outside the image plane (``False``).
This can be thought of as first applying the affine transformation
and then applying a second transformation to "zoom in" on the new
image so that it fits the image plane,
This is useful to avoid corners of the image being outside of the image
plane after applying rotations. It will however negate translation
and scaling.
Note also that activating this may lead to image sizes differing from
the input image sizes. To avoid this, wrap ``Affine`` in
:class:`imgaug.augmenters.size.KeepSizeByResize`,
e.g. ``KeepSizeByResize(Affine(...))``.
backend : str, optional
Framework to use as a backend. Valid values are ``auto``, ``skimage``
(scikit-image's warp) and ``cv2`` (OpenCV's warp).
If ``auto`` is used, the augmenter will automatically try
to use ``cv2`` whenever possible (order must be in ``[0, 1, 3]``). It
will silently fall back to skimage if order/dtype is not supported by
cv2. cv2 is generally faster than skimage. It also supports RGB cvals,
while skimage will resort to intensity cvals (i.e. 3x the same value
as RGB). If ``cv2`` is chosen and order is ``2`` or ``4``, it will
automatically fall back to order ``3``.
name : None or str, optional
See :func:`imgaug.augmenters.meta.Augmenter.__init__`.
deterministic : bool, optional
See :func:`imgaug.augmenters.meta.Augmenter.__init__`.
random_state : None or int or imgaug.random.RNG or numpy.random.Generator or numpy.random.bit_generator.BitGenerator or numpy.random.SeedSequence or numpy.random.RandomState, optional
See :func:`imgaug.augmenters.meta.Augmenter.__init__`.
Examples
--------
>>> import imgaug.augmenters as iaa
>>> aug = iaa.Affine(scale=2.0)
Zoom in on all images by a factor of ``2``.
>>> aug = iaa.Affine(translate_px=16)
Translate all images on the x- and y-axis by 16 pixels (towards the
bottom right) and fill up any new pixels with zero (black values).
>>> aug = iaa.Affine(translate_percent=0.1)
Translate all images on the x- and y-axis by ``10`` percent of their
width/height (towards the bottom right). The pixel values are computed
per axis based on that axis' size. Fill up any new pixels with zero
(black values).
>>> aug = iaa.Affine(rotate=35)
Rotate all images by ``35`` *degrees*. Fill up any new pixels with zero
(black values).
>>> aug = iaa.Affine(shear=15)
Shear all images by ``15`` *degrees*. Fill up any new pixels with zero
(black values).
>>> aug = iaa.Affine(translate_px=(-16, 16))
Translate all images on the x- and y-axis by a random value
between ``-16`` and ``16`` pixels (to the bottom right) and fill up any new
pixels with zero (black values). The translation value is sampled once
per image and is the same for both axis.
>>> aug = iaa.Affine(translate_px={"x": (-16, 16), "y": (-4, 4)})
Translate all images on the x-axis by a random value
between ``-16`` and ``16`` pixels (to the right) and on the y-axis by a
random value between ``-4`` and ``4`` pixels to the bottom. The sampling
happens independently per axis, so even if both intervals were identical,
the sampled axis-wise values would likely be different.
This also fills up any new pixels with zero (black values).
>>> aug = iaa.Affine(scale=2.0, order=[0, 1])
Same as in the above `scale` example, but uses (randomly) either
nearest neighbour interpolation or linear interpolation. If `order` is
not specified, ``order=1`` would be used by default.
>>> aug = iaa.Affine(translate_px=16, cval=(0, 255))
Same as in the `translate_px` example above, but newly created pixels
are now filled with a random color (sampled once per image and the
same for all newly created pixels within that image).
>>> aug = iaa.Affine(translate_px=16, mode=["constant", "edge"])
Similar to the previous example, but the newly created pixels are
filled with black pixels in half of all images (mode ``constant`` with
default `cval` being ``0``) and in the other half of all images using
``edge`` mode, which repeats the color of the spatially closest pixel
of the corresponding image edge.
"""
def __init__(self, scale=1.0, translate_percent=None, translate_px=None,
rotate=0.0, shear=0.0, order=1, cval=0, mode="constant",
fit_output=False, backend="auto",
name=None, deterministic=False, random_state=None):
super(Affine, self).__init__(
name=name, deterministic=deterministic, random_state=random_state)
assert backend in ["auto", "skimage", "cv2"], (
"Expected 'backend' to be \"auto\", \"skimage\" or \"cv2\", "
"got %s." % (backend,))
self.backend = backend
self.order = _handle_order_arg(order, backend)
self.cval = _handle_cval_arg(cval)
self.mode = _handle_mode_arg(mode)
self.scale = self._handle_scale_arg(scale)
self.translate = self._handle_translate_arg(
translate_px, translate_percent)
self.rotate = iap.handle_continuous_param(
rotate, "rotate", value_range=None, tuple_to_uniform=True,
list_to_choice=True)
self.shear = iap.handle_continuous_param(
shear, "shear", value_range=None, tuple_to_uniform=True,
list_to_choice=True)
self.fit_output = fit_output
# Special order, mode and cval parameters for heatmaps and
# segmentation maps. These may either be None or a fixed value.
# Stochastic parameters are currently *not* supported.
# If set to None, the same values as for images will be used.
# That is really not recommended for the cval parameter.
#
# Segmentation map augmentation by default always pads with a
# constant value of 0 (background class id), and always uses nearest
# neighbour interpolation. While other pad modes and BG class ids
# could be used, the interpolation mode has to be NN as any other
# mode would lead to averaging class ids, which makes no sense to do.
self._order_heatmaps = 3
self._order_segmentation_maps = 0
self._mode_heatmaps = "constant"
self._mode_segmentation_maps = "constant"
self._cval_heatmaps = 0
self._cval_segmentation_maps = 0
@classmethod
def _handle_scale_arg(cls, scale):
if isinstance(scale, dict):
assert "x" in scale or "y" in scale, (
"Expected scale dictionary to contain at least key \"x\" or "
"key \"y\". Found neither of them.")
x = scale.get("x", 1.0)
y = scale.get("y", 1.0)
return (
iap.handle_continuous_param(
x, "scale['x']", value_range=(0+1e-4, None),
tuple_to_uniform=True, list_to_choice=True),
iap.handle_continuous_param(
y, "scale['y']", value_range=(0+1e-4, None),
tuple_to_uniform=True, list_to_choice=True)
)
else:
return iap.handle_continuous_param(
scale, "scale", value_range=(0+1e-4, None),
tuple_to_uniform=True, list_to_choice=True)
@classmethod
def _handle_translate_arg(cls, translate_px, translate_percent):
if translate_percent is None and translate_px is None:
translate_px = 0
assert translate_percent is None or translate_px is None, (
"Expected either translate_percent or translate_px to be "
"provided, but neither of them was.")
if translate_percent is not None:
# translate by percent
if isinstance(translate_percent, dict):
assert "x" in translate_percent or "y" in translate_percent, (
"Expected translate_percent dictionary to contain at "
"least key \"x\" or key \"y\". Found neither of them.")
x = translate_percent.get("x", 0)
y = translate_percent.get("y", 0)
return (
iap.handle_continuous_param(
x, "translate_percent['x']", value_range=None,
tuple_to_uniform=True, list_to_choice=True),
iap.handle_continuous_param(
y, "translate_percent['y']", value_range=None,
tuple_to_uniform=True, list_to_choice=True)
)
else:
return iap.handle_continuous_param(
translate_percent, "translate_percent", value_range=None,
tuple_to_uniform=True, list_to_choice=True)
else:
# translate by pixels
if isinstance(translate_px, dict):
assert "x" in translate_px or "y" in translate_px, (
"Expected translate_px dictionary to contain at "
"least key \"x\" or key \"y\". Found neither of them.")
x = translate_px.get("x", 0)
y = translate_px.get("y", 0)
return (
iap.handle_discrete_param(
x, "translate_px['x']", value_range=None,
tuple_to_uniform=True, list_to_choice=True,
allow_floats=False),
iap.handle_discrete_param(
y, "translate_px['y']", value_range=None,
tuple_to_uniform=True, list_to_choice=True,
allow_floats=False)
)
else:
return iap.handle_discrete_param(
translate_px, "translate_px", value_range=None,
tuple_to_uniform=True, list_to_choice=True,
allow_floats=False)
def _augment_images(self, images, random_state, parents, hooks):
nb_images = len(images)
samples = self._draw_samples(nb_images, random_state)
result = self._augment_images_by_samples(images, samples)
return result
def _augment_images_by_samples(self, images, samples,
return_matrices=False):
nb_images = len(images)
input_was_array = ia.is_np_array(images)
input_dtype = None if not input_was_array else images.dtype
result = []
if return_matrices:
matrices = [None] * nb_images
for i in sm.xrange(nb_images):
image = images[i]
matrix, output_shape = samples.to_matrix(i, image.shape,
self.fit_output)
cval = samples.cval[i]
mode = samples.mode[i]
order = samples.order[i]
if not _is_identity_matrix(matrix):
image_warped = _warp_affine_arr(
image, matrix,
order=order, mode=mode, cval=cval,
output_shape=output_shape, backend=self.backend)
result.append(image_warped)
else:
result.append(image)
if return_matrices:
matrices[i] = matrix
# the shapes can change due to fit_output, then it may not be possible
# to return an array, even when the input was an array
if input_was_array:
nb_shapes = len(set([image.shape for image in result]))
if nb_shapes == 1:
result = np.array(result, input_dtype)
if return_matrices:
result = (result, matrices)
return result
def _augment_heatmaps(self, heatmaps, random_state, parents, hooks):
return self._augment_hms_and_segmaps(
heatmaps, random_state, "arr_0to1",
self._cval_heatmaps, self._mode_heatmaps,
self._order_heatmaps, "float32")
def _augment_segmentation_maps(self, segmaps, random_state, parents, hooks):
return self._augment_hms_and_segmaps(
segmaps, random_state, "arr",
self._cval_segmentation_maps, self._mode_segmentation_maps,
self._order_segmentation_maps, "int32")
def _augment_hms_and_segmaps(self, augmentables, random_state,
arr_attr_name, cval, mode, order, cval_dtype):
nb_images = len(augmentables)
samples = self._draw_samples(nb_images, random_state)
if cval is not None:
samples.cval = np.full((nb_images, 1), cval, dtype=cval_dtype)
if mode is not None:
samples.mode = [mode] * nb_images
if order is not None:
samples.order = [order] * nb_images
arrs = [getattr(augmentable, arr_attr_name)
for augmentable in augmentables]
arrs_aug, matrices = self._augment_images_by_samples(
arrs, samples, return_matrices=True)
gen = zip(augmentables, arrs_aug, matrices, samples.order)
for augmentable_i, arr_aug, matrix, order_i in gen:
# skip augmented HM/SM arrs for which the images were not
# augmented due to being zero-sized
if 0 in augmentable_i.shape:
continue
# order=3 matches cubic interpolation and can cause values to go
# outside of the range [0.0, 1.0] not clear whether 4+ also do that
# We don't clip here for Segmentation Maps, because for these
# the value range isn't clearly limited to [0, 1] (and they should
# also never use order=3 to begin with).
# TODO add test for this
if order_i >= 3 and isinstance(augmentable_i, ia.HeatmapsOnImage):
arr_aug = np.clip(arr_aug, 0.0, 1.0, out=arr_aug)
setattr(augmentable_i, arr_attr_name, arr_aug)
if self.fit_output:
_, output_shape_i = _compute_affine_warp_output_shape(
matrix, augmentable_i.shape)
else:
output_shape_i = augmentable_i.shape
augmentable_i.shape = output_shape_i
return augmentables
def _augment_keypoints(self, keypoints_on_images, random_state, parents,
hooks):
result = []
nb_images = len(keypoints_on_images)
samples = self._draw_samples(nb_images, random_state)
for i, keypoints_on_image in enumerate(keypoints_on_images):
matrix, output_shape = samples.to_matrix(
i, keypoints_on_image.shape, self.fit_output)
kps = keypoints_on_image.keypoints
if (not _is_identity_matrix(matrix)
and not keypoints_on_image.empty
and not (0 in keypoints_on_image.shape)):
coords = keypoints_on_image.to_xy_array()
coords_aug = tf.matrix_transform(coords, matrix.params)
kps = [kp.deepcopy(x=coords[0], y=coords[1])
for kp, coords
in zip(keypoints_on_image.keypoints, coords_aug)]
result.append(keypoints_on_image.deepcopy(
keypoints=kps, shape=output_shape))
return result
def _augment_polygons(self, polygons_on_images, random_state, parents,
hooks):
return self._augment_polygons_as_keypoints(
polygons_on_images, random_state, parents, hooks)
def _draw_samples(self, nb_samples, random_state):
rngs = random_state.duplicate(11)
if isinstance(self.scale, tuple):
scale_samples = (
self.scale[0].draw_samples((nb_samples,), random_state=rngs[0]),
self.scale[1].draw_samples((nb_samples,), random_state=rngs[1]),
)
else:
scale_samples = self.scale.draw_samples((nb_samples,),
random_state=rngs[2])
scale_samples = (scale_samples, scale_samples)
if isinstance(self.translate, tuple):
translate_samples = (
self.translate[0].draw_samples((nb_samples,),
random_state=rngs[3]),
self.translate[1].draw_samples((nb_samples,),
random_state=rngs[4]),
)
else:
translate_samples = self.translate.draw_samples(
(nb_samples,), random_state=rngs[5])
translate_samples = (translate_samples, translate_samples)
rotate_samples = self.rotate.draw_samples((nb_samples,),
random_state=rngs[6])
shear_samples = self.shear.draw_samples((nb_samples,),
random_state=rngs[7])
cval_samples = self.cval.draw_samples((nb_samples, 3),
random_state=rngs[8])
mode_samples = self.mode.draw_samples((nb_samples,),
random_state=rngs[9])
order_samples = self.order.draw_samples((nb_samples,),
random_state=rngs[10])
return _AffineSamplingResult(
scale=scale_samples,
translate=translate_samples,
rotate=rotate_samples,
shear=shear_samples,
cval=cval_samples,
mode=mode_samples,
order=order_samples)
[docs] def get_parameters(self):
return [
self.scale, self.translate, self.rotate, self.shear, self.order,
self.cval, self.mode, self.backend, self.fit_output]
[docs]class AffineCv2(meta.Augmenter):
"""
Augmenter to apply affine transformations to images using cv2 (i.e. opencv)
backend.
.. warning ::
This augmenter might be removed in the future as ``Affine``
already offers a cv2 backend (use ``backend="cv2"``).
Affine transformations
involve:
- Translation ("move" image on the x-/y-axis)
- Rotation
- Scaling ("zoom" in/out)
- Shear (move one side of the image, turning a square into a trapezoid)
All such transformations can create "new" pixels in the image without a
defined content, e.g. if the image is translated to the left, pixels
are created on the right.
A method has to be defined to deal with these pixel values. The
parameters `cval` and `mode` of this class deal with this.
Some transformations involve interpolations between several pixels
of the input image to generate output pixel values. The parameter `order`
deals with the method of interpolation used for this.
dtype support::
* ``uint8``: yes; fully tested
* ``uint16``: ?
* ``uint32``: ?
* ``uint64``: ?
* ``int8``: ?
* ``int16``: ?
* ``int32``: ?
* ``int64``: ?
* ``float16``: ?
* ``float32``: ?
* ``float64``: ?
* ``float128``: ?
* ``bool``: ?
Parameters
----------
scale : number or tuple of number or list of number or imgaug.parameters.StochasticParameter or dict {"x": number/tuple/list/StochasticParameter, "y": number/tuple/list/StochasticParameter}, optional
Scaling factor to use, where ``1.0`` denotes \"no change\" and
``0.5`` is zoomed out to ``50`` percent of the original size.
* If a single number, then that value will be used for all images.
* If a tuple ``(a, b)``, then a value will be uniformly sampled
per image from the interval ``[a, b]``. That value will be
used identically for both x- and y-axis.
* If a list, then a random value will be sampled from that list
per image (again, used for both x- and y-axis).
* If a ``StochasticParameter``, then from that parameter a value
will be sampled per image (again, used for both x- and y-axis).
* If a dictionary, then it is expected to have the keys ``x``
and/or ``y``. Each of these keys can have the same values as
described above. Using a dictionary allows to set different
values for the two axis and sampling will then happen
*independently* per axis, resulting in samples that differ
between the axes.
translate_percent : number or tuple of number or list of number or imgaug.parameters.StochasticParameter or dict {"x": number/tuple/list/StochasticParameter, "y": number/tuple/list/StochasticParameter}, optional
Translation as a fraction of the image height/width (x-translation,
y-translation), where ``0`` denotes "no change" and ``0.5`` denotes
"half of the axis size".
* If ``None`` then equivalent to ``0.0`` unless `translate_px` has
a value other than ``None``.
* If a single number, then that value will be used for all images.
* If a tuple ``(a, b)``, then a value will be uniformly sampled
per image from the interval ``[a, b]``. That sampled fraction
value will be used identically for both x- and y-axis.
* If a list, then a random value will be sampled from that list
per image (again, used for both x- and y-axis).
* If a ``StochasticParameter``, then from that parameter a value
will be sampled per image (again, used for both x- and y-axis).
* If a dictionary, then it is expected to have the keys ``x``
and/or ``y``. Each of these keys can have the same values as
described above. Using a dictionary allows to set different
values for the two axis and sampling will then happen
*independently* per axis, resulting in samples that differ
between the axes.
translate_px : int or tuple of int or list of int or imgaug.parameters.StochasticParameter or dict {"x": int/tuple/list/StochasticParameter, "y": int/tuple/list/StochasticParameter}, optional
Translation in pixels.
* If ``None`` then equivalent to ``0`` unless `translate_percent`
has a value other than ``None``.
* If a single int, then that value will be used for all images.
* If a tuple ``(a, b)``, then a value will be uniformly sampled
per image from the discrete interval ``[a..b]``. That number
will be used identically for both x- and y-axis.
* If a list, then a random value will be sampled from that list
per image (again, used for both x- and y-axis).
* If a ``StochasticParameter``, then from that parameter a value
will be sampled per image (again, used for both x- and y-axis).
* If a dictionary, then it is expected to have the keys ``x``
and/or ``y``. Each of these keys can have the same values as
described above. Using a dictionary allows to set different
values for the two axis and sampling will then happen
*independently* per axis, resulting in samples that differ
between the axes.
rotate : number or tuple of number or list of number or imgaug.parameters.StochasticParameter, optional
Rotation in degrees (**NOT** radians), i.e. expected value range is
around ``[-360, 360]``. Rotation happens around the *center* of the
image, not the top left corner as in some other frameworks.
* If a number, then that value will be used for all images.
* If a tuple ``(a, b)``, then a value will be uniformly sampled
per image from the interval ``[a, b]`` and used as the rotation
value.
* If a list, then a random value will be sampled from that list
per image.
* If a ``StochasticParameter``, then this parameter will be used to
sample the rotation value per image.
shear : number or tuple of number or list of number or imgaug.parameters.StochasticParameter, optional
Shear in degrees (**NOT** radians), i.e. expected value range is
around ``[-360, 360]``.
* If a number, then that value will be used for all images.
* If a tuple ``(a, b)``, then a value will be uniformly sampled
per image from the interval ``[a, b]`` and be used as the
rotation value.
* If a list, then a random value will be sampled from that list
per image.
* If a ``StochasticParameter``, then this parameter will be used
to sample the shear value per image.
order : int or list of int or str or list of str or imaug.ALL or imgaug.parameters.StochasticParameter, optional
Interpolation order to use. Allowed are:
* ``cv2.INTER_NEAREST`` (nearest-neighbor interpolation)
* ``cv2.INTER_LINEAR`` (bilinear interpolation, used by default)
* ``cv2.INTER_CUBIC`` (bicubic interpolation over ``4x4`` pixel
neighborhood)
* ``cv2.INTER_LANCZOS4``
* string ``nearest`` (same as ``cv2.INTER_NEAREST``)
* string ``linear`` (same as ``cv2.INTER_LINEAR``)
* string ``cubic`` (same as ``cv2.INTER_CUBIC``)
* string ``lanczos4`` (same as ``cv2.INTER_LANCZOS``)
``INTER_NEAREST`` (nearest neighbour interpolation) and
``INTER_NEAREST`` (linear interpolation) are the fastest.
* If a single ``int``, then that order will be used for all images.
* If a string, then it must be one of: ``nearest``, ``linear``,
``cubic``, ``lanczos4``.
* If an iterable of ``int``/``str``, then for each image a random
value will be sampled from that iterable (i.e. list of allowed
order values).
* If ``imgaug.ALL``, then equivalant to list ``[cv2.INTER_NEAREST,
cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4]``.
* If ``StochasticParameter``, then that parameter is queried per
image to sample the order value to use.
cval : number or tuple of number or list of number or imaug.ALL or imgaug.parameters.StochasticParameter, optional
The constant value to use when filling in newly created pixels.
(E.g. translating by 1px to the right will create a new 1px-wide
column of pixels on the left of the image). The value is only used
when `mode=constant`. The expected value range is ``[0, 255]`` for
``uint8`` images. It may be a float value.
* If this is a single number, then that value will be used
(e.g. 0 results in black pixels).
* If a tuple ``(a, b)``, then three values (for three image
channels) will be uniformly sampled per image from the
interval ``[a, b]``.
* If a list, then a random value will be sampled from that list
per image.
* If ``imgaug.ALL`` then equivalent to tuple ``(0, 255)`.
* If a ``StochasticParameter``, a new value will be sampled from
the parameter per image.
mode : int or str or list of str or list of int or imgaug.ALL or imgaug.parameters.StochasticParameter,
optional
Method to use when filling in newly created pixels.
Same meaning as in OpenCV's border mode. Let ``abcdefgh`` be an image's
content and ``|`` be an image boundary after which new pixels are
filled in, then the valid modes and their behaviour are the following:
* ``cv2.BORDER_REPLICATE``: ``aaaaaa|abcdefgh|hhhhhhh``
* ``cv2.BORDER_REFLECT``: ``fedcba|abcdefgh|hgfedcb``
* ``cv2.BORDER_REFLECT_101``: ``gfedcb|abcdefgh|gfedcba``
* ``cv2.BORDER_WRAP``: ``cdefgh|abcdefgh|abcdefg``
* ``cv2.BORDER_CONSTANT``: ``iiiiii|abcdefgh|iiiiiii``,
where ``i`` is the defined cval.
* ``replicate``: Same as ``cv2.BORDER_REPLICATE``.
* ``reflect``: Same as ``cv2.BORDER_REFLECT``.
* ``reflect_101``: Same as ``cv2.BORDER_REFLECT_101``.
* ``wrap``: Same as ``cv2.BORDER_WRAP``.
* ``constant``: Same as ``cv2.BORDER_CONSTANT``.
The datatype of the parameter may be:
* If a single ``int``, then it must be one of the ``cv2.BORDER_*``
constants.
* If a single string, then it must be one of: ``replicate``,
``reflect``, ``reflect_101``, ``wrap``, ``constant``.
* If a list of ``int``/``str``, then per image a random mode will
be picked from that list.
* If ``imgaug.ALL``, then a random mode from all possible modes
will be picked.
* If ``StochasticParameter``, then the mode will be sampled from
that parameter per image, i.e. it must return only the above
mentioned strings.
name : None or str, optional
See :func:`imgaug.augmenters.meta.Augmenter.__init__`.
deterministic : bool, optional
See :func:`imgaug.augmenters.meta.Augmenter.__init__`.
random_state : None or int or imgaug.random.RNG or numpy.random.Generator or numpy.random.bit_generator.BitGenerator or numpy.random.SeedSequence or numpy.random.RandomState, optional
See :func:`imgaug.augmenters.meta.Augmenter.__init__`.
Examples
--------
>>> import imgaug.augmenters as iaa
>>> aug = iaa.AffineCv2(scale=2.0)
Zoom in on all images by a factor of ``2``.
>>> aug = iaa.AffineCv2(translate_px=16)
Translate all images on the x- and y-axis by 16 pixels (towards the
bottom right) and fill up any new pixels with zero (black values).
>>> aug = iaa.AffineCv2(translate_percent=0.1)
Translate all images on the x- and y-axis by ``10`` percent of their
width/height (towards the bottom right). The pixel values are computed
per axis based on that axis' size. Fill up any new pixels with zero
(black values).
>>> aug = iaa.AffineCv2(rotate=35)
Rotate all images by ``35`` *degrees*. Fill up any new pixels with zero
(black values).
>>> aug = iaa.AffineCv2(shear=15)
Shear all images by ``15`` *degrees*. Fill up any new pixels with zero
(black values).
>>> aug = iaa.AffineCv2(translate_px=(-16, 16))
Translate all images on the x- and y-axis by a random value
between ``-16`` and ``16`` pixels (to the bottom right) and fill up any new
pixels with zero (black values). The translation value is sampled once
per image and is the same for both axis.
>>> aug = iaa.AffineCv2(translate_px={"x": (-16, 16), "y": (-4, 4)})
Translate all images on the x-axis by a random value
between ``-16`` and ``16`` pixels (to the right) and on the y-axis by a
random value between ``-4`` and ``4`` pixels to the bottom. The sampling
happens independently per axis, so even if both intervals were identical,
the sampled axis-wise values would likely be different.
This also fills up any new pixels with zero (black values).
>>> aug = iaa.AffineCv2(scale=2.0, order=[0, 1])
Same as in the above `scale` example, but uses (randomly) either
nearest neighbour interpolation or linear interpolation. If `order` is
not specified, ``order=1`` would be used by default.
>>> aug = iaa.AffineCv2(translate_px=16, cval=(0, 255))
Same as in the `translate_px` example above, but newly created pixels
are now filled with a random color (sampled once per image and the
same for all newly created pixels within that image).
>>> aug = iaa.AffineCv2(translate_px=16, mode=["constant", "replicate"])
Similar to the previous example, but the newly created pixels are
filled with black pixels in half of all images (mode ``constant`` with
default `cval` being ``0``) and in the other half of all images using
``replicate`` mode, which repeats the color of the spatially closest pixel
of the corresponding image edge.
"""
def __init__(self, scale=1.0, translate_percent=None, translate_px=None,
rotate=0.0, shear=0.0, order=cv2.INTER_LINEAR, cval=0,
mode=cv2.BORDER_CONSTANT,
name=None, deterministic=False, random_state=None):
super(AffineCv2, self).__init__(
name=name, deterministic=deterministic, random_state=random_state)
available_orders = [cv2.INTER_NEAREST, cv2.INTER_LINEAR,
cv2.INTER_CUBIC, cv2.INTER_LANCZOS4]
available_orders_str = ["nearest", "linear", "cubic", "lanczos4"]
if order == ia.ALL:
self.order = iap.Choice(available_orders)
elif ia.is_single_integer(order):
assert order in available_orders, (
"Expected order's integer value to be in %s, got %d." % (
str(available_orders), order))
self.order = iap.Deterministic(order)
elif ia.is_string(order):
assert order in available_orders_str, (
"Expected order to be in %s, got %s." % (
str(available_orders_str), order))
self.order = iap.Deterministic(order)
elif isinstance(order, list):
valid_types = all(
[ia.is_single_integer(val) or ia.is_string(val)
for val in order])
assert valid_types, (
"Expected order list to only contain integers/strings, got "
"types %s." % (str([type(val) for val in order]),))
valid_orders = all(
[val in available_orders + available_orders_str
for val in order])
assert valid_orders, (
"Expected all order values to be in %s, got %s." % (
available_orders + available_orders_str, str(order),))
self.order = iap.Choice(order)
elif isinstance(order, iap.StochasticParameter):
self.order = order
else:
raise Exception(
"Expected order to be imgaug.ALL, int, string, a list of"
"int/string or StochasticParameter, got %s." % (type(order),))
if cval == ia.ALL:
self.cval = iap.DiscreteUniform(0, 255)
else:
self.cval = iap.handle_discrete_param(
cval, "cval", value_range=(0, 255), tuple_to_uniform=True,
list_to_choice=True, allow_floats=True)
available_modes = [cv2.BORDER_REPLICATE, cv2.BORDER_REFLECT,
cv2.BORDER_REFLECT_101, cv2.BORDER_WRAP,
cv2.BORDER_CONSTANT]
available_modes_str = ["replicate", "reflect", "reflect_101",
"wrap", "constant"]
if mode == ia.ALL:
self.mode = iap.Choice(available_modes)
elif ia.is_single_integer(mode):
assert mode in available_modes, (
"Expected mode to be in %s, got %d." % (
str(available_modes), mode))
self.mode = iap.Deterministic(mode)
elif ia.is_string(mode):
assert mode in available_modes_str, (
"Expected mode to be in %s, got %s." % (
str(available_modes_str), mode))
self.mode = iap.Deterministic(mode)
elif isinstance(mode, list):
all_valid_types = all([
ia.is_single_integer(val) or ia.is_string(val) for val in mode])
assert all_valid_types, (
"Expected mode list to only contain integers/strings, "
"got types %s." % (str([type(val) for val in mode]),))
all_valid_modes = all([
val in available_modes + available_modes_str for val in mode])
assert all_valid_modes, (
"Expected all mode values to be in %s, got %s." % (
str(available_modes + available_modes_str), str(mode)))
self.mode = iap.Choice(mode)
elif isinstance(mode, iap.StochasticParameter):
self.mode = mode
else:
raise Exception(
"Expected mode to be imgaug.ALL, an int, a string, a list of "
"int/strings or StochasticParameter, got %s." % (type(mode),))
# scale
if isinstance(scale, dict):
assert "x" in scale or "y" in scale, (
"Expected scale dictionary to contain at "
"least key \"x\" or key \"y\". Found neither of them.")
x = scale.get("x", 1.0)
y = scale.get("y", 1.0)
self.scale = (
iap.handle_continuous_param(
x, "scale['x']", value_range=(0+1e-4, None),
tuple_to_uniform=True, list_to_choice=True),
iap.handle_continuous_param(
y, "scale['y']", value_range=(0+1e-4, None),
tuple_to_uniform=True, list_to_choice=True)
)
else:
self.scale = iap.handle_continuous_param(
scale, "scale", value_range=(0+1e-4, None),
tuple_to_uniform=True, list_to_choice=True)
# translate
if translate_percent is None and translate_px is None:
translate_px = 0
assert translate_percent is None or translate_px is None, (
"Expected either translate_percent or translate_px to be "
"provided, but neither of them was.")
if translate_percent is not None:
# translate by percent
if isinstance(translate_percent, dict):
assert "x" in translate_percent or "y" in translate_percent, (
"Expected translate_percent dictionary to contain at "
"least key \"x\" or key \"y\". Found neither of them.")
x = translate_percent.get("x", 0)
y = translate_percent.get("y", 0)
self.translate = (
iap.handle_continuous_param(
x, "translate_percent['x']", value_range=None,
tuple_to_uniform=True, list_to_choice=True),
iap.handle_continuous_param(
y, "translate_percent['y']", value_range=None,
tuple_to_uniform=True, list_to_choice=True)
)
else:
self.translate = iap.handle_continuous_param(
translate_percent, "translate_percent", value_range=None,
tuple_to_uniform=True, list_to_choice=True)
else:
# translate by pixels
if isinstance(translate_px, dict):
assert "x" in translate_px or "y" in translate_px, (
"Expected translate_px dictionary to contain at "
"least key \"x\" or key \"y\". Found neither of them.")
x = translate_px.get("x", 0)
y = translate_px.get("y", 0)
self.translate = (
iap.handle_discrete_param(
x, "translate_px['x']", value_range=None,
tuple_to_uniform=True, list_to_choice=True,
allow_floats=False),
iap.handle_discrete_param(
y, "translate_px['y']", value_range=None,
tuple_to_uniform=True, list_to_choice=True,
allow_floats=False)
)
else:
self.translate = iap.handle_discrete_param(
translate_px, "translate_px", value_range=None,
tuple_to_uniform=True, list_to_choice=True,
allow_floats=False)
self.rotate = iap.handle_continuous_param(
rotate, "rotate", value_range=None, tuple_to_uniform=True,
list_to_choice=True)
self.shear = iap.handle_continuous_param(
shear, "shear", value_range=None, tuple_to_uniform=True,
list_to_choice=True)
def _augment_images(self, images, random_state, parents, hooks):
nb_images = len(images)
scale_samples, translate_samples, rotate_samples, shear_samples, \
cval_samples, mode_samples, order_samples = self._draw_samples(
nb_images, random_state)
result = self._augment_images_by_samples(
images, scale_samples, translate_samples, rotate_samples,
shear_samples, cval_samples, mode_samples, order_samples)
return result
@classmethod
def _augment_images_by_samples(cls, images, scale_samples,
translate_samples, rotate_samples,
shear_samples, cval_samples, mode_samples,
order_samples):
# TODO change these to class attributes
order_str_to_int = {
"nearest": cv2.INTER_NEAREST,
"linear": cv2.INTER_LINEAR,
"cubic": cv2.INTER_CUBIC,
"lanczos4": cv2.INTER_LANCZOS4
}
mode_str_to_int = {
"replicate": cv2.BORDER_REPLICATE,
"reflect": cv2.BORDER_REFLECT,
"reflect_101": cv2.BORDER_REFLECT_101,
"wrap": cv2.BORDER_WRAP,
"constant": cv2.BORDER_CONSTANT
}
nb_images = len(images)
result = images
for i in sm.xrange(nb_images):
height, width = images[i].shape[0], images[i].shape[1]
shift_x = width / 2.0 - 0.5
shift_y = height / 2.0 - 0.5
scale_x, scale_y = scale_samples[0][i], scale_samples[1][i]
translate_x = translate_samples[0][i]
translate_y = translate_samples[1][i]
if ia.is_single_float(translate_y):
translate_y_px = int(
np.round(translate_y * images[i].shape[0]))
else:
translate_y_px = translate_y
if ia.is_single_float(translate_x):
translate_x_px = int(
np.round(translate_x * images[i].shape[1]))
else:
translate_x_px = translate_x
rotate = rotate_samples[i]
shear = shear_samples[i]
cval = cval_samples[i]
mode = mode_samples[i]
order = order_samples[i]
mode = (mode
if ia.is_single_integer(mode)
else mode_str_to_int[mode])
order = (order
if ia.is_single_integer(order)
else order_str_to_int[order])
any_change = (
scale_x != 1.0 or scale_y != 1.0
or translate_x_px != 0 or translate_y_px != 0
or rotate != 0 or shear != 0
)
if any_change:
matrix_to_topleft = tf.SimilarityTransform(
translation=[-shift_x, -shift_y])
matrix_transforms = tf.AffineTransform(
scale=(scale_x, scale_y),
translation=(translate_x_px, translate_y_px),
rotation=math.radians(rotate),
shear=math.radians(shear)
)
matrix_to_center = tf.SimilarityTransform(
translation=[shift_x, shift_y])
matrix = (matrix_to_topleft
+ matrix_transforms
+ matrix_to_center)
image_warped = cv2.warpAffine(
images[i],
matrix.params[:2],
dsize=(width, height),
flags=order,
borderMode=mode,
borderValue=tuple([int(v) for v in cval])
)
# cv2 warp drops last axis if shape is (H, W, 1)
if image_warped.ndim == 2:
image_warped = image_warped[..., np.newaxis]
# warp changes uint8 to float64, making this necessary
result[i] = image_warped
else:
result[i] = images[i]
return result
def _augment_heatmaps(self, heatmaps, random_state, parents, hooks):
nb_images = len(heatmaps)
scale_samples, translate_samples, rotate_samples, shear_samples, \
cval_samples, mode_samples, order_samples = self._draw_samples(
nb_images, random_state)
cval_samples = np.zeros((cval_samples.shape[0], 1), dtype=np.float32)
mode_samples = ["constant"] * len(mode_samples)
arrs = [heatmap_i.arr_0to1 for heatmap_i in heatmaps]
arrs_aug = self._augment_images_by_samples(
arrs, scale_samples, translate_samples, rotate_samples,
shear_samples, cval_samples, mode_samples, order_samples)
for heatmap_i, arr_aug in zip(heatmaps, arrs_aug):
heatmap_i.arr_0to1 = arr_aug
return heatmaps
def _augment_segmentation_maps(self, segmaps, random_state, parents, hooks):
nb_images = len(segmaps)
scale_samples, translate_samples, rotate_samples, shear_samples, \
cval_samples, mode_samples, order_samples = self._draw_samples(
nb_images, random_state)
cval_samples = np.zeros((cval_samples.shape[0], 1), dtype=np.float32)
mode_samples = ["constant"] * len(mode_samples)
order_samples = [0] * len(order_samples)
arrs = [segmaps_i.arr for segmaps_i in segmaps]
arrs_aug = self._augment_images_by_samples(
arrs, scale_samples, translate_samples, rotate_samples,
shear_samples, cval_samples, mode_samples, order_samples)
for segmaps_i, arr_aug in zip(segmaps, arrs_aug):
segmaps_i.arr = arr_aug
return segmaps
def _augment_keypoints(self, keypoints_on_images, random_state, parents,
hooks):
result = []
nb_images = len(keypoints_on_images)
scale_samples, translate_samples, rotate_samples, shear_samples, \
_cval_samples, _mode_samples, _order_samples = self._draw_samples(
nb_images, random_state)
for i, keypoints_on_image in enumerate(keypoints_on_images):
if not keypoints_on_image.keypoints:
# AffineCv2 does not change the image shape, hence we can skip
# all steps below if there are no keypoints
result.append(keypoints_on_image)
continue
height, width = keypoints_on_image.height, keypoints_on_image.width
shift_x = width / 2.0 - 0.5
shift_y = height / 2.0 - 0.5
scale_x, scale_y = scale_samples[0][i], scale_samples[1][i]
translate_x = translate_samples[0][i]
translate_y = translate_samples[1][i]
if ia.is_single_float(translate_y):
translate_y_px = int(
np.round(translate_y * keypoints_on_image.shape[0]))
else:
translate_y_px = translate_y
if ia.is_single_float(translate_x):
translate_x_px = int(
np.round(translate_x * keypoints_on_image.shape[1]))
else:
translate_x_px = translate_x
rotate = rotate_samples[i]
shear = shear_samples[i]
any_change = (
scale_x != 1.0 or scale_y != 1.0
or translate_x_px != 0 or translate_y_px != 0
or rotate != 0 or shear != 0
)
if any_change:
matrix_to_topleft = tf.SimilarityTransform(
translation=[-shift_x, -shift_y])
matrix_transforms = tf.AffineTransform(
scale=(scale_x, scale_y),
translation=(translate_x_px, translate_y_px),
rotation=math.radians(rotate),
shear=math.radians(shear)
)
matrix_to_center = tf.SimilarityTransform(
translation=[shift_x, shift_y])
matrix = (matrix_to_topleft
+ matrix_transforms
+ matrix_to_center)
coords = keypoints_on_image.to_xy_array()
coords_aug = tf.matrix_transform(coords, matrix.params)
kps_new = [kp.deepcopy(x=coords[0], y=coords[1])
for kp, coords
in zip(keypoints_on_image.keypoints, coords_aug)]
result.append(keypoints_on_image.deepcopy(
keypoints=kps_new,
shape=keypoints_on_image.shape
))
else:
result.append(keypoints_on_image)
return result
def _augment_polygons(self, polygons_on_images, random_state, parents,
hooks):
return self._augment_polygons_as_keypoints(
polygons_on_images, random_state, parents, hooks)
[docs] def get_parameters(self):
return [self.scale, self.translate, self.rotate, self.shear,
self.order, self.cval, self.mode]
def _draw_samples(self, nb_samples, random_state):
rngs = random_state.duplicate(11)
if isinstance(self.scale, tuple):
scale_samples = (
self.scale[0].draw_samples((nb_samples,),
random_state=rngs[0]),
self.scale[1].draw_samples((nb_samples,),
random_state=rngs[1]),
)
else:
scale_samples = self.scale.draw_samples((nb_samples,),
random_state=rngs[2])
scale_samples = (scale_samples, scale_samples)
if isinstance(self.translate, tuple):
translate_samples = (
self.translate[0].draw_samples((nb_samples,),
random_state=rngs[3]),
self.translate[1].draw_samples((nb_samples,),
random_state=rngs[4]),
)
else:
translate_samples = self.translate.draw_samples(
(nb_samples,), random_state=rngs[5])
translate_samples = (translate_samples, translate_samples)
valid_dts = ["int32", "int64", "float32", "float64"]
for i in sm.xrange(2):
assert translate_samples[i].dtype.name in valid_dts, (
"Expected translate_samples to have any dtype of %s. "
"Got %s." % (str(valid_dts), translate_samples[i].dtype.name,))
rotate_samples = self.rotate.draw_samples((nb_samples,),
random_state=rngs[6])
shear_samples = self.shear.draw_samples((nb_samples,),
random_state=rngs[7])
cval_samples = self.cval.draw_samples((nb_samples, 3),
random_state=rngs[8])
mode_samples = self.mode.draw_samples((nb_samples,),
random_state=rngs[9])
order_samples = self.order.draw_samples((nb_samples,),
random_state=rngs[10])
return (
scale_samples, translate_samples, rotate_samples, shear_samples,
cval_samples, mode_samples, order_samples
)
class _PiecewiseAffineSamplingResult(object):
def __init__(self, nb_rows, nb_cols, order, cval, mode):
self.nb_rows = nb_rows
self.nb_cols = nb_cols
self.order = order
self.cval = cval
self.mode = mode
def get_clipped_cval(self, idx, dtype):
min_value, _, max_value = iadt.get_value_range_of_dtype(dtype)
cval = self.cval[idx]
cval = max(min(cval, max_value), min_value)
return cval
[docs]class PiecewiseAffine(meta.Augmenter):
"""
Apply affine transformations that differ between local neighbourhoods.
This augmenter places a regular grid of points on an image and randomly
moves the neighbourhood of these point around via affine transformations.
This leads to local distortions.
This is mostly a wrapper around scikit-image's ``PiecewiseAffine``.
See also ``Affine`` for a similar technique.
.. note::
This augmenter is very slow. See :ref:`performance`.
Try to use ``ElasticTransformation`` instead, which is at least 10x
faster.
.. note::
For coordinate-based inputs (keypoints, bounding boxes, polygons,
...), this augmenter still has to perform an image-based augmentation,
which will make it significantly slower for such inputs than other
augmenters. See :ref:`performance`.
dtype support::
* ``uint8``: yes; fully tested
* ``uint16``: yes; tested (1)
* ``uint32``: yes; tested (1) (2)
* ``uint64``: no (3)
* ``int8``: yes; tested (1)
* ``int16``: yes; tested (1)
* ``int32``: yes; tested (1) (2)
* ``int64``: no (3)
* ``float16``: yes; tested (1)
* ``float32``: yes; tested (1)
* ``float64``: yes; tested (1)
* ``float128``: no (3)
* ``bool``: yes; tested (1) (4)
- (1) Only tested with `order` set to ``0``.
- (2) scikit-image converts internally to ``float64``, which might
introduce inaccuracies. Tests showed that these inaccuracies
seemed to not be an issue.
- (3) Results too inaccurate.
- (4) Mapped internally to ``float64``.
Parameters
----------
scale : float or tuple of float or imgaug.parameters.StochasticParameter, optional
Each point on the regular grid is moved around via a normal
distribution. This scale factor is equivalent to the normal
distribution's sigma. Note that the jitter (how far each point is
moved in which direction) is multiplied by the height/width of the
image if ``absolute_scale=False`` (default), so this scale can be
the same for different sized images.
Recommended values are in the range ``0.01`` to ``0.05`` (weak to
strong augmentations).
* If a single ``float``, then that value will always be used as
the scale.
* If a tuple ``(a, b)`` of ``float`` s, then a random value will
be uniformly sampled per image from the interval ``[a, b]``.
* If a list, then a random value will be picked from that list
per image.
* If a ``StochasticParameter``, then that parameter will be
queried to draw one value per image.
nb_rows : int or tuple of int or imgaug.parameters.StochasticParameter, optional
Number of rows of points that the regular grid should have.
Must be at least ``2``. For large images, you might want to pick a
higher value than ``4``. You might have to then adjust scale to lower
values.
* If a single ``int``, then that value will always be used as the
number of rows.
* If a tuple ``(a, b)``, then a value from the discrete interval
``[a..b]`` will be uniformly sampled per image.
* If a list, then a random value will be picked from that list
per image.
* If a StochasticParameter, then that parameter will be queried to
draw one value per image.
nb_cols : int or tuple of int or imgaug.parameters.StochasticParameter, optional
Number of columns. Analogous to `nb_rows`.
order : int or list of int or imgaug.ALL or imgaug.parameters.StochasticParameter, optional
See :func:`imgaug.augmenters.geometric.Affine.__init__`.
cval : int or float or tuple of float or imgaug.ALL or imgaug.parameters.StochasticParameter, optional
See :func:`imgaug.augmenters.geometric.Affine.__init__`.
mode : str or list of str or imgaug.ALL or imgaug.parameters.StochasticParameter, optional
See :func:`imgaug.augmenters.geometric.Affine.__init__`.
absolute_scale : bool, optional
Take `scale` as an absolute value rather than a relative value.
polygon_recoverer : 'auto' or None or imgaug.augmentables.polygons._ConcavePolygonRecoverer, optional
The class to use to repair invalid polygons.
If ``"auto"``, a new instance of
:class`imgaug.augmentables.polygons._ConcavePolygonRecoverer`
will be created.
If ``None``, no polygon recoverer will be used.
If an object, then that object will be used and must provide a
``recover_from()`` method, similar to
:class:`imgaug.augmentables.polygons._ConcavePolygonRecoverer`.
name : None or str, optional
See :func:`imgaug.augmenters.meta.Augmenter.__init__`.
deterministic : bool, optional
See :func:`imgaug.augmenters.meta.Augmenter.__init__`.
random_state : None or int or imgaug.random.RNG or numpy.random.Generator or numpy.random.bit_generator.BitGenerator or numpy.random.SeedSequence or numpy.random.RandomState, optional
See :func:`imgaug.augmenters.meta.Augmenter.__init__`.
Examples
--------
>>> import imgaug.augmenters as iaa
>>> aug = iaa.PiecewiseAffine(scale=(0.01, 0.05))
Place a regular grid of points on each image and then randomly move each
point around by ``1`` to ``5`` percent (with respect to the image
height/width). Pixels between these points will be moved accordingly.
>>> aug = iaa.PiecewiseAffine(scale=(0.01, 0.05), nb_rows=8, nb_cols=8)
Same as the previous example, but uses a denser grid of ``8x8`` points
(default is ``4x4``). This can be useful for large images.
"""
def __init__(self, scale=0, nb_rows=4, nb_cols=4, order=1, cval=0,
mode="constant", absolute_scale=False, polygon_recoverer=None,
name=None, deterministic=False, random_state=None):
super(PiecewiseAffine, self).__init__(
name=name, deterministic=deterministic, random_state=random_state)
self.scale = iap.handle_continuous_param(
scale, "scale", value_range=(0, None), tuple_to_uniform=True,
list_to_choice=True)
self.jitter = iap.Normal(loc=0, scale=self.scale)
self.nb_rows = iap.handle_discrete_param(
nb_rows, "nb_rows", value_range=(2, None), tuple_to_uniform=True,
list_to_choice=True, allow_floats=False)
self.nb_cols = iap.handle_discrete_param(
nb_cols, "nb_cols", value_range=(2, None), tuple_to_uniform=True,
list_to_choice=True, allow_floats=False)
self.order = _handle_order_arg(order, backend="skimage")
self.cval = _handle_cval_arg(cval)
self.mode = _handle_mode_arg(mode)
self.absolute_scale = absolute_scale
self.polygon_recoverer = polygon_recoverer
if polygon_recoverer == "auto":
self.polygon_recoverer = _ConcavePolygonRecoverer()
# Special order, mode and cval parameters for heatmaps and
# segmentation maps. These may either be None or a fixed value.
# Stochastic parameters are currently *not* supported.
# If set to None, the same values as for images will be used.
# That is really not recommended for the cval parameter.
self._order_heatmaps = 3
self._order_segmentation_maps = 0
self._mode_heatmaps = "constant"
self._mode_segmentation_maps = "constant"
self._cval_heatmaps = 0
self._cval_segmentation_maps = 0
def _augment_images(self, images, random_state, parents, hooks):
iadt.gate_dtypes(
images,
allowed=["bool",
"uint8", "uint16", "uint32",
"int8", "int16", "int32",
"float16", "float32", "float64"],
disallowed=["uint64", "uint128", "uint256",
"int64", "int128", "int256",
"float96", "float128", "float256"],
augmenter=self)
result = images
nb_images = len(images)
samples = self._draw_samples(nb_images, random_state)
rss = random_state.duplicate(nb_images)
for i, image in enumerate(images):
rs_image = rss[i]
transformer = self._get_transformer(
image.shape, image.shape, samples.nb_rows[i],
samples.nb_cols[i], rs_image)
if transformer is not None:
input_dtype = image.dtype
if image.dtype.kind == "b":
image = image.astype(np.float64)
image_warped = tf.warp(
image,
transformer,
order=samples.order[i],
mode=samples.mode[i],
cval=samples.get_clipped_cval(i, image.dtype),
preserve_range=True,
output_shape=images[i].shape
)
if input_dtype.kind == "b":
image_warped = image_warped > 0.5
else:
# warp seems to change everything to float64, including
# uint8, making this necessary
image_warped = iadt.restore_dtypes_(
image_warped, input_dtype)
result[i] = image_warped
return result
def _augment_heatmaps(self, heatmaps, random_state, parents, hooks):
return self._augment_hms_and_segmaps(
heatmaps, random_state, "arr_0to1", self._cval_heatmaps,
self._mode_heatmaps, self._order_heatmaps)
def _augment_segmentation_maps(self, segmaps, random_state, parents, hooks):
return self._augment_hms_and_segmaps(
segmaps, random_state, "arr", self._cval_segmentation_maps,
self._mode_segmentation_maps, self._order_segmentation_maps)
def _augment_hms_and_segmaps(self, augmentables, random_state,
arr_attr_name, cval, mode, order):
result = augmentables
nb_images = len(augmentables)
samples = self._draw_samples(nb_images, random_state)
rss = random_state.duplicate(nb_images)
for i, augmentable in enumerate(augmentables):
arr = getattr(augmentable, arr_attr_name)
rs_image = rss[i]
transformer = self._get_transformer(
arr.shape, augmentable.shape, samples.nb_rows[i],
samples.nb_cols[i], rs_image)
if transformer is not None:
arr_warped = tf.warp(
arr,
transformer,
order=order if order is not None else samples.order[i],
mode=mode if mode is not None else samples.mode[i],
cval=cval if cval is not None else samples.cval[i],
preserve_range=True,
output_shape=arr.shape
)
# skimage converts to float64
arr_warped = arr_warped.astype(arr.dtype)
# TODO not entirely clear whether this breaks the value
# range -- Affine does
# TODO add test for this
# order=3 matches cubic interpolation and can cause values
# to go outside of the range [0.0, 1.0] not clear whether
# 4+ also do that
# We don't modify segmaps here, because they don't have a
# clear value range of [0, 1]
if order >= 3 and isinstance(augmentable, ia.HeatmapsOnImage):
arr_warped = np.clip(arr_warped, 0.0, 1.0, out=arr_warped)
setattr(augmentable, arr_attr_name, arr_warped)
return result
def _augment_keypoints(self, keypoints_on_images, random_state, parents,
hooks):
result = []
nb_images = len(keypoints_on_images)
samples = self._draw_samples(nb_images, random_state)
rss = random_state.duplicate(nb_images)
for i in sm.xrange(nb_images):
if not keypoints_on_images[i].keypoints:
# PiecewiseAffine does not change the image shape, so we can
# just reuse the old keypoints
result.append(keypoints_on_images[i])
continue
rs_image = rss[i]
kpsoi = keypoints_on_images[i]
h, w = kpsoi.shape[0:2]
transformer = self._get_transformer(
kpsoi.shape, kpsoi.shape, samples.nb_rows[i],
samples.nb_cols[i], rs_image)
if transformer is None or len(kpsoi.keypoints) == 0:
result.append(kpsoi)
else:
# Augmentation routine that only modifies keypoint coordinates
# This is efficient (coordinates of all other locations in the
# image are ignored). The code below should usually work, but
# for some reason augmented coordinates are often wildly off
# for large scale parameters (lots of jitter/distortion).
# The reason for that is unknown.
"""
coords = keypoints_on_images[i].get_coords_array()
coords_aug = transformer.inverse(coords)
result.append(
ia.KeypointsOnImage.from_coords_array(
coords_aug,
shape=keypoints_on_images[i].shape
)
)
"""
# Image based augmentation routine. Draws the keypoints on
# the image plane using distance maps (more accurate than
# just marking the points), then augments these images, then
# searches for the new (visual) location of the keypoints.
# Much slower than directly augmenting the coordinates, but
# here the only method that reliably works.
dist_maps = kpsoi.to_distance_maps(inverted=True)
dist_maps_warped = tf.warp(
dist_maps,
transformer,
order=1,
preserve_range=True,
output_shape=(kpsoi.shape[0], kpsoi.shape[1],
len(kpsoi.keypoints))
)
kps_aug = ia.KeypointsOnImage.from_distance_maps(
dist_maps_warped,
inverted=True,
threshold=0.01,
if_not_found_coords={"x": -1, "y": -1},
nb_channels=(
None if len(kpsoi.shape) < 3 else kpsoi.shape[2])
)
# use deepcopy() to copy old instance states as much as
# possible
kps_aug_post = kpsoi.deepcopy(
keypoints=[kp.deepcopy(x=kp_aug.x, y=kp_aug.y)
for kp, kp_aug
in zip(kpsoi.keypoints, kps_aug.keypoints)]
)
# Keypoints that were outside of the image plane before the
# augmentation will be replaced with (-1, -1) by default (as
# they can't be drawn on the keypoint images). They are now
# replaced by their old coordinates values.
ooi = [not 0 <= kp.x < w or not 0 <= kp.y < h
for kp in kpsoi.keypoints]
for kp_idx in sm.xrange(len(kps_aug_post.keypoints)):
if ooi[kp_idx]:
kp_unaug = kpsoi.keypoints[kp_idx]
kps_aug_post.keypoints[kp_idx] = kp_unaug
result.append(kps_aug_post)
return result
def _augment_polygons(self, polygons_on_images, random_state, parents,
hooks):
return self._augment_polygons_as_keypoints(
polygons_on_images, random_state, parents, hooks,
recoverer=self.polygon_recoverer)
def _draw_samples(self, nb_images, random_state):
rss = random_state.duplicate(5)
nb_rows_samples = self.nb_rows.draw_samples((nb_images,),
random_state=rss[-5])
nb_cols_samples = self.nb_cols.draw_samples((nb_images,),
random_state=rss[-4])
order_samples = self.order.draw_samples((nb_images,),
random_state=rss[-3])
cval_samples = self.cval.draw_samples((nb_images,),
random_state=rss[-2])
mode_samples = self.mode.draw_samples((nb_images,),
random_state=rss[-1])
return _PiecewiseAffineSamplingResult(
nb_rows=nb_rows_samples, nb_cols=nb_cols_samples,
order=order_samples, cval=cval_samples, mode=mode_samples)
def _get_transformer(self, augmentable_shape, image_shape, nb_rows,
nb_cols, random_state):
# get coords on y and x axis of points to move around
# these coordinates are supposed to be at the centers of each cell
# (otherwise the first coordinate would be at (0, 0) and could hardly
# be moved around before leaving the image),
# so we use here (half cell height/width to H/W minus half
# height/width) instead of (0, H/W)
nb_rows = max(nb_rows, 2)
nb_cols = max(nb_cols, 2)
y = np.linspace(0, augmentable_shape[0], nb_rows)
x = np.linspace(0, augmentable_shape[1], nb_cols)
# (H, W) and (H, W) for H=rows, W=cols
xx_src, yy_src = np.meshgrid(x, y)
# (1, HW, 2) => (HW, 2) for H=rows, W=cols
points_src = np.dstack([yy_src.flat, xx_src.flat])[0]
jitter_img = self.jitter.draw_samples(points_src.shape,
random_state=random_state)
nb_nonzero = len(jitter_img.flatten().nonzero()[0])
if nb_nonzero == 0:
return None
else:
if self.absolute_scale:
if image_shape[0] > 0:
jitter_img[:, 0] = jitter_img[:, 0] / image_shape[0]
else:
jitter_img[:, 0] = 0.0
if image_shape[1] > 0:
jitter_img[:, 1] = jitter_img[:, 1] / image_shape[1]
else:
jitter_img[:, 1] = 0.0
jitter_img[:, 0] = jitter_img[:, 0] * augmentable_shape[0]
jitter_img[:, 1] = jitter_img[:, 1] * augmentable_shape[1]
points_dest = np.copy(points_src)
points_dest[:, 0] = points_dest[:, 0] + jitter_img[:, 0]
points_dest[:, 1] = points_dest[:, 1] + jitter_img[:, 1]
# Restrict all destination points to be inside the image plane.
# This is necessary, as otherwise keypoints could be augmented
# outside of the image plane and these would be replaced by
# (-1, -1), which would not conform with the behaviour of the
# other augmenters.
points_dest[:, 0] = np.clip(points_dest[:, 0],
0, augmentable_shape[0]-1)
points_dest[:, 1] = np.clip(points_dest[:, 1],
0, augmentable_shape[1]-1)
# tf.warp() results in qhull error if the points are identical,
# which is mainly the case if any axis is 0
has_low_axis = any([axis <= 1 for axis in augmentable_shape[0:2]])
has_zero_channels = (
(
augmentable_shape is not None
and len(augmentable_shape) == 3
and augmentable_shape[-1] == 0
)
or
(
image_shape is not None
and len(image_shape) == 3
and image_shape[-1] == 0
)
)
if has_low_axis or has_zero_channels:
return None
else:
matrix = tf.PiecewiseAffineTransform()
matrix.estimate(points_src[:, ::-1], points_dest[:, ::-1])
return matrix
[docs] def get_parameters(self):
return [
self.scale, self.nb_rows, self.nb_cols, self.order, self.cval,
self.mode, self.absolute_scale]
class _PerspectiveTransformSamplingResult(object):
def __init__(self, matrices, max_heights, max_widths, cvals, modes):
self.matrices = matrices
self.max_heights = max_heights
self.max_widths = max_widths
self.cvals = cvals
self.modes = modes
# TODO add arg for image interpolation
class _ElasticTransformationSamplingResult(object):
def __init__(self, random_states, alphas, sigmas, orders, cvals, modes):
self.random_states = random_states
self.alphas = alphas
self.sigmas = sigmas
self.orders = orders
self.cvals = cvals
self.modes = modes
# TODO add independent sigmas for x/y
# TODO add independent alphas for x/y
# TODO add backend arg
[docs]class Rot90(meta.Augmenter):
"""
Rotate images clockwise by multiples of 90 degrees.
This could also be achieved using ``Affine``, but ``Rot90`` is
significantly more efficient.
dtype support::
if (keep_size=False)::
* ``uint8``: yes; fully tested
* ``uint16``: yes; tested
* ``uint32``: yes; tested
* ``uint64``: yes; tested
* ``int8``: yes; tested
* ``int16``: yes; tested
* ``int32``: yes; tested
* ``int64``: yes; tested
* ``float16``: yes; tested
* ``float32``: yes; tested
* ``float64``: yes; tested
* ``float128``: yes; tested
* ``bool``: yes; tested
if (keep_size=True)::
minimum of (
``imgaug.augmenters.geometric.Rot90(keep_size=False)``,
:func:`imgaug.imgaug.imresize_many_images`
)
Parameters
----------
k : int or list of int or tuple of int or imaug.ALL or imgaug.parameters.StochasticParameter, optional
How often to rotate clockwise by 90 degrees.
* If a single ``int``, then that value will be used for all images.
* If a tuple ``(a, b)``, then a random value will be uniformly
sampled per image from the discrete interval ``[a..b]``.
* If a list, then for each image a random value will be sampled
from that list.
* If ``imgaug.ALL``, then equivalant to list ``[0, 1, 2, 3]``.
* If ``StochasticParameter``, then that parameter is queried per
image to sample the value to use.
keep_size : bool, optional
After rotation by an odd-valued `k` (e.g. 1 or 3), the resulting image
may have a different height/width than the original image.
If this parameter is set to ``True``, then the rotated
image will be resized to the input image's size. Note that this might
also cause the augmented image to look distorted.
name : None or str, optional
See :func:`imgaug.augmenters.meta.Augmenter.__init__`.
deterministic : bool, optional
See :func:`imgaug.augmenters.meta.Augmenter.__init__`.
random_state : None or int or imgaug.random.RNG or numpy.random.Generator or numpy.random.bit_generator.BitGenerator or numpy.random.SeedSequence or numpy.random.RandomState, optional
See :func:`imgaug.augmenters.meta.Augmenter.__init__`.
Examples
--------
>>> import imgaug.augmenters as iaa
>>> aug = iaa.Rot90(1)
Rotate all images by 90 degrees.
Resize these images afterwards to keep the size that they had before
augmentation.
This may cause the images to look distorted.
>>> aug = iaa.Rot90([1, 3])
Rotate all images by 90 or 270 degrees.
Resize these images afterwards to keep the size that they had before
augmentation.
This may cause the images to look distorted.
>>> aug = iaa.Rot90((1, 3))
Rotate all images by 90, 180 or 270 degrees.
Resize these images afterwards to keep the size that they had before
augmentation.
This may cause the images to look distorted.
>>> aug = iaa.Rot90((1, 3), keep_size=False)
Rotate all images by 90, 180 or 270 degrees.
Does not resize to the original image size afterwards, i.e. each image's
size may change.
"""
def __init__(self, k, keep_size=True, name=None, deterministic=False,
random_state=None):
super(Rot90, self).__init__(
name=name, deterministic=deterministic, random_state=random_state)
if k == ia.ALL:
k = [0, 1, 2, 3]
self.k = iap.handle_discrete_param(
k, "k", value_range=None, tuple_to_uniform=True,
list_to_choice=True, allow_floats=False)
self.keep_size = keep_size
def _draw_samples(self, nb_images, random_state):
return self.k.draw_samples((nb_images,), random_state=random_state)
def _augment_arrays(self, arrs, random_state, resize_func):
ks = self._draw_samples(len(arrs), random_state)
return self._augment_arrays_by_samples(
arrs, ks, self.keep_size, resize_func), ks
@classmethod
def _augment_arrays_by_samples(cls, arrs, ks, keep_size, resize_func):
input_was_array = ia.is_np_array(arrs)
input_dtype = arrs.dtype if input_was_array else None
arrs_aug = []
for arr, k_i in zip(arrs, ks):
# adding axes here rotates clock-wise instead of ccw
arr_aug = np.rot90(arr, k_i, axes=(1, 0))
do_resize = (
keep_size
and arr.shape != arr_aug.shape
and resize_func is not None)
if do_resize:
arr_aug = resize_func(arr_aug, arr.shape[0:2])
arrs_aug.append(arr_aug)
if keep_size and input_was_array:
n_shapes = len(set([arr.shape for arr in arrs_aug]))
if n_shapes == 1:
arrs_aug = np.array(arrs_aug, dtype=input_dtype)
return arrs_aug
def _augment_images(self, images, random_state, parents, hooks):
resize_func = ia.imresize_single_image
images_aug, _ = self._augment_arrays(images, random_state, resize_func)
return images_aug
def _augment_heatmaps(self, heatmaps, random_state, parents, hooks):
return self._augment_hms_and_segmaps(heatmaps, "arr_0to1",
random_state)
def _augment_segmentation_maps(self, segmaps, random_state, parents, hooks):
return self._augment_hms_and_segmaps(segmaps, "arr", random_state)
def _augment_hms_and_segmaps(self, augmentables, arr_attr_name,
random_state):
arrs = [getattr(segmaps_i, arr_attr_name)
for segmaps_i in augmentables]
arrs_aug, ks = self._augment_arrays(arrs, random_state, None)
segmaps_aug = []
gen = zip(augmentables, arrs, arrs_aug, ks)
for augmentable_i, arr, arr_aug, k_i in gen:
shape_orig = arr.shape
setattr(augmentable_i, arr_attr_name, arr_aug)
if self.keep_size:
augmentable_i = augmentable_i.resize(shape_orig[0:2])
elif k_i % 2 == 1:
h, w = augmentable_i.shape[0:2]
augmentable_i.shape = tuple(
[w, h] + list(augmentable_i.shape[2:]))
else:
# keep_size was False, but rotated by a multiple of 2,
# hence height and width do not change
pass
segmaps_aug.append(augmentable_i)
return segmaps_aug
def _augment_keypoints(self, keypoints_on_images, random_state, parents,
hooks):
nb_images = len(keypoints_on_images)
ks = self._draw_samples(nb_images, random_state)
result = []
for kpsoi_i, k_i in zip(keypoints_on_images, ks):
if (k_i % 4) == 0:
result.append(kpsoi_i)
else:
k_i = int(k_i) % 4 # this is also correct when k_i is negative
kps_aug = []
h, w = kpsoi_i.shape[0:2]
h_aug, w_aug = (h, w) if (k_i % 2) == 0 else (w, h)
for kp in kpsoi_i.keypoints:
y, x = kp.y, kp.x
yr, xr = y, x
wr, hr = w, h
for _ in sm.xrange(k_i):
# for int coordinates this would instead be
# xr, yr = (hr - 1) - yr, xr
# here we assume that coordinates are always
# subpixel-accurate
xr, yr = hr - yr, xr
wr, hr = hr, wr
kps_aug.append(kp.deepcopy(x=xr, y=yr))
shape_aug = tuple([h_aug, w_aug] + list(kpsoi_i.shape[2:]))
kpsoi_i_aug = kpsoi_i.deepcopy(keypoints=kps_aug,
shape=shape_aug)
if self.keep_size and (h, w) != (h_aug, w_aug):
kpsoi_i_aug = kpsoi_i_aug.on(kpsoi_i.shape)
kpsoi_i_aug.shape = kpsoi_i.shape
result.append(kpsoi_i_aug)
return result
def _augment_polygons(self, polygons_on_images, random_state, parents,
hooks):
return self._augment_polygons_as_keypoints(
polygons_on_images, random_state, parents, hooks)
[docs] def get_parameters(self):
return [self.k, self.keep_size]