Source code for imgaug.augmenters.blend

"""
Augmenters that blend two images with each other.

Do not import directly from this file, as the categorization is not final.
Use instead ::

    from imgaug import augmenters as iaa

and then e.g. ::

    seq = iaa.Sequential([
        iaa.Alpha(0.5, iaa.Add((-5, 5)))
    ])

List of augmenters:

    * Alpha
    * AlphaElementwise
    * SimplexNoiseAlpha
    * FrequencyNoiseAlpha

"""
from __future__ import print_function, division, absolute_import

import numpy as np
import six.moves as sm

from . import meta
import imgaug as ia
from .. import parameters as iap
from .. import dtypes as iadt
from ..augmentables import utils as augm_utils


[docs]def blend_alpha(image_fg, image_bg, alpha, eps=1e-2): """ Blend two images using an alpha blending. In alpha blending, the two images are naively mixed using a multiplier. Let ``A`` be the foreground image and ``B`` the background image and ``a`` is the alpha value. Each pixel intensity is then computed as ``a * A_ij + (1-a) * B_ij``. dtype support:: * ``uint8``: yes; fully tested * ``uint16``: yes; fully tested * ``uint32``: yes; fully tested * ``uint64``: yes; fully tested (1) * ``int8``: yes; fully tested * ``int16``: yes; fully tested * ``int32``: yes; fully tested * ``int64``: yes; fully tested (1) * ``float16``: yes; fully tested * ``float32``: yes; fully tested * ``float64``: yes; fully tested (1) * ``float128``: no (2) * ``bool``: yes; fully tested (2) - (1) Tests show that these dtypes work, but a conversion to ``float128`` happens, which only has 96 bits of size instead of true 128 bits and hence not twice as much resolution. It is possible that these dtypes result in inaccuracies, though the tests did not indicate that. - (2) Not available due to the input dtype having to be increased to an equivalent float dtype with two times the input resolution. - (3) Mapped internally to ``float16``. Parameters ---------- image_fg : (H,W,[C]) ndarray Foreground image. Shape and dtype kind must match the one of the background image. image_bg : (H,W,[C]) ndarray Background image. Shape and dtype kind must match the one of the foreground image. alpha : number or iterable of number or ndarray The blending factor, between ``0.0`` and ``1.0``. Can be interpreted as the opacity of the foreground image. Values around ``1.0`` result in only the foreground image being visible. Values around ``0.0`` result in only the background image being visible. Multiple alphas may be provided. In these cases, there must be exactly one alpha per channel in the foreground/background image. Alternatively, for ``(H,W,C)`` images, either one ``(H,W)`` array or an ``(H,W,C)`` array of alphas may be provided, denoting the elementwise alpha value. eps : number, optional Controls when an alpha is to be interpreted as exactly ``1.0`` or exactly ``0.0``, resulting in only the foreground/background being visible and skipping the actual computation. Returns ------- image_blend : (H,W,C) ndarray Blend of foreground and background image. """ assert image_fg.shape == image_bg.shape, ( "Expected foreground and background images to have the same shape. " "Got %s and %s." % (image_fg.shape, image_bg.shape)) assert image_fg.dtype.kind == image_bg.dtype.kind, ( "Expected foreground and background images to have the same dtype " "kind. Got %s and %s." % (image_fg.dtype.kind, image_bg.dtype.kind)) # TODO switch to gate_dtypes() assert image_fg.dtype.name not in ["float128"], ( "Foreground image was float128, but blend_alpha() cannot handle that " "dtype.") assert image_bg.dtype.name not in ["float128"], ( "Background image was float128, but blend_alpha() cannot handle that " "dtype.") input_was_2d = (image_fg.ndim == 2) if input_was_2d: image_fg = image_fg[..., np.newaxis] image_bg = image_bg[..., np.newaxis] input_was_bool = False if image_fg.dtype.kind == "b": input_was_bool = True # use float32 instead of float16 here because it seems to be faster image_fg = image_fg.astype(np.float32) image_bg = image_bg.astype(np.float32) alpha = np.array(alpha, dtype=np.float64) if alpha.size == 1: pass else: if alpha.ndim == 2: assert alpha.shape == image_fg.shape[0:2], ( "'alpha' given as an array must match the height and width " "of the foreground and background image. Got shape %s vs " "foreground/background shape %s." % ( alpha.shape, image_fg.shape)) alpha = alpha.reshape((alpha.shape[0], alpha.shape[1], 1)) elif alpha.ndim == 3: assert ( alpha.shape == image_fg.shape or alpha.shape == image_fg.shape[0:2] + (1,)), ( "'alpha' given as an array must match the height and width " "of the foreground and background image. Got shape %s vs " "foreground/background shape %s." % ( alpha.shape, image_fg.shape)) else: alpha = alpha.reshape((1, 1, -1)) if alpha.shape[2] != image_fg.shape[2]: alpha = np.tile(alpha, (1, 1, image_fg.shape[2])) if not input_was_bool: if np.all(alpha >= 1.0 - eps): if input_was_2d: image_fg = image_fg[..., 0] return np.copy(image_fg) elif np.all(alpha <= eps): if input_was_2d: image_bg = image_bg[..., 0] return np.copy(image_bg) # for efficiency reaons, only test one value of alpha here, even if alpha # is much larger if alpha.size > 0: assert 0 <= alpha.item(0) <= 1.0, ( "Expected 'alpha' value(s) to be in the interval [0.0, 1.0]. " "Got min %.4f and max %.4f." % (np.min(alpha), np.max(alpha))) dt_images = iadt.get_minimal_dtype([image_fg, image_bg]) # doing the below itemsize increase only for non-float images led to # inaccuracies for large float values # we also use a minimum of 4 bytes (=float32), as float32 tends to be # faster than float16 isize = dt_images.itemsize * 2 isize = max(isize, 4) dt_blend = np.dtype("f%d" % (isize,)) if alpha.dtype.name != dt_blend.name: alpha = alpha.astype(dt_blend) if image_fg.dtype.name != dt_blend.name: image_fg = image_fg.astype(dt_blend) if image_bg.dtype.name != dt_blend.name: image_bg = image_bg.astype(dt_blend) # the following is equivalent to # image_blend = alpha * image_fg + (1 - alpha) * image_bg # but supposedly faster image_blend = image_bg + alpha * (image_fg - image_bg) if input_was_bool: image_blend = image_blend > 0.5 else: # skip clip, because alpha is expected to be in range [0.0, 1.0] and # both images must have same dtype dont skip round, because otherwise # it is very unlikely to hit the image's max possible value image_blend = iadt.restore_dtypes_( image_blend, dt_images, clip=False, round=True) if input_was_2d: return image_blend[:, :, 0] return image_blend
[docs]class Alpha(meta.Augmenter): """ Alpha-blend two image sources using an alpha/opacity value. The two image sources can be imagined as branches. If a source is not given, it is automatically the same as the input. Let A be the first branch and B be the second branch. Then the result images are defined as ``factor * A + (1-factor) * B``, where ``factor`` is an overlay factor. .. note:: It is not recommended to use ``Alpha`` with augmenters that change the geometry of images (e.g. horizontal flips, affine transformations) if you *also* want to augment coordinates (e.g. keypoints, polygons, ...), as it is unclear which of the two coordinate results (first or second branch) should be used as the coordinates after augmentation. Currently, if ``factor >= 0.5`` (per image), the results of the first branch are used as the new coordinates, otherwise the results of the second branch. dtype support:: See :func:`imgaug.augmenters.blend.blend_alpha`. Parameters ---------- factor : number or tuple of number or list of number or imgaug.parameters.StochasticParameter, optional Weighting of the results of the first branch. Values close to ``0`` mean that the results from the second branch (see parameter `second`) make up most of the final image. * If float, then that value will be used for all images. * If tuple ``(a, b)``, then a random value from the interval ``[a, b]`` will be sampled per image. * If a list, then a random value will be picked from that list per image. * If ``StochasticParameter``, then that parameter will be used to sample a value per image. first : None or imgaug.augmenters.meta.Augmenter or iterable of imgaug.augmenters.meta.Augmenter, optional Augmenter(s) that make up the first of the two branches. * If ``None``, then the input images will be reused as the output of the first branch. * If ``Augmenter``, then that augmenter will be used as the branch. * If iterable of ``Augmenter``, then that iterable will be converted into a ``Sequential`` and used as the augmenter. second : None or imgaug.augmenters.meta.Augmenter or iterable of imgaug.augmenters.meta.Augmenter, optional Augmenter(s) that make up the second of the two branches. * If ``None``, then the input images will be reused as the output of the second branch. * If ``Augmenter``, then that augmenter will be used as the branch. * If iterable of ``Augmenter``, then that iterable will be converted into a ``Sequential`` and used as the augmenter. per_channel : bool or float, optional Whether to use the same factor for all channels (``False``) or to sample a new value for each channel (``True``). If this value is a float ``p``, then for ``p`` percent of all images `per_channel` will be treated as True, otherwise as False. name : None or str, optional See :func:`imgaug.augmenters.meta.Augmenter.__init__`. deterministic : bool, optional See :func:`imgaug.augmenters.meta.Augmenter.__init__`. random_state : None or int or imgaug.random.RNG or numpy.random.Generator or numpy.random.bit_generator.BitGenerator or numpy.random.SeedSequence or numpy.random.RandomState, optional See :func:`imgaug.augmenters.meta.Augmenter.__init__`. Examples -------- >>> import imgaug.augmenters as iaa >>> aug = iaa.Alpha(0.5, iaa.Grayscale(1.0)) Convert each image to pure grayscale and alpha-blend the result with the original image using an alpha of ``50%``, thereby removing about ``50%`` of all color. This is equivalent to ``iaa.Grayscale(0.5)``. >>> aug = iaa.Alpha((0.0, 1.0), iaa.Grayscale(1.0)) Same as in the previous example, but the alpha factor is sampled uniformly from the interval ``[0.0, 1.0]`` once per image, thereby removing a random fraction of all colors. This is equivalent to ``iaa.Grayscale((0.0, 1.0))``. >>> aug = iaa.Alpha( >>> (0.0, 1.0), >>> iaa.Affine(rotate=(-20, 20)), >>> per_channel=0.5) First, rotate each image by a random degree sampled uniformly from the interval ``[-20, 20]``. Then, alpha-blend that new image with the original one using a random factor sampled uniformly from the interval ``[0.0, 1.0]``. For ``50%`` of all images, the blending happens channel-wise and the factor is sampled independently per channel (``per_channel=0.5``). As a result, e.g. the red channel may look visibly rotated (factor near ``1.0``), while the green and blue channels may not look rotated (factors near ``0.0``). >>> aug = iaa.Alpha( >>> (0.0, 1.0), >>> first=iaa.Add(100), >>> second=iaa.Multiply(0.2)) Apply two branches of augmenters -- ``A`` and ``B`` -- *independently* to input images and alpha-blend the results of these branches using a factor ``f``. Branch ``A`` increases image pixel intensities by ``100`` and ``B`` multiplies the pixel intensities by ``0.2``. ``f`` is sampled uniformly from the interval ``[0.0, 1.0]`` per image. The resulting images contain a bit of ``A`` and a bit of ``B``. >>> aug = iaa.Alpha([0.25, 0.75], iaa.MedianBlur(13)) Apply median blur to each image and alpha-blend the result with the original image using an alpha factor of either exactly ``0.25`` or exactly ``0.75`` (sampled once per image). """ # TODO rename first/second to foreground/background? def __init__(self, factor=0, first=None, second=None, per_channel=False, name=None, deterministic=False, random_state=None): super(Alpha, self).__init__(name=name, deterministic=deterministic, random_state=random_state) self.factor = iap.handle_continuous_param( factor, "factor", value_range=(0, 1.0), tuple_to_uniform=True, list_to_choice=True) assert first is not None or second is not None, ( "Expected 'first' and/or 'second' to not be None (i.e. at least " "one Augmenter), but got two None values.") self.first = meta.handle_children_list(first, self.name, "first", default=None) self.second = meta.handle_children_list(second, self.name, "second", default=None) self.per_channel = iap.handle_probability_param(per_channel, "per_channel") self.epsilon = 1e-2 def _augment_images(self, images, random_state, parents, hooks): outputs_first, outputs_second = self._generate_branch_outputs( images, "augment_images", hooks, parents) nb_images = len(images) nb_channels = meta.estimate_max_number_of_channels(images) rngs = random_state.duplicate(2) per_channel = self.per_channel.draw_samples(nb_images, random_state=rngs[0]) alphas = self.factor.draw_samples((nb_images, nb_channels), random_state=rngs[1]) result = images gen = enumerate(zip(outputs_first, outputs_second)) for i, (image_first, image_second) in gen: if per_channel[i] > 0.5: nb_channels_i = image_first.shape[2] alphas_i = alphas[i, 0:nb_channels_i] else: # We catch here the case of alphas[i] being empty, which can # happen if all images have 0 channels. # In that case the alpha value doesn't matter as the image # contains zero values anyways. alphas_i = alphas[i, 0] if alphas[i].size > 0 else 0 result[i] = blend_alpha(image_first, image_second, alphas_i, eps=self.epsilon) return result def _augment_heatmaps(self, heatmaps, random_state, parents, hooks): # This currently uses either branch A's results or branch B's results. # TODO Make this flexible to allow gradual blending of heatmaps here # as used for images. Heatmaps are probably the only augmentable # where this makes sense. return self._augment_nonimages( heatmaps, random_state, parents, hooks, "augment_heatmaps") def _augment_segmentation_maps(self, segmaps, random_state, parents, hooks): return self._augment_nonimages( segmaps, random_state, parents, hooks, "augment_segmentation_maps") def _augment_keypoints(self, keypoints_on_images, random_state, parents, hooks): return self._augment_nonimages( keypoints_on_images, random_state, parents, hooks, "augment_keypoints") def _augment_polygons(self, polygons_on_images, random_state, parents, hooks): return self._augment_nonimages( polygons_on_images, random_state, parents, hooks, "augment_polygons") def _augment_nonimages(self, augmentables, random_state, parents, hooks, augfunc_name): outputs_first, outputs_second = self._generate_branch_outputs( augmentables, augfunc_name, hooks, parents) nb_images = len(augmentables) if nb_images == 0: return augmentables nb_channels = meta.estimate_max_number_of_channels(augmentables) rngs = random_state.duplicate(2) per_channel = self.per_channel.draw_samples(nb_images, random_state=rngs[0]) alphas = self.factor.draw_samples((nb_images, nb_channels), random_state=rngs[1]) result = augmentables gen = enumerate(zip(outputs_first, outputs_second)) for i, (outputs_first_i, outputs_second_i) in gen: # nonimage augmentation also works channel-wise -- even though # e.g. keypoints do not have channels -- in order to keep the # random samples properly synchronized with the image augmentation if per_channel[i] > 0.5: nb_channels_i = ( augmentables[i].shape[2] if len(augmentables[i].shape) >= 3 else 1) alphas_i = alphas[i, 0:nb_channels_i] # the condition is required here if all images have a channel # axis of size 0 alpha = np.average(alphas_i) if alphas_i.size > 0 else 1.0 else: # the condition is required here if all images have a channel # axis of size 0 alpha = alphas[i, 0] if alphas.size > 0 else 1.0 assert 0 <= alpha <= 1.0, ( "Expected 'alpha' to be in the interval [0.0, 1.0]. " "Got %.4f." % (alpha,)) # We decide here for one branch output or another. # For coordinate-based augmentables there is really no good other # way here as you can't just choose "a bit" of one coordinate # (averaging almost never makes sense). For heatmaps we could # do some blending, but choose here to not do that to better # align with segmentation maps. # So if the alpha is >= 0.5 (branch A is more visible than # branch B), we pick the results of branch A and only A, # otherwise the result of branch B. if alpha >= 0.5: result[i] = outputs_first_i else: result[i] = outputs_second_i return result def _generate_branch_outputs(self, augmentables, augfunc_name, hooks, parents): outputs_first = augmentables outputs_second = augmentables if self._is_propagating(augmentables, hooks, parents): if self.first is not None: outputs_first = getattr(self.first, augfunc_name)( augm_utils.copy_augmentables(augmentables), parents=parents + [self], hooks=hooks ) if self.second is not None: outputs_second = getattr(self.second, augfunc_name)( augm_utils.copy_augmentables(augmentables), parents=parents + [self], hooks=hooks ) return outputs_first, outputs_second def _is_propagating(self, augmentables, hooks, parents): return (hooks is None or hooks.is_propagating(augmentables, augmenter=self, parents=parents, default=True)) def _to_deterministic(self): aug = self.copy() aug.first = ( aug.first.to_deterministic() if aug.first is not None else None) aug.second = ( aug.second.to_deterministic() if aug.second is not None else None) aug.deterministic = True aug.random_state = self.random_state.derive_rng_() return aug
[docs] def get_parameters(self): return [self.factor, self.per_channel]
[docs] def get_children_lists(self): return [lst for lst in [self.first, self.second] if lst is not None]
def __str__(self): pattern = ( "%s(" "factor=%s, per_channel=%s, name=%s, first=%s, second=%s, " "deterministic=%s" ")" ) return pattern % ( self.__class__.__name__, self.factor, self.per_channel, self.name, self.first, self.second, self.deterministic)
# FIXME the output of the third example makes it look like per_channel isn't # working # TODO switch line strings to either-or approach, like polygons
[docs]class AlphaElementwise(Alpha): """ Alpha-blend two image sources using alpha/opacity values sampled per pixel. This is the same as :class:`Alpha`, except that the opacity factor is sampled once per *pixel* instead of once per *image* (or a few times per image, if ``Alpha.per_channel`` is set to ``True``). See :class:`Alpha` for more details. .. note:: It is not recommended to use ``AlphaElementwise`` with augmenters that change the geometry of images (e.g. horizontal flips, affine transformations) if you *also* want to augment coordinates (e.g. keypoints, polygons, ...), as it is unclear which of the two coordinate results (first or second branch) should be used as the coordinates after augmentation. Currently, the for keypoints and line strings the results of the first and second branch will be mixed. For each coordinate, the augmented one from the first or second branch will be picked based on the average alpha mask value at the corresponding spatial location. For polygons, only all polygons of the first or all of the second branch will be used, based on the average over the whole alpha mask. dtype support:: See :func:`imgaug.augmenters.blend.blend_alpha`. Parameters ---------- factor : number or tuple of number or list of number or imgaug.parameters.StochasticParameter, optional Weighting of the results of the first branch. Values close to 0 mean that the results from the second branch (see parameter `second`) make up most of the final image. * If float, then that value will be used for all images. * If tuple ``(a, b)``, then a random value from the interval ``[a, b]`` will be sampled per image. * If a list, then a random value will be picked from that list per image. * If ``StochasticParameter``, then that parameter will be used to sample a value per image. first : None or imgaug.augmenters.meta.Augmenter or iterable of imgaug.augmenters.meta.Augmenter, optional Augmenter(s) that make up the first of the two branches. * If ``None``, then the input images will be reused as the output of the first branch. * If ``Augmenter``, then that augmenter will be used as the branch. * If iterable of ``Augmenter``, then that iterable will be converted into a ``Sequential`` and used as the augmenter. second : None or imgaug.augmenters.meta.Augmenter or iterable of imgaug.augmenters.meta.Augmenter, optional Augmenter(s) that make up the second of the two branches. * If ``None``, then the input images will be reused as the output of the second branch. * If ``Augmenter``, then that augmenter will be used as the branch. * If iterable of ``Augmenter``, then that iterable will be converted into a ``Sequential`` and used as the augmenter. per_channel : bool or float, optional Whether to use the same factor for all channels (``False``) or to sample a new value for each channel (``True``). If this value is a float ``p``, then for ``p`` percent of all images `per_channel` will be treated as True, otherwise as False. name : None or str, optional See :func:`imgaug.augmenters.meta.Augmenter.__init__`. deterministic : bool, optional See :func:`imgaug.augmenters.meta.Augmenter.__init__`. random_state : None or int or imgaug.random.RNG or numpy.random.Generator or numpy.random.bit_generator.BitGenerator or numpy.random.SeedSequence or numpy.random.RandomState, optional See :func:`imgaug.augmenters.meta.Augmenter.__init__`. Examples -------- >>> import imgaug.augmenters as iaa >>> aug = iaa.AlphaElementwise(0.5, iaa.Grayscale(1.0)) Convert each image to pure grayscale and alpha-blend the result with the original image using an alpha of ``50%`` for all pixels, thereby removing about ``50%`` of all color. This is equivalent to ``iaa.Grayscale(0.5)``. This is also equivalent to ``iaa.Alpha(0.5, iaa.Grayscale(1.0))``, as the opacity has a fixed value of ``0.5`` and is hence identical for all pixels. >>> aug = iaa.AlphaElementwise((0, 1.0), iaa.Grayscale(1.0)) Same as in the previous example, but the alpha factor is sampled uniformly from the interval ``[0.0, 1.0]`` once per pixel, thereby removing a random fraction of all colors from each pixel. This is equivalent to ``iaa.Grayscale((0.0, 1.0))``. >>> aug = iaa.AlphaElementwise( >>> (0.0, 1.0), >>> iaa.Affine(rotate=(-20, 20)), >>> per_channel=0.5) First, rotate each image by a random degree sampled uniformly from the interval ``[-20, 20]``. Then, alpha-blend that new image with the original one using a random factor sampled uniformly from the interval ``[0.0, 1.0]`` per pixel. For ``50%`` of all images, the blending happens channel-wise and the factor is sampled independently per pixel *and* channel (``per_channel=0.5``). As a result, e.g. the red channel may look visibly rotated (factor near ``1.0``), while the green and blue channels may not look rotated (factors near ``0.0``). >>> aug = iaa.AlphaElementwise( >>> (0.0, 1.0), >>> first=iaa.Add(100), >>> second=iaa.Multiply(0.2)) Apply two branches of augmenters -- ``A`` and ``B`` -- *independently* to input images and alpha-blend the results of these branches using a factor ``f``. Branch ``A`` increases image pixel intensities by ``100`` and ``B`` multiplies the pixel intensities by ``0.2``. ``f`` is sampled uniformly from the interval ``[0.0, 1.0]`` per pixel. The resulting images contain a bit of ``A`` and a bit of ``B``. >>> aug = iaa.AlphaElementwise([0.25, 0.75], iaa.MedianBlur(13)) Apply median blur to each image and alpha-blend the result with the original image using an alpha factor of either exactly ``0.25`` or exactly ``0.75`` (sampled once per pixel). """ # Currently the mode is only used for keypoint augmentation. # either or: use all keypoints from first or all from second branch (based # on average of the whole mask). # pointwise: decide for each point whether to use the first or secon # branch's keypoint (based on the average mask value at the point's # xy-location). _MODE_EITHER_OR = "either-or" _MODE_POINTWISE = "pointwise" _MODES = [_MODE_POINTWISE, _MODE_EITHER_OR] def __init__(self, factor=0, first=None, second=None, per_channel=False, name=None, deterministic=False, random_state=None): super(AlphaElementwise, self).__init__( factor=factor, first=first, second=second, per_channel=per_channel, name=name, deterministic=deterministic, random_state=random_state ) # this controls how keypoints and polygons are augmented # keypoint mode currently also affects line strings and bounding boxes self._keypoints_mode = self._MODE_POINTWISE self._polygons_mode = self._MODE_EITHER_OR def _augment_images(self, images, random_state, parents, hooks): outputs_first, outputs_second = self._generate_branch_outputs( images, "augment_images", hooks, parents) nb_images = len(images) rngs = random_state.duplicate(nb_images+1) per_channel = self.per_channel.draw_samples(nb_images, random_state=rngs[-1]) result = images gen = enumerate(zip(images, outputs_first, outputs_second)) for i, (image, image_first, image_second) in gen: h, w, nb_channels = image.shape[0:3] mask = self._sample_mask(h, w, nb_channels, per_channel[i], rngs[i]) result[i] = blend_alpha(image_first, image_second, mask, eps=self.epsilon) return result def _sample_mask(self, height, width, nb_channels, per_channel, rng): if per_channel > 0.5: mask = [ self.factor.draw_samples((height, width), random_state=rng) for _ in sm.xrange(nb_channels)] mask = np.stack(mask, axis=-1).astype(np.float64) else: # TODO When this was wrongly sampled directly as (H,W,C) no # test for AlphaElementwise ended up failing. That should not # happen. # note that this should not be (H,W,1) as otherwise # SimplexNoiseAlpha fails as noise params expected a call of (H,W) mask = self.factor.draw_samples((height, width), random_state=rng) mask = np.tile(mask[..., np.newaxis], (1, 1, nb_channels)) # mask has no elements if height or width is 0 if mask.size > 0: assert 0 <= mask.item(0) <= 1.0, ( "Expected 'factor' samples to be in the interval " "[0.0, 1.0]. Got min %.4f and max %.4f." % ( np.min(mask), np.max(mask),)) return mask def _augment_heatmaps(self, heatmaps, random_state, parents, hooks): return self._augment_hms_and_segmaps( heatmaps, random_state, parents, hooks, "augment_heatmaps", "arr_0to1") def _augment_segmentation_maps(self, segmaps, random_state, parents, hooks): return self._augment_hms_and_segmaps( segmaps, random_state, parents, hooks, "augment_segmentation_maps", "arr") def _augment_hms_and_segmaps(self, augmentables, random_state, parents, hooks, augfunc_name, arr_attr_name): outputs_first, outputs_second = self._generate_branch_outputs( augmentables, augfunc_name, hooks, parents) result = augmentables nb_images = len(augmentables) rngs = random_state.duplicate(nb_images+1) per_channel = self.per_channel.draw_samples(nb_images, random_state=rngs[-1]) gen = enumerate(zip(augmentables, outputs_first, outputs_second)) for i, (augmentable_i, outputs_first_i, outputs_second_i) in gen: arr_i = getattr(augmentable_i, arr_attr_name) arr_first_i = getattr(outputs_first_i, arr_attr_name) arr_second_i = getattr(outputs_second_i, arr_attr_name) h_img, w_img = augmentable_i.shape[0:2] h_arr, w_arr = arr_i.shape[0:2] nb_channels_img = ( augmentable_i.shape[2] if len(augmentable_i.shape) >= 3 else 1) per_channel_i = per_channel[i] mask_image = self._sample_mask(h_img, w_img, nb_channels_img, per_channel_i, rngs[i]) # Average over channels, resize to heatmap/segmap array size # (+clip for cubic interpolation). We can use none-NN interpolation # for segmaps here as this is just the mask and not the segmap # array. mask_3d = np.atleast_3d(mask_image) mask_avg = ( np.average(mask_3d, axis=2) if mask_3d.shape[2] > 0 else 1.0) mask_arr = iadt.clip_( ia.imresize_single_image(mask_avg, (h_arr, w_arr)), 0, 1.0) mask_arr_binarized = (mask_arr >= 0.5) arr_i_aug = blend_alpha( arr_first_i, arr_second_i, mask_arr_binarized, eps=self.epsilon) setattr(result[i], arr_attr_name, arr_i_aug) return result def _augment_keypoints(self, keypoints_on_images, random_state, parents, hooks): def _get_coords_func(augmentable): return [(kp.x, kp.y) for kp in augmentable.keypoints] def _assign_coords_func(augmentable, coords): kps = [ia.Keypoint(x=x, y=y) for x, y in coords] return augmentable.deepcopy(keypoints=kps) return self._augment_coordinate_based( keypoints_on_images, random_state, parents, hooks, "augment_keypoints", _get_coords_func, _assign_coords_func) def _augment_polygons(self, polygons_on_images, random_state, parents, hooks): # This currently uses an either-or approach. # Using pointwise augmentation is problematic here, because the order # of the points may have changed (e.g. from clockwise to # counter-clockwise). For polygons, it is also overall more likely # that some child-augmenter added/deleted points and we would need a # polygon recoverer. # Overall it seems to be the better approach to use all polygons # from one branch or the other, which guarantuees their validity. # TODO decide the either-or not based on the whole average mask # value but on the average mask value within the polygon's area? with _switch_keypoint_mode_temporarily(self, self._polygons_mode): return self._augment_polygons_as_keypoints( polygons_on_images, random_state, parents, hooks) def _augment_coordinate_based(self, inputs, random_state, parents, hooks, augfunc_name, get_coords_func, assign_coords_func): assert self._keypoints_mode in self._MODES, ( "Expected _keypoint_mode to be one of: %s. Got: %s." % ( self._MODES, self._keypoint_mode)) outputs_first, outputs_second = self._generate_branch_outputs( inputs, augfunc_name, hooks, parents) result = inputs nb_images = len(inputs) rngs = random_state.duplicate(nb_images+1) per_channel = self.per_channel.draw_samples(nb_images, random_state=rngs[-1]) gen = enumerate(zip(inputs, outputs_first, outputs_second)) for i, (augmentable_i, outputs_first_i, outputs_second_i) in gen: coords = get_coords_func(augmentable_i) coords_first = get_coords_func(outputs_first_i) coords_second = get_coords_func(outputs_second_i) h_img, w_img = augmentable_i.shape[0:2] nb_channels_img = ( augmentable_i.shape[2] if len(augmentable_i.shape) >= 3 else 1) per_channel_i = per_channel[i] mask_image = self._sample_mask(h_img, w_img, nb_channels_img, per_channel_i, rngs[i]) if self._keypoints_mode == self._MODE_POINTWISE: # Augment pointwise, i.e. check for each point and its # xy-location the average mask value and pick based on that # either the point from the first or second branch. assert len(coords_first) == len(coords_second), ( "Got different numbers of coordinates before/after " "augmentation in AlphaElementwise. The number of " "coordinates is currently not allowed to change for this " "augmenter. Input contained %d coordinates, first branch " "%d, second branch %d." % ( len(coords), len(coords_first), len(coords_second))) coords_aug = [] subgen = zip(coords, coords_first, coords_second) for coord, coord_first, coord_second in subgen: x_int = int(np.round(coord[0])) y_int = int(np.round(coord[1])) if 0 <= y_int < h_img and 0 <= x_int < w_img: alphas_i = mask_image[y_int, x_int, :] alpha = ( np.average(alphas_i) if alphas_i.size > 0 else 1.0) if alpha > 0.5: coords_aug.append(coord_first) else: coords_aug.append(coord_second) else: coords_aug.append((x_int, y_int)) else: # Augment with an either-or approach over all points, i.e. # based on the average of the whole mask, either all points # from the first or all points from the second branch are # used. # Note that we ensured above that _keypoint_mode must be # _MODE_EITHER_OR if it wasn't _MODE_POINTWISE. mask_image_avg = ( np.average(mask_image) if mask_image.size > 0 else 1.0) if mask_image_avg > 0.5: coords_aug = coords_first else: coords_aug = coords_second result[i] = assign_coords_func(result[i], coords_aug) return result
# Helper for AlphaElementwise to temporarily change the augmentation mode # of keypoints. class _switch_keypoint_mode_temporarily(object): def __init__(self, augmenter, mode): self.augmenter = augmenter self.mode = mode self.mode_orig = None def __enter__(self): # pylint:disable=protected-access self.mode_orig = self.augmenter._keypoints_mode self.augmenter._keypoints_mode = self.mode def __exit__(self, exception_type, exception_value, traceback): self.augmenter._keypoints_mode = self.mode_orig
[docs]class SimplexNoiseAlpha(AlphaElementwise): """Alpha-blend two image sources using simplex noise alpha masks. The alpha masks are sampled using a simplex noise method, roughly creating connected blobs of 1s surrounded by 0s. If nearest neighbour upsampling is used, these blobs can be rectangular with sharp edges. dtype support:: See ``imgaug.augmenters.blend.AlphaElementwise``. Parameters ---------- first : None or imgaug.augmenters.meta.Augmenter or iterable of imgaug.augmenters.meta.Augmenter, optional Augmenter(s) that make up the first of the two branches. * If ``None``, then the input images will be reused as the output of the first branch. * If ``Augmenter``, then that augmenter will be used as the branch. * If iterable of ``Augmenter``, then that iterable will be converted into a Sequential and used as the augmenter. second : None or imgaug.augmenters.meta.Augmenter or iterable of imgaug.augmenters.meta.Augmenter, optional Augmenter(s) that make up the second of the two branches. * If ``None``, then the input images will be reused as the output of the second branch. * If ``Augmenter``, then that augmenter will be used as the branch. * If iterable of ``Augmenter``, then that iterable will be converted into a Sequential and used as the augmenter. per_channel : bool or float, optional Whether to use the same factor for all channels (``False``) or to sample a new value for each channel (``True``). If this value is a float ``p``, then for ``p`` percent of all images `per_channel` will be treated as ``True``, otherwise as ``False``. size_px_max : int or tuple of int or list of int or imgaug.parameters.StochasticParameter, optional The simplex noise is always generated in a low resolution environment. This parameter defines the maximum size of that environment (in pixels). The environment is initialized at the same size as the input image and then downscaled, so that no side exceeds `size_px_max` (aspect ratio is kept). * If int, then that number will be used as the size for all iterations. * If tuple of two ``int`` s ``(a, b)``, then a value will be sampled per iteration from the discrete interval ``[a..b]``. * If a list of ``int`` s, then a value will be picked per iteration at random from that list. * If a ``StochasticParameter``, then a value will be sampled from that parameter per iteration. upscale_method : None or imgaug.ALL or str or list of str or imgaug.parameters.StochasticParameter, optional After generating the noise maps in low resolution environments, they have to be upscaled to the input image size. This parameter controls the upscaling method. * If ``None``, then either ``nearest`` or ``linear`` or ``cubic`` is picked. Most weight is put on ``linear``, followed by ``cubic``. * If ``imgaug.ALL``, then either ``nearest`` or ``linear`` or ``area`` or ``cubic`` is picked per iteration (all same probability). * If a string, then that value will be used as the method (must be ``nearest`` or ``linear`` or ``area`` or ``cubic``). * If list of string, then a random value will be picked from that list per iteration. * If ``StochasticParameter``, then a random value will be sampled from that parameter per iteration. iterations : int or tuple of int or list of int or imgaug.parameters.StochasticParameter, optional How often to repeat the simplex noise generation process per image. * If ``int``, then that number will be used as the iterations for all images. * If tuple of two ``int`` s ``(a, b)``, then a value will be sampled per image from the discrete interval ``[a..b]``. * If a list of ``int`` s, then a value will be picked per image at random from that list. * If a ``StochasticParameter``, then a value will be sampled from that parameter per image. aggregation_method : imgaug.ALL or str or list of str or imgaug.parameters.StochasticParameter, optional The noise maps (from each iteration) are combined to one noise map using an aggregation process. This parameter defines the method used for that process. Valid methods are ``min``, ``max`` or ``avg``, where ``min`` combines the noise maps by taking the (elementwise) minimum over all iteration's results, ``max`` the (elementwise) maximum and ``avg`` the (elementwise) average. * If ``imgaug.ALL``, then a random value will be picked per image from the valid ones. * If a string, then that value will always be used as the method. * If a list of string, then a random value will be picked from that list per image. * If a ``StochasticParameter``, then a random value will be sampled from that paramter per image. sigmoid : bool or number, optional Whether to apply a sigmoid function to the final noise maps, resulting in maps that have more extreme values (close to 0.0 or 1.0). * If ``bool``, then a sigmoid will always (``True``) or never (``False``) be applied. * If a number ``p`` with ``0<=p<=1``, then a sigmoid will be applied to ``p`` percent of all final noise maps. sigmoid_thresh : None or number or tuple of number or imgaug.parameters.StochasticParameter, optional Threshold of the sigmoid, when applied. Thresholds above zero (e.g. ``5.0``) will move the saddle point towards the right, leading to more values close to 0.0. * If ``None``, then ``Normal(0, 5.0)`` will be used. * If number, then that threshold will be used for all images. * If tuple of two numbers ``(a, b)``, then a random value will be sampled per image from the interval ``[a, b]``. * If ``StochasticParameter``, then a random value will be sampled from that parameter per image. name : None or str, optional See :func:`imgaug.augmenters.meta.Augmenter.__init__`. deterministic : bool, optional See :func:`imgaug.augmenters.meta.Augmenter.__init__`. random_state : None or int or imgaug.random.RNG or numpy.random.Generator or numpy.random.bit_generator.BitGenerator or numpy.random.SeedSequence or numpy.random.RandomState, optional See :func:`imgaug.augmenters.meta.Augmenter.__init__`. Examples -------- >>> import imgaug.augmenters as iaa >>> aug = iaa.SimplexNoiseAlpha(iaa.EdgeDetect(1.0)) Detect per image all edges, mark them in a black and white image and then alpha-blend the result with the original image using simplex noise masks. >>> aug = iaa.SimplexNoiseAlpha( >>> iaa.EdgeDetect(1.0), >>> upscale_method="nearest") Same as in the previous example, but using only nearest neighbour upscaling to scale the simplex noise masks to the final image sizes, i.e. no nearest linear upsampling is used. This leads to rectangles with sharp edges. >>> aug = iaa.SimplexNoiseAlpha( >>> iaa.EdgeDetect(1.0), >>> upscale_method="linear") Same as in the previous example, but using only linear upscaling to scale the simplex noise masks to the final image sizes, i.e. no nearest neighbour upsampling is used. This leads to rectangles with smooth edges. >>> aug = iaa.SimplexNoiseAlpha( >>> iaa.EdgeDetect(1.0), >>> sigmoid_thresh=iap.Normal(10.0, 5.0)) Same as in the first example, but using a threshold for the sigmoid function that is further to the right. This is more conservative, i.e. the generated noise masks will be mostly black (values around ``0.0``), which means that most of the original images (parameter/branch `second`) will be kept, rather than using the results of the augmentation (parameter/branch `first`). """ def __init__(self, first=None, second=None, per_channel=False, size_px_max=(2, 16), upscale_method=None, iterations=(1, 3), aggregation_method="max", sigmoid=True, sigmoid_thresh=None, name=None, deterministic=False, random_state=None): upscale_method_default = iap.Choice(["nearest", "linear", "cubic"], p=[0.05, 0.6, 0.35]) sigmoid_thresh_default = iap.Normal(0.0, 5.0) noise = iap.SimplexNoise( size_px_max=size_px_max, upscale_method=(upscale_method if upscale_method is not None else upscale_method_default) ) if iterations != 1: noise = iap.IterativeNoiseAggregator( noise, iterations=iterations, aggregation_method=aggregation_method ) use_sigmoid = ( sigmoid is True or (ia.is_single_number(sigmoid) and sigmoid >= 0.01)) if use_sigmoid: noise = iap.Sigmoid.create_for_noise( noise, threshold=(sigmoid_thresh if sigmoid_thresh is not None else sigmoid_thresh_default), activated=sigmoid ) super(SimplexNoiseAlpha, self).__init__( factor=noise, first=first, second=second, per_channel=per_channel, name=name, deterministic=deterministic, random_state=random_state )
[docs]class FrequencyNoiseAlpha(AlphaElementwise): """Alpha-blend two image sources using frequency noise masks. The alpha masks are sampled using frequency noise of varying scales, which can sometimes create large connected blobs of ``1`` s surrounded by ``0`` s and other times results in smaller patterns. If nearest neighbour upsampling is used, these blobs can be rectangular with sharp edges. dtype support:: See ``imgaug.augmenters.blend.AlphaElementwise``. Parameters ---------- exponent : number or tuple of number of list of number or imgaug.parameters.StochasticParameter, optional Exponent to use when scaling in the frequency domain. Sane values are in the range ``-4`` (large blobs) to ``4`` (small patterns). To generate cloud-like structures, use roughly ``-2``. * If number, then that number will be used as the exponent for all iterations. * If tuple of two numbers ``(a, b)``, then a value will be sampled per iteration from the interval ``[a, b]``. * If a list of numbers, then a value will be picked per iteration at random from that list. * If a ``StochasticParameter``, then a value will be sampled from that parameter per iteration. first : None or imgaug.augmenters.meta.Augmenter or iterable of imgaug.augmenters.meta.Augmenter, optional Augmenter(s) that make up the first of the two branches. * If ``None``, then the input images will be reused as the output of the first branch. * If ``Augmenter``, then that augmenter will be used as the branch. * If iterable of ``Augmenter``, then that iterable will be converted into a ``Sequential`` and used as the augmenter. second : None or imgaug.augmenters.meta.Augmenter or iterable of imgaug.augmenters.meta.Augmenter, optional Augmenter(s) that make up the second of the two branches. * If ``None``, then the input images will be reused as the output of the second branch. * If ``Augmenter``, then that augmenter will be used as the branch. * If iterable of ``Augmenter``, then that iterable will be converted into a ``Sequential`` and used as the augmenter. per_channel : bool or float, optional Whether to use the same factor for all channels (``False``) or to sample a new value for each channel (``True``). If this value is a float ``p``, then for ``p`` percent of all images `per_channel` will be treated as ``True``, otherwise as ``False``. size_px_max : int or tuple of int or list of int or imgaug.parameters.StochasticParameter, optional The noise is generated in a low resolution environment. This parameter defines the maximum size of that environment (in pixels). The environment is initialized at the same size as the input image and then downscaled, so that no side exceeds `size_px_max` (aspect ratio is kept). * If ``int``, then that number will be used as the size for all iterations. * If tuple of two ``int`` s ``(a, b)``, then a value will be sampled per iteration from the discrete interval ``[a..b]``. * If a list of ``int`` s, then a value will be picked per iteration at random from that list. * If a ``StochasticParameter``, then a value will be sampled from that parameter per iteration. upscale_method : None or imgaug.ALL or str or list of str or imgaug.parameters.StochasticParameter, optional After generating the noise maps in low resolution environments, they have to be upscaled to the input image size. This parameter controls the upscaling method. * If ``None``, then either ``nearest`` or ``linear`` or ``cubic`` is picked. Most weight is put on ``linear``, followed by ``cubic``. * If ``imgaug.ALL``, then either ``nearest`` or ``linear`` or ``area`` or ``cubic`` is picked per iteration (all same probability). * If string, then that value will be used as the method (must be ``nearest`` or ``linear`` or ``area`` or ``cubic``). * If list of string, then a random value will be picked from that list per iteration. * If ``StochasticParameter``, then a random value will be sampled from that parameter per iteration. iterations : int or tuple of int or list of int or imgaug.parameters.StochasticParameter, optional How often to repeat the simplex noise generation process per image. * If ``int``, then that number will be used as the iterations for all images. * If tuple of two ``int`` s ``(a, b)``, then a value will be sampled per image from the discrete interval ``[a..b]``. * If a list of ``int`` s, then a value will be picked per image at random from that list. * If a ``StochasticParameter``, then a value will be sampled from that parameter per image. aggregation_method : imgaug.ALL or str or list of str or imgaug.parameters.StochasticParameter, optional The noise maps (from each iteration) are combined to one noise map using an aggregation process. This parameter defines the method used for that process. Valid methods are ``min``, ``max`` or ``avg``, where 'min' combines the noise maps by taking the (elementwise) minimum over all iteration's results, ``max`` the (elementwise) maximum and ``avg`` the (elementwise) average. * If ``imgaug.ALL``, then a random value will be picked per image from the valid ones. * If a string, then that value will always be used as the method. * If a list of string, then a random value will be picked from that list per image. * If a ``StochasticParameter``, then a random value will be sampled from that parameter per image. sigmoid : bool or number, optional Whether to apply a sigmoid function to the final noise maps, resulting in maps that have more extreme values (close to ``0.0`` or ``1.0``). * If ``bool``, then a sigmoid will always (``True``) or never (``False``) be applied. * If a number ``p`` with ``0<=p<=1``, then a sigmoid will be applied to ``p`` percent of all final noise maps. sigmoid_thresh : None or number or tuple of number or imgaug.parameters.StochasticParameter, optional Threshold of the sigmoid, when applied. Thresholds above zero (e.g. ``5.0``) will move the saddle point towards the right, leading to more values close to ``0.0``. * If ``None``, then ``Normal(0, 5.0)`` will be used. * If number, then that threshold will be used for all images. * If tuple of two numbers ``(a, b)``, then a random value will be sampled per image from the range ``[a, b]``. * If ``StochasticParameter``, then a random value will be sampled from that parameter per image. name : None or str, optional See :func:`imgaug.augmenters.meta.Augmenter.__init__`. deterministic : bool, optional See :func:`imgaug.augmenters.meta.Augmenter.__init__`. random_state : None or int or imgaug.random.RNG or numpy.random.Generator or numpy.random.bit_generator.BitGenerator or numpy.random.SeedSequence or numpy.random.RandomState, optional See :func:`imgaug.augmenters.meta.Augmenter.__init__`. Examples -------- >>> import imgaug.augmenters as iaa >>> aug = iaa.FrequencyNoiseAlpha(first=iaa.EdgeDetect(1.0)) Detect per image all edges, mark them in a black and white image and then alpha-blend the result with the original image using frequency noise masks. >>> aug = iaa.FrequencyNoiseAlpha( >>> first=iaa.EdgeDetect(1.0), >>> upscale_method="nearest") Same as the first example, but using only linear upscaling to scale the frequency noise masks to the final image sizes, i.e. no nearest neighbour upsampling is used. This results in smooth edges. >>> aug = iaa.FrequencyNoiseAlpha( >>> first=iaa.EdgeDetect(1.0), >>> upscale_method="linear") Same as the first example, but using only linear upscaling to scale the frequency noise masks to the final image sizes, i.e. no nearest neighbour upsampling is used. This results in smooth edges. >>> aug = iaa.FrequencyNoiseAlpha( >>> first=iaa.EdgeDetect(1.0), >>> upscale_method="linear", >>> exponent=-2, >>> sigmoid=False) Same as in the previous example, but with the exponent set to a constant ``-2`` and the sigmoid deactivated, resulting in cloud-like patterns without sharp edges. >>> aug = iaa.FrequencyNoiseAlpha( >>> first=iaa.EdgeDetect(1.0), >>> sigmoid_thresh=iap.Normal(10.0, 5.0)) Same as the first example, but using a threshold for the sigmoid function that is further to the right. This is more conservative, i.e. the generated noise masks will be mostly black (values around ``0.0``), which means that most of the original images (parameter/branch `second`) will be kept, rather than using the results of the augmentation (parameter/branch `first`). """ def __init__(self, exponent=(-4, 4), first=None, second=None, per_channel=False, size_px_max=(4, 16), upscale_method=None, iterations=(1, 3), aggregation_method=["avg", "max"], sigmoid=0.5, sigmoid_thresh=None, name=None, deterministic=False, random_state=None): # pylint: disable=dangerous-default-value upscale_method_default = iap.Choice(["nearest", "linear", "cubic"], p=[0.05, 0.6, 0.35]) sigmoid_thresh_default = iap.Normal(0.0, 5.0) noise = iap.FrequencyNoise( exponent=exponent, size_px_max=size_px_max, upscale_method=(upscale_method if upscale_method is not None else upscale_method_default) ) if iterations != 1: noise = iap.IterativeNoiseAggregator( noise, iterations=iterations, aggregation_method=aggregation_method ) use_sigmoid = ( sigmoid is True or (ia.is_single_number(sigmoid) and sigmoid >= 0.01)) if use_sigmoid: noise = iap.Sigmoid.create_for_noise( noise, threshold=(sigmoid_thresh if sigmoid_thresh is not None else sigmoid_thresh_default), activated=sigmoid ) super(FrequencyNoiseAlpha, self).__init__( factor=noise, first=first, second=second, per_channel=per_channel, name=name, deterministic=deterministic, random_state=random_state )