Transform YAML References
#
Available Pre-processing Transforms#
BinarizeLabelValuestype: object
category: Labels only
description:
Transforms the labels to have only 1 and 0 values. If image labels are a list (in a multi-label case) then the behaviour is to binarize with [] and [1]. For bboxes_labels, all of them will be converted to 0, since there's no "background" bbox.
properties:
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1
#
Blurtype: object
category: Pixel level
description:
Blur the input image using a random-size kernel.
properties:
lower_limit
:- type: integer
- default: 3
- minimum: 3
- maximum: 100
upper_limit
:- type: integer
- default: 7
- minimum: 5
- maximum: 100
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 0.5
- minimum: 0
- maximum: 1
#
Casttype: object
category: Others
description:
Casts a given set of keys to the desired type.
properties:
image
:- type: string
- default: uint8
- enum: ['bool', 'uint8', 'uint16', 'int8', 'uint32', 'int16', 'int32', 'int64', 'float32', 'float64']
label
:- type: string
- default: int32
- enum: ['bool', 'uint8', 'uint16', 'int8', 'uint32', 'int16', 'int32', 'int64', 'float32', 'float64']
mask
:- type: string
- default: int32
- enum: ['bool', 'uint8', 'uint16', 'int8', 'uint32', 'int16', 'int32', 'int64', 'float32', 'float64']
bboxes
:- type: string
- default: int32
- enum: ['bool', 'uint8', 'uint16', 'int8', 'uint32', 'int16', 'int32', 'int64', 'float32', 'float64']
bboxes_labels
:- type: string
- default: int32
- enum: ['bool', 'uint8', 'uint16', 'int8', 'uint32', 'int16', 'int32', 'int64', 'float32', 'float64']
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1
#
ClassnameToIndextype: object
category: Labels only
description:
Transform the string labels to indices by applying a mapping between class names and class indices.
properties:
label_map
:- type: object
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1
#
Composetype: object
category: Compose
description:
Compose a sequence of transforms.
properties:
transforms
:- type: array
- items:
- type: object
bbox_params
:anyOf:
type: null
type: object
keypoint_params
:anyOf:
type: null
type: object
additional_targets
:anyOf:
type: null
type: object
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1
#
ConvertColortype: object
category: Pixel level
description:
Converts an image to another color spaces.
properties:
color_mode
:- type: string
- default: None
- enum: ['RGB2GRAY', 'RGB2HSV', 'GRAY2RGB', 'RGB2LAB', 'RGB2BGR', 'RGBA2RGB']
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1
#
Croptype: object
category: Spatial level
description:
Crop a rectangular region from image.
properties:
x_min
:- type: integer
- default: None
- minimum: 0
y_min
:- type: integer
- default: None
- minimum: 0
x_max
:- type: integer
- default: None
- minimum: 0
y_max
:- type: integer
- default: None
- minimum: 0
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1
#
Downscaletype: object
category: Pixel level
description:
Decreases image quality by downscaling and upscaling back.
properties:
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 0.5
- minimum: 0
- maximum: 1
#
ExpandBoxestype: object
category: Labels only
description:
This transform changes the shape the bounding boxes by a certain amount, attempting that the object remains centered.
properties:
height_delta
:- type: integer
- default: None
- minimum: 0
width_delta
:- type: integer
- default: None
- minimum: 0
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1
#
ExpandBoxesTotype: object
category: Labels only
description:
This transform changes the shape the bounding boxes to a predefined width and height, attempting that the object remains centered.
properties:
height
:- type: integer
- default: None
- minimum: 0
width
:- type: integer
- default: None
- minimum: 0
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1
#
ExpandBoxesToFittype: object
category: Labels only
description:
Preprocessing transforms to expand bounding boxes with extreme ratios (h/w and w/h) or size smaller than a minimum size, so that it is easier to fit them with anchors in an object detection task.
properties:
min_size
:- type: integer
- default: None
- minimum: 0
max_aspect_ratio
:- type: number
- default: 6
- minimum: 0
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1
#
GaussianBlurtype: object
category: Pixel level
description:
Blur the input image using a Gaussian filter with a random kernel size.
properties:
lower_limit
:- type: integer
- default: 3
- minimum: 3
- maximum: 100
upper_limit
:- type: integer
- default: 7
- minimum: 5
- maximum: 100
p
:- type: number
- default: 0.5
- minimum: 0
- maximum: 1
#
HorizontalFliptype: object
category: Spatial level
description:
Flips the input horizontally.
properties:
p
:- type: number
- default: 0.5
- minimum: 0
- maximum: 1
#
HueSaturationValuetype: object
category: Pixel level
description:
Randomly change hue, saturation and value of the input image.
properties:
hue_shift_lower_limit
:- type: integer
- default: -20
- minimum: -255
- maximum: 255
hue_shift_upper_limit
:- type: integer
- default: 20
- minimum: -255
- maximum: 255
sat_shift_lower_limit
:- type: integer
- default: -30
- minimum: -255
- maximum: 255
sat_shift_upper_limit
:- type: integer
- default: 30
- minimum: -255
- maximum: 255
val_shift_lower_limit
:- type: integer
- default: -20
- minimum: -255
- maximum: 255
val_shift_upper_limit
:- type: integer
- default: 20
- minimum: -255
- maximum: 255
p
:- type: number
- default: 0.5
- minimum: 0
- maximum: 1
#
IgnoreLabelValuestype: object
category: Labels only
description:
Transforms the labels to ignore some values. In the case of masks, the ignored values are turned to zero. For bboxes, they're removed. For image labels, if label is a list they're removed, if it's a single label it's turned to zero.
properties:
to_ignore
:- type: array
- items:
- type: integer
- minimum: 1
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1
#
IndexToClassnametype: object
category: Labels only
description:
Transform the int labels to strings by applying a mapping between class indices and class names.
properties:
label_map
:- type: object
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1
#
MotionBlurtype: object
category: Pixel level
description:
Apply motion blur to the input image using a random-size kernel.
properties:
lower_limit
:- type: integer
- default: 3
- minimum: 3
- maximum: 100
upper_limit
:- type: integer
- default: 7
- minimum: 5
- maximum: 100
p
:- type: number
- default: 0.5
- minimum: 0
- maximum: 1
#
Normalizetype: object
category: Pixel level
description:
Divide pixel values by 255, subtract mean per channel and divide by std per channel. You images needs to be in RGB format.
properties:
mean
:- type: array
- default: (0.485, 0.456, 0.406)
- items:
- type: number
- exclusiveMinimum: 0
- exclusiveMaximum: 1
std
:- type: array
- default: (0.229, 0.224, 0.225)
- items:
- type: number
- exclusiveMinimum: 0
- exclusiveMaximum: 1
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1
#
NormalizeRangetype: object
category: Pixel level
description:
Normalizes the image such that it's values go from being in the range
input_range
to the range[output_range]
. For example, usually theinput_range
is[0, 255]
, and with this transform you can convert the image to the range[-1, 1]
.properties:
output_range
:- type: array
- items:
- type: number
input_range
:- type: array
- default: (0, 255)
- items:
- type: number
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1
#
OneHottype: object
category: Labels only
description:
Transform the labels to a one-hot representation.
properties:
depth
:- type: integer
- default: None
- minimum: 1
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1
#
OneOftype: object
category: Compose
description:
Select one of the transforms to apply. Probabilities will be weights.
properties:
transforms
:- type: array
- items:
- type: object
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1
#
RandAugmenttype: object
category: Pixel level
description:
This transform implements the data augmentation policy from this paper: https://arxiv.org/abs/1909.13719
In summary, there are 12 available augmentations: [Identity, Equalize, Rotate, Solarize, Posterize, RandomContrast, RandomBrightness, GaussianBlur, ShearX, ShearY, TranslateX, TranslateY]. Out of this list, every time that the transform is applied, a set of
number_transforms
is chosen, e.g. [Posterize, TranslateY], and these transforms are applied with magnitudemagnitude
.
Example
-------
>>> transform = tr.RandAugment(number_transforms=2, magnitude=13)>>> transformed = transform(image=image, bboxes=bboxes, bboxes_labels=bboxes_labels, mask=mask)
properties:
number_transforms
:- type: integer
- default: 2
- minimum: 1
- maximum: 12
magnitude
:- type: number
- default: 4
- minimum: 0
- maximum: 10
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1
#
RandomBrightnesstype: object
category: Pixel level
description:
Randomly change brightness of the input image.
properties:
lower_limit
:- type: number
- default: -0.2
- minimum: -1
- maximum: 1
upper_limit
:- type: number
- default: 0.2
- minimum: -1
- maximum: 1
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 0.5
- minimum: 0
- maximum: 1
#
RandomBrightnessContrasttype: object
category: Pixel level
description:
Randomly change brightness and contrast of the input image.
properties:
brightness_lower_limit
:- type: number
- default: -0.2
- minimum: -1
- maximum: 1
brightness_upper_limit
:- type: number
- default: 0.2
- minimum: -1
- maximum: 1
contrast_lower_limit
:- type: number
- default: -0.2
- minimum: -1
- maximum: 1
contrast_upper_limit
:- type: number
- default: 0.2
- minimum: -1
- maximum: 1
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 0.5
- minimum: 0
- maximum: 1
#
RandomContrasttype: object
category: Pixel level
description:
Randomly change contrast of the input image.
properties:
lower_limit
:- type: number
- default: -0.2
- minimum: -1
- maximum: 1
upper_limit
:- type: number
- default: 0.2
- minimum: -1
- maximum: 1
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 0.5
- minimum: 0
- maximum: 1
#
RandomCroptype: object
category: Spatial level
description:
Crop a random rectangle of the input.
properties:
height
:- type: integer
- default: None
- minimum: 0
width
:- type: integer
- default: None
- minimum: 0
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1
#
RandomRotate90type: object
category: Spatial level
description:
Randomly rotate the input by 90 degrees zero or more times.
properties:
p
:- type: number
- default: 0.5
- minimum: 0
- maximum: 1
#
RandomScaletype: object
category: Spatial level
description:
Randomly resize the input. Output image size is different from the input size.
properties:
lower_limit
:- type: number
- default: 0.9
- minimum: 0
- maximum: 2
upper_limit
:- type: number
- default: 1.1
- minimum: 0
- maximum: 4
interpolation
:- type: string
- default: LINEAR
- enum: ['LINEAR', 'AREA', 'CUBIC', 'NEAREST', 'LANCZOS4']
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 0.5
- minimum: 0
- maximum: 1
#
RemapLabelValuestype: object
category: Labels only
description:
Transforms the values in the labels given a mapping between current values and desired values.
properties:
label_map
:- type: object
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1
#
Resizetype: object
category: Spatial level
description:
Resize the input to the given height and width.
properties:
height
:- type: integer
- default: None
- minimum: 1
width
:- type: integer
- default: None
- minimum: 1
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1
#
Rotatetype: object
category: Spatial level
description:
Rotate the input by an angle selected randomly from the uniform distribution.
properties:
lower_limit
:- type: integer
- default: -90
- minimum: -180
- maximum: 0
upper_limit
:- type: integer
- default: 90
- minimum: 0
- maximum: 180
interpolation
:- type: string
- default: LINEAR
- enum: ['LINEAR', 'AREA', 'CUBIC', 'NEAREST', 'LANCZOS4']
border_mode
:- type: string
- default: REFLECT_101
- enum: ['CONSTANT', 'REPLICATE', 'REFLECT', 'WRAP', 'REFLECT_101']
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 0.5
- minimum: 0
- maximum: 1
#
ShiftScaleRotatetype: object
category: Spatial level
description:
Randomly apply affine transforms: translate, scale and rotate the input.
properties:
shift_lower_limit
:- type: number
- default: -0.0625
- minimum: -1
- maximum: 0
shift_upper_limit
:- type: number
- default: 0.0625
- minimum: 0
- maximum: 1
scale_lower_limit
:- type: number
- default: -0.1
- maximum: 0
scale_upper_limit
:- type: number
- default: 0.1
- minimum: 0
rotate_lower_limit
:- type: integer
- default: -45
- minimum: -360
- maximum: 0
rotate_upper_limit
:- type: integer
- default: 45
- minimum: 0
- maximum: 360
interpolation
:- type: string
- default: LINEAR
- enum: ['LINEAR', 'AREA', 'CUBIC', 'NEAREST', 'LANCZOS4']
border_mode
:- type: string
- default: CONSTANT
- enum: ['CONSTANT', 'REPLICATE', 'REFLECT', 'WRAP', 'REFLECT_101']
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 0.5
- minimum: 0
- maximum: 1
#
VerticalFliptype: object
category: Spatial level
description:
Flips the input horizontally.
properties:
p
:- type: number
- default: 0.5
- minimum: 0
- maximum: 1
#
Available Post-processing Transforms#
BinaryPixelThresholdRuletype: object
category: Postprocessing
description:
Converts a binary segmentation mask to image level OK/NG result based on percentage of pixels greater than a probability threshold
properties:
probability_threshold
:- type: number
- default: None
- minimum: 0
- maximum: 1
area_percent_threshold
:- type: number
- default: None
- minimum: 0
- maximum: 1
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1
#
FilterBBoxesDimensionstype: object
category: Postprocessing
description:
Transform that filters boxes of a particular class depending on the dimensions.
properties:
defect_map
:- type: string
height_threshold
:- type: integer
- default: None
- minimum: 0
width_threshold
:- type: integer
- default: None
- minimum: 0
to_ignore
:- type: array
- items:
- type: string
filter_slim
:- type: boolean
- default: True
filter_short
:- type: boolean
- default: True
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1
#
FilterMaskDimensionstype: object
category: Postprocessing
description:
Zeroes out mask values above 0 whose area is below a certain threshold. The threshold can be applied per category.
properties:
area_percent_threshold
:anyOf:
type: number
- default: 0.0
- minimum: 0
- maximum: 1
type: array
- minItems: 1
- default: [0.0]
- items:
- type: number
- minimum: 0
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1
#
FilterMaskScoretype: object
category: Postprocessing
description:
Zeroes out mask_scores that are below a certain threshold. The threshold can be applied per category.
properties:
score_threshold
:anyOf:
type: number
- default: 0.5
- minimum: 0
- maximum: 1
type: array
- minItems: 1
- default: [0.5]
- items:
- type: number
- minimum: 0
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1
#
ObjectDetectionToClassificationtype: object
category: Postprocessing
description:
Transforms an object detection output into a classification output. by simply taking the bounding box with the highest score. If the key 'label' is passed keyword argument, this transform will ignore it.
properties:
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1
#
SegmentationToClassificationtype: object
category: Postprocessing
description:
Transforms a multiclass segmentation output into a classification output.
The segmentation output can be in terms of both probabilities (using the 'mask_scores' keyword argument) and indices (using 'mask' keyword argument). If both 'mask' and 'mask_scores' are provided, we will going to use 'mask' to compute this transform. If the key 'label' is passed keyword argument, this transform will ignore it.
Beware that this transform doesn't check that the pixels are connected before computing the predicted area.
properties:
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1
#
WarpPerspectivetype: object
category: Postprocessing
description:
WarpPerspective performs 4 point transformation of a region in the input image. Reference - https://www.pyimagesearch.com/2014/08/25/4-point-opencv-getperspective-transform-example/
properties:
original_roi
:- type: array
- default: 0
- items:
- type: number
- minimum: 0
- maximum: 1
dest_roi
:- type: array
- default: 0
- items:
- type: number
- minimum: 0
- maximum: 1
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1
#
Custom Transform#
CustomTransformtype: object
category: Others
description:
Custom Transform takes any callable class.
properties:
transform
:- type: string
params
:- type: object
always_apply
:- type: boolean
- default: False
p
:- type: number
- default: 1.0
- minimum: 0
- maximum: 1