Skip to content
3 changes: 2 additions & 1 deletion dali/python/nvidia/dali/experimental/torchvision/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from .v2.normalize import Normalize
from .v2.pad import Pad
from .v2.rand_apply import RandomApply
from .v2.randomcrop import RandomCrop
from .v2.randomcrop import RandomCrop, RandomResizedCrop
from .v2.resize import Resize
from .v2.totensor import ToPureTensor, PILToTensor, ToPILImage

Expand All @@ -37,6 +37,7 @@
"RandomCrop",
"RandomGrayscale",
"RandomHorizontalFlip",
"RandomResizedCrop",
"RandomVerticalFlip",
"Resize",
"ToPILImage",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from .centercrop import center_crop
from .color import to_grayscale, rgb_to_grayscale
from .crop import crop
from .crop import crop, resized_crop
from .flips import horizontal_flip, vertical_flip
from .gaussian_blur import gaussian_blur
from .image_metadata import get_dimensions, get_image_size, get_size
Expand All @@ -35,6 +35,7 @@
"pad",
"pil_to_tensor",
"resize",
"resized_crop",
"rgb_to_grayscale",
"to_grayscale",
"to_pil_image",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,22 @@
# limitations under the License.

import operator
from typing import List

import nvidia.dali.experimental.dynamic as ndd
from torchvision.transforms import InterpolationMode

from nvidia.dali._typing import TensorLike
from nvidia.dali.experimental.dynamic._device import DeviceLike

from ..operator import adjust_input
from ..randomcrop import RandomCrop
from ..resize import Resize


def _verify_crop_coordinate(value, name: str) -> None:
if not isinstance(value, int):
raise TypeError(f"{name} must be int, got {type(value)}")


def _validate_integer_param(value, name: str) -> int:
Expand Down Expand Up @@ -61,6 +70,26 @@ def _validate_crop_params(inpt, top, left, height, width) -> tuple[int, int, int
)


def _crop(
inpt: ndd.Tensor | ndd.Batch,
top: int,
left: int,
height: int,
width: int,
device: DeviceLike = "cpu",
) -> ndd.Tensor | ndd.Batch:
axes = [-3, -2] if _is_pil_image_layout(inpt) else [-2, -1]
return ndd.slice(
inpt,
(top, left),
(height, width),
axes=axes,
out_of_bounds_policy="pad",
fill_values=0,
device=device,
)


@adjust_input
def crop(
inpt: TensorLike | ndd.Batch,
Expand All @@ -82,13 +111,48 @@ def crop(
fill=0,
)

return ndd.slice(
inpt,
[float(left), float(top)],
[float(width), float(height)],
normalized_anchor=False,
normalized_shape=False,
out_of_bounds_policy="pad",
fill_values=0,
return _crop(inpt, top, left, height, width, device=device)


@adjust_input
def resized_crop(
inpt: TensorLike | ndd.Batch,
top: int,
left: int,
height: int,
width: int,
size: int | List[int],
interpolation: InterpolationMode | int = InterpolationMode.BILINEAR,
antialias: bool = True,
device: DeviceLike = "cpu",
) -> ndd.Tensor | ndd.Batch:
"""
Crop the input at location (top, left) with dimensions (height, width),
then resize the crop to the given size.
"""
top, left, height, width = _validate_crop_params(inpt, top, left, height, width)
RandomCrop.verify_args(
size=(height, width),
padding=None,
pad_if_needed=False,
padding_mode="constant",
fill=0,
)
interpolation = Resize.normalize_interpolation(interpolation)
Resize.verify_args(size=size, max_size=None, interpolation=interpolation, antialias=antialias)

size_normalized = Resize.infer_effective_size(size)
interpolation = Resize.interpolation_modes[interpolation]

cropped = _crop(inpt, top, left, height, width, device=device)
target_h, target_w = Resize.calculate_target_size_dynamic_mode(
(height, width), size_normalized, None
)

return ndd.resize(
cropped,
device=device,
size=(target_h, target_w),
interp_type=interpolation,
antialias=antialias,
)
119 changes: 119 additions & 0 deletions dali/python/nvidia/dali/experimental/torchvision/v2/randomcrop.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import nvidia.dali as dali
import nvidia.dali.fn as fn
from torchvision.transforms import InterpolationMode

from .centercrop import CenterCrop
from .operator import (
Expand All @@ -27,6 +28,7 @@
get_HWC_from_layout_pipeline,
)
from .pad import PADDING_CLASS, _ValidatePaddingMode
from .resize import Resize


class _ValidateCropSize(_ArgumentValidateRule):
Expand Down Expand Up @@ -86,6 +88,41 @@ def verify(cls, *, fill, **_) -> None:
raise TypeError(f"fill must be a number, sequence of numbers, or None, got {fill!r}")


class _ValidateRandomResizedCropScaleRatio(_ArgumentValidateRule):
"""
Verify RandomResizedCrop scale and ratio arguments.
"""

@classmethod
def _verify_range(cls, value, name: str) -> None:
if not isinstance(value, (list, tuple)) or len(value) != 2:
raise TypeError(f"{name} should be a sequence of two numbers")
if any(not isinstance(elem, numbers.Number) for elem in value):
raise TypeError(f"{name} values must be numbers, got {value}")
if any(elem <= 0 for elem in value):
raise ValueError(f"{name} values must be positive, got {value}")
if value[0] > value[1]:
raise ValueError(f"{name} should be a (min, max) range, got {value}")

@classmethod
def verify(cls, *, scale, ratio, **_) -> None:
cls._verify_range(scale, "scale")
cls._verify_range(ratio, "ratio")


class _ValidateRandomResizedCropInterpolation(_ArgumentValidateRule):
"""
Verify RandomResizedCrop interpolation argument.
"""

@classmethod
def verify(cls, *, interpolation, **_) -> None:
if interpolation in Resize.not_supported_interpolation_modes:
raise NotImplementedError(f"Interpolation mode: {interpolation} is not supported")
if interpolation not in Resize.interpolation_modes:
raise ValueError(f"Interpolation {interpolation!r} is not supported")

Comment thread
mdabek-nvidia marked this conversation as resolved.

class RandomCrop(Operator):
"""
Crop the input at a random location.
Expand Down Expand Up @@ -229,3 +266,85 @@ def _kernel(self, data_input):
fn.stack(crop_w, crop_h),
**slice_kwargs,
)


class RandomResizedCrop(Operator):
"""
Crop a random portion of the input and resize it to a given size.

If the input is a ``torch.Tensor`` it can have an arbitrary number of leading batch dimensions.
For example, the image tensor can have [..., C, H, W] shape.

Parameters
----------
size : sequence or int
Expected output size of the crop. If size is an int instead of sequence like (h, w),
a square output size (size, size) is made. If provided a sequence of length 1, it will be
interpreted as (size[0], size[0]).
scale : tuple of float, optional, default = (0.08, 1.0)
Lower and upper bounds for the random crop area, relative to the input image area.
ratio : tuple of float, optional, default = (3 / 4, 4 / 3)
Lower and upper bounds for the random crop aspect ratio, width / height.
interpolation : InterpolationMode or int, optional, default = InterpolationMode.BILINEAR
Interpolation mode to use for resizing. Legacy PIL integer codes
(``0`` = NEAREST, ``1`` = LANCZOS, ``2`` = BILINEAR, ``3`` = BICUBIC,
``4`` = BOX, ``5`` = HAMMING) are accepted for torchvision compatibility.
antialias : bool, optional, default = True
Whether to apply antialiasing during resize.
device : Literal["cpu", "gpu"], optional, default = "cpu"
Device to use for the crop. Can be ``"cpu"`` or ``"gpu"``.
"""

arg_rules = [
_ValidateSizeDescriptor,
_ValidateCropSize,
_ValidateRandomResizedCropScaleRatio,
_ValidateRandomResizedCropInterpolation,
]
preprocess_data = get_HWC_from_layout_pipeline

@classmethod
def adjust_size(cls, size: int | Sequence[int]) -> Sequence[int]:
return CenterCrop.adjust_size(size)

def __init__(
self,
size: int | Sequence[int],
scale: tuple[float, float] = (0.08, 1.0),
ratio: tuple[float, float] = (3.0 / 4.0, 4.0 / 3.0),
interpolation: InterpolationMode | int = InterpolationMode.BILINEAR,
antialias: bool | None = True,
device: Literal["cpu", "gpu"] = "cpu",
):
interpolation = Resize.normalize_interpolation(interpolation)

super().__init__(
device=device,
size=size,
scale=scale,
ratio=ratio,
interpolation=interpolation,
)

self.size = RandomResizedCrop.adjust_size(size)
self.scale = tuple(scale)
self.ratio = tuple(ratio)
self.interpolation = Resize.interpolation_modes[interpolation]
self.antialias = antialias

def _kernel(self, data_input):
"""
Applies random resized crop to the input data.
"""
_, _, _, tensor = data_input

return fn.random_resized_crop(
tensor,
device=self.device,
size=self.size,
random_area=self.scale,
random_aspect_ratio=self.ratio,
interp_type=self.interpolation,
antialias=self.antialias,
num_attempts=10,
)
34 changes: 34 additions & 0 deletions dali/python/nvidia/dali/experimental/torchvision/v2/resize.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,16 @@ def verify(cls, *, size, max_size, interpolation, **_):
edge, i.e. size should be an int"
)

if isinstance(size, int) and size <= 0:
raise ValueError(f"size must be positive, got {size}")
if isinstance(size, (tuple, list)):
if len(size) not in (1, 2):
raise ValueError(f"size sequence must have length 1 or 2, got {len(size)}")
if any(not isinstance(s, int) for s in size):
raise ValueError(f"size values must be integers, got {size}")
if any(s <= 0 for s in size):
raise ValueError(f"size values must be positive, got {size}")

if interpolation in Resize.not_supported_interpolation_modes:
raise NotImplementedError(f"Interpolation mode: {interpolation} is not supported")

Expand Down Expand Up @@ -98,9 +108,32 @@ class Resize(Operator):
InterpolationMode.HAMMING,
]

# Legacy PIL integer codes accepted by torchvision for back-compat
# (mirrors torchvision.transforms.functional._interpolation_modes_from_int).
int_to_interpolation_mode = {
0: InterpolationMode.NEAREST,
1: InterpolationMode.LANCZOS,
2: InterpolationMode.BILINEAR,
3: InterpolationMode.BICUBIC,
4: InterpolationMode.BOX,
5: InterpolationMode.HAMMING,
}

arg_rules = [_ValidateSize]
preprocess_data = get_HWC_from_layout_pipeline

@classmethod
def normalize_interpolation(cls, interpolation):
if isinstance(interpolation, int) and not isinstance(interpolation, InterpolationMode):
try:
return cls.int_to_interpolation_mode[interpolation]
except KeyError:
raise ValueError(
f"Interpolation int {interpolation} is not a valid PIL code; "
f"expected one of {sorted(cls.int_to_interpolation_mode)}"
)
return interpolation

@classmethod
def infer_effective_size(
cls,
Expand Down Expand Up @@ -228,6 +261,7 @@ def __init__(
antialias: Optional[bool] = True,
device: Literal["cpu", "gpu"] = "cpu",
):
interpolation = Resize.normalize_interpolation(interpolation)

super().__init__(
device=device,
Expand Down
18 changes: 9 additions & 9 deletions dali/test/python/torchvision/test_tv_crop.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,9 @@ def test_crop_preserves_tensor_dtype(dtype):
dict(top=0, left=0, height=1, width=1.0),
)
def test_crop_tensor_rejects_float_parameters(crop_kwargs):
with assert_raises(TypeError):
with assert_raises(TypeError, glob="*integer*"):
_ = tv_fn.crop(make_test_tensor(), **crop_kwargs)
with assert_raises(TypeError):
with assert_raises(TypeError, glob="*integer*"):
_ = crop(make_test_tensor(), **crop_kwargs)


Expand All @@ -144,16 +144,16 @@ def test_crop_tensor_rejects_float_parameters(crop_kwargs):
)
def test_crop_pil_rejects_non_numeric_parameters(crop_kwargs):
pil_image = _make_pil_image("RGB")
with assert_raises(TypeError):
with assert_raises(TypeError, glob="*str*"):
_ = tv_fn.crop(pil_image, **crop_kwargs)
with assert_raises(TypeError):
with assert_raises(TypeError, glob="*real numbers*"):
_ = crop(pil_image, **crop_kwargs)


def test_crop_invalid_input_type():
with assert_raises(TypeError):
with assert_raises(TypeError, glob="*support*"):
_ = tv_fn.crop([1, 2, 3], top=0, left=0, height=1, width=1)
with assert_raises(TypeError):
with assert_raises(TypeError, glob="*support*"):
_ = crop([1, 2, 3], top=0, left=0, height=1, width=1)


Expand All @@ -166,7 +166,7 @@ def test_crop_invalid_input_type():
(1, 1.0),
)
def test_crop_invalid_output_size(height, width):
with assert_raises((TypeError, ValueError)):
with assert_raises((TypeError, ValueError), glob="*must be*"):
_ = crop(make_test_tensor(), top=0, left=0, height=height, width=width)


Expand All @@ -177,7 +177,7 @@ def test_crop_invalid_output_size(height, width):
(0, "0"),
)
def test_crop_invalid_coordinates(top, left):
with assert_raises(TypeError):
with assert_raises(TypeError, glob="*int*"):
_ = tv_fn.crop(make_test_tensor(), top=top, left=left, height=1, width=1)
with assert_raises(TypeError):
with assert_raises(TypeError, glob="*int*"):
_ = crop(make_test_tensor(), top=top, left=left, height=1, width=1)
Loading