NVIDIA · mdabek-nvidia · May 18, 2026 · May 26, 2026 · May 25, 2026 · Apr 8, 2026
diff --git a/dali/python/nvidia/dali/experimental/torchvision/__init__.py b/dali/python/nvidia/dali/experimental/torchvision/__init__.py
@@ -20,7 +20,7 @@
 from .v2.normalize import Normalize
 from .v2.pad import Pad
 from .v2.rand_apply import RandomApply
-from .v2.randomcrop import RandomCrop
+from .v2.randomcrop import RandomCrop, RandomResizedCrop
 from .v2.resize import Resize
 from .v2.totensor import ToPureTensor, PILToTensor, ToPILImage
 
@@ -37,6 +37,7 @@
     "RandomCrop",
     "RandomGrayscale",
     "RandomHorizontalFlip",
+    "RandomResizedCrop",
     "RandomVerticalFlip",
     "Resize",
     "ToPILImage",

diff --git a/dali/python/nvidia/dali/experimental/torchvision/v2/functional/__init__.py b/dali/python/nvidia/dali/experimental/torchvision/v2/functional/__init__.py
@@ -14,7 +14,7 @@
 
 from .centercrop import center_crop
 from .color import to_grayscale, rgb_to_grayscale
-from .crop import crop
+from .crop import crop, resized_crop
 from .flips import horizontal_flip, vertical_flip
 from .gaussian_blur import gaussian_blur
 from .image_metadata import get_dimensions, get_image_size, get_size
@@ -35,6 +35,7 @@
     "pad",
     "pil_to_tensor",
     "resize",
+    "resized_crop",
     "rgb_to_grayscale",
     "to_grayscale",
     "to_pil_image",

diff --git a/dali/python/nvidia/dali/experimental/torchvision/v2/functional/crop.py b/dali/python/nvidia/dali/experimental/torchvision/v2/functional/crop.py
@@ -13,13 +13,22 @@
 # limitations under the License.
 
 import operator
+from typing import List
 
 import nvidia.dali.experimental.dynamic as ndd
+from torchvision.transforms import InterpolationMode
+
 from nvidia.dali._typing import TensorLike
 from nvidia.dali.experimental.dynamic._device import DeviceLike
 
 from ..operator import adjust_input
 from ..randomcrop import RandomCrop
+from ..resize import Resize
+
+
+def _verify_crop_coordinate(value, name: str) -> None:
+    if not isinstance(value, int):
+        raise TypeError(f"{name} must be int, got {type(value)}")
 
 
 def _validate_integer_param(value, name: str) -> int:
@@ -61,6 +70,26 @@ def _validate_crop_params(inpt, top, left, height, width) -> tuple[int, int, int
     )
 
 
+def _crop(
+    inpt: ndd.Tensor | ndd.Batch,
+    top: int,
+    left: int,
+    height: int,
+    width: int,
+    device: DeviceLike = "cpu",
+) -> ndd.Tensor | ndd.Batch:
+    axes = [-3, -2] if _is_pil_image_layout(inpt) else [-2, -1]
+    return ndd.slice(
+        inpt,
+        (top, left),
+        (height, width),
+        axes=axes,
+        out_of_bounds_policy="pad",
+        fill_values=0,
+        device=device,
+    )
+
+
 @adjust_input
 def crop(
     inpt: TensorLike | ndd.Batch,
@@ -82,13 +111,48 @@ def crop(
         fill=0,
     )
 
-    return ndd.slice(
-        inpt,
-        [float(left), float(top)],
-        [float(width), float(height)],
-        normalized_anchor=False,
-        normalized_shape=False,
-        out_of_bounds_policy="pad",
-        fill_values=0,
+    return _crop(inpt, top, left, height, width, device=device)
+
+
+@adjust_input
+def resized_crop(
+    inpt: TensorLike | ndd.Batch,
+    top: int,
+    left: int,
+    height: int,
+    width: int,
+    size: int | List[int],
+    interpolation: InterpolationMode | int = InterpolationMode.BILINEAR,
+    antialias: bool = True,
+    device: DeviceLike = "cpu",
+) -> ndd.Tensor | ndd.Batch:
+    """
+    Crop the input at location (top, left) with dimensions (height, width),
+    then resize the crop to the given size.
+    """
+    top, left, height, width = _validate_crop_params(inpt, top, left, height, width)
+    RandomCrop.verify_args(
+        size=(height, width),
+        padding=None,
+        pad_if_needed=False,
+        padding_mode="constant",
+        fill=0,
+    )
+    interpolation = Resize.normalize_interpolation(interpolation)
+    Resize.verify_args(size=size, max_size=None, interpolation=interpolation, antialias=antialias)
+
+    size_normalized = Resize.infer_effective_size(size)
+    interpolation = Resize.interpolation_modes[interpolation]
+
+    cropped = _crop(inpt, top, left, height, width, device=device)
+    target_h, target_w = Resize.calculate_target_size_dynamic_mode(
+        (height, width), size_normalized, None
+    )
+
+    return ndd.resize(
+        cropped,
         device=device,
+        size=(target_h, target_w),
+        interp_type=interpolation,
+        antialias=antialias,
     )
diff --git a/dali/python/nvidia/dali/experimental/torchvision/v2/randomcrop.py b/dali/python/nvidia/dali/experimental/torchvision/v2/randomcrop.py
@@ -17,6 +17,7 @@
 
 import nvidia.dali as dali
 import nvidia.dali.fn as fn
+from torchvision.transforms import InterpolationMode
 
 from .centercrop import CenterCrop
 from .operator import (
@@ -27,6 +28,7 @@
     get_HWC_from_layout_pipeline,
 )
 from .pad import PADDING_CLASS, _ValidatePaddingMode
+from .resize import Resize
 
 
 class _ValidateCropSize(_ArgumentValidateRule):
@@ -86,6 +88,41 @@ def verify(cls, *, fill, **_) -> None:
         raise TypeError(f"fill must be a number, sequence of numbers, or None, got {fill!r}")
 
 
+class _ValidateRandomResizedCropScaleRatio(_ArgumentValidateRule):
+    """
+    Verify RandomResizedCrop scale and ratio arguments.
+    """
+
+    @classmethod
+    def _verify_range(cls, value, name: str) -> None:
+        if not isinstance(value, (list, tuple)) or len(value) != 2:
+            raise TypeError(f"{name} should be a sequence of two numbers")
+        if any(not isinstance(elem, numbers.Number) for elem in value):
+            raise TypeError(f"{name} values must be numbers, got {value}")
+        if any(elem <= 0 for elem in value):
+            raise ValueError(f"{name} values must be positive, got {value}")
+        if value[0] > value[1]:
+            raise ValueError(f"{name} should be a (min, max) range, got {value}")
+
+    @classmethod
+    def verify(cls, *, scale, ratio, **_) -> None:
+        cls._verify_range(scale, "scale")
+        cls._verify_range(ratio, "ratio")
+
+
+class _ValidateRandomResizedCropInterpolation(_ArgumentValidateRule):
+    """
+    Verify RandomResizedCrop interpolation argument.
+    """
+
+    @classmethod
+    def verify(cls, *, interpolation, **_) -> None:
+        if interpolation in Resize.not_supported_interpolation_modes:
+            raise NotImplementedError(f"Interpolation mode: {interpolation} is not supported")
+        if interpolation not in Resize.interpolation_modes:
+            raise ValueError(f"Interpolation {interpolation!r} is not supported")
+
+
 class RandomCrop(Operator):
     """
     Crop the input at a random location.
@@ -229,3 +266,85 @@ def _kernel(self, data_input):
             fn.stack(crop_w, crop_h),
             **slice_kwargs,
         )
+
+
+class RandomResizedCrop(Operator):
+    """
+    Crop a random portion of the input and resize it to a given size.
+
+    If the input is a ``torch.Tensor`` it can have an arbitrary number of leading batch dimensions.
+    For example, the image tensor can have [..., C, H, W] shape.
+
+    Parameters
+    ----------
+    size : sequence or int
+        Expected output size of the crop. If size is an int instead of sequence like (h, w),
+        a square output size (size, size) is made. If provided a sequence of length 1, it will be
+        interpreted as (size[0], size[0]).
+    scale : tuple of float, optional, default = (0.08, 1.0)
+        Lower and upper bounds for the random crop area, relative to the input image area.
+    ratio : tuple of float, optional, default = (3 / 4, 4 / 3)
+        Lower and upper bounds for the random crop aspect ratio, width / height.
+    interpolation : InterpolationMode or int, optional, default = InterpolationMode.BILINEAR
+        Interpolation mode to use for resizing. Legacy PIL integer codes
+        (``0`` = NEAREST, ``1`` = LANCZOS, ``2`` = BILINEAR, ``3`` = BICUBIC,
+        ``4`` = BOX, ``5`` = HAMMING) are accepted for torchvision compatibility.
+    antialias : bool, optional, default = True
+        Whether to apply antialiasing during resize.
+    device : Literal["cpu", "gpu"], optional, default = "cpu"
+        Device to use for the crop. Can be ``"cpu"`` or ``"gpu"``.
+    """
+
+    arg_rules = [
+        _ValidateSizeDescriptor,
+        _ValidateCropSize,
+        _ValidateRandomResizedCropScaleRatio,
+        _ValidateRandomResizedCropInterpolation,
+    ]
+    preprocess_data = get_HWC_from_layout_pipeline
+
+    @classmethod
+    def adjust_size(cls, size: int | Sequence[int]) -> Sequence[int]:
+        return CenterCrop.adjust_size(size)
+
+    def __init__(
+        self,
+        size: int | Sequence[int],
+        scale: tuple[float, float] = (0.08, 1.0),
+        ratio: tuple[float, float] = (3.0 / 4.0, 4.0 / 3.0),
+        interpolation: InterpolationMode | int = InterpolationMode.BILINEAR,
+        antialias: bool | None = True,
+        device: Literal["cpu", "gpu"] = "cpu",
+    ):
+        interpolation = Resize.normalize_interpolation(interpolation)
+
+        super().__init__(
+            device=device,
+            size=size,
+            scale=scale,
+            ratio=ratio,
+            interpolation=interpolation,
+        )
+
+        self.size = RandomResizedCrop.adjust_size(size)
+        self.scale = tuple(scale)
+        self.ratio = tuple(ratio)
+        self.interpolation = Resize.interpolation_modes[interpolation]
+        self.antialias = antialias
+
+    def _kernel(self, data_input):
+        """
+        Applies random resized crop to the input data.
+        """
+        _, _, _, tensor = data_input
+
+        return fn.random_resized_crop(
+            tensor,
+            device=self.device,
+            size=self.size,
+            random_area=self.scale,
+            random_aspect_ratio=self.ratio,
+            interp_type=self.interpolation,
+            antialias=self.antialias,
+            num_attempts=10,
+        )
diff --git a/dali/python/nvidia/dali/experimental/torchvision/v2/resize.py b/dali/python/nvidia/dali/experimental/torchvision/v2/resize.py
@@ -44,6 +44,16 @@ def verify(cls, *, size, max_size, interpolation, **_):
                  edge, i.e. size should be an int"
             )
 
+        if isinstance(size, int) and size <= 0:
+            raise ValueError(f"size must be positive, got {size}")
+        if isinstance(size, (tuple, list)):
+            if len(size) not in (1, 2):
+                raise ValueError(f"size sequence must have length 1 or 2, got {len(size)}")
+            if any(not isinstance(s, int) for s in size):
+                raise ValueError(f"size values must be integers, got {size}")
+            if any(s <= 0 for s in size):
+                raise ValueError(f"size values must be positive, got {size}")
+
         if interpolation in Resize.not_supported_interpolation_modes:
             raise NotImplementedError(f"Interpolation mode: {interpolation} is not supported")
 
@@ -98,9 +108,32 @@ class Resize(Operator):
         InterpolationMode.HAMMING,
     ]
 
+    # Legacy PIL integer codes accepted by torchvision for back-compat
+    # (mirrors torchvision.transforms.functional._interpolation_modes_from_int).
+    int_to_interpolation_mode = {
+        0: InterpolationMode.NEAREST,
+        1: InterpolationMode.LANCZOS,
+        2: InterpolationMode.BILINEAR,
+        3: InterpolationMode.BICUBIC,
+        4: InterpolationMode.BOX,
+        5: InterpolationMode.HAMMING,
+    }
+
     arg_rules = [_ValidateSize]
     preprocess_data = get_HWC_from_layout_pipeline
 
+    @classmethod
+    def normalize_interpolation(cls, interpolation):
+        if isinstance(interpolation, int) and not isinstance(interpolation, InterpolationMode):
+            try:
+                return cls.int_to_interpolation_mode[interpolation]
+            except KeyError:
+                raise ValueError(
+                    f"Interpolation int {interpolation} is not a valid PIL code; "
+                    f"expected one of {sorted(cls.int_to_interpolation_mode)}"
+                )
+        return interpolation
+
     @classmethod
     def infer_effective_size(
         cls,
@@ -228,6 +261,7 @@ def __init__(
         antialias: Optional[bool] = True,
         device: Literal["cpu", "gpu"] = "cpu",
     ):
+        interpolation = Resize.normalize_interpolation(interpolation)
 
         super().__init__(
             device=device,

diff --git a/dali/test/python/torchvision/test_tv_crop.py b/dali/test/python/torchvision/test_tv_crop.py
@@ -130,9 +130,9 @@ def test_crop_preserves_tensor_dtype(dtype):
     dict(top=0, left=0, height=1, width=1.0),
 )
 def test_crop_tensor_rejects_float_parameters(crop_kwargs):
-    with assert_raises(TypeError):
+    with assert_raises(TypeError, glob="*integer*"):
         _ = tv_fn.crop(make_test_tensor(), **crop_kwargs)
-    with assert_raises(TypeError):
+    with assert_raises(TypeError, glob="*integer*"):
         _ = crop(make_test_tensor(), **crop_kwargs)
 
 
@@ -144,16 +144,16 @@ def test_crop_tensor_rejects_float_parameters(crop_kwargs):
 )
 def test_crop_pil_rejects_non_numeric_parameters(crop_kwargs):
     pil_image = _make_pil_image("RGB")
-    with assert_raises(TypeError):
+    with assert_raises(TypeError, glob="*str*"):
         _ = tv_fn.crop(pil_image, **crop_kwargs)
-    with assert_raises(TypeError):
+    with assert_raises(TypeError, glob="*real numbers*"):
         _ = crop(pil_image, **crop_kwargs)
 
 
 def test_crop_invalid_input_type():
-    with assert_raises(TypeError):
+    with assert_raises(TypeError, glob="*support*"):
         _ = tv_fn.crop([1, 2, 3], top=0, left=0, height=1, width=1)
-    with assert_raises(TypeError):
+    with assert_raises(TypeError, glob="*support*"):
         _ = crop([1, 2, 3], top=0, left=0, height=1, width=1)
 
 
@@ -166,7 +166,7 @@ def test_crop_invalid_input_type():
     (1, 1.0),
 )
 def test_crop_invalid_output_size(height, width):
-    with assert_raises((TypeError, ValueError)):
+    with assert_raises((TypeError, ValueError), glob="*must be*"):
         _ = crop(make_test_tensor(), top=0, left=0, height=height, width=width)
 
 
@@ -177,7 +177,7 @@ def test_crop_invalid_output_size(height, width):
     (0, "0"),
 )
 def test_crop_invalid_coordinates(top, left):
-    with assert_raises(TypeError):
+    with assert_raises(TypeError, glob="*int*"):
         _ = tv_fn.crop(make_test_tensor(), top=top, left=left, height=1, width=1)
-    with assert_raises(TypeError):
+    with assert_raises(TypeError, glob="*int*"):
         _ = crop(make_test_tensor(), top=top, left=left, height=1, width=1)