NVIDIA · mdabek-nvidia · Apr 8, 2026 · May 8, 2026 · May 13, 2026 · May 13, 2026
diff --git a/dali/python/nvidia/dali/experimental/torchvision/__init__.py b/dali/python/nvidia/dali/experimental/torchvision/__init__.py
@@ -20,6 +20,7 @@
 from .v2.normalize import Normalize
 from .v2.pad import Pad
 from .v2.rand_apply import RandomApply
+from .v2.randomcrop import RandomCrop
 from .v2.resize import Resize
 from .v2.totensor import ToPureTensor, PILToTensor, ToPILImage
 
@@ -33,6 +34,7 @@
     "Pad",
     "PILToTensor",
     "RandomApply",
+    "RandomCrop",
     "RandomGrayscale",
     "RandomHorizontalFlip",
     "RandomVerticalFlip",

diff --git a/dali/python/nvidia/dali/experimental/torchvision/v2/functional/__init__.py b/dali/python/nvidia/dali/experimental/torchvision/v2/functional/__init__.py
@@ -14,16 +14,21 @@
 
 from .centercrop import center_crop
 from .color import to_grayscale, rgb_to_grayscale
+from .crop import crop
 from .flips import horizontal_flip, vertical_flip
 from .gaussian_blur import gaussian_blur
+from .image_metadata import get_dimensions, get_image_size
 from .normalize import normalize
 from .pad import pad
 from .resize import resize
 from .totensor import pil_to_tensor, to_tensor, to_pil_image
 
 __all__ = [
     "center_crop",
+    "crop",
     "gaussian_blur",
+    "get_dimensions",
+    "get_image_size",
     "horizontal_flip",
     "normalize",
     "pad",

diff --git a/dali/python/nvidia/dali/experimental/torchvision/v2/functional/crop.py b/dali/python/nvidia/dali/experimental/torchvision/v2/functional/crop.py
@@ -0,0 +1,94 @@
+# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import operator
+
+import nvidia.dali.experimental.dynamic as ndd
+from nvidia.dali._typing import TensorLike
+from nvidia.dali.experimental.dynamic._device import DeviceLike
+
+from ..operator import adjust_input
+from ..randomcrop import RandomCrop
+
+
+def _validate_integer_param(value, name: str) -> int:
+    try:
+        return operator.index(value)
+    except TypeError as err:
+        raise TypeError(f"{name} must be an integer, got {type(value)}") from err
+
+
+def _round_pil_box(top, left, height, width) -> tuple[int, int, int, int]:
+    try:
+        rounded_top = int(round(top))
+        rounded_left = int(round(left))
+        rounded_bottom = int(round(top + height))
+        rounded_right = int(round(left + width))
+    except TypeError as err:
+        raise TypeError("top, left, height, and width must be real numbers") from err
+
+    return (
+        rounded_top,
+        rounded_left,
+        rounded_bottom - rounded_top,
+        rounded_right - rounded_left,
+    )
+
+
+def _is_pil_image_layout(inpt: TensorLike | ndd.Batch) -> bool:
+    return inpt.layout[-3:] == "HWC"
+
+
+def _validate_crop_params(inpt, top, left, height, width) -> tuple[int, int, int, int]:
+    if _is_pil_image_layout(inpt):
+        return _round_pil_box(top, left, height, width)
+    return (
+        _validate_integer_param(top, "top"),
+        _validate_integer_param(left, "left"),
+        _validate_integer_param(height, "height"),
+        _validate_integer_param(width, "width"),
+    )
+
+
+@adjust_input
+def crop(
+    inpt: TensorLike | ndd.Batch,
+    top: int | float,
+    left: int | float,
+    height: int | float,
+    width: int | float,
+    device: DeviceLike = "cpu",
+) -> ndd.Tensor | ndd.Batch:
+    """
+    Please refer to the ``RandomCrop`` operator for more details.
+    """
+    top, left, height, width = _validate_crop_params(inpt, top, left, height, width)
+    RandomCrop.verify_args(
+        size=(height, width),
+        padding=None,
+        pad_if_needed=False,
+        padding_mode="constant",
+        fill=0,
+    )
+
+    return ndd.slice(
+        inpt,
+        [float(left), float(top)],
+        [float(width), float(height)],
+        normalized_anchor=False,
+        normalized_shape=False,
+        out_of_bounds_policy="pad",
+        fill_values=0,
+        device=device,
+    )
diff --git a/dali/python/nvidia/dali/experimental/torchvision/v2/functional/image_metadata.py b/dali/python/nvidia/dali/experimental/torchvision/v2/functional/image_metadata.py
@@ -0,0 +1,82 @@
+# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List
+
+from PIL import Image
+import torch
+
+
+def get_image_size(inpt: Image.Image | torch.Tensor) -> List[int]:
+    """
+    Return the spatial size of an image as ``[width, height]``.
+
+    Mirrors ``torchvision.transforms.v2.functional.get_image_size``.
+
+    .. note::
+        This function is provided for compatibility.  The torchvision successor
+        ``get_size`` returns ``[height, width]`` instead.
+
+    Parameters
+    ----------
+    inpt : PIL Image or torch.Tensor
+        Input image.  Tensors are expected in ``[…, H, W]`` layout (leading
+        channel / batch dimensions are ignored).
+
+    Returns
+    -------
+    List[int]
+        ``[width, height]``
+    """
+    if isinstance(inpt, Image.Image):
+        return list(inpt.size)  # PIL .size is (W, H)
+    elif isinstance(inpt, torch.Tensor):
+        if inpt.ndim < 2:
+            raise TypeError(
+                f"get_image_size requires a tensor with at least 2 dimensions, got {inpt.ndim}."
+            )
+        return [inpt.shape[-1], inpt.shape[-2]]  # [W, H]
+    raise TypeError(f"Unsupported input type: {type(inpt)}.")
+
+
+def get_dimensions(inpt: Image.Image | torch.Tensor) -> List[int]:
+    """
+    Return the number of channels, height, and width of an image as
+    ``[channels, height, width]``.
+
+    Mirrors ``torchvision.transforms.v2.functional.get_dimensions``.
+
+    Parameters
+    ----------
+    inpt : PIL Image or torch.Tensor
+        Input image.  Tensors are expected in ``[H, W]`` or ``[…, C, H, W]`` layout
+        (leading batch dimensions are ignored).
+
+    Returns
+    -------
+    List[int]
+        ``[channels, height, width]``
+    """
+    if isinstance(inpt, Image.Image):
+        w, h = inpt.size
+        return [len(inpt.getbands()), h, w]
+    elif isinstance(inpt, torch.Tensor):
+        if inpt.ndim < 2:
+            raise TypeError(
+                f"get_dimensions requires a tensor with at least 2 dimensions, got {inpt.ndim}."
+            )
+        if inpt.ndim == 2:
+            return [1, inpt.shape[-2], inpt.shape[-1]]
+        return [inpt.shape[-3], inpt.shape[-2], inpt.shape[-1]]  # [C, H, W]
+    raise TypeError(f"Unsupported input type: {type(inpt)}.")