Skip to content
2 changes: 2 additions & 0 deletions dali/python/nvidia/dali/experimental/torchvision/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from .v2.normalize import Normalize
from .v2.pad import Pad
from .v2.rand_apply import RandomApply
from .v2.randomcrop import RandomCrop
from .v2.resize import Resize
from .v2.totensor import ToPureTensor, PILToTensor, ToPILImage

Expand All @@ -33,6 +34,7 @@
"Pad",
"PILToTensor",
"RandomApply",
"RandomCrop",
"RandomGrayscale",
"RandomHorizontalFlip",
"RandomVerticalFlip",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,21 @@

from .centercrop import center_crop
from .color import to_grayscale, rgb_to_grayscale
from .crop import crop
from .flips import horizontal_flip, vertical_flip
from .gaussian_blur import gaussian_blur
from .image_metadata import get_dimensions, get_image_size
from .normalize import normalize
from .pad import pad
from .resize import resize
from .totensor import pil_to_tensor, to_tensor, to_pil_image

__all__ = [
"center_crop",
"crop",
"gaussian_blur",
"get_dimensions",
"get_image_size",
"horizontal_flip",
"normalize",
"pad",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import operator

import nvidia.dali.experimental.dynamic as ndd
from nvidia.dali._typing import TensorLike
from nvidia.dali.experimental.dynamic._device import DeviceLike

from ..operator import adjust_input
from ..randomcrop import RandomCrop


def _validate_integer_param(value, name: str) -> int:
try:
return operator.index(value)
except TypeError as err:
raise TypeError(f"{name} must be an integer, got {type(value)}") from err


def _round_pil_box(top, left, height, width) -> tuple[int, int, int, int]:
try:
rounded_top = int(round(top))
rounded_left = int(round(left))
rounded_bottom = int(round(top + height))
rounded_right = int(round(left + width))
except TypeError as err:
raise TypeError("top, left, height, and width must be real numbers") from err

return (
rounded_top,
rounded_left,
rounded_bottom - rounded_top,
rounded_right - rounded_left,
)


def _is_pil_image_layout(inpt: TensorLike | ndd.Batch) -> bool:
return inpt.layout[-3:] == "HWC"


def _validate_crop_params(inpt, top, left, height, width) -> tuple[int, int, int, int]:
if _is_pil_image_layout(inpt):
return _round_pil_box(top, left, height, width)
return (
_validate_integer_param(top, "top"),
_validate_integer_param(left, "left"),
_validate_integer_param(height, "height"),
_validate_integer_param(width, "width"),
)


@adjust_input
def crop(
inpt: TensorLike | ndd.Batch,
top: int | float,
left: int | float,
height: int | float,
width: int | float,
device: DeviceLike = "cpu",
) -> ndd.Tensor | ndd.Batch:
"""
Please refer to the ``RandomCrop`` operator for more details.
"""
top, left, height, width = _validate_crop_params(inpt, top, left, height, width)
RandomCrop.verify_args(
size=(height, width),
padding=None,
pad_if_needed=False,
padding_mode="constant",
fill=0,
)

return ndd.slice(
inpt,
[float(left), float(top)],
[float(width), float(height)],
normalized_anchor=False,
normalized_shape=False,
out_of_bounds_policy="pad",
fill_values=0,
device=device,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import List

from PIL import Image
import torch


def get_image_size(inpt: Image.Image | torch.Tensor) -> List[int]:
"""
Return the spatial size of an image as ``[width, height]``.

Mirrors ``torchvision.transforms.v2.functional.get_image_size``.

.. note::
This function is provided for compatibility. The torchvision successor
``get_size`` returns ``[height, width]`` instead.

Parameters
----------
inpt : PIL Image or torch.Tensor
Input image. Tensors are expected in ``[…, H, W]`` layout (leading
channel / batch dimensions are ignored).

Returns
-------
List[int]
``[width, height]``
"""
if isinstance(inpt, Image.Image):
return list(inpt.size) # PIL .size is (W, H)
elif isinstance(inpt, torch.Tensor):
if inpt.ndim < 2:
raise TypeError(
f"get_image_size requires a tensor with at least 2 dimensions, got {inpt.ndim}."
)
return [inpt.shape[-1], inpt.shape[-2]] # [W, H]
raise TypeError(f"Unsupported input type: {type(inpt)}.")


def get_dimensions(inpt: Image.Image | torch.Tensor) -> List[int]:
Comment thread
mdabek-nvidia marked this conversation as resolved.
"""
Return the number of channels, height, and width of an image as
``[channels, height, width]``.

Mirrors ``torchvision.transforms.v2.functional.get_dimensions``.

Parameters
----------
inpt : PIL Image or torch.Tensor
Input image. Tensors are expected in ``[H, W]`` or ``[…, C, H, W]`` layout
(leading batch dimensions are ignored).

Returns
-------
List[int]
``[channels, height, width]``
"""
if isinstance(inpt, Image.Image):
w, h = inpt.size
return [len(inpt.getbands()), h, w]
elif isinstance(inpt, torch.Tensor):
if inpt.ndim < 2:
raise TypeError(
f"get_dimensions requires a tensor with at least 2 dimensions, got {inpt.ndim}."
)
if inpt.ndim == 2:
Comment thread
mdabek-nvidia marked this conversation as resolved.
return [1, inpt.shape[-2], inpt.shape[-1]]
return [inpt.shape[-3], inpt.shape[-2], inpt.shape[-1]] # [C, H, W]
raise TypeError(f"Unsupported input type: {type(inpt)}.")
Loading