diff --git a/evals/hub/preprocessor.py b/evals/hub/preprocessor.py index 42c28a77..2c2fc186 100644 --- a/evals/hub/preprocessor.py +++ b/evals/hub/preprocessor.py @@ -4,12 +4,12 @@ # LICENSE file in the root directory of this source tree. -def _make_transforms(crop_size=256): +def _make_transforms(crop_size=256, preserve_border=False): from ..video_classification_frozen.utils import make_transforms - return make_transforms(crop_size=crop_size, training=False) + return make_transforms(crop_size=crop_size, training=False, preserve_border=preserve_border) -def vjepa2_preprocessor(*, pretrained: bool = True, **kwargs): +def vjepa2_preprocessor(*, pretrained: bool = True, preserve_border: bool = False, **kwargs): crop_size = kwargs.get("crop_size", 256) - return _make_transforms(crop_size=crop_size) + return _make_transforms(crop_size=crop_size, preserve_border=preserve_border) diff --git a/evals/video_classification_frozen/utils.py b/evals/video_classification_frozen/utils.py index 91b29a22..3b8969ae 100644 --- a/evals/video_classification_frozen/utils.py +++ b/evals/video_classification_frozen/utils.py @@ -23,6 +23,7 @@ def make_transforms( crop_size=224, num_views_per_clip=1, normalize=((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + preserve_border=False ): if not training and num_views_per_clip > 1: @@ -44,6 +45,7 @@ def make_transforms( motion_shift=motion_shift, crop_size=crop_size, normalize=normalize, + preserve_border=preserve_border ) return _frames_augmentation @@ -61,11 +63,12 @@ def __init__( motion_shift=False, crop_size=224, normalize=((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + preserve_border=False ): self.training = training - short_side_size = int(crop_size * 256 / 224) + short_side_size = int(crop_size * 256 / 224) if not preserve_border else crop_size self.eval_transform = video_transforms.Compose( [ video_transforms.Resize(short_side_size, interpolation="bilinear"),