diff --git a/smartcrop/__init__.py b/smartcrop/__init__.py index 56ba90d..7a6a2d9 100644 --- a/smartcrop/__init__.py +++ b/smartcrop/__init__.py @@ -1,3 +1,3 @@ -from .library import SmartCrop, saturation, thirds +from .library import SmartCrop, saturation -__all__ = ['SmartCrop', 'saturation', 'thirds'] +__all__ = ['SmartCrop', 'saturation'] diff --git a/smartcrop/library.py b/smartcrop/library.py index 2af8381..3c0ba4d 100644 --- a/smartcrop/library.py +++ b/smartcrop/library.py @@ -1,6 +1,5 @@ from __future__ import annotations from dataclasses import dataclass -from functools import lru_cache import math import sys @@ -26,14 +25,6 @@ def saturation(image) -> np.ndarray: return d / s # [0.0; 1.0] -@lru_cache(maxsize=4096) -def thirds(x) -> float: - """gets value in the range of [0, 1] where 0 is the center of the pictures - returns weight of rule of thirds [0, 1]""" - x = 8 * (x + 2 / 3) - 8 # 8*x-8/3 is even simpler, but with ~e-16 floating error - return max(1 - x * x, 0) - - # a quite odd workaround for using slots for python > 3.9 @dataclass(eq=False, **{"slots": True} if sys.version_info.minor > 9 else {}) class SmartCrop: # pylint:disable=too-many-instance-attributes @@ -75,13 +66,17 @@ def analyse( # pylint:disable=too-many-arguments,too-many-locals image = image.convert('RGB') analyse_image = self.prepare_features_image(image) - score_image = analyse_image.resize( + downsampled_features = analyse_image.resize( ( int(math.ceil(image.size[0] / self.score_down_sample)), int(math.ceil(image.size[1] / self.score_down_sample)) ), Image.Resampling.LANCZOS) + precomputed_features = self.precompute_features(downsampled_features) + features_sum = np.sum(precomputed_features, axis=(0, 1)) + prescore = features_sum * self.outside_importance + crops = self.crops( image, crop_width, @@ -89,10 +84,27 @@ def analyse( # pylint:disable=too-many-arguments,too-many-locals max_scale=max_scale, min_scale=min_scale, scale_step=scale_step, - step=step) + step=step + ) + + cached_importances = {} + inv_down_sample = 1 / self.score_down_sample for crop in crops: - crop['score'] = self.score(score_image, crop) + cx, cy, cw, ch = map( + lambda val: int(val * inv_down_sample), + [crop['x'], crop['y'], crop['width'], crop['height']] + ) + + if (cw, ch) not in cached_importances: + cached_importances[(cw, ch)] = self.get_importance( + width=cw, height=ch + ) + importance = cached_importances[(cw, ch)] + + crop['score'] = self.score( + precomputed_features, prescore, (cx, cy, cw, ch), importance + ) top_crop = max(crops, key=lambda c: c['score']['total']) @@ -185,39 +197,37 @@ def crops( # pylint:disable=too-many-arguments return crops def debug_crop(self, analyse_image, crop: dict, orig_size: tuple[int, int]) -> Image: - debug_image = analyse_image.copy() - debug_pixels = debug_image.getdata() - - ratio_horizontal = debug_image.size[0] / orig_size[0] - ratio_vertical = debug_image.size[1] / orig_size[1] - fake_crop = { - 'x': crop['x'] * ratio_horizontal, - 'y': crop['y'] * ratio_vertical, - 'width': crop['width'] * ratio_horizontal, - 'height': crop['height'] * ratio_vertical, - } - - for y in range(analyse_image.size[1]): # height - for x in range(analyse_image.size[0]): # width - index = y * analyse_image.size[0] + x - importance = self.importance(fake_crop, x, y) - redder, greener = (-64, 0) if importance < 0 else (0, 32) - debug_pixels.putpixel( - (x, y), - ( - debug_pixels[index][0] + int(importance * redder), - debug_pixels[index][1] + int(importance * greener), - debug_pixels[index][2] - )) - - # in case you want a whitish outline to mark the crop - # ImageDraw.Draw(debug_image).rectangle([fake_crop['x'], - # fake_crop['y'], - # fake_crop['x'] + fake_crop['width'], - # fake_crop['y'] + fake_crop['height']], - # outline=(175, 175, 175), width=2) - - return debug_image + """ + Creates a debug visualization showing how importance weights affect a + specific crop region. This function is intended to be used for internal + debugging. The original image dimensions `orig_size` are required to + correctly prescale the crop coordinates. + """ + ratio_horizontal = analyse_image.size[0] / orig_size[0] + ratio_vertical = analyse_image.size[1] / orig_size[1] + i_x, i_width, = map( + lambda n: int(n * ratio_horizontal), (crop['x'], crop['width']) + ) + i_y, i_height = map( + lambda n: int(n * ratio_vertical), (crop['y'], crop['height']) + ) + + features_data = np.array(analyse_image).astype(np.float32) + importance_map = self.get_importance(height=i_height, width=i_width) + + # window there the importance is applied + i_window = features_data[i_y : i_y + i_height, i_x : i_x + i_width] # noqa: E203 + + # place the outside importance + features_data += np.array([-64 * self.outside_importance, 0, 0]) + + # apply the importance on the window + mask = importance_map > 0 + i_window[~mask, 0] += -64 * importance_map[~mask] # redder + i_window[mask, 1] += 32 * importance_map[mask] # greener + features_data[i_y : i_y + i_height, i_x : i_x + i_width] = i_window # noqa: E203 + + return Image.fromarray(np.clip(features_data, 0, 255).astype(np.uint8)) def prepare_features_image(self, image: Image) -> Image: # luminance @@ -270,63 +280,86 @@ def detect_skin(self, cie_array: np.ndarray, source_image) -> Image: return Image.fromarray(skin_data.astype('uint8')) - def importance(self, crop: dict, x: int, y: int) -> float: - if ( - crop['x'] > x or x >= crop['x'] + crop['width'] or - crop['y'] > y or y >= crop['y'] + crop['height'] - ): - return self.outside_importance - - x = (x - crop['x']) / crop['width'] - y = (y - crop['y']) / crop['height'] - px, py = abs(0.5 - x) * 2, abs(0.5 - y) * 2 # pylint:disable=invalid-name - - # distance from edge - dx = max(px - 1 + self.edge_radius, 0) # pylint:disable=invalid-name - dy = max(py - 1 + self.edge_radius, 0) # pylint:disable=invalid-name - d = (dx * dx + dy * dy) * self.edge_weight # pylint:disable=invalid-name - s = 1.41 - math.sqrt(px * px + py * py) # pylint:disable=invalid-name + def get_importance(self, height, width) -> np.ndarray: + """ + Generate composite weighting map for a scoring crop. + """ + # the original importance has a scaling that not include 1.0 + xx = np.linspace(0.0, 1.0, width, endpoint=False) + yy = np.linspace(0.0, 1.0, height, endpoint=False) + px = np.abs(0.5 - xx) * 2 + py = np.abs(0.5 - yy) * 2 + edge_threshold = 1.0 - self.edge_radius + dx = np.maximum(px - edge_threshold, 0.0) + dy = np.maximum(py - edge_threshold, 0.0) + d = (np.square(dy[:, np.newaxis]) + np.square(dx)) * self.edge_weight + # 1.41 is just an approximation of the square root of 2, no magic + s = 1.41 - np.sqrt(np.square(py[:, np.newaxis]) + np.square(px)) if self.rule_of_thirds: - # pylint:disable=invalid-name - s += (max(0, s + d + 0.5) * 1.2) * (thirds(px) + thirds(py)) + def thirds(t): + # that's kind of parabola centered at 1/3 + t = 1.0 - 64.0 * np.square(t - 1.0 / 3) + return np.maximum(t, 0.0) + # 1.2 is pure magic from original js code + thirds_weight = (thirds(py)[:, np.newaxis] + thirds(px)) * 1.2 + intermediate = s + d + 0.5 + s += np.maximum(intermediate, 0.0) * thirds_weight return s + d - def score(self, target_image, crop: dict) -> dict: # pylint:disable=too-many-locals - score = { - 'detail': 0, - 'saturation': 0, - 'skin': 0, - 'total': 0, - } - target_data = target_image.getdata() - target_width, target_height = target_image.size - - down_sample = self.score_down_sample - inv_down_sample = 1 / down_sample - target_width_down_sample = target_width * down_sample - target_height_down_sample = target_height * down_sample - - for y in range(0, target_height_down_sample, down_sample): - for x in range(0, target_width_down_sample, down_sample): - index = int( - math.floor(y * inv_down_sample) * target_width + - math.floor(x * inv_down_sample) - ) - importance = self.importance(crop, x, y) - detail = target_data[index][1] / 255 - score['skin'] += ( - target_data[index][0] / 255 * (detail + self.skin_bias) * importance - ) - score['detail'] += detail * importance - score['saturation'] += ( - target_data[index][2] / 255 * (detail + self.saturation_bias) * importance - ) - score['total'] = ( - score['detail'] * self.detail_weight + - score['skin'] * self.skin_weight + - score['saturation'] * self.saturation_weight - ) / (crop['width'] * crop['height']) + def precompute_features(self, features_image: Image) -> np.ndarray: + """ + Apply scaling, biasing, and weighting transformations to image features. + """ + features = np.array(features_image) + + skin = features[..., 0] + detail = features[..., 1] + satur = features[..., 2] + + detail = detail / 255 + skin = skin / 255 * (detail + self.skin_bias) + satur = satur / 255 * (detail + self.saturation_bias) + + precomputed = np.stack( + [ + skin * self.skin_weight, + detail * self.detail_weight, + satur * self.saturation_weight + ], + axis=2) + + return precomputed + + def score( + self, + features_data: np.ndarray, + prescore: np.ndarray, + crop_dimensions: tuple[int, int, int, int], # (x, y, w, h) + importance: np.ndarray + ) -> dict: # pylint:disable=too-many-locals + """ + Calculate region scores for skin, detail, and saturation features. + Returns a dictionary with individual channel scores and total score. + """ + score = {} + inv_down_sample = 1 / self.score_down_sample + x, y, w, h = crop_dimensions + + scores = prescore + np.sum( + features_data[y: y + h, x: x + w] * + (importance - self.outside_importance)[..., np.newaxis], + axis=(0, 1) + ) + + # Last factor of squared inv_down_sample is not mandatory for finding + # max score, it's here to match the score magnitude of previous version. + # To be honest, that can lead to some inaccuracies, as it brings the + # values even closer to zero. Recommend to drop it later. + total = np.sum(scores) / (w * h) * inv_down_sample * inv_down_sample + + score['skin'], score['detail'], score['saturation'] = scores + score['total'] = total return score diff --git a/tests/test_smartcrop.py b/tests/test_smartcrop.py index 3d235c6..bab5075 100644 --- a/tests/test_smartcrop.py +++ b/tests/test_smartcrop.py @@ -13,9 +13,9 @@ def load_image(name): @pytest.mark.parametrize('image, crop', [ ('business-work-1.jpg', (41, 0, 1193, 1152)), - ('nature-1.jpg', (705, 235, 3639, 3169)), + ('nature-1.jpg', (822, 235, 3756, 3169)), ('travel-1.jpg', (52, 52, 1370, 1370)), - ('orientation.jpg', (972, 216, 3669, 2913)) + ('orientation.jpg', (972, 0, 3969, 2997)) ]) def test_square_thumbs(image, crop): cropper = SmartCrop()