gimp-ai/coordinate_utils.py at main · lukaso/gimp-ai · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
"""
Pure coordinate transformation utilities for GIMP AI Plugin.

These functions contain no GIMP dependencies and can be unit tested independently.
All coordinate calculations for context extraction, masking, and placement are here.
"""


def get_optimal_openai_shape(width, height):
    """
    Select optimal OpenAI shape based on image dimensions.

    Args:
        width: Image width in pixels
        height: Image height in pixels

    Returns:
        tuple: (target_width, target_height) - one of (1024, 1024), (1536, 1024), (1024, 1536)
    """
    if width <= 0 or height <= 0:
        return (1024, 1024)  # Default to square for invalid dimensions

    aspect_ratio = width / height

    if aspect_ratio > 1.3:
        # Landscape orientation
        return (1536, 1024)
    elif aspect_ratio < 0.77:
        # Portrait orientation
        return (1024, 1536)
    else:
        # Square or near-square
        return (1024, 1024)


def calculate_padding_for_shape(current_width, current_height, target_width, target_height):
    """
    Calculate padding needed to fit content into target OpenAI shape.

    Args:
        current_width: Current content width
        current_height: Current content height
        target_width: Target width (1024 or 1536)
        target_height: Target height (1024 or 1536)

    Returns:
        dict: {
            'scale_factor': Applied scaling factor,
            'scaled_size': (scaled_width, scaled_height),
            'padding': (left, top, right, bottom)
        }
    """
    # Calculate scale to fit within target
    scale_x = target_width / current_width
    scale_y = target_height / current_height
    scale = min(scale_x, scale_y)

    # Scale dimensions
    scaled_width = int(current_width * scale)
    scaled_height = int(current_height * scale)

    # Calculate padding to center
    pad_left = (target_width - scaled_width) // 2
    pad_top = (target_height - scaled_height) // 2
    pad_right = target_width - scaled_width - pad_left
    pad_bottom = target_height - scaled_height - pad_top

    return {
        'scale_factor': scale,
        'scaled_size': (scaled_width, scaled_height),
        'padding': (pad_left, pad_top, pad_right, pad_bottom)
    }


def extract_context_with_selection(img_width, img_height, sel_x1, sel_y1, sel_x2, sel_y2,
                                  mode='focused', has_selection=True):
    """
    Extract context region around selection for inpainting with optimal shape.

    Args:
        img_width: Source image width
        img_height: Source image height
        sel_x1, sel_y1, sel_x2, sel_y2: Selection bounds
        mode: 'focused' for partial extraction, 'full' for whole image
        has_selection: Whether there's an active selection

    Returns:
        dict: Context extraction parameters with optimal shape
    """
    if not has_selection:
        # No selection - use center area
        target_shape = get_optimal_openai_shape(img_width, img_height)
        # Create a default selection in center
        size = min(img_width, img_height, 512)
        sel_x1 = (img_width - size) // 2
        sel_y1 = (img_height - size) // 2
        sel_x2 = sel_x1 + size
        sel_y2 = sel_y1 + size

    sel_width = sel_x2 - sel_x1
    sel_height = sel_y2 - sel_y1

    if mode == 'full':
        # Send entire image with mask
        target_shape = get_optimal_openai_shape(img_width, img_height)
        padding_info = calculate_padding_for_shape(img_width, img_height,
                                                  target_shape[0], target_shape[1])
        return {
            'mode': 'full',
            'selection_bounds': (sel_x1, sel_y1, sel_x2, sel_y2),
            'extract_region': (0, 0, img_width, img_height),
            'target_shape': target_shape,
            'needs_padding': True,
            'padding_info': padding_info,
            'has_selection': has_selection
        }

    # Focused mode: extract region around selection
    # Calculate context padding (30-50% of selection, min 50px, max 300px)
    context_pad = max(50, min(300, int(max(sel_width, sel_height) * 0.4)))

    # Initial context bounds
    ctx_x1 = sel_x1 - context_pad
    ctx_y1 = sel_y1 - context_pad
    ctx_x2 = sel_x2 + context_pad
    ctx_y2 = sel_y2 + context_pad

    # Smart boundary handling: prefer not to extend beyond image
    if ctx_x1 < 0:
        shift = -ctx_x1
        ctx_x1 = 0
        ctx_x2 = min(img_width, ctx_x2 + shift)
    if ctx_y1 < 0:
        shift = -ctx_y1
        ctx_y1 = 0
        ctx_y2 = min(img_height, ctx_y2 + shift)
    if ctx_x2 > img_width:
        shift = ctx_x2 - img_width
        ctx_x2 = img_width
        ctx_x1 = max(0, ctx_x1 - shift)
    if ctx_y2 > img_height:
        shift = ctx_y2 - img_height
        ctx_y2 = img_height
        ctx_y1 = max(0, ctx_y1 - shift)

    ctx_width = ctx_x2 - ctx_x1
    ctx_height = ctx_y2 - ctx_y1

    # Determine optimal shape for context
    target_shape = get_optimal_openai_shape(ctx_width, ctx_height)
    target_aspect = target_shape[0] / target_shape[1]
    current_aspect = ctx_width / ctx_height if ctx_height > 0 else 1.0

    # Try to extend extract region to match target aspect ratio
    # This avoids padding when possible by using more of the available image
    if abs(current_aspect - target_aspect) > 0.01:  # Only if aspect ratios differ significantly
        if target_aspect > current_aspect:
            # Need wider region: extend horizontally if possible
            target_width = int(ctx_height * target_aspect)
            width_diff = target_width - ctx_width

            # Try to extend equally on both sides
            left_extend = width_diff // 2
            right_extend = width_diff - left_extend

            new_ctx_x1 = max(0, ctx_x1 - left_extend)
            new_ctx_x2 = min(img_width, ctx_x2 + right_extend)

            # If we hit boundaries, try to extend more on the available side
            if new_ctx_x1 == 0 and new_ctx_x2 < img_width:
                # Hit left boundary, extend right more
                remaining = target_width - (new_ctx_x2 - new_ctx_x1)
                new_ctx_x2 = min(img_width, new_ctx_x2 + remaining)
            elif new_ctx_x2 == img_width and new_ctx_x1 > 0:
                # Hit right boundary, extend left more
                remaining = target_width - (new_ctx_x2 - new_ctx_x1)
                new_ctx_x1 = max(0, new_ctx_x1 - remaining)

            ctx_x1, ctx_x2 = new_ctx_x1, new_ctx_x2

        else:
            # Need taller region: extend vertically if possible
            target_height = int(ctx_width / target_aspect)
            height_diff = target_height - ctx_height

            # Try to extend equally on both sides
            top_extend = height_diff // 2
            bottom_extend = height_diff - top_extend

            new_ctx_y1 = max(0, ctx_y1 - top_extend)
            new_ctx_y2 = min(img_height, ctx_y2 + bottom_extend)

            # If we hit boundaries, try to extend more on the available side
            if new_ctx_y1 == 0 and new_ctx_y2 < img_height:
                # Hit top boundary, extend bottom more
                remaining = target_height - (new_ctx_y2 - new_ctx_y1)
                new_ctx_y2 = min(img_height, new_ctx_y2 + remaining)
            elif new_ctx_y2 == img_height and new_ctx_y1 > 0:
                # Hit bottom boundary, extend top more
                remaining = target_height - (new_ctx_y2 - new_ctx_y1)
                new_ctx_y1 = max(0, new_ctx_y1 - remaining)

            ctx_y1, ctx_y2 = new_ctx_y1, new_ctx_y2

    # Recalculate final dimensions
    ctx_width = ctx_x2 - ctx_x1
    ctx_height = ctx_y2 - ctx_y1

    padding_info = calculate_padding_for_shape(ctx_width, ctx_height,
                                              target_shape[0], target_shape[1])

    return {
        'mode': 'focused',
        'selection_bounds': (sel_x1, sel_y1, sel_x2, sel_y2),
        'extract_region': (ctx_x1, ctx_y1, ctx_width, ctx_height),
        'selection_in_extract': (
            sel_x1 - ctx_x1,
            sel_y1 - ctx_y1,
            sel_x2 - ctx_x1,
            sel_y2 - ctx_y1
        ),
        'target_shape': target_shape,
        'needs_padding': ctx_width != target_shape[0] or ctx_height != target_shape[1],
        'padding_info': padding_info,
        'has_selection': has_selection
    }


def calculate_result_placement(result_shape, original_shape, context_info):
    """
    Calculate placement for AI result back into original image.

    Args:
        result_shape: (width, height) of AI result
        original_shape: (width, height) of original image
        context_info: Context extraction info used for generation

    Returns:
        dict: Placement parameters
    """
    if context_info['mode'] == 'full':
        # Full image mode: scale entire result to original size
        scale_x = original_shape[0] / result_shape[0]
        scale_y = original_shape[1] / result_shape[1]

        return {
            'placement_mode': 'replace',
            'scale': (scale_x, scale_y),
            'position': (0, 0),
            'size': original_shape
        }
    else:
        # Focused mode: scale and position extract region
        extract_region = context_info['extract_region']
        target_shape = context_info['target_shape']

        # Calculate scale from result back to extract size
        scale_x = extract_region[2] / target_shape[0]
        scale_y = extract_region[3] / target_shape[1]

        return {
            'placement_mode': 'composite',
            'scale': (scale_x, scale_y),
            'position': (extract_region[0], extract_region[1]),
            'size': (extract_region[2], extract_region[3])
        }


def calculate_scale_from_shape(source_shape, target_shape):
    """
    Calculate scaling factors between two shapes.

    Args:
        source_shape: (width, height) tuple
        target_shape: (width, height) tuple

    Returns:
        dict: {
            'scale_x': Horizontal scale factor,
            'scale_y': Vertical scale factor,
            'uniform_scale': Min of scale_x and scale_y (preserves aspect ratio)
        }
    """
    scale_x = target_shape[0] / source_shape[0] if source_shape[0] > 0 else 1.0
    scale_y = target_shape[1] / source_shape[1] if source_shape[1] > 0 else 1.0

    return {
        'scale_x': scale_x,
        'scale_y': scale_y,
        'uniform_scale': min(scale_x, scale_y)
    }


def calculate_mask_coordinates(context_info, target_size):
    """
    Calculate mask coordinates for selection within extract region.

    Args:
        context_info: Context extraction info from extract_context_with_selection()
        target_size: Target size for the mask (e.g. 1024)

    Returns:
        dict with mask coordinates
    """
    if not context_info['has_selection']:
        # Create center circle mask for no selection case
        center = target_size // 2
        radius = target_size // 4
        return {
            'mask_type': 'circle',
            'center_x': center,
            'center_y': center,
            'radius': radius,
            'target_size': target_size
        }

    # Get extract region info
    sel_x1, sel_y1, sel_x2, sel_y2 = context_info['selection_bounds']
    ext_x1, ext_y1, ext_width, ext_height = context_info['extract_region']

    # Calculate selection position within the extract region
    sel_in_ext_x1 = sel_x1 - ext_x1
    sel_in_ext_y1 = sel_y1 - ext_y1
    sel_in_ext_x2 = sel_x2 - ext_x1
    sel_in_ext_y2 = sel_y2 - ext_y1

    # Scale to target size (use the larger dimension for scale factor)
    scale = target_size / max(ext_width, ext_height)
    mask_sel_x1 = int(sel_in_ext_x1 * scale)
    mask_sel_y1 = int(sel_in_ext_y1 * scale)
    mask_sel_x2 = int(sel_in_ext_x2 * scale)
    mask_sel_y2 = int(sel_in_ext_y2 * scale)

    # Ensure coordinates are within bounds
    mask_sel_x1 = max(0, min(target_size - 1, mask_sel_x1))
    mask_sel_y1 = max(0, min(target_size - 1, mask_sel_y1))
    mask_sel_x2 = max(0, min(target_size, mask_sel_x2))
    mask_sel_y2 = max(0, min(target_size, mask_sel_y2))

    return {
        'mask_type': 'rectangle',
        'x1': mask_sel_x1,
        'y1': mask_sel_y1,
        'x2': mask_sel_x2,
        'y2': mask_sel_y2,
        'target_size': target_size,
        'scale_factor': scale
    }


def calculate_placement_coordinates(context_info):
    """
    Calculate where to place the AI result back in the original image.

    Args:
        context_info: Context extraction info from extract_context_with_selection()

    Returns:
        dict with placement coordinates
    """
    ctx_x1, ctx_y1, ctx_width, ctx_height = context_info['extract_region']

    return {
        'paste_x': ctx_x1,
        'paste_y': ctx_y1,
        'result_width': ctx_width,
        'result_height': ctx_height
    }


def validate_context_info(context_info):
    """
    Validate that context_info contains all required fields with valid values.

    Args:
        context_info: Context info dict to validate

    Returns:
        tuple: (is_valid: bool, error_message: str)
    """
    required_fields = [
        'selection_bounds', 'extract_region', 'target_shape', 'has_selection'
    ]

    for field in required_fields:
        if field not in context_info:
            return False, f"Missing required field: {field}"

    # Validate selection bounds
    sel_bounds = context_info['selection_bounds']
    if len(sel_bounds) != 4:
        return False, "selection_bounds must have 4 values (x1, y1, x2, y2)"

    sel_x1, sel_y1, sel_x2, sel_y2 = sel_bounds
    if sel_x2 <= sel_x1 or sel_y2 <= sel_y1:
        return False, "Invalid selection bounds: x2 <= x1 or y2 <= y1"

    # Validate extract region
    extract_region = context_info['extract_region']
    if len(extract_region) != 4:
        return False, "extract_region must have 4 values (x1, y1, width, height)"

    ext_x1, ext_y1, ext_width, ext_height = extract_region
    if ext_width <= 0 or ext_height <= 0:
        return False, "Extract region dimensions must be positive"

    # Validate that extract region contains selection (for focused mode)
    if context_info.get('mode') == 'focused':
        ext_x2 = ext_x1 + ext_width
        ext_y2 = ext_y1 + ext_height

        if not (ext_x1 <= sel_x1 and ext_y1 <= sel_y1 and ext_x2 >= sel_x2 and ext_y2 >= sel_y2):
            return False, "Extract region must contain the selection"

    # Validate target shape
    target_shape = context_info['target_shape']
    if not isinstance(target_shape, tuple) or len(target_shape) != 2:
        return False, "target_shape must be a tuple of (width, height)"
    valid_shapes = [(1024, 1024), (1536, 1024), (1024, 1536)]
    if target_shape not in valid_shapes:
        return False, f"target_shape must be one of {valid_shapes}"

    return True, ""


def check_coordinate_properties(img_width, img_height, sel_x1, sel_y1, sel_x2, sel_y2):
    """
    Test that coordinate calculations satisfy expected mathematical properties.

    Returns:
        dict with test results
    """
    context_info = extract_context_with_selection(img_width, img_height, sel_x1, sel_y1, sel_x2, sel_y2)
    target_shape = context_info['target_shape']
    target_size = max(target_shape)
    mask_coords = calculate_mask_coordinates(context_info, target_size)
    placement = calculate_placement_coordinates(context_info)

    # Test validation
    is_valid, error_msg = validate_context_info(context_info)

    results = {
        'validation_passed': is_valid,
        'validation_error': error_msg,
        'context_contains_selection': True,  # Will be checked below
        'mask_coordinates_valid': True,
        'placement_covers_selection': True
    }

    if not is_valid:
        return results

    # Check that extract region contains selection
    ext_x1, ext_y1, ext_width, ext_height = context_info['extract_region']
    ext_x2, ext_y2 = ext_x1 + ext_width, ext_y1 + ext_height

    results['context_contains_selection'] = (
        ext_x1 <= sel_x1 and ext_y1 <= sel_y1 and
        ext_x2 >= sel_x2 and ext_y2 >= sel_y2
    )

    # Check mask coordinates are within bounds
    if mask_coords['mask_type'] == 'rectangle':
        results['mask_coordinates_valid'] = (
            0 <= mask_coords['x1'] < target_size and
            0 <= mask_coords['y1'] < target_size and
            0 < mask_coords['x2'] <= target_size and
            0 < mask_coords['y2'] <= target_size and
            mask_coords['x1'] < mask_coords['x2'] and
            mask_coords['y1'] < mask_coords['y2']
        )

    # Check that placement would cover selection
    paste_x, paste_y = placement['paste_x'], placement['paste_y']
    result_width, result_height = placement['result_width'], placement['result_height']

    results['placement_covers_selection'] = (
        paste_x <= sel_x1 and paste_y <= sel_y1 and
        paste_x + result_width >= sel_x2 and paste_y + result_height >= sel_y2
    )

    return results