-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcoordinate_utils.py
More file actions
484 lines (393 loc) · 17 KB
/
coordinate_utils.py
File metadata and controls
484 lines (393 loc) · 17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
"""
Pure coordinate transformation utilities for GIMP AI Plugin.
These functions contain no GIMP dependencies and can be unit tested independently.
All coordinate calculations for context extraction, masking, and placement are here.
"""
def get_optimal_openai_shape(width, height):
"""
Select optimal OpenAI shape based on image dimensions.
Args:
width: Image width in pixels
height: Image height in pixels
Returns:
tuple: (target_width, target_height) - one of (1024, 1024), (1536, 1024), (1024, 1536)
"""
if width <= 0 or height <= 0:
return (1024, 1024) # Default to square for invalid dimensions
aspect_ratio = width / height
if aspect_ratio > 1.3:
# Landscape orientation
return (1536, 1024)
elif aspect_ratio < 0.77:
# Portrait orientation
return (1024, 1536)
else:
# Square or near-square
return (1024, 1024)
def calculate_padding_for_shape(current_width, current_height, target_width, target_height):
"""
Calculate padding needed to fit content into target OpenAI shape.
Args:
current_width: Current content width
current_height: Current content height
target_width: Target width (1024 or 1536)
target_height: Target height (1024 or 1536)
Returns:
dict: {
'scale_factor': Applied scaling factor,
'scaled_size': (scaled_width, scaled_height),
'padding': (left, top, right, bottom)
}
"""
# Calculate scale to fit within target
scale_x = target_width / current_width
scale_y = target_height / current_height
scale = min(scale_x, scale_y)
# Scale dimensions
scaled_width = int(current_width * scale)
scaled_height = int(current_height * scale)
# Calculate padding to center
pad_left = (target_width - scaled_width) // 2
pad_top = (target_height - scaled_height) // 2
pad_right = target_width - scaled_width - pad_left
pad_bottom = target_height - scaled_height - pad_top
return {
'scale_factor': scale,
'scaled_size': (scaled_width, scaled_height),
'padding': (pad_left, pad_top, pad_right, pad_bottom)
}
def extract_context_with_selection(img_width, img_height, sel_x1, sel_y1, sel_x2, sel_y2,
mode='focused', has_selection=True):
"""
Extract context region around selection for inpainting with optimal shape.
Args:
img_width: Source image width
img_height: Source image height
sel_x1, sel_y1, sel_x2, sel_y2: Selection bounds
mode: 'focused' for partial extraction, 'full' for whole image
has_selection: Whether there's an active selection
Returns:
dict: Context extraction parameters with optimal shape
"""
if not has_selection:
# No selection - use center area
target_shape = get_optimal_openai_shape(img_width, img_height)
# Create a default selection in center
size = min(img_width, img_height, 512)
sel_x1 = (img_width - size) // 2
sel_y1 = (img_height - size) // 2
sel_x2 = sel_x1 + size
sel_y2 = sel_y1 + size
sel_width = sel_x2 - sel_x1
sel_height = sel_y2 - sel_y1
if mode == 'full':
# Send entire image with mask
target_shape = get_optimal_openai_shape(img_width, img_height)
padding_info = calculate_padding_for_shape(img_width, img_height,
target_shape[0], target_shape[1])
return {
'mode': 'full',
'selection_bounds': (sel_x1, sel_y1, sel_x2, sel_y2),
'extract_region': (0, 0, img_width, img_height),
'target_shape': target_shape,
'needs_padding': True,
'padding_info': padding_info,
'has_selection': has_selection
}
# Focused mode: extract region around selection
# Calculate context padding (30-50% of selection, min 50px, max 300px)
context_pad = max(50, min(300, int(max(sel_width, sel_height) * 0.4)))
# Initial context bounds
ctx_x1 = sel_x1 - context_pad
ctx_y1 = sel_y1 - context_pad
ctx_x2 = sel_x2 + context_pad
ctx_y2 = sel_y2 + context_pad
# Smart boundary handling: prefer not to extend beyond image
if ctx_x1 < 0:
shift = -ctx_x1
ctx_x1 = 0
ctx_x2 = min(img_width, ctx_x2 + shift)
if ctx_y1 < 0:
shift = -ctx_y1
ctx_y1 = 0
ctx_y2 = min(img_height, ctx_y2 + shift)
if ctx_x2 > img_width:
shift = ctx_x2 - img_width
ctx_x2 = img_width
ctx_x1 = max(0, ctx_x1 - shift)
if ctx_y2 > img_height:
shift = ctx_y2 - img_height
ctx_y2 = img_height
ctx_y1 = max(0, ctx_y1 - shift)
ctx_width = ctx_x2 - ctx_x1
ctx_height = ctx_y2 - ctx_y1
# Determine optimal shape for context
target_shape = get_optimal_openai_shape(ctx_width, ctx_height)
target_aspect = target_shape[0] / target_shape[1]
current_aspect = ctx_width / ctx_height if ctx_height > 0 else 1.0
# Try to extend extract region to match target aspect ratio
# This avoids padding when possible by using more of the available image
if abs(current_aspect - target_aspect) > 0.01: # Only if aspect ratios differ significantly
if target_aspect > current_aspect:
# Need wider region: extend horizontally if possible
target_width = int(ctx_height * target_aspect)
width_diff = target_width - ctx_width
# Try to extend equally on both sides
left_extend = width_diff // 2
right_extend = width_diff - left_extend
new_ctx_x1 = max(0, ctx_x1 - left_extend)
new_ctx_x2 = min(img_width, ctx_x2 + right_extend)
# If we hit boundaries, try to extend more on the available side
if new_ctx_x1 == 0 and new_ctx_x2 < img_width:
# Hit left boundary, extend right more
remaining = target_width - (new_ctx_x2 - new_ctx_x1)
new_ctx_x2 = min(img_width, new_ctx_x2 + remaining)
elif new_ctx_x2 == img_width and new_ctx_x1 > 0:
# Hit right boundary, extend left more
remaining = target_width - (new_ctx_x2 - new_ctx_x1)
new_ctx_x1 = max(0, new_ctx_x1 - remaining)
ctx_x1, ctx_x2 = new_ctx_x1, new_ctx_x2
else:
# Need taller region: extend vertically if possible
target_height = int(ctx_width / target_aspect)
height_diff = target_height - ctx_height
# Try to extend equally on both sides
top_extend = height_diff // 2
bottom_extend = height_diff - top_extend
new_ctx_y1 = max(0, ctx_y1 - top_extend)
new_ctx_y2 = min(img_height, ctx_y2 + bottom_extend)
# If we hit boundaries, try to extend more on the available side
if new_ctx_y1 == 0 and new_ctx_y2 < img_height:
# Hit top boundary, extend bottom more
remaining = target_height - (new_ctx_y2 - new_ctx_y1)
new_ctx_y2 = min(img_height, new_ctx_y2 + remaining)
elif new_ctx_y2 == img_height and new_ctx_y1 > 0:
# Hit bottom boundary, extend top more
remaining = target_height - (new_ctx_y2 - new_ctx_y1)
new_ctx_y1 = max(0, new_ctx_y1 - remaining)
ctx_y1, ctx_y2 = new_ctx_y1, new_ctx_y2
# Recalculate final dimensions
ctx_width = ctx_x2 - ctx_x1
ctx_height = ctx_y2 - ctx_y1
padding_info = calculate_padding_for_shape(ctx_width, ctx_height,
target_shape[0], target_shape[1])
return {
'mode': 'focused',
'selection_bounds': (sel_x1, sel_y1, sel_x2, sel_y2),
'extract_region': (ctx_x1, ctx_y1, ctx_width, ctx_height),
'selection_in_extract': (
sel_x1 - ctx_x1,
sel_y1 - ctx_y1,
sel_x2 - ctx_x1,
sel_y2 - ctx_y1
),
'target_shape': target_shape,
'needs_padding': ctx_width != target_shape[0] or ctx_height != target_shape[1],
'padding_info': padding_info,
'has_selection': has_selection
}
def calculate_result_placement(result_shape, original_shape, context_info):
"""
Calculate placement for AI result back into original image.
Args:
result_shape: (width, height) of AI result
original_shape: (width, height) of original image
context_info: Context extraction info used for generation
Returns:
dict: Placement parameters
"""
if context_info['mode'] == 'full':
# Full image mode: scale entire result to original size
scale_x = original_shape[0] / result_shape[0]
scale_y = original_shape[1] / result_shape[1]
return {
'placement_mode': 'replace',
'scale': (scale_x, scale_y),
'position': (0, 0),
'size': original_shape
}
else:
# Focused mode: scale and position extract region
extract_region = context_info['extract_region']
target_shape = context_info['target_shape']
# Calculate scale from result back to extract size
scale_x = extract_region[2] / target_shape[0]
scale_y = extract_region[3] / target_shape[1]
return {
'placement_mode': 'composite',
'scale': (scale_x, scale_y),
'position': (extract_region[0], extract_region[1]),
'size': (extract_region[2], extract_region[3])
}
def calculate_scale_from_shape(source_shape, target_shape):
"""
Calculate scaling factors between two shapes.
Args:
source_shape: (width, height) tuple
target_shape: (width, height) tuple
Returns:
dict: {
'scale_x': Horizontal scale factor,
'scale_y': Vertical scale factor,
'uniform_scale': Min of scale_x and scale_y (preserves aspect ratio)
}
"""
scale_x = target_shape[0] / source_shape[0] if source_shape[0] > 0 else 1.0
scale_y = target_shape[1] / source_shape[1] if source_shape[1] > 0 else 1.0
return {
'scale_x': scale_x,
'scale_y': scale_y,
'uniform_scale': min(scale_x, scale_y)
}
def calculate_mask_coordinates(context_info, target_size):
"""
Calculate mask coordinates for selection within extract region.
Args:
context_info: Context extraction info from extract_context_with_selection()
target_size: Target size for the mask (e.g. 1024)
Returns:
dict with mask coordinates
"""
if not context_info['has_selection']:
# Create center circle mask for no selection case
center = target_size // 2
radius = target_size // 4
return {
'mask_type': 'circle',
'center_x': center,
'center_y': center,
'radius': radius,
'target_size': target_size
}
# Get extract region info
sel_x1, sel_y1, sel_x2, sel_y2 = context_info['selection_bounds']
ext_x1, ext_y1, ext_width, ext_height = context_info['extract_region']
# Calculate selection position within the extract region
sel_in_ext_x1 = sel_x1 - ext_x1
sel_in_ext_y1 = sel_y1 - ext_y1
sel_in_ext_x2 = sel_x2 - ext_x1
sel_in_ext_y2 = sel_y2 - ext_y1
# Scale to target size (use the larger dimension for scale factor)
scale = target_size / max(ext_width, ext_height)
mask_sel_x1 = int(sel_in_ext_x1 * scale)
mask_sel_y1 = int(sel_in_ext_y1 * scale)
mask_sel_x2 = int(sel_in_ext_x2 * scale)
mask_sel_y2 = int(sel_in_ext_y2 * scale)
# Ensure coordinates are within bounds
mask_sel_x1 = max(0, min(target_size - 1, mask_sel_x1))
mask_sel_y1 = max(0, min(target_size - 1, mask_sel_y1))
mask_sel_x2 = max(0, min(target_size, mask_sel_x2))
mask_sel_y2 = max(0, min(target_size, mask_sel_y2))
return {
'mask_type': 'rectangle',
'x1': mask_sel_x1,
'y1': mask_sel_y1,
'x2': mask_sel_x2,
'y2': mask_sel_y2,
'target_size': target_size,
'scale_factor': scale
}
def calculate_placement_coordinates(context_info):
"""
Calculate where to place the AI result back in the original image.
Args:
context_info: Context extraction info from extract_context_with_selection()
Returns:
dict with placement coordinates
"""
ctx_x1, ctx_y1, ctx_width, ctx_height = context_info['extract_region']
return {
'paste_x': ctx_x1,
'paste_y': ctx_y1,
'result_width': ctx_width,
'result_height': ctx_height
}
def validate_context_info(context_info):
"""
Validate that context_info contains all required fields with valid values.
Args:
context_info: Context info dict to validate
Returns:
tuple: (is_valid: bool, error_message: str)
"""
required_fields = [
'selection_bounds', 'extract_region', 'target_shape', 'has_selection'
]
for field in required_fields:
if field not in context_info:
return False, f"Missing required field: {field}"
# Validate selection bounds
sel_bounds = context_info['selection_bounds']
if len(sel_bounds) != 4:
return False, "selection_bounds must have 4 values (x1, y1, x2, y2)"
sel_x1, sel_y1, sel_x2, sel_y2 = sel_bounds
if sel_x2 <= sel_x1 or sel_y2 <= sel_y1:
return False, "Invalid selection bounds: x2 <= x1 or y2 <= y1"
# Validate extract region
extract_region = context_info['extract_region']
if len(extract_region) != 4:
return False, "extract_region must have 4 values (x1, y1, width, height)"
ext_x1, ext_y1, ext_width, ext_height = extract_region
if ext_width <= 0 or ext_height <= 0:
return False, "Extract region dimensions must be positive"
# Validate that extract region contains selection (for focused mode)
if context_info.get('mode') == 'focused':
ext_x2 = ext_x1 + ext_width
ext_y2 = ext_y1 + ext_height
if not (ext_x1 <= sel_x1 and ext_y1 <= sel_y1 and ext_x2 >= sel_x2 and ext_y2 >= sel_y2):
return False, "Extract region must contain the selection"
# Validate target shape
target_shape = context_info['target_shape']
if not isinstance(target_shape, tuple) or len(target_shape) != 2:
return False, "target_shape must be a tuple of (width, height)"
valid_shapes = [(1024, 1024), (1536, 1024), (1024, 1536)]
if target_shape not in valid_shapes:
return False, f"target_shape must be one of {valid_shapes}"
return True, ""
def check_coordinate_properties(img_width, img_height, sel_x1, sel_y1, sel_x2, sel_y2):
"""
Test that coordinate calculations satisfy expected mathematical properties.
Returns:
dict with test results
"""
context_info = extract_context_with_selection(img_width, img_height, sel_x1, sel_y1, sel_x2, sel_y2)
target_shape = context_info['target_shape']
target_size = max(target_shape)
mask_coords = calculate_mask_coordinates(context_info, target_size)
placement = calculate_placement_coordinates(context_info)
# Test validation
is_valid, error_msg = validate_context_info(context_info)
results = {
'validation_passed': is_valid,
'validation_error': error_msg,
'context_contains_selection': True, # Will be checked below
'mask_coordinates_valid': True,
'placement_covers_selection': True
}
if not is_valid:
return results
# Check that extract region contains selection
ext_x1, ext_y1, ext_width, ext_height = context_info['extract_region']
ext_x2, ext_y2 = ext_x1 + ext_width, ext_y1 + ext_height
results['context_contains_selection'] = (
ext_x1 <= sel_x1 and ext_y1 <= sel_y1 and
ext_x2 >= sel_x2 and ext_y2 >= sel_y2
)
# Check mask coordinates are within bounds
if mask_coords['mask_type'] == 'rectangle':
results['mask_coordinates_valid'] = (
0 <= mask_coords['x1'] < target_size and
0 <= mask_coords['y1'] < target_size and
0 < mask_coords['x2'] <= target_size and
0 < mask_coords['y2'] <= target_size and
mask_coords['x1'] < mask_coords['x2'] and
mask_coords['y1'] < mask_coords['y2']
)
# Check that placement would cover selection
paste_x, paste_y = placement['paste_x'], placement['paste_y']
result_width, result_height = placement['result_width'], placement['result_height']
results['placement_covers_selection'] = (
paste_x <= sel_x1 and paste_y <= sel_y1 and
paste_x + result_width >= sel_x2 and paste_y + result_height >= sel_y2
)
return results