-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdump_layer_activations.py
More file actions
244 lines (205 loc) · 8.73 KB
/
dump_layer_activations.py
File metadata and controls
244 lines (205 loc) · 8.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.10,<3.14"
# dependencies = ["numpy", "tensorflow", "pillow"]
# ///
"""Dump intermediate activations from TFLite palm detection for layer-by-layer comparison.
Preprocesses test image with exact same letterbox resize as our WebGPU code,
runs TFLite inference, and dumps key intermediate tensors to JSON.
"""
import json
import numpy as np
import tensorflow as tf
from pathlib import Path
from PIL import Image
SCRIPT_DIR = Path(__file__).parent
TFLITE_PATH = SCRIPT_DIR / "palm_detection.tflite"
def letterbox_resize(img_array, target_size=192):
"""Match our WebGPU letterbox resize exactly."""
h, w = img_array.shape[:2]
scale = min(target_size / w, target_size / h)
scaled_w = round(w * scale)
scaled_h = round(h * scale)
offset_x = (target_size - scaled_w) // 2
offset_y = (target_size - scaled_h) // 2
# Use PIL for bilinear resize (matches GPU hardware bilinear)
pil_img = Image.fromarray(img_array)
pil_resized = pil_img.resize((scaled_w, scaled_h), Image.BILINEAR)
resized = np.array(pil_resized).astype(np.float32) / 255.0
# Place in letterbox
result = np.zeros((target_size, target_size, 3), dtype=np.float32)
result[offset_y:offset_y+scaled_h, offset_x:offset_x+scaled_w] = resized
return result, offset_x, offset_y, scale
def main():
import sys
image_name = sys.argv[1] if len(sys.argv) > 1 else "hand_07.jpg"
# Find image
img_path = SCRIPT_DIR / "docs" / image_name
if not img_path.exists():
img_path = SCRIPT_DIR / "docs" / "test-hands" / image_name
if not img_path.exists():
print(f"Image not found: {image_name}")
return
# Load and preprocess
img = Image.open(img_path)
img_array = np.array(img)
print(f"Image: {image_name} shape={img_array.shape}")
letterboxed, offset_x, offset_y, scale = letterbox_resize(img_array)
print(f"Letterbox: offset=({offset_x},{offset_y}) scale={scale}")
# Prepare input: TFLite expects [1, 192, 192, 3] in [0, 1]
input_tensor = letterboxed[np.newaxis, ...] # [1, 192, 192, 3]
# Setup interpreter with tensor allocation
interpreter = tf.lite.Interpreter(model_path=str(TFLITE_PATH))
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
details = interpreter.get_tensor_details()
# Run inference
interpreter.set_tensor(input_details[0]['index'], input_tensor)
interpreter.invoke()
# Get ops info
ops = interpreter._get_ops_details()
# Build tensor name → details map
tensor_map = {d['index']: d for d in details}
# Dump activations at key layers
result = {}
# Input
input_data = interpreter.get_tensor(input_details[0]['index'])
# Convert to CHW [0,1] for comparison with our GPU format
input_chw = input_data[0].transpose(2, 0, 1) # [3, 192, 192]
result['input'] = {
'shape': list(input_chw.shape),
'min': float(input_chw.min()),
'max': float(input_chw.max()),
'mean': float(input_chw.mean()),
'sample': input_chw.flatten()[:10].tolist(),
}
# Find key ops and their output tensors
# We want:
# 1. After initial conv2d_0 + PReLU (96x96x32)
# 2. After each stage boundary (stride-2 blocks)
# 3. FPN intermediate tensors
# 4. SSD head outputs
# Map op names to their output tensors
print(f"\n=== Key ops ===")
key_tensors = {}
for i, op in enumerate(ops):
op_name = op['op_name']
outputs = list(op['outputs'])
# Skip DEQUANTIZE ops (just weight conversion)
if op_name == 'DEQUANTIZE':
continue
# For key layers, grab the output tensor
if outputs:
out_idx = outputs[0]
if out_idx in tensor_map:
out_shape = tuple(tensor_map[out_idx]['shape'])
out_name = tensor_map[out_idx]['name']
# Print significant ops
if op_name in ('CONV_2D', 'DEPTHWISE_CONV_2D', 'ADD', 'RESIZE_BILINEAR', 'PRELU', 'PAD'):
if any(s > 1 for s in out_shape[1:3]): # spatial dims > 1
key_tensors[i] = {
'op_name': op_name,
'out_idx': out_idx,
'out_shape': out_shape,
'out_name': out_name[:80],
}
# Now read back intermediate tensors at key points
# TFLite doesn't easily let us read intermediates after invoke(),
# but the tensor data IS available after invoke
print("\n=== Reading intermediate activations ===")
# Find specific layers we care about
for i, info in sorted(key_tensors.items()):
try:
data = interpreter.get_tensor(info['out_idx'])
if data.size == 0:
continue
# Only dump key layers (by shape and name)
shape = info['out_shape']
name = info['out_name']
op_name = info['op_name']
# Key shapes we want:
interesting = False
label = None
if shape == (1, 96, 96, 32) and op_name == 'PRELU':
interesting = True
label = 'initConv' # After initial conv + PReLU
elif shape == (1, 48, 48, 64) and op_name == 'ADD':
interesting = True
label = f'stage1_add_{i}'
elif shape == (1, 24, 24, 128) and op_name == 'ADD':
interesting = True
label = f'stage2_add_{i}'
elif shape == (1, 12, 12, 256) and op_name == 'ADD':
interesting = True
label = f'stage3_add_{i}'
elif shape == (1, 6, 6, 256) and op_name == 'ADD':
interesting = True
label = f'stage4_add_{i}'
elif shape == (1, 12, 12, 256) and op_name == 'RESIZE_BILINEAR':
interesting = True
label = 'fpnUpsample6to12'
elif shape == (1, 24, 24, 256) and op_name == 'RESIZE_BILINEAR':
interesting = True
label = 'fpnUpsample12to24'
elif 'conv2d_25' in name and op_name == 'PRELU':
interesting = True
label = 'fpn6to12Conv'
elif 'conv2d_28' in name and op_name == 'PRELU':
interesting = True
label = 'fpn12to24Conv'
# SSD heads
elif 'classifier_palm_16' in name and op_name == 'CONV_2D':
interesting = True
label = 'cls16'
elif 'regressor_palm_16' in name and op_name == 'CONV_2D':
interesting = True
label = 'reg16'
elif 'classifier_palm_8' in name and op_name == 'CONV_2D':
interesting = True
label = 'cls8'
elif 'regressor_palm_8' in name and op_name == 'CONV_2D':
interesting = True
label = 'reg8'
if interesting:
tensor = data[0] # Remove batch dim
# Convert to CHW for comparison with our GPU format
if len(tensor.shape) == 3:
tensor_chw = tensor.transpose(2, 0, 1) # [C, H, W]
else:
tensor_chw = tensor
flat = tensor_chw.flatten()
result[label] = {
'shape': list(tensor_chw.shape),
'min': float(flat.min()),
'max': float(flat.max()),
'mean': float(flat.mean()),
'nonZero': int(np.count_nonzero(flat[:1000])),
'sample': flat[:10].tolist(),
# Store first 500 values for detailed comparison
'data500': flat[:500].tolist(),
}
print(f" Op {i:3d} [{label:25s}] shape={shape} min={flat.min():.6f} max={flat.max():.6f} mean={flat.mean():.6f}")
except Exception as e:
pass
# Final outputs
for od in output_details:
data = interpreter.get_tensor(od['index'])
name = od['name']
flat = data.flatten()
label = f"output_{name}"
result[label] = {
'shape': list(data.shape),
'min': float(flat.min()),
'max': float(flat.max()),
'mean': float(flat.mean()),
'sample': flat[:20].tolist(),
}
print(f" Output [{name}] shape={data.shape} min={flat.min():.6f} max={flat.max():.6f}")
# Write to JSON
out_path = SCRIPT_DIR / "docs" / f"tflite_activations_{image_name.replace('.jpg','')}.json"
with open(out_path, 'w') as f:
json.dump(result, f, indent=2)
print(f"\nWrote {out_path}")
if __name__ == "__main__":
main()