WiLoR/mano_visualizer.py at main · SPICExLAB/WiLoR · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
import torch
from manopth.manolayer import ManoLayer
import numpy as np
import trimesh
import pyrender
import os
import glob
import time
import json
import cv2
from tqdm import tqdm
import argparse

DEFAULT_MANO_PATH = './mano_data'
DEFAULT_SEQUENCE_PATH = './out/fancyhands'
DEFAULT_OUTPUT_VIDEO = 'mano_animation_both.mp4'

LEFT_HAND_COLOR = [0.6, 0.6, 0.8, 1.0]  # A light purple/blue
RIGHT_HAND_COLOR = [0.8, 0.6, 0.6, 1.0] # A light red/orange

def load_sequence(path):
    """Loads a sequence of MANO parameters from a directory of JSON files."""
    if not os.path.exists(path):
        print(f"Info: Sequence path '{path}' not found. Skipping.")
        return None

    param_files = sorted(glob.glob(os.path.join(path, "*.json")))
    if not param_files:
        print(f"Info: No .json files found in '{path}'. Skipping.")
        return None

    all_params = []
    for file_path in param_files:
        try:
            with open(file_path, 'r') as f:
                data = json.load(f)

            if isinstance(data, list) and len(data) > 0:
                person_data = data[0]
            else:
                person_data = data

            required_keys = ['Rh', 'Th', 'poses', 'shapes']
            if not all(key in person_data for key in required_keys):
                print(f"Skipping {os.path.basename(file_path)}: missing one of the required keys {required_keys}")
                continue

            # manopth expects pose parameters to be concatenated
            rh_tensor = torch.tensor(person_data['Rh'], dtype=torch.float32).view(1, -1)
            poses_tensor = torch.tensor(person_data['poses'], dtype=torch.float32).view(1, -1)
            full_pose = torch.cat([rh_tensor, poses_tensor], dim=1)

            # Ensure shape and translation params have a batch dimension
            all_params.append({
                'pose_params': full_pose,
                'shape_params': torch.tensor(person_data['shapes'], dtype=torch.float32).view(1, -1),
                'trans_params': torch.tensor(person_data['Th'], dtype=torch.float32).view(1, -1)
            })
        except Exception as e:
            print(f"Error processing {file_path}: {e}")
            return None

    print(f"Loaded {len(all_params)} frames from '{path}'.")
    return all_params

def precompute_all_vertices(model, mano_sequence):
    """Pre-computes all vertex positions for the animation sequence."""
    if not model or not mano_sequence:
        return None

    all_vertices = []
    print(f"Pre-computing vertex positions for {model.side} hand...")

    with torch.no_grad():
        for i, params in enumerate(tqdm(mano_sequence, desc=f"Processing {model.side} hand")):
            # manopth returns vertices and joints from a forward pass
            final_vertices, _ = model(
                params['pose_params'],
                params['shape_params'],
                params['trans_params']
            )
            all_vertices.append(final_vertices.detach().cpu().numpy().squeeze())

    return np.array(all_vertices)

def debug_hand_positions(left_vertices, right_vertices):
    """Debug function to print hand positions and bounds"""
    print("\n=== DEBUGGING HAND POSITIONS ===")

    if left_vertices is not None and len(left_vertices) > 0:
        left_bounds = np.array([left_vertices[0].min(axis=0), left_vertices[0].max(axis=0)])
        left_center = left_vertices[0].mean(axis=0)
        left_size = left_bounds[1] - left_bounds[0]
        print(f"Left hand bounds: min={left_bounds[0]}, max={left_bounds[1]}")
        print(f"Left hand center: {left_center}")
        print(f"Left hand size: {left_size}")
        print(f"Left hand distance from origin: {np.linalg.norm(left_center)}")

    if right_vertices is not None and len(right_vertices) > 0:
        right_bounds = np.array([right_vertices[0].min(axis=0), right_vertices[0].max(axis=0)])
        right_center = right_vertices[0].mean(axis=0)
        right_size = right_bounds[1] - right_bounds[0]
        print(f"Right hand bounds: min={right_bounds[0]}, max={right_bounds[1]}")
        print(f"Right hand center: {right_center}")
        print(f"Right hand size: {right_size}")
        print(f"Right hand distance from origin: {np.linalg.norm(right_center)}")

    print("===================================\n")


# Replace your create_scene function with this corrected version:

def create_scene():
    """Create the pyrender scene with proper camera orientation for Y-up, Z-forward coordinate system."""
    scene = pyrender.Scene(bg_color=[0.85, 0.85, 0.85], ambient_light=[0.4, 0.4, 0.4])

    # Grid in XZ plane (ground plane)
    def create_grid_lines(size=5, spacing=0.1):
        """Create a grid in the XZ plane (Y=0)"""
        lines = []

        # Lines parallel to X-axis (running left-right)
        for i in range(-size, size + 1):
            lines.append(trimesh.creation.cylinder(
                radius=0.001,
                segment=[[-size * spacing, 0, i * spacing], [size * spacing, 0, i * spacing]],
            ))

        # Lines parallel to Z-axis (running forward-backward)
        for i in range(-size, size + 1):
            lines.append(trimesh.creation.cylinder(
                radius=0.001,
                segment=[[i * spacing, 0, -size * spacing], [i * spacing, 0, size * spacing]],
            ))

        grid_mesh = trimesh.util.concatenate(lines)
        grid_mesh.visual.vertex_colors = [150, 150, 150, 100]

        return grid_mesh

    # grid_mesh = create_grid_lines(size=5, spacing=0.1)
    # grid_render_mesh = pyrender.Mesh.from_trimesh(grid_mesh, smooth=False)
    # scene.add(grid_render_mesh)

    # Coordinate axes (Red=X, Green=Y, Blue=Z)
    axis_mesh = trimesh.creation.axis(origin_size=0.01, axis_radius=0.005, axis_length=0.1)
    render_axis = pyrender.Mesh.from_trimesh(axis_mesh, smooth=False)
    scene.add(render_axis)

    camera_pose = np.array([
        [1.0,  0.0,  0.0,  0.0],     # X-axis: camera right = world right
        [0.0,  -0.866, -0.5, 1],     # Y-axis: camera up = mostly world up, slightly back
        [0.0, 0.5, -0.866, 2],    # Z-axis: camera forward = mostly world forward, slightly down
        [0.0,  0.0,  0.0,  1.0],
    ])


    camera = pyrender.PerspectiveCamera(yfov=np.pi / 4.0, aspectRatio=16/9, znear=0.001, zfar=10.0)
    scene.add(camera, pose=camera_pose)

    light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=3.0)
    scene.add(light, pose=camera_pose)

    light2_pose = np.array([
        [-0.707, 0.0, 0.707, 0.3],   # From side and forward
        [0.0, 1.0, 0.0, 0.4],        # From above
        [-0.707, 0.0, -0.707, 0.2],  # Light direction
        [0.0, 0.0, 0.0, 1.0],
    ])
    light2 = pyrender.DirectionalLight(color=[0.8, 0.8, 1.0], intensity=1.5)
    scene.add(light2, pose=light2_pose)

    return scene


def normalize_hand_vertices(vertices, target_center=np.array([0, 0.1, 0]), target_scale=1.0):
    """Normalize hand vertices with Y-up coordinate system"""
    if vertices is None or len(vertices) == 0:
        return vertices

    normalized_vertices = []
    for frame_vertices in vertices:
        current_center = frame_vertices.mean(axis=0)
        current_bounds = np.array([frame_vertices.min(axis=0), frame_vertices.max(axis=0)])
        current_size = np.linalg.norm(current_bounds[1] - current_bounds[0])
        centered_vertices = frame_vertices - current_center

        if current_size > 0:
            scale_factor = (0.2 * target_scale) / current_size
            scaled_vertices = centered_vertices * scale_factor
        else:
            scaled_vertices = centered_vertices

        final_vertices = scaled_vertices + target_center
        normalized_vertices.append(final_vertices)

    return np.array(normalized_vertices)

def export_to_video(faces, left_vertices, right_vertices, output_path, width, height, fps, num_frames):
    """Renders the animation sequence and saves it as an MP4 video."""
    print(f"\nExporting animation to video: {output_path}")

    scene = create_scene()
    renderer = pyrender.OffscreenRenderer(viewport_width=width, viewport_height=height)

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out_dir = os.path.dirname(output_path)
    if out_dir and not os.path.exists(out_dir):
        os.makedirs(out_dir)
    video_writer = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    left_hand_node = None
    right_hand_node = None
    for frame_idx in tqdm(range(num_frames), desc="Rendering frames"):
        if left_hand_node is not None:
            scene.remove_node(left_hand_node)
        if right_hand_node is not None:
            scene.remove_node(right_hand_node)

        # Create left hand mesh if data exists
        if left_vertices is not None and frame_idx < len(left_vertices):
            left_mesh = trimesh.Trimesh(vertices=left_vertices[frame_idx], faces=faces)
            left_mesh.visual.vertex_colors = LEFT_HAND_COLOR
            render_mesh_left = pyrender.Mesh.from_trimesh(left_mesh, smooth=True)
            left_hand_node = scene.add(render_mesh_left)

        # Create right hand mesh if data exists
        if right_vertices is not None and frame_idx < len(right_vertices):
            right_mesh = trimesh.Trimesh(vertices=right_vertices[frame_idx], faces=faces)
            right_mesh.visual.vertex_colors = RIGHT_HAND_COLOR
            render_mesh_right = pyrender.Mesh.from_trimesh(right_mesh, smooth=True)
            right_hand_node = scene.add(render_mesh_right)

        color, _ = renderer.render(scene)
        frame_bgr = cv2.cvtColor(color, cv2.COLOR_RGB2BGR)
        video_writer.write(frame_bgr)

    video_writer.release()
    renderer.delete()

    print(f"Video saved successfully to: {output_path}")
    print(f"Video properties: {width}x{height} @ {fps}fps, {num_frames} frames")

def view_interactively(faces, left_vertices, right_vertices, fps, num_frames):
    """Opens an interactive pyrender viewer for the animation."""
    scene = create_scene()

    left_hand_node = None
    right_hand_node = None

    # Initial hand meshes
    if left_vertices is not None:
        left_mesh = trimesh.Trimesh(vertices=left_vertices[0], faces=faces)
        left_mesh.visual.vertex_colors = LEFT_HAND_COLOR
        render_mesh_left = pyrender.Mesh.from_trimesh(left_mesh, smooth=True)
        left_hand_node = scene.add(render_mesh_left)

    if right_vertices is not None:
        right_mesh = trimesh.Trimesh(vertices=right_vertices[0], faces=faces)
        right_mesh.visual.vertex_colors = RIGHT_HAND_COLOR
        render_mesh_right = pyrender.Mesh.from_trimesh(right_mesh, smooth=True)
        right_hand_node = scene.add(render_mesh_right)

    viewer = pyrender.Viewer(scene, use_raymond_lighting=True, run_in_thread=True)

    print("\nStarting interactive animation viewer...")
    print("Close the viewer window to exit.")
    print(f"Playing {num_frames} frames at {fps} FPS")

    frame_idx = 0
    frame_time = 1.0 / fps

    while viewer.is_active:
        start_time = time.time()

        viewer.render_lock.acquire()
        try:
            # Remove old meshes and add new ones with updated vertices
            if left_hand_node is not None and left_vertices is not None and frame_idx < len(left_vertices):
                scene.remove_node(left_hand_node)
                left_mesh = trimesh.Trimesh(vertices=left_vertices[frame_idx], faces=faces)
                left_mesh.visual.vertex_colors = LEFT_HAND_COLOR
                render_mesh_left = pyrender.Mesh.from_trimesh(left_mesh, smooth=True)
                left_hand_node = scene.add(render_mesh_left)

            if right_hand_node is not None and right_vertices is not None and frame_idx < len(right_vertices):
                scene.remove_node(right_hand_node)
                right_mesh = trimesh.Trimesh(vertices=right_vertices[frame_idx], faces=faces)
                right_mesh.visual.vertex_colors = RIGHT_HAND_COLOR
                render_mesh_right = pyrender.Mesh.from_trimesh(right_mesh, smooth=True)
                right_hand_node = scene.add(render_mesh_right)

        except Exception as e:
            print(f"Error updating frame {frame_idx}: {e}")
        finally:
            viewer.render_lock.release()

        frame_idx = (frame_idx + 1) % num_frames
        print(f"Frame: {frame_idx}/{num_frames}", end='\r')  # Progress indicator

        elapsed = time.time() - start_time
        sleep_time = frame_time - elapsed
        if sleep_time > 0:
            time.sleep(sleep_time)

def main():
    parser = argparse.ArgumentParser(description='Dual Hand MANO Animation Viewer (using manopth)')
    parser.add_argument('--sequence_path', type=str, default=DEFAULT_SEQUENCE_PATH, help='Path to the parent directory containing mano/left and mano/right subdirectories')
    parser.add_argument('--model_path', type=str, default=DEFAULT_MANO_PATH, help='Path to the directory containing MANO model files (e.g., MANO_RIGHT.pkl)')

    parser.add_argument('--export_video', action='store_true', help='Export animation to video instead of using the interactive viewer')
    parser.add_argument('--output', type=str, default=DEFAULT_OUTPUT_VIDEO, help='Output video path')
    parser.add_argument('--width', type=int, default=1920, help='Video width')
    parser.add_argument('--height', type=int, default=1080, help='Video height')
    parser.add_argument('--fps', type=int, default=30, help='Animation and video FPS')
    args = parser.parse_args()

    # Construct paths for left and right hands
    left_sequence_path = os.path.join(args.sequence_path, 'mano', 'left')
    right_sequence_path = os.path.join(args.sequence_path, 'mano', 'right')

    # Load sequences for both hands
    left_mano_sequence = load_sequence(left_sequence_path)
    right_mano_sequence = load_sequence(right_sequence_path)

    if not left_mano_sequence and not right_mano_sequence:
        print("Error: No valid sequence data found for either hand. Exiting.")
        return

    models = {}
    try:
        if left_mano_sequence:
            models['left'] = ManoLayer(mano_root=args.model_path, side='left', use_pca=False, ncomps=45, flat_hand_mean=True)
        if right_mano_sequence:
            models['right'] = ManoLayer(mano_root=args.model_path, side='right', use_pca=False, ncomps=45, flat_hand_mean=True)
    except Exception as e:
        print(f"Error creating MANO models with manopth: {e}")
        return

    if not models:
        print("Error: No models could be created. Check paths and data.")
        return

    mano_model = models.get('left', models.get('right'))
    if hasattr(mano_model, 'th_faces'):
        faces = mano_model.th_faces.cpu().numpy()
    elif hasattr(mano_model, 'faces'):
        faces = mano_model.faces.cpu().numpy()
    else:
        print("Warning: Could not find faces attribute in MANO model")
        try:
            faces = mano_model.smpl_layer.faces.cpu().numpy()
        except:
            print("Error: Unable to extract faces from MANO model")
            return

    # Pre-compute vertices for both hands
    left_vertices = precompute_all_vertices(models.get('left'), left_mano_sequence)
    right_vertices = precompute_all_vertices(models.get('right'), right_mano_sequence)
    debug_hand_positions(left_vertices, right_vertices)

    # Normalize hand positions and scales
    if left_vertices is not None:
        left_vertices = normalize_hand_vertices(left_vertices, target_center=np.array([-0.15, 0, 0.1]))
    if right_vertices is not None:
        right_vertices = normalize_hand_vertices(right_vertices, target_center=np.array([0.15, 0, 0.1]))

    print("After normalization:")
    debug_hand_positions(left_vertices, right_vertices)

    # Determine the number of frames to render (minimum of the two sequences)
    num_frames = 0
    if left_vertices is not None:
        num_frames = len(left_vertices)
    if right_vertices is not None:
        num_frames = len(right_vertices) if num_frames == 0 else min(num_frames, len(right_vertices))

    if num_frames == 0:
        print("No frames to render. Exiting.")
        return

    if args.export_video:
        export_to_video(faces, left_vertices, right_vertices, args.output, args.width, args.height, args.fps, num_frames)
    else:
        view_interactively(faces, left_vertices, right_vertices, args.fps, num_frames)

if __name__ == '__main__':
    main()