From 5d34f5284912bb6a58ecdbd0b4fbe0e0a24bd87b Mon Sep 17 00:00:00 2001 From: wjr21 Date: Fri, 28 Nov 2025 23:34:45 +0000 Subject: [PATCH 1/7] Actually run the cell tree construction in conversion... --- tools/convert_to_gridder_format.py | 256 ++++++++++++++++++++++++++--- 1 file changed, 236 insertions(+), 20 deletions(-) diff --git a/tools/convert_to_gridder_format.py b/tools/convert_to_gridder_format.py index 6e6dff7..41476ef 100755 --- a/tools/convert_to_gridder_format.py +++ b/tools/convert_to_gridder_format.py @@ -3,11 +3,35 @@ Convert HDF5 simulation snapshots to gridder-compatible format. This script converts HDF5 files with arbitrary key names to the format -expected by the parent_gridder code. It supports both serial and MPI -execution for processing large files efficiently. - -In MPI mode, each rank writes a separate file and a virtual HDF5 file -is created to provide a unified view of the data. +expected by the parent_gridder code. The conversion includes: + +1. Reading particle coordinates and masses from arbitrary HDF5 keys +2. Creating a spatial cell structure for efficient particle lookup +3. Sorting particles by cell index +4. Writing output in the standardized gridder format + +The gridder requires a hierarchical cell structure for spatial indexing. +This script creates a regular grid of cells (default: 16x16x16 = 4096 cells) +and assigns each particle to a cell based on its position. Particles are +then sorted by cell index before being written to the output file. + +Output HDF5 Structure: + /Header # Simulation metadata (copied if --copy-header) + /PartType1 + /Coordinates # Particle positions (sorted by cell) + /Masses # Particle masses (sorted by cell) + /Cells + /Meta-data + dimension: [cdim, cdim, cdim] # Number of cells per dimension + size: [dx, dy, dz] # Physical size of each cell + /Counts + /PartType1 # Number of particles in each cell + /OffsetsInFile + /PartType1 # Starting index for particles in each cell + +Supports both serial and MPI execution for processing large files efficiently. +In MPI mode, each rank writes a separate file and a virtual HDF5 file is +created to provide a unified view of the data. """ import argparse @@ -31,18 +55,25 @@ def parse_args(): formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: - # Serial conversion + # Serial conversion (BoxSize read from Header) python convert_to_gridder_format.py input.hdf5 output.hdf5 \\ - --coordinates-key Coordinates --masses-key Masses + --coordinates-key Coordinates --masses-key Masses \\ + --copy-header # MPI conversion (creates output_rank_*.hdf5 files + virtual file) mpirun -np 4 python convert_to_gridder_format.py input.hdf5 output.hdf5 \\ - --coordinates-key PartType1/Coordinates --masses-key PartType1/Masses + --coordinates-key PartType1/Coordinates --masses-key PartType1/Masses \\ + --copy-header - # With custom particle type prefix + # Specify BoxSize and cell dimension manually python convert_to_gridder_format.py input.hdf5 output.hdf5 \\ --coordinates-key MyCoords --masses-key MyMasses \\ - --particle-type PartType1 + --boxsize 100.0 100.0 100.0 --cdim 32 + + # With custom particle type prefix and finer cell grid + python convert_to_gridder_format.py input.hdf5 output.hdf5 \\ + --coordinates-key DarkMatter/Positions --masses-key DarkMatter/Masses \\ + --particle-type PartType1 --cdim 64 --copy-header """ ) @@ -86,6 +117,20 @@ def parse_args(): help="HDF5 key for header group in input file (default: Header)" ) + parser.add_argument( + "--cdim", + type=int, + default=16, + help="Number of cells per dimension for spatial indexing (default: 16)" + ) + + parser.add_argument( + "--boxsize", + type=float, + nargs=3, + help="Box size [X, Y, Z] in same units as coordinates. If not provided, will try to read from Header/BoxSize" + ) + return parser.parse_args() @@ -134,10 +179,130 @@ def get_particle_count(input_file, masses_key, rank, size): return total_particles, start_idx, count +def get_boxsize(input_file, args, rank=0): + """ + Get box size from command line args or Header in input file. + + Returns: + boxsize: numpy array [X, Y, Z] box dimensions + """ + if args.boxsize is not None: + boxsize = np.array(args.boxsize, dtype=np.float64) + if rank == 0: + print(f" Using provided BoxSize: {boxsize}") + return boxsize + + # Try to read from Header + with h5py.File(input_file, 'r') as f: + if args.header_key in f and 'BoxSize' in f[args.header_key].attrs: + boxsize = np.array(f[args.header_key].attrs['BoxSize'], dtype=np.float64) + if boxsize.shape == (): # Scalar -> cubic box + boxsize = np.array([boxsize, boxsize, boxsize], dtype=np.float64) + if rank == 0: + print(f" Read BoxSize from {args.header_key}/BoxSize: {boxsize}") + return boxsize + + raise ValueError( + "BoxSize not found in input file and not provided via --boxsize. " + "Please specify --boxsize X Y Z or ensure Header/BoxSize exists in input file." + ) + + +def create_cell_structure(coords, masses, boxsize, cdim): + """ + Create cell structure for spatial indexing. + + This function: + 1. Assigns each particle to a cell based on position + 2. Counts particles per cell + 3. Sorts particles by cell index + 4. Computes cumulative offsets + + Args: + coords: Particle coordinates (N, 3) + masses: Particle masses (N,) + boxsize: Box dimensions [X, Y, Z] + cdim: Number of cells per dimension + + Returns: + sorted_coords: Coordinates sorted by cell index + sorted_masses: Masses sorted by cell index + cell_counts: Number of particles in each cell + cell_offsets: Starting index for each cell + cell_size: Physical size of cells [X, Y, Z] + """ + npart = coords.shape[0] + ncells = cdim ** 3 + + # Calculate cell size + cell_size = boxsize / cdim + + # Assign particles to cells + # cell_id = k + j*cdim + i*cdim*cdim (row-major order) + i = np.floor(coords[:, 0] / cell_size[0]).astype(np.int32) + j = np.floor(coords[:, 1] / cell_size[1]).astype(np.int32) + k = np.floor(coords[:, 2] / cell_size[2]).astype(np.int32) + + # Clamp to cell bounds (handle particles exactly at box edge) + i = np.clip(i, 0, cdim - 1) + j = np.clip(j, 0, cdim - 1) + k = np.clip(k, 0, cdim - 1) + + # Compute cell index + cell_indices = k + j * cdim + i * cdim * cdim + + # Count particles per cell + cell_counts = np.bincount(cell_indices, minlength=ncells).astype(np.int32) + + # Get sorting indices + sort_idx = np.argsort(cell_indices) + + # Sort particles by cell + sorted_coords = coords[sort_idx] + sorted_masses = masses[sort_idx] + + # Compute cumulative offsets + cell_offsets = np.zeros(ncells, dtype=np.int64) + cell_offsets[1:] = np.cumsum(cell_counts[:-1]) + + return sorted_coords, sorted_masses, cell_counts, cell_offsets, cell_size + + +def write_cell_structure(f_out, cell_counts, cell_offsets, cdim, cell_size): + """ + Write cell structure to HDF5 file. + + Args: + f_out: Output HDF5 file handle + cell_counts: Particle counts per cell + cell_offsets: Cumulative offsets per cell + cdim: Number of cells per dimension + cell_size: Physical size of cells [X, Y, Z] + """ + # Create Cells group + cells_group = f_out.create_group('Cells') + + # Create Counts subgroup + counts_group = cells_group.create_group('Counts') + counts_group.create_dataset('PartType1', data=cell_counts, compression='gzip', compression_opts=4) + + # Create OffsetsInFile subgroup + offsets_group = cells_group.create_group('OffsetsInFile') + offsets_group.create_dataset('PartType1', data=cell_offsets, compression='gzip', compression_opts=4) + + # Create Meta-data subgroup + metadata_group = cells_group.create_group('Meta-data') + metadata_group.attrs['dimension'] = np.array([cdim, cdim, cdim], dtype=np.int32) + metadata_group.attrs['size'] = cell_size + + def convert_file_serial(args): """Convert file in serial mode (single output file).""" print(f"Converting {args.input_file} -> {args.output_file}") + # Get box size + boxsize = get_boxsize(args.input_file, args) + with h5py.File(args.input_file, 'r') as f_in: # Check input keys exist if args.coordinates_key not in f_in: @@ -166,25 +331,38 @@ def convert_file_serial(args): print(f" Coordinates shape: {coords.shape}") print(f" Masses shape: {masses.shape}") + # Create cell structure + print(f" Creating cell structure (cdim={args.cdim})...") + sorted_coords, sorted_masses, cell_counts, cell_offsets, cell_size = \ + create_cell_structure(coords, masses, boxsize, args.cdim) + + ncells = args.cdim ** 3 + non_empty = np.count_nonzero(cell_counts) + print(f" Cells: {ncells} total, {non_empty} non-empty") + print(f" Cell size: {cell_size}") + # Create output file with h5py.File(args.output_file, 'w') as f_out: - # Create particle type group + # Create particle type group with sorted data pt_group = f_out.create_group(args.particle_type) - # Write coordinates and masses + # Write sorted coordinates and masses pt_group.create_dataset( 'Coordinates', - data=coords, + data=sorted_coords, compression='gzip', compression_opts=4 ) pt_group.create_dataset( 'Masses', - data=masses, + data=sorted_masses, compression='gzip', compression_opts=4 ) + # Write cell structure + write_cell_structure(f_out, cell_counts, cell_offsets, args.cdim, cell_size) + # Copy header if requested if args.copy_header and args.header_key in f_in: print(f" Copying header from {args.header_key}") @@ -194,7 +372,13 @@ def convert_file_serial(args): def convert_file_mpi(args, comm, rank, size): - """Convert file in MPI mode (one file per rank + virtual file).""" + """ + Convert file in MPI mode (one file per rank + virtual file). + + Note: In MPI mode, particles are sorted by cell within each rank's file, + but not globally across all ranks. This is acceptable for the gridder + which can handle per-rank cell structures. + """ # Generate output filenames base_name = args.output_file.replace('.hdf5', '') rank_file = f"{base_name}_rank_{rank}.hdf5" @@ -205,6 +389,9 @@ def convert_file_mpi(args, comm, rank, size): args.input_file, args.masses_key, rank, size ) + # Get box size (all ranks need this) + boxsize = get_boxsize(args.input_file, args, rank) + if rank == 0: print(f"Converting {args.input_file} -> {base_name}_rank_*.hdf5") print(f" Total particles: {total_particles}") @@ -232,23 +419,35 @@ def convert_file_mpi(args, comm, rank, size): f"coordinates shape {coords.shape}" ) + # Create cell structure for this rank's particles + print(f"Rank {rank}: Creating cell structure (cdim={args.cdim})...") + sorted_coords, sorted_masses, cell_counts, cell_offsets, cell_size = \ + create_cell_structure(coords, masses, boxsize, args.cdim) + + ncells = args.cdim ** 3 + non_empty = np.count_nonzero(cell_counts) + print(f"Rank {rank}: Cells: {ncells} total, {non_empty} non-empty in this rank") + # Write to rank-specific file with h5py.File(rank_file, 'w') as f_out: pt_group = f_out.create_group(args.particle_type) pt_group.create_dataset( 'Coordinates', - data=coords, + data=sorted_coords, compression='gzip', compression_opts=4 ) pt_group.create_dataset( 'Masses', - data=masses, + data=sorted_masses, compression='gzip', compression_opts=4 ) + # Write cell structure for this rank + write_cell_structure(f_out, cell_counts, cell_offsets, args.cdim, cell_size) + # Copy header to first rank's file if rank == 0 and args.copy_header and args.header_key in f_in: f_in.copy(args.header_key, f_out, 'Header') @@ -262,13 +461,13 @@ def convert_file_mpi(args, comm, rank, size): print(f"Rank 0: Creating virtual file {virtual_file}") create_virtual_file( base_name, size, total_particles, args.particle_type, - args.copy_header + args.copy_header, args.cdim, boxsize ) print(f"✓ Conversion complete: {virtual_file}") def create_virtual_file(base_name, nranks, total_particles, particle_type, - include_header): + include_header, cdim, boxsize): """ Create a virtual HDF5 file that combines all rank files. @@ -278,6 +477,8 @@ def create_virtual_file(base_name, nranks, total_particles, particle_type, total_particles: Total number of particles across all ranks particle_type: Particle type group name include_header: Whether to include header from rank 0 + cdim: Number of cells per dimension + boxsize: Box dimensions [X, Y, Z] """ virtual_file = f"{base_name}.hdf5" @@ -306,14 +507,25 @@ def create_virtual_file(base_name, nranks, total_particles, particle_type, dtype=dtype ) - # Get particle counts from each rank file + # Get particle counts from each rank file and merge cell structures particle_counts = [] + ncells = cdim ** 3 + merged_cell_counts = np.zeros(ncells, dtype=np.int32) + for i in range(nranks): rank_file = f"{base_name}_rank_{i}.hdf5" with h5py.File(rank_file, 'r') as f: npart = f[f'{particle_type}/Masses'].shape[0] particle_counts.append(npart) + # Merge cell counts from each rank + if 'Cells/Counts/PartType1' in f: + merged_cell_counts += f['Cells/Counts/PartType1'][:] + + # Recompute global offsets from merged counts + merged_cell_offsets = np.zeros(ncells, dtype=np.int64) + merged_cell_offsets[1:] = np.cumsum(merged_cell_counts[:-1]) + # Map each rank's data into the virtual datasets offset = 0 for i in range(nranks): @@ -346,6 +558,10 @@ def create_virtual_file(base_name, nranks, total_particles, particle_type, pt_group.create_virtual_dataset('Coordinates', coord_layout) pt_group.create_virtual_dataset('Masses', mass_layout) + # Write merged cell structure + cell_size = boxsize / cdim + write_cell_structure(f, merged_cell_counts, merged_cell_offsets, cdim, cell_size) + # Copy header from rank 0 file if requested if include_header: rank0_file = f"{base_name}_rank_0.hdf5" From 09089fbfdf92ddc200859285e2a7c631a9f0bc97 Mon Sep 17 00:00:00 2001 From: wjr21 Date: Fri, 28 Nov 2025 23:35:04 +0000 Subject: [PATCH 2/7] Document conversion --- README.md | 20 +++ docs/conversion.md | 308 ++++++++++++++++++++++++++++++++++++ docs/index.md | 2 + docs/quickstart.md | 1 + test_conversion_manual.hdf5 | Bin 0 -> 20480 bytes 5 files changed, 331 insertions(+) create mode 100644 docs/conversion.md create mode 100644 test_conversion_manual.hdf5 diff --git a/README.md b/README.md index b8fb642..328085e 100644 --- a/README.md +++ b/README.md @@ -203,6 +203,26 @@ The gridder produces HDF5 files containing: - Kernel radius information - Processing metadata and timestamps +## Converting Arbitrary Snapshots + +Use the conversion tool to process snapshots from any simulation code: + +```bash +python tools/convert_to_gridder_format.py input.hdf5 output.hdf5 \ + --coordinates-key PartType1/Coordinates \ + --masses-key PartType1/Masses \ + --copy-header +``` + +**Key features:** +- Handles arbitrary HDF5 dataset names +- Creates required cell structure automatically +- Supports both cubic and non-cubic simulation boxes +- MPI parallelization for large files +- Configurable cell grid resolution (default: 16×16×16) + +**See [Conversion Guide](docs/conversion.md) for detailed documentation.** + ## Generating Test Data Use the included Python script to create test snapshots: diff --git a/docs/conversion.md b/docs/conversion.md new file mode 100644 index 0000000..de00af3 --- /dev/null +++ b/docs/conversion.md @@ -0,0 +1,308 @@ +# Converting Arbitrary Snapshots + +The `convert_to_gridder_format.py` tool converts HDF5 simulation snapshots from arbitrary formats into the format required by the gridder. This allows you to process simulations from any code (not just SWIFT) with the FLARES-2 Gridder. + +## Overview + +The conversion process: + +1. Reads particle coordinates and masses from arbitrary HDF5 dataset keys +2. Creates a spatial cell structure for efficient particle lookup +3. Sorts particles by cell index +4. Writes output in the standardized gridder format + +The gridder requires a hierarchical cell structure for spatial indexing. The conversion script creates a regular grid of cells (default: 16×16×16 = 4,096 cells) and assigns each particle to a cell based on its position. + +## Requirements + +- Python 3.7+ +- h5py +- numpy +- mpi4py (optional, for MPI mode) + +## Basic Usage + +```bash +python tools/convert_to_gridder_format.py input.hdf5 output.hdf5 \ + --coordinates-key PartType1/Coordinates \ + --masses-key PartType1/Masses \ + --copy-header +``` + +### Required Arguments + +- `input_file`: Path to input HDF5 snapshot +- `output_file`: Path to output HDF5 file +- `--coordinates-key`: HDF5 dataset path for particle coordinates +- `--masses-key`: HDF5 dataset path for particle masses + +### Optional Arguments + +- `--copy-header`: Copy Header group from input to output (recommended) +- `--header-key`: HDF5 key for header group (default: `Header`) +- `--particle-type`: Output particle type group name (default: `PartType1`) +- `--cdim`: Number of cells per dimension for spatial indexing (default: **16**) +- `--boxsize X Y Z`: Manually specify box size (if not in Header) + +## Examples + +### SWIFT-like Format + +If your snapshot already uses SWIFT-style keys: + +```bash +python tools/convert_to_gridder_format.py \ + swift_snapshot.hdf5 \ + gridder_snapshot.hdf5 \ + --coordinates-key PartType1/Coordinates \ + --masses-key PartType1/Masses \ + --copy-header +``` + +### Custom Format with Non-Standard Keys + +For simulations with different naming conventions: + +```bash +python tools/convert_to_gridder_format.py \ + gadget_snapshot.hdf5 \ + gridder_snapshot.hdf5 \ + --coordinates-key DarkMatter/Positions \ + --masses-key DarkMatter/Mass \ + --copy-header \ + --cdim 32 +``` + +### Without Header (Manual BoxSize) + +If your input file doesn't have a Header group: + +```bash +python tools/convert_to_gridder_format.py \ + custom_snapshot.hdf5 \ + gridder_snapshot.hdf5 \ + --coordinates-key Coordinates \ + --masses-key Masses \ + --boxsize 100.0 100.0 100.0 \ + --cdim 16 +``` + +### Non-Cubic Simulation Box + +For simulations with different box sizes in each dimension: + +```bash +python tools/convert_to_gridder_format.py \ + noncubic_snapshot.hdf5 \ + gridder_snapshot.hdf5 \ + --coordinates-key Coords \ + --masses-key Mass \ + --boxsize 100.0 200.0 150.0 \ + --copy-header +``` + +### Fine Cell Grid for Large Simulations + +For very large simulations, increase `cdim` for better spatial indexing: + +```bash +python tools/convert_to_gridder_format.py \ + large_simulation.hdf5 \ + gridder_snapshot.hdf5 \ + --coordinates-key PartType1/Coordinates \ + --masses-key PartType1/Masses \ + --cdim 64 \ + --copy-header +``` + +## MPI Mode + +For very large snapshots, use MPI to parallelize the conversion: + +```bash +mpirun -np 4 python tools/convert_to_gridder_format.py \ + huge_snapshot.hdf5 \ + gridder_snapshot.hdf5 \ + --coordinates-key PartType1/Coordinates \ + --masses-key PartType1/Masses \ + --copy-header \ + --cdim 32 +``` + +**MPI mode creates:** +- Per-rank files: `gridder_snapshot_rank_0.hdf5`, `gridder_snapshot_rank_1.hdf5`, ... +- Virtual file: `gridder_snapshot.hdf5` (combines all ranks) + +Use the virtual file (`gridder_snapshot.hdf5`) as input to the gridder. + +## Output HDF5 Structure + +The conversion script produces files with this structure: + +``` +/Header # Simulation metadata (if --copy-header used) + BoxSize: [X, Y, Z] # Box dimensions + NumPart_Total: [0, N, 0, 0, 0, 0] # Total particle counts + Redshift: float # Redshift value + +/PartType1 # Dark matter particles + /Coordinates # Particle positions (sorted by cell) + shape: (N, 3) + dtype: float64 + /Masses # Particle masses (sorted by cell) + shape: (N,) + dtype: float64 + +/Cells # Spatial indexing structure + /Meta-data + dimension: [cdim, cdim, cdim] # Number of cells per dimension + size: [dx, dy, dz] # Physical size of each cell + /Counts + /PartType1 # Number of particles in each cell + shape: (cdim³,) + dtype: int32 + /OffsetsInFile + /PartType1 # Starting index for particles in each cell + shape: (cdim³,) + dtype: int64 +``` + +## Choosing `cdim` + +The `cdim` parameter controls the cell grid resolution. Choose based on your simulation size: + +| Simulation Size | Recommended cdim | Total Cells | Use Case | +|----------------|------------------|-------------|----------| +| < 1M particles | 8-16 | 512-4,096 | Small test simulations | +| 1-10M particles | 16-32 | 4,096-32,768 | Medium simulations | +| 10-100M particles | 32-64 | 32,768-262,144 | Large simulations | +| > 100M particles | 64-128 | 262,144-2M | Very large simulations | + +**Guidelines:** +- Higher `cdim` → More cells → Better spatial locality → Faster gridder +- But: Very high `cdim` with sparse distributions may waste memory +- For uniform distributions: `cdim ≈ (nparticles^(1/3)) / 10` is a good starting point + +## Input File Requirements + +Your input HDF5 file must contain: + +**Required:** +- Particle coordinates as a (N, 3) array +- Particle masses as a (N,) array + +**Optional (but recommended):** +- `Header/BoxSize`: Box dimensions [X, Y, Z] + - If missing, use `--boxsize` argument +- `Header/NumPart_Total`: Total particle counts +- `Header/Redshift`: Redshift value + +**Not required:** +- Cell structure (created by conversion script) +- Velocities +- Particle IDs (will be created if missing) + +## Common Issues + +### Missing BoxSize + +**Error:** +``` +ValueError: BoxSize not found in input file and not provided via --boxsize +``` + +**Solution:** +Provide BoxSize manually: +```bash +--boxsize 100.0 100.0 100.0 +``` + +### Particles Outside Box + +Particles with coordinates outside `[0, BoxSize]` will be clamped to the nearest cell boundary. + +### Memory Usage + +For very large simulations: +- Serial mode: Entire particle array loaded into memory +- MPI mode: Particles split across ranks, reducing per-rank memory + +## Verification + +After conversion, verify the output structure: + +```python +import h5py + +with h5py.File('gridder_snapshot.hdf5', 'r') as f: + # Check required groups + assert '/PartType1/Coordinates' in f + assert '/PartType1/Masses' in f + assert '/Cells/Counts/PartType1' in f + assert '/Cells/OffsetsInFile/PartType1' in f + + # Check cell structure + coords = f['/PartType1/Coordinates'][:] + cell_counts = f['/Cells/Counts/PartType1'][:] + + print(f"Total particles: {len(coords)}") + print(f"Particles in cells: {cell_counts.sum()}") + print(f"Non-empty cells: {(cell_counts > 0).sum()}") +``` + +## Full Workflow Example + +Complete example converting a Gadget snapshot: + +```bash +# 1. Convert Gadget snapshot to gridder format +python tools/convert_to_gridder_format.py \ + gadget_snapshot_099.hdf5 \ + gridder_input.hdf5 \ + --coordinates-key PartType1/Coordinates \ + --masses-key PartType1/Masses \ + --boxsize 100.0 100.0 100.0 \ + --cdim 32 \ + --copy-header + +# 2. Create parameter file (params.yml) +cat > params.yml << EOF +Kernels: + nkernels: 3 + kernel_radius_1: 1.0 + kernel_radius_2: 2.0 + kernel_radius_3: 5.0 + +Grid: + type: uniform + cdim: 50 + +Cosmology: + h: 0.7 + Omega_cdm: 0.25 + Omega_b: 0.05 + +Tree: + max_leaf_count: 200 + +Input: + filepath: gridder_input.hdf5 + +Output: + filepath: output/ + basename: gridded_output.hdf5 + write_masses: 1 +EOF + +# 3. Run gridder +./build/parent_gridder params.yml 8 + +# 4. Verify output +ls -lh output/gridded_output.hdf5 +``` + +## See Also + +- [Parameter Reference](parameters.md) - Gridder parameter file documentation +- [Quick Start](quickstart.md) - Getting started with the gridder +- [Installation Guide](installation.md) - Building the gridder diff --git a/docs/index.md b/docs/index.md index 6da3572..04d0ad4 100644 --- a/docs/index.md +++ b/docs/index.md @@ -41,12 +41,14 @@ mpirun -n 4 ./build_mpi/parent_gridder params.yml 1 - **[Getting Started](getting-started/installation.md)**: Installation, quick start, and configuration - **[Parameter Reference](getting-started/parameters.md)**: Detailed parameter file documentation - **[Performance](performance/openmp.md)**: OpenMP and MPI optimization guides +- **[Conversion Tool](conversion.md)**: Converting arbitrary simulation snapshots to gridder format ## Quick Links - [Installation Guide](getting-started/installation.md) - [Quick Start Tutorial](getting-started/quickstart.md) - [Parameter File Reference](getting-started/parameters.md) +- [Snapshot Conversion Guide](conversion.md) - [OpenMP Threading](performance/openmp.md) - [MPI Parallelization](performance/mpi.md) diff --git a/docs/quickstart.md b/docs/quickstart.md index c27e33b..538a036 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -22,6 +22,7 @@ Before starting, ensure you have: - Built the gridder (see [Installation](installation.md)) - Access to a SWIFT simulation snapshot (HDF5 format) + - **Note:** For non-SWIFT snapshots, use the [Conversion Tool](conversion.md) to create compatible HDF5 files - Basic understanding of YAML syntax ## Minimal Example diff --git a/test_conversion_manual.hdf5 b/test_conversion_manual.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..dacddcae3bc89d34208f7d13207ab69d98b49915 GIT binary patch literal 20480 zcmeHucTiN_llIJzGXn!NgnoL_raepoj#;hyhSh5Je1# zBtb+F2?|J15D-Cf&N=RVUv=xN_p4v+*8cJB*0&B-)Ayb^oO^HI=k)1*x=)ynmNp-^ zI5+OE4<{#%6({)j;qR}dUmu+D&z65b|JA|vm&E^)p}*uK!p{Rd?yo-_j`FLW_-p)s z_A}kNZ5s~j`Fs4|^4GuNTG)ObasF=jBmY-vKxdog-v~7S_0+Uqvg8-eek;T9gt6Tb zFINY5r=x#2={VUrIvx27SwBB&zdQaX<-fwO4$IF0Tz=v5Kf2y5GC1G#?cxJB`&{Qd3o`{2Jt{u5WW|AArs zjVoD>|CZbRAA8aL%cnZ7P4dCGlegw07ADBxJJ;OVPK9q%bNX9H$nL|2X&XurN zwWHmHeV{+ffMr45>$!}X5dZktmBp1bNNAQle9D>=3@kRiFwh)A8_uZIL|99}hpPuH zbN6pCxws`MbLZRjD^IW6fP#XS7>pOOC zNMoLGkfaDeT&$C;dO8~zpPi~PO#496v50TrGiXY}*sGdrLsH z+zuLq21ioWv?U?See67^92F+xIEE|grC|9^zK+)~x=?NY=`!1=yfA#jVEIzmDB8(R z>lfWhhb4WKaAlAJ(`fwu2On-IQuw0!a(6o_+nBQ0+rkOo3m!4@S-8RL+j!MVB`VY= z*gU!(CH{Avm_uZ1IV`YveYgHoqa?aR|TSpM<=A;|@ zsGpJX3ET8KFEP-bzxSN5oC`9x_+EIohXjK1d(Y)AXTY=(k4Df43(%HQ*X=wY2;zM# zvvq1x5S(^!SE(TZTJ6R&SKseOkT^<_ROEoN#h6!jKhB_)*Do6J-DJRBmCX@Xv z^V|sEZZU{cTvpR^kPbTK5rXAHH0U{J`mMWu0mU1v8|YFCHi2!FF<2<88i0~u9&Ulk04J-w=ei&Az z!{L~eahb9g$o+n@3*R9Uh(x(}>ZS@p%l-bFg@t&Kc62!LTwx4}O&Kv8 zjSMj1QdVBm$OTQ-AN&ryB>`s-D_gIWBos*}S~JLe5E#Tz4e?-tJWi@F8!=%rY|=7t z88#2nE=t$534@0C$>wBBpd zm?v6LyWP>5r+Vx_n-|%-b0q_$Vk$J$SF*q*`#TNoic{#i+-9DotHOY9uzPndg9%xN z1G#!uEWql&pOD-t4%V{u((}ipU}(UNRz+ZeFRZM`hiFt#n|}FujXNKZOL{+k(i4M^ z{)^J)d&IzrQ$*S9kO+L{8_Eo8??;!sTb}2o(LiQ_q8l+$g_Q_oWEHkT_Xzxk3!$eLscp zcFrN^_WA0y@e&a1slc*lm>=93lpOxo3^*Tp=5`})5^Y>zF~+6K1G|qXO17*RMYSOo zZioCCU~HJXX*Plm;i9^@2lrV)eWdy7CTDRtcHQfwf;kP4e$;Kvb)pdX%~18Ok_gne zOolP`vcUKD+mW*$22h!iQ~a9|IxySP#+d{LL`007oY*M_w4Tz_rqVnR)Qju*G{gh_ zuQqCya#2B^cbTOM)<3qD(Dx&k4uR%}$NE=G!$xJz!~36!!EC4d#GC$J^lT)pYsa@4 z#I-_x^yV}j9JhBle$k|W@qYSxyNFEm@?^P&_NHO9Qt!L)mT+-+8=IbP(wl^Wj!E24 z&7MKMQ3umF$3)=H+~z#|QwgZpCzqz|Z5}uf5m9Ia> z4wchW;*HXukVxhFj{8+&pf1)f?6rFyrN5JJk@>`cm3^sOsU#*Od8G467-059*7reu zEH;m-Pfb^syhHS7HE9c%7{C_(jxUBr1ZSo$=Sw)9JQ1SE&|wk9L2xR zIFbX7=5#BRSMWn$b#h}B8y#3p8n`c6Z~;x$Ypc7|912jG5qq#r6n^MSmP)>2K(AZk zUh_H$7^hpTI9M(U4ZHT0q*8{Ek>I|FC$}h&653%WT+Re56A`OA*GW`0eL^HPhXk32 zDh@3_PY0@jhM&(pDv(;9jU4`gjk7|^S4rlO-1f(4Jt8{xJv7) zL+s=NeCAOa>>jk z#nv(7XT--l`q1^GMW`^W1sw_H*A>zf0a@FCQ1Lhs2q(YTtP>yxti4H1@;p@N3v{ow z3}(XFXIJcd6G?z)xNK<~F@v}p zo&084S;0xxCyz8O0_TXmX2+r=!Pw}@CpQ;yV41P|%;UrkXP)b^^-!3wNnF=oFqjJZ zNeXRI@}i(~G;t%F4+ZWDZ+1v45d<0jr22DJEO3r-oolBrFI@O^OTye(3TTcmcYC`R zpk_jCz1fvf^wuo&p^#@2y7s201W#;0__k5~2?;uEaBpe!l92?9DG`nEDIs8Q2<5+O zG=wzjInT;G7KSCQoZ7v^LV)P{oA+kSq1tprOt2tAJIBdD+aCfT)cuBJbe{qT#}0Mw z+b;&)V$JO_i{H?k!isspX+Cf%d%V7aQw;J0vZIurXv{XE8d|mwmWUZ4ilmc&5Ez zA_3vI)K?Z2ccPHn4P$qLxgcs|+EhUY4b0~Pj^&k>qt#!R(@Aa$D1$s5B57a25IrD@t}|A`6Kp7AGDf&`(6J5X`3fDe)=T?@zgr6H^; zS@6+T9@u=dYqRfd24uXXyc1eQhtRdo;+?WYFcf)tw2xB?)JBw>gB7KqZ2AIoJYX29 zPMgUH@iAc4t&_zi>qx+{?}X{Gb2LyIok_eM%>^L~c`BqHI!N`0rq(kApd~YB&80Uv zXlT`)t&x}>AE~dgR#6>5?5>wZA|kk8j{ZW;1jmGpm1$)XSX}c!=r}8WSQPF@AK1g9 zEe5ykyg5XMiSQh;w~u>qK#gzMdtQBRc&aB@a=lX=*y%aXjVV$fD$qSum?RCPO_!rm zqNJfPW7F+ur2&-4)hyRkBn;Ap(f+Kv8mQ`->HW6Dte94KSnluROFMQZD!T{b+zQ((a@DTo^(HGUN1 z2g)|BG25(OG+o_Bm=}=-?$aTg1a^zUpp`}h=d>VPwXCTMr7< znlKz*@i^#}U@qG3*Z5W>PY8mq#BZ}=ZAI~1<||4FV<`W*7i&xTDAK->)zC8~2DqI4 zZ2@&ukRm9rPc{eKdhz$E#mISM-n2?x6z<#wvh?J zccd5xZg(MLns3qZ6{4_ayRzD6dnWkge~vxdkJ;H^kwR_-F(?}I@jU)n0@xqV(gGEQ zz**3*VYRpzXcqT%+uD%f+MBwX0X7CaaZ%elB+`UbwqMt!`LcsMr_3G+0uzq<&qpk; zpu^_}p|$-Rgh2GcQzzL93LLOY)htwI2cf1RQ%|u)R5CXlvF*GNFhJk*p%yQMJDd8( zo0EYsT~PJ71*CB;E5 z_q1u?Tx0S4frA7%cPFj+z&eYP3sZbY*E8Vta6OA7wr(v;|G;crMjs8evNxe~c26=_aW`QeMl z+GCumG|-AVcDdmV6AZdS-GqtLsLDH!t@9ljF7XZWJ@DZNt;a%ZC$aZpr0PyhtfN8k z1KG72g)}IoD=i(s@R4Mi?D6)7APC@?KDNzlp!SrdaXWV_@=ML`vDK%8{+wAX-xeC! zjx>*6SY!g}V9A%_K_-+f^tatvFACZo_pA>UNP$wQ$SR*fjO z{rDIaDHya{vu*kl1-QJ~zZc0#f)J52B8UqQtq$gwimE11*4yU!#oGnwNE#`Slb07_ zyA-SQZ<1kr*LiPQI{~*5iM!jr@qpP%%^0{y0`>hDEMm;1 zA+2auZOtnISXg&wVr44@9-Pv!G(>b5&Rs~3SYQBmqtXU?=oH#A96d-2Wx$l`SF>%; zg~8=g((bR1SfT2rdhK~DA*e9sGbp^pgo7!si(IgIKWanP_+r2fDPtXG>{z_|-T6S0 zqAClokU0cI#VL=i-EJ~ zU{8hE0@7=sifTRNfi;(uf+C0La6a%V&U!)!%C0pN=5mPL~}x>+XB)-8ql8d>x%QZQqZspp*|9hGR%Cxwzft zkPxV?8R^dm!a>AM-BoBA9}vzZu3BV}KqD^ZPQq;oI4o)Zuv4}kWi{=0nw*ga1FOSp zUQ)5R$ojbaOSxXO;ke^WmJha`9rJ8%I>3O`E8>rWBHB0^)vRWp^SE2e|W_i?!nU;IX>m&>1^nP<~TZI)K^#o!rJFd(U8Y zdrLHaU!V}MmhD(C-bn?W)G8hY-#QdnY#P(OniU+ccragvN`jncY>;;s9W+koWn4W@ z0xEvR)jAy!U|nXu@J?m~6&t;O64*h61nYSfoRJ9d9p}ye=!C_8`8H!>;cRf}LG_Jd zMSch*Y!V+#DMhL4T@EM~a>1x&dx%#r4o-Vpn8>W<1{dPvX)n1k#5~d18}Nw^KF_Ri z4sS4f7+N%kUnv1v{u+GL%hGVH$;pIwSQx&qwZ3?|vJWlLpI@0iDFSodrNwZ941@0m z8w{#2`x8`sYqb^;z8YWkNO2uU%bRT2a$d4S4`r#lpN9`#By;Jnt`&oX_iCi~pG6`1 z%$)N#KVHyx-=#H?%>)K5c@JYF1u}JoqxWO+pGsQLHG=LiIu&PxB)z8*`^T0B)u${F zro?J;I(-K5&*yWMV!TN#i$FnM1v@ZCC4200=THtQ#`Z9VGlM+A=)5OAD0MHHxim`x zQ%S!^$1wh8ug0Q5@dGB{9t?6(2S(7&$O1W$&jKLE(AQ0IV1k7sOQ6#ZI^cN@r@8E* zfs{%2mHKCB?nb=Ph0Vtz1Qbyf787;(5l43AUB*8A(ni6N``TA8VE$>ONenrIAM&=Dsm|q!z}c%pG$x$^ z-@axXT{BIAi|+TzmZgoNaNL)I;g1a1E&gyRTS5#9dxu7wK5)aXB(rl-I+IB0fumzp zfi$=-^9_k8r$7%eT_a()2yj&mwr01`p_gZ2+olLb8T2`9N z|APe-R5C9U3aIc}$O^{2zo1~;mD|Z5vAAB}C-rK86dakd*W?bv{Hx;}7r$8XfW+Q9 zn_71o%wM`%cjDy;>h(<+{W5^Xky}Z@Lr&u;rIGE1XNdrWvb~%%Y?lV#<=Jv~=BLoN ze$hzc9s=kPzP)d1lZ3Q80@virCE+E0k=Nn@=2z9%tm7<@fQ#wZE(Kuxa_gA|^cCX) ze?)7=HemKX;FzMY2LA$*U*Bk=wF>i72+Zq)m|v)-A~l@JiNzP^5Bm@M3c=CKTx#S| z1}Ht>M9>bKMmLD5ri(|&V9Od*Ww40^t6pxuq32D3?q+#YbH!rB9WaNqJ?U`vfTr+~ z1T3z=-OQPc#{5~MF|)KL%+Grr>9FP~1MqJ(A4uy-f?J9E$EE$#XimF%-)cTGsH&Nh z-)T|c(zLyV*pwhV@HlI$>`jHZAGbm$o-^Qct+rgxX$HivxovrqQwVZvGpE(1M$wN! zSAK_7CRA!n1;`fDpl>X1Tyb;^eZOMboQ|y*9J{t({O&}DML}k5C&ruBigS~a63I}^ z_EG<8p8%9l-JACRS*ulpAM*v_-Jaqq0>(!@CRV(v zS>T0tI~+nbY#@WzE3poHj4ut{&vE}l!CNHhe%0kJ#^(jSKka7WH;?k)%7{Kw!~EC% zGS4@ZPNB{i!>w~)Vp|@x}QxNVjSX6KBWsmkoc3h{23B-eArKMy+?!=c|Vg% z4nY`wSaUwpUKCol3QWe=P9xUg=NUy33&>}ua^jv%boeH0?=9KSfH&k=My-I!+Qk*7i>IefGQDh*2hPL`DqmV)@*ZRJQl~PabL6BCl1Ej znFcO!;TiAbL03NlRP&xYqu3%2B`WLk^3+Q)e3wS+J{m^Q_LXhfk~Aon z<(*KzJde&rm*h)Hii42Oy4*QF7WnF}D!aLQ2_zlz5E;}p**lOh2~jY<<+{Yo6RS_ul0oN*v^V6)BRqa5(@UZ&3G0Xmd~n>b;% z6%{6VG&NfGA@y}9Q%>K*ftOFUF4tToDsnt`@FvFssvlAHG2clBBHvkghekn&FVlYP z+d#)|S0HZ3UK;RL?c3*Oj`7yoFu3rR75om}%Mr8Xho(=fitfac;q?ilz!6V2xcK!| zkIobYD5t-ZY;6VMv}xtVNs=fOt~5F~WB>=lh)zQZUe= z!{Bn8M0Jz{=8@KPkli*AuOA=`RwtkOOSIGBJEiAh@S-%x2fL>z5r{Cob#e8Zf-*Fo zd0@pEe>V7Z*8is2V-dKul|`oNuY3zrQ8^r2za5`;nzI{BqUp8^1$C)3U_3I7W2P`6 zG$(KykwXLw`D;q&j?W?MvCsD#O4xvt>-Ia%@M83SZfDLqT?WudhBrm(Mv-mgslDd} z$RIeKd_S?D4N@}M?MqgQf_}c=u?T)~SR61=*nX1+hw20;g3e(0HmT^}u>3WO6VPt@ z(m8=B7gu^va14;X^?9v{FCE;5Ao_F%8Ln8Z?9*eTz{6yp+?iYA7!Q1$xlUjn9dWqH z{sFswP7GVcy0?OGA~b#1(?otKQjxrunNI>)-5u^-{NGX8D*1q^=vnl5v9IpfSt_*i z-tKGi??sQlH3)8}QGk~9Y&b8D1w{L;m5Y`MfZ%$YJ4YxSz?HeeM!}00S}(?=a=ep- zvistJeD67+UNWzE?x83MX3Sm6=N1N)jqjO@5#kW*Tzld&mdB`4aVrZmqyp|FpgAU~1=~+3)it%pi$hXSLV86ccM7dWB`tq{K>_dYvOwGQ}Om4>3ft-!8 z#<|=e!fqqF49nX+&}d8D%0Gf!-V6Ba@^m9*TS3M{ERTaT3d#RmB@8wd3s*OXVSc=$ zLGe9DB4jue=ox3QfYAHZAIj}$aCwpNj&)WVUO%j*z9^Ld#!)_l(D!UmFmY!4WDFS= ztz>H6wD3Z|h=I<2qXDG#rVBkVXTYWJWxL4j6UbrWmH2u*<|l29u@S2y0u1JO^W0rP z`@KI@2|g787MqSsC%x$~)UY~cPL~QEB^@<4=rmAhxI-Mlct)N_O1?cQ5|DQJ=&K)d zG&mj0>AH4?1>|xzDr8{!D}!@`k=Chr@M5E@etXM=?RoOvQ9l?Ucsg`f%Eo%6H9I^j zxG&ADSW;*2R#g~%XX&Ar1 z;grHTjDI^VN#?zV#pMTC(oCP%p~X+t>E-oga5js{x#mm&cdok!-Sn`$tG#m!KCT;S z=O&`1dI_lIc1?^|=7Ep7Y%#G5l7LIf*g8wR%-O#}oCPB9nRBr{9YJNgrVk`p^McWa zbC({@vIASyiav)l4p=GGzF#+m06xx&yU!0dAmvjJO$J+vkbYC_DP}$s%%#ppJ4q5D z>q_q_LLNW7+y3vi8UE7cOAPP`(ASptl7=fE!|$Gt?MDLkJ)`Vt zf*{+LJ}MGLhpN*!Sq_XR(JBmPU7acflCq=A%`pCF|5VfW^jEXU_C`{8Ni`Gh@URZv z4<&&0I*May0|RCe`Jle3@iiiz6C=ksVy=pLs1mcQ5%cQ&ruqoo%)$1DbD4x^$ zb!0pqwr-GVKK*$Cks|2*BZ|`CK8?@1MBoM7fO5Uh=~i^^2>03wJ0=uA|M+F@2_5V; z7k$W)43J~1yf)Cm1fr8leO~n(YUW(AkL#rX3}<>JTbfFN(T+J?!17Xo&7)yg?@lAb zOR^sYO=zHzr9K}%jtANW-;Ry~QK5E&S>x!5vt+A75gi=%(?f?OnQ$vvw1=!ghoT$R-C4@m zbvz!0JzXabenL+piaABW-&6DI_?JG!=eYI8#odAsN8poBs^kM>j)~s-)^I!9u%~hB4KeG4@yHm4VO(NKs5bPUA$#C8na$8pukRtS5gOh z46yo<_t&nqsB91h*WIDVG_kssj)Z|PR{mJM2LA3ay@m&TxU!O3r6hpk!cJ$(J&e!% zv|Az9k`44?3_GH(&|y$xk558}G{{YDn2p#i4lh#MPtFqPF#fQ!A;O#v9HGhF@n(`R zQ$UeColC>~mBi~3of437{M?2F6)bO^?jL*Y4Fx1sMuVy(q~O5ZJ>qfO8Pu`4tB?N< z9b!6@@@hRWp3r{RWOHUUn%83qRSc#>CE0c+ns;c18lOn6__LP47VK29la`ggJrnts(x|E=ia#+T&+=p#!DJTMxSH zlHslPHOuG8JkS(lQzTzYf!9Y@kK1TBq3#zmSz3qa;QU1S*j67Vd~&5mJ+x)QVEZNm zk9jJD7`2hjD#sC#z0c=1hI1TmAa{b#Clt8KljVp>8LDj0j0kJP>>v-NA}vA~h^kww zrz@pkj|u+E2!@}I!ylD-YKbuK{k*8TjSbQ*EZMh@VCz!F=?n@l5xgC?;Z*oB9?2?P zic@hGWh&4k>ao1FS>Gu$qdW;H+Uk7WG6|2>r}VseXxWPvjxQ%0t6}3xJZf33C=MjG zXWbUx*x-eaOPd=j6C$HqpR$XawYVM;7`%sV%&_a8*aL@_z!|Wyk6}c6pr!b zb+Zc$K82ghz(Q|ixsDyGGX?7bl5c;L2$jdBfazow!ZA~-tXf?0INQnqdhMk zo~$@3LKvt)JH)S^BoeXwOG2EJ+etoXDOPxJMwbpDyEAQ*2ZW%y*Xh)9D<u?kpZn@lv%s4=itJL^*nFNfz_z_pL7R&1s!mmET@IP!hG9$CE z9_b9cp*yMx!?aZ2mETJ!`uysesa+w}9dr?hd;jI8<$DNPh+BAWJ#PB+IUW!7g?r~n}YZ6eO zbzup|LIc7f*9>769Ne*SkrOSOMVE&-wu|KqBPZP53?6T+4oB`}_Kq!*5Xc@iB((lB z3Wy7*x?pt**5`vO{qD7(@YDWH&Di`N?v5Y3&i)`g{hPaK}P}$qS=qH6N;L{zo zVJu_@$qRGKhGO=yNk{KqU>F`Ud!I-g!t(mBEIil6jd8=n5|8}z%mt)CTBUjBx-{^$ zt?yZh`Gt+c6$&8(!chEDT)kga6x0>8cElB8e9EhxAKz*dAXRDK2lWB0o-#G3eb}D~ z(H1U+W-Q{6iPdUt8CXI-CgLGs>m;Dx^|h_Pco*{1(WOb;sX=X#`;VF5W`*Rb?>o0( zJeF>!Gc(c(n@^S`QB_eYJX*am$~c`05380TJ~krA2UtAJd%^?3Hc!)=pYcJd*jBDW z%n!Bwc+Bloay=56C^@?ptJj#y82)x&h78kpW$^A1Se&+H_HaCb3cYO>;O(j{;r?9^{J!<|9g<;(BW7-2AHN`jQKM6<0b&3#vG%HE0}jGB zo1RtZqrg{T#SIEu#lWIcpGG!Sg@1l zuUIYw)B3ew+)4lo(`P{q!ClC3#hpYQxjE$Fp`qsY0b38_U#Qkha6`V16}}a#)AZFw zA9Obnp>wW9B)NkHJ}SMraOpV(3^#2hI&YH%x|PyCuIyRVZhXD@S=B81?B+>BK6PmP zc1dc!7ZsE{9_Q}6AP#&lx-He_g~0a6R`QBg8gM_oJ;y=Cc!4!L%{dN`;pIA~(DN8B zQe>Uq#U01Nb?)a{>fIwqa;T}!=NcPaSL<6Asnv>RzOLX^Kq5d8rB}M;VD^8@+Lt_v z^~l)AAMS>Yp(@wTJHZ&wH2Nef>F`qqT=3yM+oHz>?Y8~5Mf6Aj0sLfxmnXIG@$yRqW|ziUGi_ZAnGhE!9WkhvgtE(x-e_|%A^B3%-u7))tUn}Fun)42#!nR_{kuHq4-z_m&86 z!d(uuSz`EEC85A?K95q`jBHP(VfJ_DsL%3uG$=Vgl;_^gfCM+u3~y{*RywU#NyO?K zYtOFm$AiBxKHAd3J*`far$!XyPw^(*K2n3+XbVE#cNnmz`vyKAvx~2baS$LN z0YV8bBkY=ZxMab|p0^N(BS(@e_na00{<2p2I9{y2FLip57mI&)w0&GOGZu%$RV){m z<}tnXA2}h=_#N#yyE2a+fz>h@TTmP6ucl@ zkB#lY>aJFshuF9iVcDVKq#c-FcXE2j@ZH8SR9dMKc)3^vT4v>{7fwD!(PoB`!Bbpd zy-KWsZo&s<`|tm_{Xzf+hdM7rRbhUJtY1K~hyYjy&bizjC4%LImrWNokFzs8E!KSF zgEi-6>`y8Zfb-1B;&tv_s4=E)yD^gj`T?p10omd(Hu3TOwPFhNT@I{K2p57QN8Puv z*z*G#wSA_gh2>}duYD7gziaXFc>LdT{HORo_DlR%_et2o-vi>}1b)ij-=V(`{zu5) z`uM2@9JU)$@bCLl-t_bK)PCvX=YBHWzuXV`KjLBj+{AyI2L8F9jPuu>eEqeL>-UZQ zg46HqyT6{CTT zeg3Z`FAo0x36JB#o{JN=)5-Cui>tHO@BaU4Ki6-)|L*rk{?WkyUJbOMpU?hB{?Wr< t8u){QKREb Date: Fri, 28 Nov 2025 23:35:12 +0000 Subject: [PATCH 3/7] Test the conversion --- .github/workflows/test.yml | 5 + tests/test_conversion.py | 508 +++++++++++++++++++++++++++++++++++++ 2 files changed, 513 insertions(+) create mode 100644 tests/test_conversion.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 03c6a7c..f9a708c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -42,6 +42,10 @@ jobs: run: | pytest tests/test_gridder.py -v --tb=short + - name: Run conversion tests + run: | + pytest tests/test_conversion.py -v --tb=short + - name: Run integration tests run: | bash tests/run_tests.sh --integration @@ -132,6 +136,7 @@ jobs: - name: Run tests with debug build run: | pytest tests/test_gridder.py -v --tb=short + pytest tests/test_conversion.py -v --tb=short - name: Upload test artifacts on failure if: failure() diff --git a/tests/test_conversion.py b/tests/test_conversion.py new file mode 100644 index 0000000..22f765c --- /dev/null +++ b/tests/test_conversion.py @@ -0,0 +1,508 @@ +#!/usr/bin/env python3 +""" +Test suite for convert_to_gridder_format.py + +Tests the conversion script's ability to: +1. Create proper cell structures +2. Handle various input formats +3. Produce files that the gridder can read successfully +""" + +import subprocess +import sys +import tempfile +from pathlib import Path + +import h5py +import numpy as np +import pytest + +# Get paths +PROJECT_ROOT = Path(__file__).parent.parent +CONVERSION_SCRIPT = PROJECT_ROOT / "tools" / "convert_to_gridder_format.py" +GRIDDER_EXE = PROJECT_ROOT / "build" / "parent_gridder" + + +@pytest.fixture(scope="session") +def build_gridder(): + """Ensure gridder is built before running tests.""" + if not GRIDDER_EXE.exists(): + print("Building gridder...") + subprocess.run( + ["cmake", "-B", str(PROJECT_ROOT / "build")], cwd=PROJECT_ROOT, check=True + ) + subprocess.run( + ["cmake", "--build", str(PROJECT_ROOT / "build")], + cwd=PROJECT_ROOT, + check=True, + ) + assert GRIDDER_EXE.exists(), f"Gridder executable not found at {GRIDDER_EXE}" + return GRIDDER_EXE + + +@pytest.fixture +def temp_dir(): + """Create a temporary directory for test files.""" + with tempfile.TemporaryDirectory() as tmpdir: + yield Path(tmpdir) + + +def create_arbitrary_snapshot( + filepath, + coords_key="Coords", + masses_key="Masses", + npart=1000, + boxsize=10.0, + include_header=True, +): + """ + Create a test snapshot with arbitrary key names. + + Args: + filepath: Output file path + coords_key: Key name for coordinates + masses_key: Key name for masses + npart: Number of particles + boxsize: Box size + include_header: Whether to include Header group + """ + np.random.seed(42) + coords = np.random.uniform(0, boxsize, (npart, 3)) + masses = np.ones(npart) * 0.5 + + with h5py.File(filepath, "w") as f: + # Write particles with arbitrary keys + f.create_dataset(coords_key, data=coords) + f.create_dataset(masses_key, data=masses) + + # Add header if requested + if include_header: + header = f.create_group("Header") + header.attrs["BoxSize"] = np.array([boxsize, boxsize, boxsize]) + header.attrs["NumPart_Total"] = np.array( + [0, npart, 0, 0, 0, 0], dtype=np.uint64 + ) + header.attrs["Redshift"] = 0.0 + + +def create_uniform_snapshot(filepath, npart_per_dim=5, boxsize=10.0): + """ + Create a snapshot with particles on a uniform grid. + + Args: + filepath: Output file path + npart_per_dim: Number of particles per dimension + boxsize: Box size + """ + spacing = boxsize / npart_per_dim + coords = [] + for i in range(npart_per_dim): + for j in range(npart_per_dim): + for k in range(npart_per_dim): + x = (i + 0.5) * spacing + y = (j + 0.5) * spacing + z = (k + 0.5) * spacing + coords.append([x, y, z]) + + coords = np.array(coords) + npart = len(coords) + masses = np.ones(npart) + + with h5py.File(filepath, "w") as f: + f.create_dataset("DarkMatter/Positions", data=coords) + f.create_dataset("DarkMatter/Masses", data=masses) + + header = f.create_group("Header") + header.attrs["BoxSize"] = np.array([boxsize, boxsize, boxsize]) + header.attrs["NumPart_Total"] = np.array( + [0, npart, 0, 0, 0, 0], dtype=np.uint64 + ) + header.attrs["Redshift"] = 0.0 + + +def create_noncubic_snapshot(filepath, boxsize_xyz, npart=500): + """Create a snapshot with non-cubic box.""" + np.random.seed(123) + coords = np.random.uniform([0, 0, 0], boxsize_xyz, (npart, 3)) + masses = np.ones(npart) * 2.0 + + with h5py.File(filepath, "w") as f: + f.create_dataset("PartType1/Coordinates", data=coords) + f.create_dataset("PartType1/Masses", data=masses) + + header = f.create_group("Header") + header.attrs["BoxSize"] = np.array(boxsize_xyz) + header.attrs["NumPart_Total"] = np.array( + [0, npart, 0, 0, 0, 0], dtype=np.uint64 + ) + header.attrs["Redshift"] = 0.0 + + +def verify_cell_structure(hdf_file, expected_npart, cdim): + """ + Verify that an HDF5 file has correct cell structure. + + Args: + hdf_file: Path to HDF5 file + expected_npart: Expected total number of particles + cdim: Expected cell dimension + + Returns: + True if structure is valid, raises AssertionError otherwise + """ + with h5py.File(hdf_file, "r") as f: + # Check required groups exist + assert "/PartType1" in f, "Missing PartType1 group" + assert "/Cells" in f, "Missing Cells group" + + # Check particle data + assert "/PartType1/Coordinates" in f, "Missing Coordinates dataset" + assert "/PartType1/Masses" in f, "Missing Masses dataset" + + coords = f["/PartType1/Coordinates"][:] + masses = f["/PartType1/Masses"][:] + + assert coords.shape == (expected_npart, 3), ( + f"Wrong coordinates shape: {coords.shape} vs expected ({expected_npart}, 3)" + ) + assert masses.shape == (expected_npart,), ( + f"Wrong masses shape: {masses.shape} vs expected ({expected_npart},)" + ) + + # Check cell metadata + assert "/Cells/Meta-data" in f, "Missing Cells/Meta-data group" + metadata = f["/Cells/Meta-data"] + + assert "dimension" in metadata.attrs, "Missing dimension attribute" + assert "size" in metadata.attrs, "Missing size attribute" + + dimension = metadata.attrs["dimension"] + assert np.array_equal(dimension, [cdim, cdim, cdim]), ( + f"Wrong dimension: {dimension} vs expected [{cdim}, {cdim}, {cdim}]" + ) + + # Check cell counts and offsets + assert "/Cells/Counts/PartType1" in f, "Missing Counts dataset" + assert "/Cells/OffsetsInFile/PartType1" in f, "Missing OffsetsInFile dataset" + + cell_counts = f["/Cells/Counts/PartType1"][:] + cell_offsets = f["/Cells/OffsetsInFile/PartType1"][:] + + ncells = cdim**3 + assert cell_counts.shape == (ncells,), ( + f"Wrong cell_counts shape: {cell_counts.shape} vs expected ({ncells},)" + ) + assert cell_offsets.shape == (ncells,), ( + f"Wrong cell_offsets shape: {cell_offsets.shape} vs expected ({ncells},)" + ) + + # Verify counts sum to total particles + assert np.sum(cell_counts) == expected_npart, ( + f"Cell counts sum to {np.sum(cell_counts)}, expected {expected_npart}" + ) + + # Verify offsets are cumulative + expected_offsets = np.zeros(ncells, dtype=np.int64) + expected_offsets[1:] = np.cumsum(cell_counts[:-1]) + assert np.array_equal(cell_offsets, expected_offsets), ( + "Cell offsets are not cumulative" + ) + + # Verify particles are sorted by cell + # Recompute cell assignments + cell_size = metadata.attrs["size"] + i = np.floor(coords[:, 0] / cell_size[0]).astype(np.int32) + j = np.floor(coords[:, 1] / cell_size[1]).astype(np.int32) + k = np.floor(coords[:, 2] / cell_size[2]).astype(np.int32) + i = np.clip(i, 0, cdim - 1) + j = np.clip(j, 0, cdim - 1) + k = np.clip(k, 0, cdim - 1) + cell_indices = k + j * cdim + i * cdim * cdim + + # Check that particles are sorted + for idx in range(1, len(cell_indices)): + assert cell_indices[idx] >= cell_indices[idx - 1], ( + f"Particles not sorted by cell at index {idx}" + ) + + return True + + +class TestConversion: + """Test conversion script functionality.""" + + def test_uniform_distribution(self, build_gridder, temp_dir): + """Test conversion with uniform particle distribution.""" + input_file = temp_dir / "uniform_input.hdf5" + output_file = temp_dir / "uniform_output.hdf5" + + # Create test input + npart_per_dim = 5 + npart = npart_per_dim**3 + create_uniform_snapshot(input_file, npart_per_dim=npart_per_dim) + + # Run conversion + cmd = [ + sys.executable, + str(CONVERSION_SCRIPT), + str(input_file), + str(output_file), + "--coordinates-key", + "DarkMatter/Positions", + "--masses-key", + "DarkMatter/Masses", + "--copy-header", + "--cdim", + "4", + ] + + result = subprocess.run(cmd, capture_output=True, text=True) + assert result.returncode == 0, f"Conversion failed: {result.stderr}" + + # Verify output structure + verify_cell_structure(output_file, npart, cdim=4) + + # Test with gridder + param_file = temp_dir / "test_params.yml" + param_content = f""" +Kernels: + nkernels: 1 + kernel_radius_1: 1.0 + +Grid: + type: uniform + cdim: 3 + +Cosmology: + h: 0.681 + Omega_cdm: 0.256011 + Omega_b: 0.048600 + +Tree: + max_leaf_count: 200 + +Input: + filepath: {output_file} + +Output: + filepath: {temp_dir}/ + basename: gridded_output.hdf5 + write_masses: 1 +""" + param_file.write_text(param_content) + + result = subprocess.run( + [str(build_gridder), str(param_file), "1"], capture_output=True, text=True + ) + + assert result.returncode == 0, ( + f"Gridder failed on converted file:\nstdout: {result.stdout}\nstderr: {result.stderr}" + ) + + def test_random_distribution(self, build_gridder, temp_dir): + """Test conversion with random particle distribution.""" + input_file = temp_dir / "random_input.hdf5" + output_file = temp_dir / "random_output.hdf5" + + # Create test input with arbitrary keys + npart = 1000 + create_arbitrary_snapshot( + input_file, + coords_key="MyCoordinates", + masses_key="MyMasses", + npart=npart, + boxsize=20.0, + ) + + # Run conversion + cmd = [ + sys.executable, + str(CONVERSION_SCRIPT), + str(input_file), + str(output_file), + "--coordinates-key", + "MyCoordinates", + "--masses-key", + "MyMasses", + "--copy-header", + "--cdim", + "10", + ] + + result = subprocess.run(cmd, capture_output=True, text=True) + assert result.returncode == 0, f"Conversion failed: {result.stderr}" + + # Verify output structure + verify_cell_structure(output_file, npart, cdim=10) + + def test_noncubic_box(self, build_gridder, temp_dir): + """Test conversion with non-cubic box.""" + input_file = temp_dir / "noncubic_input.hdf5" + output_file = temp_dir / "noncubic_output.hdf5" + + # Create test input with non-cubic box + npart = 500 + boxsize_xyz = [10.0, 20.0, 15.0] + create_noncubic_snapshot(input_file, boxsize_xyz, npart) + + # Run conversion + cmd = [ + sys.executable, + str(CONVERSION_SCRIPT), + str(input_file), + str(output_file), + "--coordinates-key", + "PartType1/Coordinates", + "--masses-key", + "PartType1/Masses", + "--copy-header", + "--cdim", + "8", + ] + + result = subprocess.run(cmd, capture_output=True, text=True) + assert result.returncode == 0, f"Conversion failed: {result.stderr}" + + # Verify output structure + verify_cell_structure(output_file, npart, cdim=8) + + # Verify cell sizes match non-cubic box + with h5py.File(output_file, "r") as f: + cell_size = f["/Cells/Meta-data"].attrs["size"] + expected_size = np.array(boxsize_xyz) / 8 + assert np.allclose(cell_size, expected_size), ( + f"Cell sizes incorrect: {cell_size} vs expected {expected_size}" + ) + + def test_boundary_particles(self, build_gridder, temp_dir): + """Test handling of particles at box boundaries.""" + input_file = temp_dir / "boundary_input.hdf5" + output_file = temp_dir / "boundary_output.hdf5" + + boxsize = 10.0 + + # Create particles at boundaries and random interior + np.random.seed(789) + boundary_coords = np.array( + [ + [0.0, 5.0, 5.0], # x=0 + [10.0, 5.0, 5.0], # x=boxsize + [5.0, 0.0, 5.0], # y=0 + [5.0, 10.0, 5.0], # y=boxsize + [5.0, 5.0, 0.0], # z=0 + [5.0, 5.0, 10.0], # z=boxsize + [0.0, 0.0, 0.0], # corner + [10.0, 10.0, 10.0], # opposite corner + ] + ) + interior_coords = np.random.uniform(0.1, 9.9, (100, 3)) + coords = np.vstack([boundary_coords, interior_coords]) + npart = len(coords) + masses = np.ones(npart) + + with h5py.File(input_file, "w") as f: + f.create_dataset("Coordinates", data=coords) + f.create_dataset("Masses", data=masses) + + header = f.create_group("Header") + header.attrs["BoxSize"] = np.array([boxsize, boxsize, boxsize]) + header.attrs["NumPart_Total"] = np.array( + [0, npart, 0, 0, 0, 0], dtype=np.uint64 + ) + header.attrs["Redshift"] = 0.0 + + # Run conversion + cmd = [ + sys.executable, + str(CONVERSION_SCRIPT), + str(input_file), + str(output_file), + "--coordinates-key", + "Coordinates", + "--masses-key", + "Masses", + "--copy-header", + "--cdim", + "5", + ] + + result = subprocess.run(cmd, capture_output=True, text=True) + assert result.returncode == 0, f"Conversion failed: {result.stderr}" + + # Verify output structure + verify_cell_structure(output_file, npart, cdim=5) + + def test_manual_boxsize(self, build_gridder, temp_dir): + """Test specifying BoxSize manually.""" + input_file = temp_dir / "no_header_input.hdf5" + output_file = temp_dir / "no_header_output.hdf5" + + # Create input without header + npart = 200 + boxsize = 50.0 + np.random.seed(456) + coords = np.random.uniform(0, boxsize, (npart, 3)) + masses = np.ones(npart) * 1.5 + + with h5py.File(input_file, "w") as f: + f.create_dataset("Positions", data=coords) + f.create_dataset("ParticleMasses", data=masses) + # No Header group! + + # Run conversion with manual BoxSize + cmd = [ + sys.executable, + str(CONVERSION_SCRIPT), + str(input_file), + str(output_file), + "--coordinates-key", + "Positions", + "--masses-key", + "ParticleMasses", + "--boxsize", + "50.0", + "50.0", + "50.0", + "--cdim", + "8", + ] + + result = subprocess.run(cmd, capture_output=True, text=True) + assert result.returncode == 0, f"Conversion failed: {result.stderr}" + + # Verify output structure + verify_cell_structure(output_file, npart, cdim=8) + + def test_different_cdim_values(self, build_gridder, temp_dir): + """Test various cell dimension values.""" + for cdim in [4, 10, 16, 32]: + input_file = temp_dir / f"cdim{cdim}_input.hdf5" + output_file = temp_dir / f"cdim{cdim}_output.hdf5" + + npart = 500 + create_arbitrary_snapshot(input_file, npart=npart, boxsize=10.0) + + cmd = [ + sys.executable, + str(CONVERSION_SCRIPT), + str(input_file), + str(output_file), + "--coordinates-key", + "Coords", + "--masses-key", + "Masses", + "--copy-header", + "--cdim", + str(cdim), + ] + + result = subprocess.run(cmd, capture_output=True, text=True) + assert result.returncode == 0, ( + f"Conversion failed for cdim={cdim}: {result.stderr}" + ) + + verify_cell_structure(output_file, npart, cdim=cdim) + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) From 585d2dd2600d03b28b495c9cb0cd6420f50c5844 Mon Sep 17 00:00:00 2001 From: wjr21 Date: Fri, 28 Nov 2025 23:54:13 +0000 Subject: [PATCH 4/7] Refining the tests to ensure gridder runs with output --- tests/test_conversion.py | 131 +++++++++++++++++++++-------- tools/convert_to_gridder_format.py | 27 ++++-- 2 files changed, 119 insertions(+), 39 deletions(-) diff --git a/tests/test_conversion.py b/tests/test_conversion.py index 22f765c..95b2fb6 100644 --- a/tests/test_conversion.py +++ b/tests/test_conversion.py @@ -12,6 +12,7 @@ import sys import tempfile from pathlib import Path +import textwrap import h5py import numpy as np @@ -47,6 +48,55 @@ def temp_dir(): yield Path(tmpdir) +def run_gridder_on_converted( + build_gridder, temp_dir, input_file, basename="gridded_output.hdf5", grid_cdim=3 +): + """ + Run the C++ gridder against a converted snapshot to ensure it is readable. + """ + param_file = temp_dir / f"{basename}.yml" + param_content = textwrap.dedent( + f""" + Kernels: + nkernels: 1 + kernel_radius_1: 1.0 + + Grid: + type: uniform + cdim: {grid_cdim} + + Cosmology: + h: 0.681 + Omega_cdm: 0.256011 + Omega_b: 0.048600 + + Tree: + max_leaf_count: 200 + + Input: + filepath: {input_file} + + Output: + filepath: {temp_dir}/ + basename: {basename} + write_masses: 1 + """ + ).strip() + param_file.write_text(param_content) + + result = subprocess.run( + [str(build_gridder), str(param_file), "1"], capture_output=True, text=True + ) + assert result.returncode == 0, ( + f"Gridder failed on converted file {input_file}:\n" + f"stdout: {result.stdout}\nstderr: {result.stderr}" + ) + + output_path = temp_dir / basename + assert output_path.exists(), f"Gridder output missing at {output_path}" + return output_path + + def create_arbitrary_snapshot( filepath, coords_key="Coords", @@ -263,40 +313,12 @@ def test_uniform_distribution(self, build_gridder, temp_dir): verify_cell_structure(output_file, npart, cdim=4) # Test with gridder - param_file = temp_dir / "test_params.yml" - param_content = f""" -Kernels: - nkernels: 1 - kernel_radius_1: 1.0 - -Grid: - type: uniform - cdim: 3 - -Cosmology: - h: 0.681 - Omega_cdm: 0.256011 - Omega_b: 0.048600 - -Tree: - max_leaf_count: 200 - -Input: - filepath: {output_file} - -Output: - filepath: {temp_dir}/ - basename: gridded_output.hdf5 - write_masses: 1 -""" - param_file.write_text(param_content) - - result = subprocess.run( - [str(build_gridder), str(param_file), "1"], capture_output=True, text=True - ) - - assert result.returncode == 0, ( - f"Gridder failed on converted file:\nstdout: {result.stdout}\nstderr: {result.stderr}" + run_gridder_on_converted( + build_gridder, + temp_dir, + output_file, + basename="uniform_gridded_output.hdf5", + grid_cdim=3, ) def test_random_distribution(self, build_gridder, temp_dir): @@ -335,6 +357,15 @@ def test_random_distribution(self, build_gridder, temp_dir): # Verify output structure verify_cell_structure(output_file, npart, cdim=10) + # Ensure gridder can read converted file + run_gridder_on_converted( + build_gridder, + temp_dir, + output_file, + basename="random_gridded_output.hdf5", + grid_cdim=4, + ) + def test_noncubic_box(self, build_gridder, temp_dir): """Test conversion with non-cubic box.""" input_file = temp_dir / "noncubic_input.hdf5" @@ -374,6 +405,14 @@ def test_noncubic_box(self, build_gridder, temp_dir): f"Cell sizes incorrect: {cell_size} vs expected {expected_size}" ) + run_gridder_on_converted( + build_gridder, + temp_dir, + output_file, + basename="noncubic_gridded_output.hdf5", + grid_cdim=4, + ) + def test_boundary_particles(self, build_gridder, temp_dir): """Test handling of particles at box boundaries.""" input_file = temp_dir / "boundary_input.hdf5" @@ -432,6 +471,14 @@ def test_boundary_particles(self, build_gridder, temp_dir): # Verify output structure verify_cell_structure(output_file, npart, cdim=5) + run_gridder_on_converted( + build_gridder, + temp_dir, + output_file, + basename="boundary_gridded_output.hdf5", + grid_cdim=3, + ) + def test_manual_boxsize(self, build_gridder, temp_dir): """Test specifying BoxSize manually.""" input_file = temp_dir / "no_header_input.hdf5" @@ -473,6 +520,14 @@ def test_manual_boxsize(self, build_gridder, temp_dir): # Verify output structure verify_cell_structure(output_file, npart, cdim=8) + run_gridder_on_converted( + build_gridder, + temp_dir, + output_file, + basename="manual_boxsize_gridded_output.hdf5", + grid_cdim=4, + ) + def test_different_cdim_values(self, build_gridder, temp_dir): """Test various cell dimension values.""" for cdim in [4, 10, 16, 32]: @@ -503,6 +558,14 @@ def test_different_cdim_values(self, build_gridder, temp_dir): verify_cell_structure(output_file, npart, cdim=cdim) + run_gridder_on_converted( + build_gridder, + temp_dir, + output_file, + basename=f"cdim{cdim}_gridded_output.hdf5", + grid_cdim=3, + ) + if __name__ == "__main__": pytest.main([__file__, "-v"]) diff --git a/tools/convert_to_gridder_format.py b/tools/convert_to_gridder_format.py index 41476ef..6dfaeb4 100755 --- a/tools/convert_to_gridder_format.py +++ b/tools/convert_to_gridder_format.py @@ -134,6 +134,17 @@ def parse_args(): return parser.parse_args() +def write_minimal_header(h5file, boxsize, npart, redshift=0.0, header_name="Header"): + """Write a minimal Header group expected by the gridder.""" + hdr = h5file.require_group(header_name) + hdr.attrs["BoxSize"] = np.array(boxsize, dtype=np.float64) + + numpart = np.zeros(6, dtype=np.uint64) + numpart[1] = npart # PartType1 slot + hdr.attrs["NumPart_Total"] = numpart + hdr.attrs["Redshift"] = np.float64(redshift) + + def get_mpi_info(): """Get MPI rank and size, or return defaults if not using MPI.""" if HAS_MPI and MPI.COMM_WORLD.size > 1: @@ -363,10 +374,13 @@ def convert_file_serial(args): # Write cell structure write_cell_structure(f_out, cell_counts, cell_offsets, args.cdim, cell_size) - # Copy header if requested + # Copy header if requested, otherwise synthesize when boxsize is provided if args.copy_header and args.header_key in f_in: print(f" Copying header from {args.header_key}") f_in.copy(args.header_key, f_out, 'Header') + elif args.boxsize is not None: + print(" Writing minimal Header from provided BoxSize") + write_minimal_header(f_out, boxsize, npart) print(f"✓ Conversion complete: {args.output_file}") @@ -448,9 +462,12 @@ def convert_file_mpi(args, comm, rank, size): # Write cell structure for this rank write_cell_structure(f_out, cell_counts, cell_offsets, args.cdim, cell_size) - # Copy header to first rank's file - if rank == 0 and args.copy_header and args.header_key in f_in: - f_in.copy(args.header_key, f_out, 'Header') + # Copy header to first rank's file, or synthesize if BoxSize provided + if rank == 0: + if args.copy_header and args.header_key in f_in: + f_in.copy(args.header_key, f_out, 'Header') + elif args.boxsize is not None: + write_minimal_header(f_out, boxsize, total_particles) print(f"Rank {rank}: Wrote {count} particles to {rank_file}") @@ -461,7 +478,7 @@ def convert_file_mpi(args, comm, rank, size): print(f"Rank 0: Creating virtual file {virtual_file}") create_virtual_file( base_name, size, total_particles, args.particle_type, - args.copy_header, args.cdim, boxsize + args.copy_header or args.boxsize is not None, args.cdim, boxsize ) print(f"✓ Conversion complete: {virtual_file}") From 2ce040c363b1e49b6f44c9f539fdea45eb7612d4 Mon Sep 17 00:00:00 2001 From: wjr21 Date: Sat, 29 Nov 2025 00:00:06 +0000 Subject: [PATCH 5/7] More test fixes --- tools/convert_to_gridder_format.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/tools/convert_to_gridder_format.py b/tools/convert_to_gridder_format.py index 6dfaeb4..c15e00f 100755 --- a/tools/convert_to_gridder_format.py +++ b/tools/convert_to_gridder_format.py @@ -190,9 +190,10 @@ def get_particle_count(input_file, masses_key, rank, size): return total_particles, start_idx, count -def get_boxsize(input_file, args, rank=0): +def get_boxsize(input_file, args, rank=0, coords=None): """ - Get box size from command line args or Header in input file. + Get box size from command line args or Header in input file. If missing, + optionally infer from coordinates (max extent). Returns: boxsize: numpy array [X, Y, Z] box dimensions @@ -213,6 +214,20 @@ def get_boxsize(input_file, args, rank=0): print(f" Read BoxSize from {args.header_key}/BoxSize: {boxsize}") return boxsize + # If boxsize still unknown, optionally infer from coords + if coords is not None: + if coords.ndim != 2 or coords.shape[1] != 3: + raise ValueError("Coordinates must be shape (N, 3) to infer BoxSize") + inferred = np.max(coords, axis=0) + if np.any(inferred <= 0): + raise ValueError( + "Failed to infer BoxSize from coordinates (non-positive extents). " + "Provide --boxsize or add Header/BoxSize." + ) + if rank == 0: + print(f" Inferring BoxSize from coordinates: {inferred}") + return inferred + raise ValueError( "BoxSize not found in input file and not provided via --boxsize. " "Please specify --boxsize X Y Z or ensure Header/BoxSize exists in input file." @@ -311,9 +326,6 @@ def convert_file_serial(args): """Convert file in serial mode (single output file).""" print(f"Converting {args.input_file} -> {args.output_file}") - # Get box size - boxsize = get_boxsize(args.input_file, args) - with h5py.File(args.input_file, 'r') as f_in: # Check input keys exist if args.coordinates_key not in f_in: @@ -338,6 +350,9 @@ def convert_file_serial(args): f"Coordinates must be shape (N, 3), got {coords.shape}" ) + # Get box size now that shapes are known + boxsize = get_boxsize(args.input_file, args, coords=coords) + print(f" Found {npart} particles") print(f" Coordinates shape: {coords.shape}") print(f" Masses shape: {masses.shape}") From 9187f82037884e5590a8ac4d3030cf501dfdbbd5 Mon Sep 17 00:00:00 2001 From: wjr21 Date: Sat, 29 Nov 2025 00:05:44 +0000 Subject: [PATCH 6/7] Fixing debug errors in converison tests --- tools/convert_to_gridder_format.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/convert_to_gridder_format.py b/tools/convert_to_gridder_format.py index c15e00f..9ce9df8 100755 --- a/tools/convert_to_gridder_format.py +++ b/tools/convert_to_gridder_format.py @@ -260,6 +260,11 @@ def create_cell_structure(coords, masses, boxsize, cdim): npart = coords.shape[0] ncells = cdim ** 3 + # Clamp coordinates to stay strictly within [0, boxsize) to avoid boundary issues + eps = np.finfo(coords.dtype).eps + upper = np.maximum(boxsize - eps, 0.0) + coords = np.clip(coords, 0.0, upper) + # Calculate cell size cell_size = boxsize / cdim From 8b78cc366fc5aa91bd1053acf96a8d13ce27c20c Mon Sep 17 00:00:00 2001 From: wjr21 Date: Sat, 29 Nov 2025 00:08:49 +0000 Subject: [PATCH 7/7] More debug fixes --- tools/convert_to_gridder_format.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/convert_to_gridder_format.py b/tools/convert_to_gridder_format.py index 9ce9df8..40f299d 100755 --- a/tools/convert_to_gridder_format.py +++ b/tools/convert_to_gridder_format.py @@ -261,8 +261,7 @@ def create_cell_structure(coords, masses, boxsize, cdim): ncells = cdim ** 3 # Clamp coordinates to stay strictly within [0, boxsize) to avoid boundary issues - eps = np.finfo(coords.dtype).eps - upper = np.maximum(boxsize - eps, 0.0) + upper = np.nextafter(boxsize, np.full_like(boxsize, -np.inf)) coords = np.clip(coords, 0.0, upper) # Calculate cell size