csiborgtools/scripts/process_snapshot.py

# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
r"""
Script to process simulation files and create a single HDF5 file, in which
particles are sorted by the particle halo IDs.
"""
from argparse import ArgumentParser
from gc import collect

import h5py
import numpy
from mpi4py import MPI

import csiborgtools
from csiborgtools import fprint
from numba import jit
from taskmaster import work_delegation
from tqdm import trange, tqdm
from utils import get_nsims


@jit(nopython=True, boundscheck=False)
def minmax_halo(hid, halo_ids, start_loop=0):
    """
    Find the start and end index of a halo in a sorted array of halo IDs.
    This is much faster than using `numpy.where` and then `numpy.min` and
    `numpy.max`.
    """
    start = None
    end = None

    for i in range(start_loop, halo_ids.size):
        n = halo_ids[i]
        if n == hid:
            if start is None:
                start = i
            end = i
        elif n > hid:
            break
    return start, end


def process_snapshot(nsim, simname, halo_finder, verbose):
    """
    Read in the snapshot particles, sort them by their halo ID and dump
    into a HDF5 file. Stores the first and last index of each halo in the
    particle array for fast slicing of the array to acces particles of a single
    halo.
    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    nsnap = max(paths.get_snapshots(nsim, simname))

    if simname == "csiborg":
        partreader = csiborgtools.read.CSiBORGReader(paths)
        box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
    else:
        partreader = csiborgtools.read.QuijoteReader(paths)
        box = None

    desc = {"hid": f"Halo finder ID ({halo_finder})of the particle.",
            "pos": "DM particle positions in box units.",
            "vel": "DM particle velocity in km / s.",
            "mass": "DM particle mass in Msun / h.",
            "pid": "DM particle ID",
            }

    fname = paths.processed_output(nsim, simname, halo_finder)

    fprint(f"loading HIDs of IC {nsim}.", verbose)
    hids = partreader.read_halo_id(nsnap, nsim, halo_finder, verbose)
    collect()

    fprint(f"sorting HIDs of IC {nsim}.")
    sort_indxs = numpy.argsort(hids)

    with h5py.File(fname, "w") as f:
        group = f.create_group("snapshot_final")
        group.attrs["header"] = "Snapshot data at z = 0."

        fprint("dumping halo IDs.", verbose)
        dset = group.create_dataset("halo_ids", data=hids[sort_indxs])
        dset.attrs["header"] = desc["hid"]
        del hids
        collect()

        fprint("reading, sorting and dumping the snapshot particles.", verbose)
        for kind in ["pos", "vel", "mass", "pid"]:
            x = partreader.read_snapshot(nsnap, nsim, kind)[sort_indxs]

            if simname == "csiborg" and kind == "vel":
                x = box.box2vel(x) if simname == "csiborg" else x

            if simname == "csiborg" and kind == "mass":
                x = box.box2solarmass(x) if simname == "csiborg" else x

            dset = f["snapshot_final"].create_dataset(kind, data=x)
            dset.attrs["header"] = desc[kind]
            del x
            collect()

    del sort_indxs
    collect()

    fprint(f"creating a halo map for IC {nsim}.")
    with h5py.File(fname, "r") as f:
        part_hids = f["snapshot_final"]["halo_ids"][:]
    # We loop over the unique halo IDs and remove the 0 halo ID
    unique_halo_ids = numpy.unique(part_hids)
    unique_halo_ids = unique_halo_ids[unique_halo_ids != 0]
    halo_map = numpy.full((unique_halo_ids.size, 3), numpy.nan,
                          dtype=numpy.uint64)
    start_loop, niters = 0, unique_halo_ids.size
    for i in trange(niters, disable=not verbose):
        hid = unique_halo_ids[i]
        k0, kf = minmax_halo(hid, part_hids, start_loop=start_loop)
        halo_map[i, :] = hid, k0, kf
        start_loop = kf

    # Dump the halo mapping.
    with h5py.File(fname, "r+") as f:
        dset = f["snapshot_final"].create_dataset("halo_map", data=halo_map)
        dset.attrs["header"] = """
        Halo to particle mapping. Columns are HID, start index, end index.
        """
        f.close()

    del part_hids
    collect()

    # Add the halo finder catalogue
    with h5py.File(fname, "r+") as f:
        group = f.create_group("halofinder_catalogue")
        group.attrs["header"] = f"Original {halo_finder} halo catalogue."
        cat = partreader.read_catalogue(nsnap, nsim, halo_finder)

        hid2pos = {hid: i for i, hid in enumerate(unique_halo_ids)}

        for key in cat.dtype.names:
            x = numpy.full(unique_halo_ids.size, numpy.nan,
                           dtype=cat[key].dtype)
            for i in range(len(cat)):
                j = hid2pos[cat["index"][i]]
                x[j] = cat[key][i]
            group.create_dataset(key, data=x)
        f.close()

    # Lastly create the halo catalogue
    with h5py.File(fname, "r+") as f:
        group = f.create_group("halo_catalogue")
        group.attrs["header"] = f"{halo_finder} halo catalogue."
        group.create_dataset("index", data=unique_halo_ids)
        f.close()


def add_initial_snapshot(nsim, simname, halo_finder, verbose):
    """
    Sort the initial snapshot particles according to their final snapshot and
    add them to the final snapshot's HDF5 file.
    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    fname = paths.processed_output(nsim, simname, halo_finder)

    if simname == "csiborg":
        partreader = csiborgtools.read.CSiBORGReader(paths)
    else:
        partreader = csiborgtools.read.QuijoteReader(paths)

    fprint(f"processing simulation `{nsim}`.", verbose)
    if simname == "csiborg":
        nsnap0 = 1
    elif simname == "quijote":
        nsnap0 = -1
    else:
        raise ValueError(f"Unknown simulation `{simname}`.")

    fprint("loading and sorting the initial PID.", verbose)
    sort_indxs = numpy.argsort(partreader.read_snapshot(nsnap0, nsim, "pid"))

    fprint("loading the final particles.", verbose)
    with h5py.File(fname, "r") as f:
        sort_indxs_final = f["snapshot_final/pid"][:]
        f.close()

    fprint("sorting the particles according to the final snapshot.", verbose)
    sort_indxs_final = numpy.argsort(numpy.argsort(sort_indxs_final))
    sort_indxs = sort_indxs[sort_indxs_final]

    del sort_indxs_final
    collect()

    fprint("loading and sorting the initial particle position.", verbose)
    pos = partreader.read_snapshot(nsnap0, nsim, "pos")[sort_indxs]

    del sort_indxs
    collect()

    # In Quijote some particles are position precisely at the edge of the
    # box. Move them to be just inside.
    if simname == "quijote":
        mask = pos >= 1
        if numpy.any(mask):
            spacing = numpy.spacing(pos[mask])
            assert numpy.max(spacing) <= 1e-5
            pos[mask] -= spacing

    fprint(f"dumping particles for `{nsim}` to `{fname}`.", verbose)
    with h5py.File(fname, "r+") as f:
        if "snapshot_initial" in f.keys():
            del f["snapshot_initial"]
        group = f.create_group("snapshot_initial")
        group.attrs["header"] = "Initial snapshot data."
        dset = group.create_dataset("pos", data=pos)
        dset.attrs["header"] = "DM particle positions in box units."

        f.close()


def calculate_initial(nsim, simname, halo_finder, verbose):
    """Calculate the Lagrangian patch centre of mass and size."""
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)

    fname = paths.processed_output(nsim, simname, halo_finder)
    fprint("loading the particle information.", verbose)
    f = h5py.File(fname, "r")
    pos = f["snapshot_initial/pos"]
    mass = f["snapshot_final/mass"]
    hid = f["halo_catalogue/index"][:]
    hid2map = csiborgtools.read.make_halomap_dict(
        f["snapshot_final/halo_map"][:])

    if simname == "csiborg":
        kwargs = {"box_size": 2048, "bckg_halfsize": 512}
    else:
        kwargs = {"box_size": 512, "bckg_halfsize": 256}
    overlapper = csiborgtools.match.ParticleOverlap(**kwargs)

    lagpatch_pos = numpy.full((len(hid), 3), numpy.nan, dtype=numpy.float32)
    lagpatch_size = numpy.full(len(hid), numpy.nan, dtype=numpy.float32)
    lagpatch_ncells = numpy.full(len(hid), numpy.nan, dtype=numpy.int32)

    for i in trange(len(hid), disable=not verbose):
        h = hid[i]
        # These are unasigned particles.
        if h == 0:
            continue

        parts_pos = csiborgtools.read.load_halo_particles(h, pos, hid2map)
        parts_mass = csiborgtools.read.load_halo_particles(h, mass, hid2map)

        # Skip if the halo has no particles or is too small.
        if parts_pos is None or parts_pos.size < 5:
            continue

        cm = csiborgtools.center_of_mass(parts_pos, parts_mass, boxsize=1.0)
        sep = csiborgtools.periodic_distance(parts_pos, cm, boxsize=1.0)
        delta = overlapper.make_delta(parts_pos, parts_mass, subbox=True)

        lagpatch_pos[i] = cm
        lagpatch_size[i] = numpy.percentile(sep, 99)
        lagpatch_ncells[i] = csiborgtools.delta2ncells(delta)

    f.close()
    collect()

    with h5py.File(fname, "r+") as f:
        grp = f["halo_catalogue"]
        dset = grp.create_dataset("lagpatch_pos", data=lagpatch_pos)
        dset.attrs["header"] = "Lagrangian patch centre of mass in box units."

        dset = grp.create_dataset("lagpatch_size", data=lagpatch_size)
        dset.attrs["header"] = "Lagrangian patch size in box units."

        dset = grp.create_dataset("lagpatch_ncells", data=lagpatch_ncells)
        dset.attrs["header"] = f"Lagrangian patch number of cells on a {kwargs['box_size']}^3 grid."  # noqa

        f.close()


def make_phew_halo_catalogue(nsim, verbose):
    """
    Process the PHEW halo catalogue for a CSiBORG simulation at all snapshots.
    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    snapshots = paths.get_snapshots(nsim, "csiborg")
    reader = csiborgtools.read.CSiBORGReader(paths)
    keys_write = ["index", "x", "y", "z", "mass_cl", "parent",
                  "ultimate_parent", "summed_mass"]

    # Create a HDF5 file to store all this.
    fname = paths.processed_phew(nsim)
    with h5py.File(fname, "w") as f:
        f.close()

    for nsnap in tqdm(snapshots, disable=not verbose, desc="Snapshot"):
        try:
            data = reader.read_phew_clumps(nsnap, nsim, verbose=False)
        except FileExistsError:
            continue

        with h5py.File(fname, "r+") as f:
            if str(nsnap) in f:
                print(f"Group {nsnap} already exists. Deleting.", flush=True)
                del f[str(nsnap)]
            grp = f.create_group(str(nsnap))
            for key in keys_write:
                grp.create_dataset(key, data=data[key])

            grp.attrs["header"] = f"CSiBORG PHEW clumps at snapshot {nsnap}."
            f.close()

    # Now write the redshifts
    scale_factors = numpy.full(len(snapshots), numpy.nan, dtype=numpy.float32)
    for i, nsnap in enumerate(snapshots):
        box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
        scale_factors[i] = box._aexp

    redshifts = scale_factors[-1] / scale_factors - 1

    with h5py.File(fname, "r+") as f:
        grp = f.create_group("info")
        grp.create_dataset("redshift", data=redshifts)
        grp.create_dataset("snapshots", data=snapshots)
        grp.create_dataset("Om0", data=[box.Om0])
        grp.create_dataset("boxsize", data=[box.boxsize])
        f.close()


def make_merger_tree_file(nsim, verbose):
    """
    Process the `.dat` merger tree files and dump them into a HDF5 file.
    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    reader = csiborgtools.read.CSiBORGReader(paths)
    snaps = paths.get_snapshots(nsim, "csiborg")

    fname = paths.processed_merger_tree(nsim)
    with h5py.File(fname, "w") as f:
        f.close()

    for nsnap in tqdm(snaps, desc="Loading merger files",
                      disable=not verbose):
        try:
            data = reader.read_merger_tree(nsnap, nsim)
        except FileExistsError:
            continue

        with h5py.File(fname, "r+") as f:
            grp = f.create_group(str(nsnap))

            grp.create_dataset("clump",
                               data=data[:, 0].astype(numpy.int32))
            grp.create_dataset("progenitor",
                               data=data[:, 1].astype(numpy.int32))
            grp.create_dataset("progenitor_outputnr",
                               data=data[:, 2].astype(numpy.int32))
            grp.create_dataset("desc_mass",
                               data=data[:, 3].astype(numpy.float32))
            grp.create_dataset("desc_npart",
                               data=data[:, 4].astype(numpy.int32))
            grp.create_dataset("desc_pos",
                               data=data[:, 5:8].astype(numpy.float32))
            grp.create_dataset("desc_vel",
                               data=data[:, 8:11].astype(numpy.float32))
            f.close()


def append_merger_tree_mass_to_phew_catalogue(nsim, verbose):
    """
    Append mass of haloes from mergertree files to the PHEW catalogue. The
    difference between this and the PHEW value is that the latter is written
    before unbinding is performed.

    Note that currently only does this for the highest snapshot.
    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    snapshots = paths.get_snapshots(nsim, "csiborg")
    merger_reader = csiborgtools.read.MergerReader(nsim, paths)

    for nsnap in tqdm(snapshots, disable=not verbose, desc="Snapshot"):
        # TODO do this for all later
        if nsnap < 930:
            continue
        try:
            phewcat = csiborgtools.read.CSiBORGPHEWCatalogue(nsnap, nsim,
                                                             paths)
        except ValueError:
            phewcat.close()
            continue

        mergertree_mass = merger_reader.match_mass_to_phewcat(phewcat)
        phewcat.close()

        fname = paths.processed_phew(nsim)
        with h5py.File(fname, "r+") as f:
            grp = f[str(nsnap)]
            grp.create_dataset("mergertree_mass_new", data=mergertree_mass)
            f.close()


def main(nsim, args):
    if args.make_final:
        process_snapshot(nsim, args.simname, args.halofinder, True)

    if args.make_initial:
        add_initial_snapshot(nsim, args.simname, args.halofinder, True)
        calculate_initial(nsim, args.simname, args.halofinder, True)

    if args.make_phew:
        make_phew_halo_catalogue(nsim, True)

    if args.make_merger:
        make_merger_tree_file(nsim, True)

    if args.append_merger_mass:
        append_merger_tree_mass_to_phew_catalogue(nsim, True)


if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument("--simname", type=str, default="csiborg",
                        choices=["csiborg", "quijote"],
                        help="Simulation name")
    parser.add_argument("--nsims", type=int, nargs="+", default=None,
                        help="IC realisations. If `-1` processes all.")
    parser.add_argument("--halofinder", type=str, help="Halo finder")
    parser.add_argument("--make_final", action="store_true", default=False,
                        help="Process the final snapshot.")
    parser.add_argument("--make_initial", action="store_true", default=False,
                        help="Process the initial snapshot.")
    parser.add_argument("--make_phew", action="store_true", default=False,
                        help="Process the PHEW halo catalogue.")
    parser.add_argument("--make_merger", action="store_true", default=False,
                        help="Process the merger tree files.")
    parser.add_argument("--append_merger_mass", action="store_true",
                        default=False,
                        help="Append the merger tree mass to the PHEW cat.")

    args = parser.parse_args()
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    nsims = get_nsims(args, paths)

    def _main(nsim):
        main(nsim, args)

    work_delegation(_main, nsims, MPI.COMM_WORLD)
Add pynbody and other support (#92) * Simplify box units * Move old scripts * Add printing * Update readers * Disable boundscheck * Add new ordering * Clean up imports * Enforce dtype and add mass to quijote * Simplify print statements * Fix little typos * Fix key bug * Bug fixing * Delete boring comments * Improve ultimate clumps for PHEW * Delete boring comments * Add basic reading * Remove 0th index HID * Add flipping of X and Z * Updates to halo catalogues * Add ordered caching * Fix flipping * Add new flags * Fix PHEW empty clumps * Stop over-wrriting * Little improvements to angular neighbours * Add catalogue masking * Change if-else statements * Cache only filtered data * Add PHEW cats * Add comments * Sort imports * Get Quijote workign * Docs * Add HMF calculation * Move to old * Fix angular * Add great circle distance * Update imports * Update impotrts * Update docs * Remove unused import * Fix a quick bug * Update compatibility * Rename files * Renaming * Improve compatiblity * Rename snapsht * Fix snapshot bug * Update interface * Finish updating interface * Update all paths * Add old scripts * Add basic halo * Update imports * Improve snapshot processing * Update ordering * Fix how CM positions accessed * Add merger paths * Add imports * Add merger reading * Add making a merger tree * Add a basic merger tree reader * Add imports * Add main branch walking + comments + debuggin * Get tree running * Add working merger tree walking along main branch * Add units conversion for merger data * Add hid_to_array_index * Update merger tree * Add mergertree mass to PHEWcat * Edit comments * Add this to track changes... * Fix a little bug * Add mergertree mass * Add cache clearing * Improve summing substructure code * Littbe bug * Little updates to the merger tree reader * Update .giignore * Add box selection * Add optional deletingf of a group * add to keep track of changes * Update changes * Remove * Add manual tracker * Fix bug * Add m200c_to_r200c * Add manual halo tracking * Remove skipped snapshots * update cosmo params to match csiborg * remove old comments * Add SDSSxALFALFA * Fix bugs * Rename * Edit paths * Updates * Add comments * Add comment * Add hour conversion * Add imports * Add new observation class * Add selection * Add imports * Fix small bug * Add field copying for safety * Add matching to survey without masking * Add P(k) calculation * Add nb * Edit comment * Move files * Remove merger import * Edit setup.yp * Fix typo * Edit import warnigns * update nb * Update README * Update README * Update README * Add skeleton * Add skeleton 2023-12-07 14:23:32 +00:00			`# Copyright (C) 2022 Richard Stiskalek`
			`# This program is free software; you can redistribute it and/or modify it`
			`# under the terms of the GNU General Public License as published by the`
			`# Free Software Foundation; either version 3 of the License, or (at your`
			`# option) any later version.`
			`#`
			`# This program is distributed in the hope that it will be useful, but`
			`# WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General`
			`# Public License for more details.`
			`#`
			`# You should have received a copy of the GNU General Public License along`
			`# with this program; if not, write to the Free Software Foundation, Inc.,`
			`# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.`
			`r"""`
			`Script to process simulation files and create a single HDF5 file, in which`
			`particles are sorted by the particle halo IDs.`
			`"""`
			`from argparse import ArgumentParser`
			`from gc import collect`

			`import h5py`
			`import numpy`
			`from mpi4py import MPI`

			`import csiborgtools`
			`from csiborgtools import fprint`
			`from numba import jit`
			`from taskmaster import work_delegation`
			`from tqdm import trange, tqdm`
			`from utils import get_nsims`


			`@jit(nopython=True, boundscheck=False)`
			`def minmax_halo(hid, halo_ids, start_loop=0):`
			`"""`
			`Find the start and end index of a halo in a sorted array of halo IDs.`
			This is much faster than using `numpy.where` and then `numpy.min` and
			`numpy.max`.
			`"""`
			`start = None`
			`end = None`

			`for i in range(start_loop, halo_ids.size):`
			`n = halo_ids[i]`
			`if n == hid:`
			`if start is None:`
			`start = i`
			`end = i`
			`elif n > hid:`
			`break`
			`return start, end`


			`def process_snapshot(nsim, simname, halo_finder, verbose):`
			`"""`
			`Read in the snapshot particles, sort them by their halo ID and dump`
			`into a HDF5 file. Stores the first and last index of each halo in the`
			`particle array for fast slicing of the array to acces particles of a single`
			`halo.`
			`"""`
			`paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)`
			`nsnap = max(paths.get_snapshots(nsim, simname))`

			`if simname == "csiborg":`
			`partreader = csiborgtools.read.CSiBORGReader(paths)`
			`box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)`
			`else:`
			`partreader = csiborgtools.read.QuijoteReader(paths)`
			`box = None`

			`desc = {"hid": f"Halo finder ID ({halo_finder})of the particle.",`
			`"pos": "DM particle positions in box units.",`
			`"vel": "DM particle velocity in km / s.",`
			`"mass": "DM particle mass in Msun / h.",`
			`"pid": "DM particle ID",`
			`}`

			`fname = paths.processed_output(nsim, simname, halo_finder)`

			`fprint(f"loading HIDs of IC {nsim}.", verbose)`
			`hids = partreader.read_halo_id(nsnap, nsim, halo_finder, verbose)`
			`collect()`

			`fprint(f"sorting HIDs of IC {nsim}.")`
			`sort_indxs = numpy.argsort(hids)`

			`with h5py.File(fname, "w") as f:`
			`group = f.create_group("snapshot_final")`
			`group.attrs["header"] = "Snapshot data at z = 0."`

			`fprint("dumping halo IDs.", verbose)`
			`dset = group.create_dataset("halo_ids", data=hids[sort_indxs])`
			`dset.attrs["header"] = desc["hid"]`
			`del hids`
			`collect()`

			`fprint("reading, sorting and dumping the snapshot particles.", verbose)`
			`for kind in ["pos", "vel", "mass", "pid"]:`
			`x = partreader.read_snapshot(nsnap, nsim, kind)[sort_indxs]`

			`if simname == "csiborg" and kind == "vel":`
			`x = box.box2vel(x) if simname == "csiborg" else x`

			`if simname == "csiborg" and kind == "mass":`
			`x = box.box2solarmass(x) if simname == "csiborg" else x`

			`dset = f["snapshot_final"].create_dataset(kind, data=x)`
			`dset.attrs["header"] = desc[kind]`
			`del x`
			`collect()`

			`del sort_indxs`
			`collect()`

			`fprint(f"creating a halo map for IC {nsim}.")`
			`with h5py.File(fname, "r") as f:`
			`part_hids = f["snapshot_final"]["halo_ids"][:]`
			`# We loop over the unique halo IDs and remove the 0 halo ID`
			`unique_halo_ids = numpy.unique(part_hids)`
			`unique_halo_ids = unique_halo_ids[unique_halo_ids != 0]`
			`halo_map = numpy.full((unique_halo_ids.size, 3), numpy.nan,`
			`dtype=numpy.uint64)`
			`start_loop, niters = 0, unique_halo_ids.size`
			`for i in trange(niters, disable=not verbose):`
			`hid = unique_halo_ids[i]`
			`k0, kf = minmax_halo(hid, part_hids, start_loop=start_loop)`
			`halo_map[i, :] = hid, k0, kf`
			`start_loop = kf`

			`# Dump the halo mapping.`
			`with h5py.File(fname, "r+") as f:`
			`dset = f["snapshot_final"].create_dataset("halo_map", data=halo_map)`
			`dset.attrs["header"] = """`
			`Halo to particle mapping. Columns are HID, start index, end index.`
			`"""`
			`f.close()`

			`del part_hids`
			`collect()`

			`# Add the halo finder catalogue`
			`with h5py.File(fname, "r+") as f:`
			`group = f.create_group("halofinder_catalogue")`
			`group.attrs["header"] = f"Original {halo_finder} halo catalogue."`
			`cat = partreader.read_catalogue(nsnap, nsim, halo_finder)`

			`hid2pos = {hid: i for i, hid in enumerate(unique_halo_ids)}`

			`for key in cat.dtype.names:`
			`x = numpy.full(unique_halo_ids.size, numpy.nan,`
			`dtype=cat[key].dtype)`
			`for i in range(len(cat)):`
			`j = hid2pos[cat["index"][i]]`
			`x[j] = cat[key][i]`
			`group.create_dataset(key, data=x)`
			`f.close()`

			`# Lastly create the halo catalogue`
			`with h5py.File(fname, "r+") as f:`
			`group = f.create_group("halo_catalogue")`
			`group.attrs["header"] = f"{halo_finder} halo catalogue."`
			`group.create_dataset("index", data=unique_halo_ids)`
			`f.close()`


			`def add_initial_snapshot(nsim, simname, halo_finder, verbose):`
			`"""`
			`Sort the initial snapshot particles according to their final snapshot and`
			`add them to the final snapshot's HDF5 file.`
			`"""`
			`paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)`
			`fname = paths.processed_output(nsim, simname, halo_finder)`

			`if simname == "csiborg":`
			`partreader = csiborgtools.read.CSiBORGReader(paths)`
			`else:`
			`partreader = csiborgtools.read.QuijoteReader(paths)`

			fprint(f"processing simulation `{nsim}`.", verbose)
			`if simname == "csiborg":`
			`nsnap0 = 1`
			`elif simname == "quijote":`
			`nsnap0 = -1`
			`else:`
			raise ValueError(f"Unknown simulation `{simname}`.")

			`fprint("loading and sorting the initial PID.", verbose)`
			`sort_indxs = numpy.argsort(partreader.read_snapshot(nsnap0, nsim, "pid"))`

			`fprint("loading the final particles.", verbose)`
			`with h5py.File(fname, "r") as f:`
			`sort_indxs_final = f["snapshot_final/pid"][:]`
			`f.close()`

			`fprint("sorting the particles according to the final snapshot.", verbose)`
			`sort_indxs_final = numpy.argsort(numpy.argsort(sort_indxs_final))`
			`sort_indxs = sort_indxs[sort_indxs_final]`

			`del sort_indxs_final`
			`collect()`

			`fprint("loading and sorting the initial particle position.", verbose)`
			`pos = partreader.read_snapshot(nsnap0, nsim, "pos")[sort_indxs]`

			`del sort_indxs`
			`collect()`

			`# In Quijote some particles are position precisely at the edge of the`
			`# box. Move them to be just inside.`
			`if simname == "quijote":`
			`mask = pos >= 1`
			`if numpy.any(mask):`
			`spacing = numpy.spacing(pos[mask])`
			`assert numpy.max(spacing) <= 1e-5`
			`pos[mask] -= spacing`

			fprint(f"dumping particles for `{nsim}` to `{fname}`.", verbose)
			`with h5py.File(fname, "r+") as f:`
			`if "snapshot_initial" in f.keys():`
			`del f["snapshot_initial"]`
			`group = f.create_group("snapshot_initial")`
			`group.attrs["header"] = "Initial snapshot data."`
			`dset = group.create_dataset("pos", data=pos)`
			`dset.attrs["header"] = "DM particle positions in box units."`

			`f.close()`


			`def calculate_initial(nsim, simname, halo_finder, verbose):`
			`"""Calculate the Lagrangian patch centre of mass and size."""`
			`paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)`

			`fname = paths.processed_output(nsim, simname, halo_finder)`
			`fprint("loading the particle information.", verbose)`
			`f = h5py.File(fname, "r")`
			`pos = f["snapshot_initial/pos"]`
			`mass = f["snapshot_final/mass"]`
			`hid = f["halo_catalogue/index"][:]`
			`hid2map = csiborgtools.read.make_halomap_dict(`
			`f["snapshot_final/halo_map"][:])`

			`if simname == "csiborg":`
			`kwargs = {"box_size": 2048, "bckg_halfsize": 512}`
			`else:`
			`kwargs = {"box_size": 512, "bckg_halfsize": 256}`
			`overlapper = csiborgtools.match.ParticleOverlap(**kwargs)`

			`lagpatch_pos = numpy.full((len(hid), 3), numpy.nan, dtype=numpy.float32)`
			`lagpatch_size = numpy.full(len(hid), numpy.nan, dtype=numpy.float32)`
			`lagpatch_ncells = numpy.full(len(hid), numpy.nan, dtype=numpy.int32)`

			`for i in trange(len(hid), disable=not verbose):`
			`h = hid[i]`
			`# These are unasigned particles.`
			`if h == 0:`
			`continue`

			`parts_pos = csiborgtools.read.load_halo_particles(h, pos, hid2map)`
			`parts_mass = csiborgtools.read.load_halo_particles(h, mass, hid2map)`

			`# Skip if the halo has no particles or is too small.`
			`if parts_pos is None or parts_pos.size < 5:`
			`continue`

			`cm = csiborgtools.center_of_mass(parts_pos, parts_mass, boxsize=1.0)`
			`sep = csiborgtools.periodic_distance(parts_pos, cm, boxsize=1.0)`
			`delta = overlapper.make_delta(parts_pos, parts_mass, subbox=True)`

			`lagpatch_pos[i] = cm`
			`lagpatch_size[i] = numpy.percentile(sep, 99)`
			`lagpatch_ncells[i] = csiborgtools.delta2ncells(delta)`

			`f.close()`
			`collect()`

			`with h5py.File(fname, "r+") as f:`
			`grp = f["halo_catalogue"]`
			`dset = grp.create_dataset("lagpatch_pos", data=lagpatch_pos)`
			`dset.attrs["header"] = "Lagrangian patch centre of mass in box units."`

			`dset = grp.create_dataset("lagpatch_size", data=lagpatch_size)`
			`dset.attrs["header"] = "Lagrangian patch size in box units."`

			`dset = grp.create_dataset("lagpatch_ncells", data=lagpatch_ncells)`
			`dset.attrs["header"] = f"Lagrangian patch number of cells on a {kwargs['box_size']}^3 grid." # noqa`

			`f.close()`


			`def make_phew_halo_catalogue(nsim, verbose):`
			`"""`
			`Process the PHEW halo catalogue for a CSiBORG simulation at all snapshots.`
			`"""`
			`paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)`
			`snapshots = paths.get_snapshots(nsim, "csiborg")`
			`reader = csiborgtools.read.CSiBORGReader(paths)`
			`keys_write = ["index", "x", "y", "z", "mass_cl", "parent",`
			`"ultimate_parent", "summed_mass"]`

			`# Create a HDF5 file to store all this.`
			`fname = paths.processed_phew(nsim)`
			`with h5py.File(fname, "w") as f:`
			`f.close()`

			`for nsnap in tqdm(snapshots, disable=not verbose, desc="Snapshot"):`
			`try:`
			`data = reader.read_phew_clumps(nsnap, nsim, verbose=False)`
			`except FileExistsError:`
			`continue`

			`with h5py.File(fname, "r+") as f:`
			`if str(nsnap) in f:`
			`print(f"Group {nsnap} already exists. Deleting.", flush=True)`
			`del f[str(nsnap)]`
			`grp = f.create_group(str(nsnap))`
			`for key in keys_write:`
			`grp.create_dataset(key, data=data[key])`

			`grp.attrs["header"] = f"CSiBORG PHEW clumps at snapshot {nsnap}."`
			`f.close()`

			`# Now write the redshifts`
			`scale_factors = numpy.full(len(snapshots), numpy.nan, dtype=numpy.float32)`
			`for i, nsnap in enumerate(snapshots):`
			`box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)`
			`scale_factors[i] = box._aexp`

			`redshifts = scale_factors[-1] / scale_factors - 1`

			`with h5py.File(fname, "r+") as f:`
			`grp = f.create_group("info")`
			`grp.create_dataset("redshift", data=redshifts)`
			`grp.create_dataset("snapshots", data=snapshots)`
			`grp.create_dataset("Om0", data=[box.Om0])`
			`grp.create_dataset("boxsize", data=[box.boxsize])`
			`f.close()`


			`def make_merger_tree_file(nsim, verbose):`
			`"""`
			Process the `.dat` merger tree files and dump them into a HDF5 file.
			`"""`
			`paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)`
			`reader = csiborgtools.read.CSiBORGReader(paths)`
			`snaps = paths.get_snapshots(nsim, "csiborg")`

			`fname = paths.processed_merger_tree(nsim)`
			`with h5py.File(fname, "w") as f:`
			`f.close()`

			`for nsnap in tqdm(snaps, desc="Loading merger files",`
			`disable=not verbose):`
			`try:`
			`data = reader.read_merger_tree(nsnap, nsim)`
			`except FileExistsError:`
			`continue`

			`with h5py.File(fname, "r+") as f:`
			`grp = f.create_group(str(nsnap))`

			`grp.create_dataset("clump",`
			`data=data[:, 0].astype(numpy.int32))`
			`grp.create_dataset("progenitor",`
			`data=data[:, 1].astype(numpy.int32))`
			`grp.create_dataset("progenitor_outputnr",`
			`data=data[:, 2].astype(numpy.int32))`
			`grp.create_dataset("desc_mass",`
			`data=data[:, 3].astype(numpy.float32))`
			`grp.create_dataset("desc_npart",`
			`data=data[:, 4].astype(numpy.int32))`
			`grp.create_dataset("desc_pos",`
			`data=data[:, 5:8].astype(numpy.float32))`
			`grp.create_dataset("desc_vel",`
			`data=data[:, 8:11].astype(numpy.float32))`
			`f.close()`


			`def append_merger_tree_mass_to_phew_catalogue(nsim, verbose):`
			`"""`
			`Append mass of haloes from mergertree files to the PHEW catalogue. The`
			`difference between this and the PHEW value is that the latter is written`
			`before unbinding is performed.`

			`Note that currently only does this for the highest snapshot.`
			`"""`
			`paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)`
			`snapshots = paths.get_snapshots(nsim, "csiborg")`
			`merger_reader = csiborgtools.read.MergerReader(nsim, paths)`

			`for nsnap in tqdm(snapshots, disable=not verbose, desc="Snapshot"):`
			`# TODO do this for all later`
			`if nsnap < 930:`
			`continue`
			`try:`
			`phewcat = csiborgtools.read.CSiBORGPHEWCatalogue(nsnap, nsim,`
			`paths)`
			`except ValueError:`
			`phewcat.close()`
			`continue`

			`mergertree_mass = merger_reader.match_mass_to_phewcat(phewcat)`
			`phewcat.close()`

			`fname = paths.processed_phew(nsim)`
			`with h5py.File(fname, "r+") as f:`
			`grp = f[str(nsnap)]`
			`grp.create_dataset("mergertree_mass_new", data=mergertree_mass)`
			`f.close()`


			`def main(nsim, args):`
			`if args.make_final:`
			`process_snapshot(nsim, args.simname, args.halofinder, True)`

			`if args.make_initial:`
			`add_initial_snapshot(nsim, args.simname, args.halofinder, True)`
			`calculate_initial(nsim, args.simname, args.halofinder, True)`

			`if args.make_phew:`
			`make_phew_halo_catalogue(nsim, True)`

			`if args.make_merger:`
			`make_merger_tree_file(nsim, True)`

			`if args.append_merger_mass:`
			`append_merger_tree_mass_to_phew_catalogue(nsim, True)`


			`if __name__ == "__main__":`
			`parser = ArgumentParser()`
			`parser.add_argument("--simname", type=str, default="csiborg",`
			`choices=["csiborg", "quijote"],`
			`help="Simulation name")`
			`parser.add_argument("--nsims", type=int, nargs="+", default=None,`
			help="IC realisations. If `-1` processes all.")
			`parser.add_argument("--halofinder", type=str, help="Halo finder")`
			`parser.add_argument("--make_final", action="store_true", default=False,`
			`help="Process the final snapshot.")`
			`parser.add_argument("--make_initial", action="store_true", default=False,`
			`help="Process the initial snapshot.")`
			`parser.add_argument("--make_phew", action="store_true", default=False,`
			`help="Process the PHEW halo catalogue.")`
			`parser.add_argument("--make_merger", action="store_true", default=False,`
			`help="Process the merger tree files.")`
			`parser.add_argument("--append_merger_mass", action="store_true",`
			`default=False,`
			`help="Append the merger tree mass to the PHEW cat.")`

			`args = parser.parse_args()`
			`paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)`
			`nsims = get_nsims(args, paths)`

			`def _main(nsim):`
			`main(nsim, args)`

			`work_delegation(_main, nsims, MPI.COMM_WORLD)`