CSiBORG FoF switch (#75)

* Add moving FoF membership files * add FoF membership path * Add notes where its PHEW * Add FoF catalogue path * Correct typo * Add more functionalities * Make work with halo IDs from FoF * Edit print statement * Fix copy bug * copy * Add FoF catalogue reading * Clean up script * Fix typo * Little edits * Fix naming convention * Rename key * Remove loading substructure particles * Rename CSiBORG Cat * Rename clumps cat * Rename cat * Remove misplaced import * Switch to halos * rm import * structfit of only halos * Add FoF halo reading * Add a short comment * Fix __getitem__ to work with int * Fix problems * Improve __getitem__ * Add more conversion * Fix indexing * Fix __getitem__ assertion * Fix numbers * Rename * Fix verbosity flags * Add full Quijote HMF option * Add plot of Quijote only * Add quijote full paths * Fix the fit_init script * Renam arg * Update .gitignore * add default argument name * Change default verbosity flag * Modernise script structure * Fix dictionary * Fix reading to include m200c * Modernise script * Add args
2025-06-28 10:51:10 +00:00 · 2023-07-24 14:10:21 +02:00 · 2023-07-24 14:10:21 +02:00 · eb8d070fff
commit eb8d070fff
parent fcd1a6b321
19 changed files with 659 additions and 466 deletions
--- a/scripts/fit_halos.py
+++ b/scripts/fit_halos.py
@ -13,15 +13,17 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
-A script to fit halos (concentration, ...). The particle array of each CSiBORG
-realisation must have been split in advance by `runsplit_halos`.
+A script to fit FoF halos (concentration, ...). The particle array of each
+CSiBORG realisation must have been processed in advance by `pre_dumppart.py`.
 """
 from argparse import ArgumentParser
 from datetime import datetime

 import numpy
 from mpi4py import MPI
-from tqdm import tqdm
+from tqdm import trange
+
+from utils import get_nsims

 try:
    import csiborgtools
@ -38,18 +40,13 @@ nproc = comm.Get_size()
 verbose = nproc == 1

 parser = ArgumentParser()
-parser.add_argument("--kind", type=str, choices=["halos", "clumps"])
-parser.add_argument("--ics", type=int, nargs="+", default=None,
+parser.add_argument("--nsims", type=int, nargs="+", default=None,
                    help="IC realisations. If `-1` processes all simulations.")
 args = parser.parse_args()
 paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
 partreader = csiborgtools.read.ParticleReader(paths)
 nfwpost = csiborgtools.fits.NFWPosterior()
-
-if args.ics is None or args.ics[0] == -1:
-    ics = paths.get_ics("csiborg")
-else:
-    ics = args.ics
+nsims = get_nsims(args, paths)

 cols_collect = [
    ("index", numpy.int32),
@ -67,13 +64,12 @@ cols_collect = [
    ("lambda200c", numpy.float32),
    ("r200m", numpy.float32),
    ("m200m", numpy.float32),
+    ("r500m", numpy.float32),
+    ("m500m", numpy.float32),
    ]


-def fit_clump(particles, clump_info, box):
-    """
-    Fit an object. Can be eithe a clump or a parent halo.
-    """
+def fit_halo(particles, clump_info, box):
    obj = csiborgtools.fits.Clump(particles, clump_info, box)

    out = {}
@ -82,16 +78,15 @@ def fit_clump(particles, clump_info, box):
    for i, v in enumerate(["vx", "vy", "vz"]):
        out[v] = numpy.average(obj.vel[:, i], weights=obj["M"])
    # Overdensity masses
-    out["r200c"], out["m200c"] = obj.spherical_overdensity_mass(200,
-                                                                kind="crit")
-    out["r500c"], out["m500c"] = obj.spherical_overdensity_mass(500,
-                                                                kind="crit")
-    out["r200m"], out["m200m"] = obj.spherical_overdensity_mass(200,
-                                                                kind="matter")
+    for n in [200, 500]:
+        out[f"r{n}c"], out[f"m{n}c"] = obj.spherical_overdensity_mass(
+            n, kind="crit", npart_min=10)
+        out[f"r{n}m"], out[f"m{n}m"] = obj.spherical_overdensity_mass(
+            n, kind="matter", npart_min=10)
    # NFW fit
    if out["npart"] > 10 and numpy.isfinite(out["r200c"]):
        Rs, rho0 = nfwpost.fit(obj)
-        out["conc"] = Rs / out["r200c"]
+        out["conc"] = out["r200c"] / Rs
        out["rho0"] = rho0
    # Spin within R200c
    if numpy.isfinite(out["r200c"]):
@ -100,8 +95,8 @@ def fit_clump(particles, clump_info, box):


 # We MPI loop over all simulations.
-jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
-for nsim in [ics[i] for i in jobs]:
+jobs = csiborgtools.fits.split_jobs(len(nsims), nproc)[rank]
+for nsim in [nsims[i] for i in jobs]:
    print(f"{datetime.now()}: rank {rank} calculating simulation `{nsim}`.",
          flush=True)
    nsnap = max(paths.get_snapshots(nsim))
@ -110,49 +105,30 @@ for nsim in [ics[i] for i in jobs]:
    # Particle archive
    f = csiborgtools.read.read_h5(paths.particles(nsim))
    particles = f["particles"]
-    clump_map = f["clumpmap"]
-    clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
-    clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
-                                                   load_fitted=False)
-    # We check whether we fit halos or clumps, will be indexing over different
-    # iterators.
-    if args.kind == "halos":
-        ismain = clumps_cat.ismain
-    else:
-        ismain = numpy.ones(len(clumps_cat), dtype=bool)
-
+    halo_map = f["halomap"]
+    hid2map = {clid: i for i, clid in enumerate(halo_map[:, 0])}
+    cat = csiborgtools.read.CSiBORGHaloCatalogue(
+        nsim, paths, with_lagpatch=False, load_initial=False, rawdata=True,
+        load_fitted=False)
    # Even if we are calculating parent halo this index runs over all clumps.
-    out = csiborgtools.read.cols_to_structured(len(clumps_cat), cols_collect)
-    indxs = clumps_cat["index"]
-    for i, clid in enumerate(tqdm(indxs)) if verbose else enumerate(indxs):
-        clid = clumps_cat["index"][i]
-        out["index"][i] = clid
-        # If we are fitting halos and this clump is not a main, then continue.
-        if args.kind == "halos" and not ismain[i]:
-            continue
-
-        if args.kind == "halos":
-            part = csiborgtools.read.load_parent_particles(
-                clid, particles, clump_map, clid2map, clumps_cat)
-        else:
-            part = csiborgtools.read.load_clump_particles(clid, particles,
-                                                          clump_map, clid2map)
+    out = csiborgtools.read.cols_to_structured(len(cat), cols_collect)
+    indxs = cat["index"]
+    for i in trange(len(cat)) if verbose else range(len(cat)):
+        hid = cat["index"][i]
+        out["index"][i] = hid

+        part = csiborgtools.read.load_halo_particles(hid, particles, halo_map,
+                                                     hid2map)
        # We fit the particles if there are any. If not we assign the index,
        # otherwise it would be NaN converted to integers (-2147483648) and
        # yield an error further down.
        if part is None:
            continue

-        _out = fit_clump(part, clumps_cat[i], box)
+        _out = fit_halo(part, cat[i], box)
        for key in _out.keys():
            out[key][i] = _out[key]

-    # Finally, we save the results. If we were analysing main halos, then
-    # remove array indices that do not correspond to parent halos.
-    if args.kind == "halos":
-        out = out[ismain]
-
-    fout = paths.structfit(nsnap, nsim, args.kind)
+    fout = paths.structfit(nsnap, nsim)
    print(f"Saving to `{fout}`.", flush=True)
    numpy.save(fout, out)
--- a/scripts/fit_hmf.py
+++ b/scripts/fit_hmf.py
@ -58,7 +58,9 @@ def get_counts(nsim, bins, paths, parser_args):
    bounds = {"dist": (0, parser_args.Rmax)}

    if simname == "csiborg":
-        cat = csiborgtools.read.HaloCatalogue(nsim, paths, bounds=bounds)
+        cat = csiborgtools.read.CSiBORGHaloCatalogue(
+            nsim, paths, bounds=bounds, with_lagpatch=False,
+            load_initial=False)
        logmass = numpy.log10(cat["totpartmass"])
        counts = csiborgtools.fits.number_counts(logmass, bins)
    elif simname == "quijote":
@ -71,6 +73,12 @@ def get_counts(nsim, bins, paths, parser_args):
            cat = cat0.pick_fiducial_observer(nobs, rmax=parser_args.Rmax)
            logmass = numpy.log10(cat["group_mass"])
            counts[nobs, :] = csiborgtools.fits.number_counts(logmass, bins)
+    elif simname == "quijote_full":
+        cat = csiborgtools.read.QuijoteHaloCatalogue(nsim, paths, nsnap=4)
+        logmass = numpy.log10(cat["group_mass"])
+        counts = csiborgtools.fits.number_counts(logmass, bins)
+    else:
+        raise ValueError(f"Unknown simulation name `{simname}`.")

    fout = paths.halo_counts(simname, nsim)
    if parser_args.verbose:
@ -80,12 +88,15 @@ def get_counts(nsim, bins, paths, parser_args):

 if __name__ == "__main__":
    parser = ArgumentParser()
-    parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"],
+    parser.add_argument("--simname", type=str,
+                        choices=["csiborg", "quijote", "quijote_full"],
                        help="Simulation name")
    parser.add_argument("--nsims", type=int, nargs="+", default=None,
                        help="Indices of simulations to cross. If `-1` processes all simulations.")  # noqa
    parser.add_argument("--Rmax", type=float, default=155/0.705,
                        help="High-resolution region radius")
+    parser.add_argument("--bw", type=float, default=0.2,
+                        help="Bin width in dex")
    parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
                        default=False)

@ -96,7 +107,7 @@ if __name__ == "__main__":
    verbose = nproc == 1
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    nsims = get_nsims(parser_args, paths)
-    bins = numpy.arange(11., 16., 0.2, dtype=numpy.float32)
+    bins = numpy.arange(11., 16., parser_args.bw, dtype=numpy.float32)

    def do_work(nsim):
        get_counts(nsim, bins, paths, parser_args)
--- a/scripts/fit_init.py
+++ b/scripts/fit_init.py
@ -24,6 +24,8 @@ import numpy
 from mpi4py import MPI
 from tqdm import tqdm

+from utils import get_nsims
+
 try:
    import csiborgtools
 except ModuleNotFoundError:
@ -41,16 +43,16 @@ verbose = nproc == 1

 # Argument parser
 parser = ArgumentParser()
-parser.add_argument("--ics", type=int, nargs="+", default=None,
+parser.add_argument("--simname", type=str, default="csiborg",
+                    choices=["csiborg", "quijote"],
+                    help="Simulation name")
+parser.add_argument("--nsims", type=int, nargs="+", default=None,
                    help="IC realisations. If `-1` processes all simulations.")
 args = parser.parse_args()
 paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
 partreader = csiborgtools.read.ParticleReader(paths)

-if args.ics is None or args.ics[0] == -1:
-    ics = paths.get_ics("csiborg")
-else:
-    ics = args.ics
+nsims = get_nsims(args, paths)

 cols_collect = [("index", numpy.int32),
                ("x", numpy.float32),
@ -61,8 +63,8 @@ cols_collect = [("index", numpy.int32),


 # MPI loop over simulations
-jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
-for nsim in [ics[i] for i in jobs]:
+jobs = csiborgtools.fits.split_jobs(len(nsims), nproc)[rank]
+for nsim in [nsims[i] for i in jobs]:
    nsnap = max(paths.get_snapshots(nsim))
    overlapper = csiborgtools.match.ParticleOverlap()
    print(f"{datetime.now()}: rank {rank} calculating simulation `{nsim}`.",
@ -70,22 +72,18 @@ for nsim in [ics[i] for i in jobs]:

    parts = csiborgtools.read.read_h5(paths.initmatch(nsim, "particles"))
    parts = parts['particles']
-    clump_map = csiborgtools.read.read_h5(paths.particles(nsim))
-    clump_map = clump_map["clumpmap"]
-    clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
-                                                   load_fitted=False)
-    clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
-    ismain = clumps_cat.ismain
+    halo_map = csiborgtools.read.read_h5(paths.particles(nsim))
+    halo_map = halo_map["halomap"]
+    cat = csiborgtools.read.CSiBORGHaloCatalogue(
+        nsim, paths, rawdata=True, load_fitted=False, load_initial=False)
+    hid2map = {hid: i for i, hid in enumerate(halo_map[:, 0])}

-    out = csiborgtools.read.cols_to_structured(len(clumps_cat), cols_collect)
-    indxs = clumps_cat["index"]
-    for i, hid in enumerate(tqdm(indxs) if verbose else indxs):
+    out = csiborgtools.read.cols_to_structured(len(cat), cols_collect)
+    for i, hid in enumerate(tqdm(cat["index"]) if verbose else cat["index"]):
        out["index"][i] = hid
-        if not ismain[i]:
-            continue
+        part = csiborgtools.read.load_halo_particles(hid, parts, halo_map,
+                                                     hid2map)

-        part = csiborgtools.read.load_parent_particles(hid, parts, clump_map,
-                                                       clid2map, clumps_cat)
        # Skip if the halo is too small.
        if part is None or part.size < 100:
            continue
@ -101,7 +99,6 @@ for nsim in [ics[i] for i in jobs]:
        delta = overlapper.make_delta(part[:, :3], part[:, 3], subbox=True)
        out["lagpatch_ncells"][i] = csiborgtools.fits.delta2ncells(delta)

-    out = out[ismain]
    # Now save it
    fout = paths.initmatch(nsim, "fit")
    print(f"{datetime.now()}: dumping fits to .. `{fout}`.",
--- a/scripts/match_singlematch.py
+++ b/scripts/match_singlematch.py
@ -30,7 +30,7 @@ except ModuleNotFoundError:


 def pair_match(nsim0, nsimx, sigma, smoothen, verbose):
-    from csiborgtools.read import HaloCatalogue, read_h5
+    from csiborgtools.read import CSiBORGHaloCatalogue, read_h5

    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    smooth_kwargs = {"sigma": sigma, "mode": "constant", "cval": 0.0}
@ -40,10 +40,10 @@ def pair_match(nsim0, nsimx, sigma, smoothen, verbose):
    # Load the raw catalogues (i.e. no selection) including the initial CM
    # positions and the particle archives.
    bounds = {"totpartmass": (1e12, None)}
-    cat0 = HaloCatalogue(nsim0, paths, load_initial=True, bounds=bounds,
-                         with_lagpatch=True, load_clumps_cat=True)
-    catx = HaloCatalogue(nsimx, paths, load_initial=True, bounds=bounds,
-                         with_lagpatch=True, load_clumps_cat=True)
+    cat0 = CSiBORGHaloCatalogue(nsim0, paths, load_initial=True, bounds=bounds,
+                                with_lagpatch=True, load_clumps_cat=True)
+    catx = CSiBORGHaloCatalogue(nsimx, paths, load_initial=True, bounds=bounds,
+                                with_lagpatch=True, load_clumps_cat=True)

    clumpmap0 = read_h5(paths.particles(nsim0))["clumpmap"]
    parts0 = read_h5(paths.initmatch(nsim0, "particles"))["particles"]
--- a/scripts/mv_fofmembership.py
+++ b/scripts/mv_fofmembership.py
@ -0,0 +1,151 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+Short script to move and change format of the CSiBORG FoF membership files
+calculated by Julien. Additionally, also orders the particles in the same way
+as the PHEW halo finder output.
+"""
+from argparse import ArgumentParser
+from datetime import datetime
+from gc import collect
+from os.path import join
+from shutil import copy
+
+import numpy
+from mpi4py import MPI
+from taskmaster import work_delegation
+from tqdm import trange
+
+from utils import get_nsims
+
+try:
+    import csiborgtools
+except ModuleNotFoundError:
+    import sys
+    sys.path.append("../")
+    import csiborgtools
+
+
+def copy_membership(nsim, verbose=True):
+    """
+    Copy the FoF particle halo membership to the CSiBORG directory and write it
+    as a NumPy array instead of a text file.
+
+    Parameters
+    ----------
+    nsim : int
+        IC realisation index.
+    verbose : bool, optional
+        Verbosity flag.
+    """
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    fpath = join("/mnt/extraspace/jeg/greenwhale/Constrained_Sims",
+                 f"sim_{nsim}/particle_membership_{nsim}_FOF.txt")
+    if verbose:
+        print(f"Loading from ... `{fpath}`.")
+    data = numpy.genfromtxt(fpath, dtype=int)
+
+    fout = paths.fof_membership(nsim)
+    if verbose:
+        print(f"Saving to ... `{fout}`.")
+    numpy.save(fout, data)
+
+
+def copy_catalogue(nsim, verbose=True):
+    """
+    Move the FoF catalogue to the CSiBORG directory.
+
+    Parameters
+    ----------
+    nsim : int
+        IC realisation index.
+    verbose : bool, optional
+        Verbosity flag.
+    """
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    source = join("/mnt/extraspace/jeg/greenwhale/Constrained_Sims",
+                  f"sim_{nsim}/halo_catalog_{nsim}_FOF.txt")
+    dest = paths.fof_cat(nsim)
+    if verbose:
+        print("Copying`{}` to `{}`.".format(source, dest))
+    copy(source, dest)
+
+
+def sort_fofid(nsim, verbose=True):
+    """
+    Read the FoF particle halo membership and sort the halo IDs to the ordering
+    of particles in the PHEW clump IDs.
+
+    Parameters
+    ----------
+    nsim : int
+        IC realisation index.
+    verbose : bool, optional
+        Verbosity flag.
+    """
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    nsnap = max(paths.get_snapshots(nsim))
+    fpath = paths.fof_membership(nsim)
+    if verbose:
+        print(f"{datetime.now()}: loading from ... `{fpath}`.")
+    # Columns are halo ID, particle ID.
+    fof = numpy.load(fpath)
+
+    reader = csiborgtools.read.ParticleReader(paths)
+    pars_extract = ["x"]  # Dummy variable
+    __, pids = reader.read_particle(nsnap, nsim, pars_extract,
+                                    return_structured=False, verbose=verbose)
+    del __
+    collect()
+
+    # Map the particle IDs in pids to their corresponding PHEW array index
+    if verbose:
+        print(f"{datetime.now()}: mapping particle IDs to their indices.")
+    pids_idx = {pid: i for i, pid in enumerate(pids)}
+
+    if verbose:
+        print(f"{datetime.now()}: mapping FoF HIDs to their array indices.")
+    # Unassigned particle IDs are assigned a halo ID of 0. Same as PHEW.
+    fof_hids = numpy.zeros(pids.size, dtype=numpy.int32)
+    for i in trange(fof.shape[0]) if verbose else range(fof.shape[0]):
+        hid, pid = fof[i]
+        fof_hids[pids_idx[pid]] = hid
+
+    fout = paths.fof_membership(nsim, sorted=True)
+    if verbose:
+        print(f"Saving the sorted data to ... `{fout}`")
+    numpy.save(fout, fof_hids)
+
+
+def main(nsim, verbose=True):
+    copy_membership(nsim, verbose=verbose)
+    copy_catalogue(nsim, verbose=verbose)
+    sort_fofid(nsim, verbose=verbose)
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument("--simname", type=str, default="csiborg",
+                        choices=["csiborg", "quijote"],
+                        help="Simulation name")
+    parser.add_argument("--nsims", type=int, nargs="+", default=None,
+                        help="Indices of simulations to cross. If `-1` processes all simulations.")  # noqa
+    args = parser.parse_args()
+
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    nsims = get_nsims(args, paths)
+    comm = MPI.COMM_WORLD
+
+    work_delegation(main, nsims, comm)
--- a/scripts/pre_dumppart.py
+++ b/scripts/pre_dumppart.py
@ -12,12 +12,12 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
-Script to load in the simulation particles, load them by their clump ID and
-dump into a HDF5 file. Stores the first and last index of each clump in the
+Script to load in the simulation particles, sort them by their FoF halo ID and
+dump into a HDF5 file. Stores the first and last index of each halo in the
 particle array. This can be used for fast slicing of the array to acces
 particles of a single clump.
 """
-
+from argparse import ArgumentParser
 from datetime import datetime
 from gc import collect

@ -25,8 +25,11 @@ import h5py
 import numba
 import numpy
 from mpi4py import MPI
+from taskmaster import work_delegation
 from tqdm import trange

+from utils import get_nsims
+
 try:
    import csiborgtools
 except ModuleNotFoundError:
@ -35,80 +38,79 @@ except ModuleNotFoundError:
    sys.path.append("../")
    import csiborgtools

-from argparse import ArgumentParser
-
-# We set up the MPI
-comm = MPI.COMM_WORLD
-rank = comm.Get_rank()
-nproc = comm.Get_size()
-
-# And next parse all the arguments and set up CSiBORG objects
-parser = ArgumentParser()
-parser.add_argument("--ics", type=int, nargs="+", default=None,
-                    help="IC realisations. If `-1` processes all simulations.")
-args = parser.parse_args()
-
-verbose = nproc == 1
-paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-partreader = csiborgtools.read.ParticleReader(paths)
-# Keep "ID" as the last column!
-pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"]
-
-if args.ics is None or args.ics[0] == -1:
-    ics = paths.get_ics("csiborg")
-else:
-    ics = args.ics
-

@numba.jit(nopython=True)
-def minmax_clump(clid, clump_ids, start_loop=0):
+def minmax_halo(hid, halo_ids, start_loop=0):
    """
-    Find the start and end index of a clump in a sorted array of clump IDs.
+    Find the start and end index of a halo in a sorted array of halo IDs.
    This is much faster than using `numpy.where` and then `numpy.min` and
    `numpy.max`.
    """
    start = None
    end = None

-    for i in range(start_loop, clump_ids.size):
-        n = clump_ids[i]
-        if n == clid:
+    for i in range(start_loop, halo_ids.size):
+        n = halo_ids[i]
+        if n == hid:
            if start is None:
                start = i
            end = i
-        elif n > clid:
+        elif n > hid:
            break
    return start, end


-# MPI loop over individual simulations. We read in the particles from RAMSES
-# files and dump them to a HDF5 file.
-jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
-for i in jobs:
-    nsim = ics[i]
+def main(nsim, simname, verbose):
+    """
+    Read in the snapshot particles, sort them by their FoF halo ID and dump
+    into a HDF5 file. Stores the first and last index of each halo in the
+    particle array for fast slicing of the array to acces particles of a single
+    halo.
+
+    Parameters
+    ----------
+    nsim : int
+        IC realisation index.
+    simname : str
+        Simulation name.
+    verbose : bool
+        Verbosity flag.
+
+    Returns
+    -------
+    None
+    """
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    partreader = csiborgtools.read.ParticleReader(paths)
+
+    if simname == "quijote":
+        raise NotImplementedError("Not implemented for Quijote yet.")
+
+    # Keep "ID" as the last column!
+    pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"]
    nsnap = max(paths.get_snapshots(nsim))
    fname = paths.particles(nsim)
-    # We first read in the clump IDs of the particles and infer the sorting.
-    # Right away we dump the clump IDs to a HDF5 file and clear up memory.
-    print(f"{datetime.now()}: rank {rank} loading particles {nsim}.",
-          flush=True)
-    part_cids = partreader.read_clumpid(nsnap, nsim, verbose=verbose)
-    sort_indxs = numpy.argsort(part_cids).astype(numpy.int32)
-    part_cids = part_cids[sort_indxs]
+    # We first read in the halo IDs of the particles and infer the sorting.
+    # Right away we dump the halo IDs to a HDF5 file and clear up memory.
+    if verbose:
+        print(f"{datetime.now()}: loading particles {nsim}.", flush=True)
+    part_hids = partreader.read_fof_hids(nsim)
+    sort_indxs = numpy.argsort(part_hids).astype(numpy.int32)
+    part_hids = part_hids[sort_indxs]
    with h5py.File(fname, "w") as f:
-        f.create_dataset("clump_ids", data=part_cids)
+        f.create_dataset("halo_ids", data=part_hids)
        f.close()
-    del part_cids
+    del part_hids
    collect()

-    # Next we read in the particles and sort them by their clump ID.
+    # Next we read in the particles and sort them by their halo ID.
    # We cannot directly read this as an unstructured array because the float32
-    # precision is insufficient to capture the clump IDs.
+    # precision is insufficient to capture the halo IDs.
    parts, pids = partreader.read_particle(
        nsnap, nsim, pars_extract, return_structured=False, verbose=verbose)
    # Now we in two steps save the particles and particle IDs.
-    print(f"{datetime.now()}: rank {rank} dumping particles from {nsim}.",
-          flush=True)
+    if verbose:
+        print(f"{datetime.now()}: dumping particles from {nsim}.", flush=True)
    parts = parts[sort_indxs]
    pids = pids[sort_indxs]
    del sort_indxs
@ -126,29 +128,48 @@ for i in jobs:
    del parts
    collect()

-    print(f"{datetime.now()}: rank {rank} creating clump mapping for {nsim}.",
-          flush=True)
+    if verbose:
+        print(f"{datetime.now()}: creating halo map for {nsim}.", flush=True)
    # Load clump IDs back to memory
    with h5py.File(fname, "r") as f:
-        part_cids = f["clump_ids"][:]
+        part_hids = f["halo_ids"][:]
    # We loop over the unique clump IDs.
-    unique_clump_ids = numpy.unique(part_cids)
-    clump_map = numpy.full((unique_clump_ids.size, 3), numpy.nan,
-                           dtype=numpy.int32)
+    unique_halo_ids = numpy.unique(part_hids)
+    halo_map = numpy.full((unique_halo_ids.size, 3), numpy.nan,
+                          dtype=numpy.int32)
    start_loop = 0
-    niters = unique_clump_ids.size
+    niters = unique_halo_ids.size
    for i in trange(niters) if verbose else range(niters):
-        clid = unique_clump_ids[i]
-        k0, kf = minmax_clump(clid, part_cids, start_loop=start_loop)
-        clump_map[i, 0] = clid
-        clump_map[i, 1] = k0
-        clump_map[i, 2] = kf
+        hid = unique_halo_ids[i]
+        k0, kf = minmax_halo(hid, part_hids, start_loop=start_loop)
+        halo_map[i, 0] = hid
+        halo_map[i, 1] = k0
+        halo_map[i, 2] = kf
        start_loop = kf

    # We save the mapping to a HDF5 file
    with h5py.File(paths.particles(nsim), "r+") as f:
-        f.create_dataset("clumpmap", data=clump_map)
+        f.create_dataset("halomap", data=halo_map)
        f.close()

-    del part_cids
+    del part_hids
    collect()
+
+
+if __name__ == "__main__":
+    # And next parse all the arguments and set up CSiBORG objects
+    parser = ArgumentParser()
+    parser.add_argument("--simname", type=str, default="csiborg",
+                        choices=["csiborg", "quijote"],
+                        help="Simulation name")
+    parser.add_argument("--nsims", type=int, nargs="+", default=None,
+                        help="IC realisations. If `-1` processes all .")
+    args = parser.parse_args()
+
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    nsims = get_nsims(args, paths)
+
+    def _main(nsim, verbose=MPI.COMM_WORLD.nproc == 1):
+        main(nsim, args.simname, verbose=verbose)
+
+    work_delegation(_main, nsims, MPI.COMM_WORLD)
--- a/scripts/pre_sortinit.py
+++ b/scripts/pre_sortinit.py
@ -14,7 +14,7 @@
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 Script to sort the initial snapshot particles according to their final
-snapshot ordering, which is sorted by the clump IDs.
+snapshot ordering, which is sorted by the halo IDs.
 """
 from argparse import ArgumentParser
 from datetime import datetime
@ -23,6 +23,9 @@ from gc import collect
 import h5py
 import numpy
 from mpi4py import MPI
+from taskmaster import work_delegation
+
+from utils import get_nsims

 try:
    import csiborgtools
@ -33,42 +36,37 @@ except ModuleNotFoundError:
    import csiborgtools


-# Get MPI things
-comm = MPI.COMM_WORLD
-rank = comm.Get_rank()
-nproc = comm.Get_size()
-verbose = nproc == 1
+def _main(nsim, simname, verbose):
+    """
+    Sort the initial snapshot particles according to their final snapshot
+    ordering and dump them into a HDF5 file.

-# Argument parser
-parser = ArgumentParser()
-parser.add_argument("--ics", type=int, nargs="+", default=None,
-                    help="IC realisations. If `-1` processes all simulations.")
-args = parser.parse_args()
-paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-partreader = csiborgtools.read.ParticleReader(paths)
-# NOTE: ID has to be the last column.
-pars_extract = ["x", "y", "z", "M", "ID"]
+    Parameters
+    ----------
+    nsim : int
+        IC realisation index.
+    simname : str
+        Simulation name.
+    verbose : bool
+        Verbosity flag.
+    """
+    if simname == "quijote":
+        raise NotImplementedError("Quijote not implemented yet.")

-if args.ics is None or args.ics[0] == -1:
-    ics = paths.get_ics("csiborg")
-else:
-    ics = args.ics
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    partreader = csiborgtools.read.ParticleReader(paths)

-# We loop over simulations. Each simulation is then procesed with MPI, rank 0
-# loads the data and broadcasts it to other ranks.
-jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
-for i in jobs:
-    nsim = ics[i]
-    nsnap = max(paths.get_snapshots(nsim))
-
-    print(f"{datetime.now()}: reading and processing simulation {nsim}.",
-          flush=True)
+    if verbose:
+        print(f"{datetime.now()}: reading and processing simulation {nsim}.",
+              flush=True)
    # We first load the particle IDs in the final snapshot.
    pidf = csiborgtools.read.read_h5(paths.particles(nsim))
    pidf = pidf["particle_ids"]
    # Then we load the particles in the initil snapshot and make sure that
-    # their particle IDs are sorted as in the final snapshot.
-    # Again, because of precision this must be read as structured.
+    # their particle IDs are sorted as in the final snapshot. Again, because of
+    # precision this must be read as structured.
+    # NOTE: ID has to be the last column.
+    pars_extract = ["x", "y", "z", "M", "ID"]
    part0, pid0 = partreader.read_particle(
        1, nsim, pars_extract, return_structured=False, verbose=verbose)
    # First enforce them to already be sorted and then apply reverse
@ -77,6 +75,26 @@ for i in jobs:
    del pid0
    collect()
    part0 = part0[numpy.argsort(numpy.argsort(pidf))]
-    print(f"{datetime.now()}: dumping particles for {nsim}.", flush=True)
+    if verbose:
+        print(f"{datetime.now()}: dumping particles for {nsim}.", flush=True)
    with h5py.File(paths.initmatch(nsim, "particles"), "w") as f:
        f.create_dataset("particles", data=part0)
+
+
+if __name__ == "__main__":
+    # Argument parser
+    parser = ArgumentParser()
+    parser.add_argument("--simname", type=str, default="csiborg",
+                        choices=["csiborg", "quijote"],
+                        help="Simulation name")
+    parser.add_argument("--nsims", type=int, nargs="+", default=None,
+                        help="IC realisations. If `-1` processes all.")
+    args = parser.parse_args()
+
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    nsims = get_nsims(args, paths)
+
+    def main(nsim):
+        _main(nsim, args.simname, MPI.COMM_WORLD.size == 1)
+
+    work_delegation(main, nsims, MPI.COMM_WORLD)
--- a/scripts/utils.py
+++ b/scripts/utils.py
@ -81,7 +81,7 @@ def read_single_catalogue(args, config, nsim, run, rmax, paths, nobs=None):

    Returns
    -------
-    cat : csiborgtools.read.HaloCatalogue or csiborgtools.read.QuijoteHaloCatalogue  # noqa
+    cat : csiborgtools.read.CSiBORGHaloCatalogue or csiborgtools.read.QuijoteHaloCatalogue  # noqa
        Halo catalogue with selection criteria applied.
    """
    selection = config.get(run, None)
@ -89,7 +89,7 @@ def read_single_catalogue(args, config, nsim, run, rmax, paths, nobs=None):
        raise KeyError(f"No configuration for run {run}.")
    # We first read the full catalogue without applying any bounds.
    if args.simname == "csiborg":
-        cat = csiborgtools.read.HaloCatalogue(nsim, paths)
+        cat = csiborgtools.read.CSiBORGHaloCatalogue(nsim, paths)
    else:
        cat = csiborgtools.read.QuijoteHaloCatalogue(nsim, paths, nsnap=4)
        if nobs is not None: