Add pynbody and other support (#92)

* Simplify box units * Move old scripts * Add printing * Update readers * Disable boundscheck * Add new ordering * Clean up imports * Enforce dtype and add mass to quijote * Simplify print statements * Fix little typos * Fix key bug * Bug fixing * Delete boring comments * Improve ultimate clumps for PHEW * Delete boring comments * Add basic reading * Remove 0th index HID * Add flipping of X and Z * Updates to halo catalogues * Add ordered caching * Fix flipping * Add new flags * Fix PHEW empty clumps * Stop over-wrriting * Little improvements to angular neighbours * Add catalogue masking * Change if-else statements * Cache only filtered data * Add PHEW cats * Add comments * Sort imports * Get Quijote workign * Docs * Add HMF calculation * Move to old * Fix angular * Add great circle distance * Update imports * Update impotrts * Update docs * Remove unused import * Fix a quick bug * Update compatibility * Rename files * Renaming * Improve compatiblity * Rename snapsht * Fix snapshot bug * Update interface * Finish updating interface * Update all paths * Add old scripts * Add basic halo * Update imports * Improve snapshot processing * Update ordering * Fix how CM positions accessed * Add merger paths * Add imports * Add merger reading * Add making a merger tree * Add a basic merger tree reader * Add imports * Add main branch walking + comments + debuggin * Get tree running * Add working merger tree walking along main branch * Add units conversion for merger data * Add hid_to_array_index * Update merger tree * Add mergertree mass to PHEWcat * Edit comments * Add this to track changes... * Fix a little bug * Add mergertree mass * Add cache clearing * Improve summing substructure code * Littbe bug * Little updates to the merger tree reader * Update .giignore * Add box selection * Add optional deletingf of a group * add to keep track of changes * Update changes * Remove * Add manual tracker * Fix bug * Add m200c_to_r200c * Add manual halo tracking * Remove skipped snapshots * update cosmo params to match csiborg * remove old comments * Add SDSSxALFALFA * Fix bugs * Rename * Edit paths * Updates * Add comments * Add comment * Add hour conversion * Add imports * Add new observation class * Add selection * Add imports * Fix small bug * Add field copying for safety * Add matching to survey without masking * Add P(k) calculation * Add nb * Edit comment * Move files * Remove merger import * Edit setup.yp * Fix typo * Edit import warnigns * update nb * Update README * Update README * Update README * Add skeleton * Add skeleton
2025-07-03 04:51:12 +00:00 · 2023-12-07 14:23:32 +00:00 · 2023-12-07 14:23:32 +00:00 · e972f8e3f2
commit e972f8e3f2
parent 5500fbd2b9
53 changed files with 4627 additions and 1774 deletions
--- a/old/cluster_crosspk.py
+++ b/old/cluster_crosspk.py
@ -0,0 +1,159 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+MPI script to calculate the matter cross power spectrum between CSiBORG
+IC realisations. Units are Mpc/h.
+"""
+raise NotImplementedError("This script is currently not working.")
+from argparse import ArgumentParser
+from datetime import datetime
+from gc import collect
+from itertools import combinations
+from os import remove
+from os.path import join
+
+import joblib
+import numpy
+import Pk_library as PKL
+from mpi4py import MPI
+
+try:
+    import csiborgtools
+except ModuleNotFoundError:
+    import sys
+    sys.path.append("../")
+    import csiborgtools
+
+
+dumpdir = "/mnt/extraspace/rstiskalek/csiborg/"
+parser = ArgumentParser()
+parser.add_argument("--grid", type=int)
+parser.add_argument("--halfwidth", type=float, default=0.5)
+args = parser.parse_args()
+
+# Get MPI things
+comm = MPI.COMM_WORLD
+rank = comm.Get_rank()
+nproc = comm.Get_size()
+MAS = "CIC"  # mass asignment scheme
+
+paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+box = csiborgtools.read.CSiBORGBox(paths)
+reader = csiborgtools.read.CSiBORGReader(paths)
+ics = paths.get_ics("csiborg")
+nsims = len(ics)
+
+# File paths
+ftemp = join(dumpdir, "temp_crosspk",
+             "out_{}_{}" + "_{}".format(args.halfwidth))
+fout = join(dumpdir, "crosspk",
+            "out_{}_{}" + "_{}.p".format(args.halfwidth))
+
+
+jobs = csiborgtools.utils.split_jobs(nsims, nproc)[rank]
+for n in jobs:
+    print(f"Rank {rank} at {datetime.now()}: saving {n}th delta.", flush=True)
+    nsim = ics[n]
+    particles = reader.read_snapshot(max(paths.get_snapshots(nsim, "csiborg")),
+                                     nsim, ["x", "y", "z", "M"], verbose=False)
+    # Halfwidth -- particle selection
+    if args.halfwidth < 0.5:
+        particles = csiborgtools.read.halfwidth_select(
+            args.halfwidth, particles)
+        length = box.box2mpc(2 * args.halfwidth) * box.h  # Mpc/h
+    else:
+        length = box.box2mpc(1) * box.h  # Mpc/h
+    # Calculate the overdensity field
+    field = csiborgtools.field.DensityField(particles, length, box, MAS)
+    delta = field.overdensity_field(args.grid, verbose=False)
+    aexp = box._aexp
+
+    # Try to clean up memory
+    del field, particles, box, reader
+    collect()
+
+    # Dump the results
+    with open(ftemp.format(nsim, "delta") + ".npy", "wb") as f:
+        numpy.save(f, delta)
+    joblib.dump([aexp, length], ftemp.format(nsim, "lengths") + ".p")
+
+    # Try to clean up memory
+    del delta
+    collect()
+
+
+comm.Barrier()
+
+# Get off-diagonal elements and append the diagoal
+combs = [c for c in combinations(range(nsims), 2)]
+for i in range(nsims):
+    combs.append((i, i))
+prev_delta = [-1, None, None, None]  # i, delta, aexp, length
+
+jobs = csiborgtools.utils.split_jobs(len(combs), nproc)[rank]
+for n in jobs:
+    i, j = combs[n]
+    print("Rank {}@{}: combination {}.".format(rank, datetime.now(), (i, j)))
+
+    # If i same as last time then don't have to load it
+    if prev_delta[0] == i:
+        delta_i = prev_delta[1]
+        aexp_i = prev_delta[2]
+        length_i = prev_delta[3]
+    else:
+        with open(ftemp.format(ics[i], "delta") + ".npy", "rb") as f:
+            delta_i = numpy.load(f)
+        aexp_i, length_i = joblib.load(ftemp.format(ics[i], "lengths") + ".p")
+        # Store in prev_delta
+        prev_delta[0] = i
+        prev_delta[1] = delta_i
+        prev_delta[2] = aexp_i
+        prev_delta[3] = length_i
+
+    # Get jth delta
+    with open(ftemp.format(ics[j], "delta") + ".npy", "rb") as f:
+        delta_j = numpy.load(f)
+    aexp_j, length_j = joblib.load(ftemp.format(ics[j], "lengths") + ".p")
+
+    # Verify the difference between the scale factors! Say more than 1%
+    daexp = abs((aexp_i - aexp_j) / aexp_i)
+    if daexp > 0.01:
+        raise ValueError(
+            "Boxes {} and {} final snapshot scale factors disagree by "
+            "`{}` percent!".format(ics[i], ics[j], daexp * 100))
+    # Check how well the boxsizes agree
+    dlength = abs((length_i - length_j) / length_i)
+    if dlength > 0.001:
+        raise ValueError("Boxes {} and {} box sizes disagree by `{}` percent!"
+                         .format(ics[i], ics[j], dlength * 100))
+
+    # Calculate the cross power spectrum
+    Pk = PKL.XPk([delta_i, delta_j], length_i, axis=1, MAS=[MAS, MAS],
+                 threads=1)
+    joblib.dump(Pk, fout.format(ics[i], ics[j]))
+
+    del delta_i, delta_j, Pk
+    collect()
+
+
+# Clean up the temp files
+comm.Barrier()
+if rank == 0:
+    print("Cleaning up the temporary files...")
+    for ic in ics:
+        remove(ftemp.format(ic, "delta") + ".npy")
+        remove(ftemp.format(ic, "lengths") + ".p")
+
+    print("All finished!")
--- a/old/cluster_crosspk.sh
+++ b/old/cluster_crosspk.sh
@ -0,0 +1,14 @@
+nthreads=20
+memory=40
+queue="berg"
+env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
+file="cluster_crosspk.py"
+grid=1024
+halfwidth=0.13
+
+cm="addqueue -q $queue -n $nthreads -m $memory $env $file --grid $grid --halfwidth $halfwidth"
+
+echo "Submitting:"
+echo $cm
+echo
+$cm
--- a/old/cluster_knn_auto.py
+++ b/old/cluster_knn_auto.py
@ -0,0 +1,155 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+A script to calculate the KNN-CDF for a set of halo catalogues.
+"""
+from argparse import ArgumentParser
+from datetime import datetime
+from distutils.util import strtobool
+
+import joblib
+import numpy
+import yaml
+from mpi4py import MPI
+from sklearn.neighbors import NearestNeighbors
+from taskmaster import work_delegation
+
+try:
+    import csiborgtools
+except ModuleNotFoundError:
+    import sys
+
+    sys.path.append("../")
+    import csiborgtools
+
+from utils import open_catalogues
+
+
+def do_auto(args, config, cats, nsim, paths):
+    """
+    Calculate the kNN-CDF single catalogue auto-correlation.
+
+    Parameters
+    ----------
+    args : argparse.Namespace
+        Command line arguments.
+    config : dict
+        Configuration dictionary.
+    cats : dict
+        Dictionary of halo catalogues. Keys are simulation indices, values are
+        the catalogues.
+    nsim : int
+        Simulation index.
+    paths : csiborgtools.paths.Paths
+        Paths object.
+
+    Returns
+    -------
+    None
+    """
+    cat = cats[nsim]
+    rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax, cat.boxsize)
+    knncdf = csiborgtools.clustering.kNN_1DCDF()
+    knn = cat.knn(in_initial=False, subtract_observer=False, periodic=True)
+    rs, cdf = knncdf(
+        knn, rvs_gen=rvs_gen, nneighbours=config["nneighbours"],
+        rmin=config["rmin"], rmax=config["rmax"],
+        nsamples=int(config["nsamples"]), neval=int(config["neval"]),
+        batch_size=int(config["batch_size"]), random_state=config["seed"])
+    totvol = (4 / 3) * numpy.pi * args.Rmax ** 3
+    fout = paths.knnauto(args.simname, args.run, nsim)
+    if args.verbose:
+        print(f"Saving output to `{fout}`.")
+    joblib.dump({"rs": rs, "cdf": cdf, "ndensity": len(cat) / totvol}, fout)
+
+
+def do_cross_rand(args, config, cats, nsim, paths):
+    """
+    Calculate the kNN-CDF cross catalogue random correlation.
+
+    Parameters
+    ----------
+    args : argparse.Namespace
+        Command line arguments.
+    config : dict
+        Configuration dictionary.
+    cats : dict
+        Dictionary of halo catalogues. Keys are simulation indices, values are
+        the catalogues.
+    nsim : int
+        Simulation index.
+    paths : csiborgtools.paths.Paths
+        Paths object.
+
+    Returns
+    -------
+    None
+    """
+    cat = cats[nsim]
+    rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax, cat.boxsize)
+    knn1 = cat.knn(in_initial=False, subtract_observer=False, periodic=True)
+
+    knn2 = NearestNeighbors()
+    pos2 = rvs_gen(len(cat).shape[0])
+    knn2.fit(pos2)
+
+    knncdf = csiborgtools.clustering.kNN_1DCDF()
+    rs, cdf0, cdf1, joint_cdf = knncdf.joint(
+        knn1, knn2, rvs_gen=rvs_gen, nneighbours=int(config["nneighbours"]),
+        rmin=config["rmin"], rmax=config["rmax"],
+        nsamples=int(config["nsamples"]), neval=int(config["neval"]),
+        batch_size=int(config["batch_size"]), random_state=config["seed"])
+    corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
+
+    fout = paths.knnauto(args.simname, args.run, nsim)
+    if args.verbose:
+        print(f"Saving output to `{fout}`.", flush=True)
+    joblib.dump({"rs": rs, "corr": corr}, fout)
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument("--run", type=str, help="Run name.")
+    parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"],
+                        help="Simulation name")
+    parser.add_argument("--nsims", type=int, nargs="+", default=None,
+                        help="Indices of simulations to cross. If `-1` processes all simulations.")  # noqa
+    parser.add_argument("--Rmax", type=float, default=155,
+                        help="High-resolution region radius")  # noqa
+    parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
+                        default=False)
+    args = parser.parse_args()
+
+    with open("./cluster_knn_auto.yml", "r") as file:
+        config = yaml.safe_load(file)
+    comm = MPI.COMM_WORLD
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    cats = open_catalogues(args, config, paths, comm)
+
+    if args.verbose and comm.Get_rank() == 0:
+        print(f"{datetime.now()}: starting to calculate the kNN statistic.")
+
+    def do_work(nsim):
+        if "random" in args.run:
+            do_cross_rand(args, config, cats, nsim, paths)
+        else:
+            do_auto(args, config, cats, nsim, paths)
+
+    nsims = list(cats.keys())
+    work_delegation(do_work, nsims, comm, master_verbose=args.verbose)
+
+    comm.Barrier()
+    if comm.Get_rank() == 0:
+        print(f"{datetime.now()}: all finished. Quitting.")
--- a/old/cluster_knn_auto.sh
+++ b/old/cluster_knn_auto.sh
@ -0,0 +1,27 @@
+nthreads=4
+memory=4
+queue="cmb"
+env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
+file="cluster_knn_auto.py"
+Rmax=219.8581560283688
+verbose="true"
+
+
+
+simname="quijote"
+nsims="0 1 2"
+# simname="csiborg"
+# nsims="7444 7900 9052"
+
+run="mass003"
+
+pythoncm="$env $file --run $run --simname $simname --nsims $nsims --Rmax $Rmax --verbose $verbose"
+
+echo $pythoncm
+$pythoncm
+
+# cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
+# echo "Submitting:"
+# echo $cm
+# echo
+# $cm
--- a/old/cluster_knn_auto.yml
+++ b/old/cluster_knn_auto.yml
@ -0,0 +1,158 @@
+rmin: 0.1
+rmax: 100
+nneighbours: 8
+nsamples: 1.e+7
+batch_size: 1.e+6
+neval: 10000
+seed: 42
+nbins_marks: 10
+
+
+################################################################################
+#                                 totpartmass                                 #
+################################################################################
+
+
+"mass001":
+  primary:
+    name:
+    - totpartmass
+    - group_mass
+    min: 1.e+12
+    max: 1.e+13
+
+"mass002":
+  primary:
+    name:
+    - totpartmass
+    - group_mass
+    min: 1.e+13
+    max: 1.e+14
+
+"mass003":
+  primary:
+    name:
+    - totpartmass
+    - group_mass
+    min: 1.e+14
+
+"mass003_poisson":
+  poisson: true
+  primary:
+    name:
+    - totpartmass
+    - group_mass
+    min: 1.e+14
+
+
+################################################################################
+#                        totpartmass + lambda200c                             #
+################################################################################
+
+
+"mass001_spinlow":
+  primary:
+    name: totpartmass
+    min: 1.e+12
+    max: 1.e+13
+  secondary:
+    name: lambda200c
+    toperm: false
+    marked: true
+    max: 0.5
+
+"mass001_spinhigh":
+  primary:
+    name: totpartmass
+    min: 1.e+12
+    max: 1.e+13
+  secondary:
+    name: lambda200c
+    toperm: false
+    marked: true
+    min: 0.5
+
+"mass001_spinmedian_perm":
+  primary:
+    name: totpartmass
+    min: 1.e+12
+    max: 1.e+13
+  secondary:
+    name: lambda200c
+    toperm: true
+    marked : true
+    min: 0.5
+
+"mass002_spinlow":
+  primary:
+    name: totpartmass
+    min: 1.e+13
+    max: 1.e+14
+  secondary:
+    name: lambda200c
+    toperm: false
+    marked: true
+    max: 0.5
+
+"mass002_spinhigh":
+  primary:
+    name: totpartmass
+    min: 1.e+13
+    max: 1.e+14
+  secondary:
+    name: lambda200c
+    toperm: false
+    marked: true
+    min: 0.5
+
+"mass002_spinmedian_perm":
+  primary:
+    name: totpartmass
+    min: 1.e+13
+    max: 1.e+14
+  secondary:
+    name: lambda200c
+    toperm: true
+    marked : true
+    min: 0.5
+
+"mass003_spinlow":
+  primary:
+    name: totpartmass
+    min: 1.e+14
+  secondary:
+    name: lambda200c
+    toperm: false
+    marked: true
+    max: 0.5
+
+"mass003_spinhigh":
+  primary:
+    name: totpartmass
+    min: 1.e+14
+  secondary:
+    name: lambda200c
+    toperm: false
+    marked: true
+    min: 0.5
+
+"mass003_spinmedian_perm":
+  primary:
+    name: totpartmass
+    min: 1.e+14
+  secondary:
+    name: lambda200c
+    toperm: true
+    marked : true
+    min: 0.5
+
+
+################################################################################
+#                           Cross with random                                  #
+################################################################################
+
+"mass001_random":
+  primary:
+    name: totpartmass
+    min: 1.e+12
+    max: 1.e+13
--- a/old/cluster_knn_cross.py
+++ b/old/cluster_knn_cross.py
@ -0,0 +1,144 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues.
+
+TODO:
+    - [ ] Add support for new catalogue readers. Currently will not work.
+    - [ ] Update catalogue readers.
+    - [ ] Update paths.
+    - [ ] Update to cross-correlate different mass populations from different
+    simulations.
+"""
+raise NotImplementedError("This script is currently not working.")
+from argparse import ArgumentParser
+from datetime import datetime
+from itertools import combinations
+from warnings import warn
+
+import joblib
+import numpy
+import yaml
+from mpi4py import MPI
+from sklearn.neighbors import NearestNeighbors
+from taskmaster import master_process, worker_process
+
+try:
+    import csiborgtools
+except ModuleNotFoundError:
+    import sys
+
+    sys.path.append("../")
+    import csiborgtools
+
+
+###############################################################################
+#                            MPI and arguments                                #
+###############################################################################
+comm = MPI.COMM_WORLD
+rank = comm.Get_rank()
+nproc = comm.Get_size()
+
+parser = ArgumentParser()
+parser.add_argument("--runs", type=str, nargs="+")
+parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"])
+args = parser.parse_args()
+with open("../scripts/knn_cross.yml", "r") as file:
+    config = yaml.safe_load(file)
+
+Rmax = 155 / 0.705  # Mpc (h = 0.705) high resolution region radius
+paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+ics = paths.get_ics("csiborg")
+knncdf = csiborgtools.clustering.kNN_1DCDF()
+
+###############################################################################
+#                               Analysis                                      #
+###############################################################################
+
+
+def read_single(selection, cat):
+    mmask = numpy.ones(len(cat), dtype=bool)
+    pos = cat.positions(False)
+    # Primary selection
+    psel = selection["primary"]
+    pmin, pmax = psel.get("min", None), psel.get("max", None)
+    if pmin is not None:
+        mmask &= cat[psel["name"]] >= pmin
+    if pmax is not None:
+        mmask &= cat[psel["name"]] < pmax
+    return pos[mmask, ...]
+
+
+def do_cross(run, ics):
+    _config = config.get(run, None)
+    if _config is None:
+        warn("No configuration for run {}.".format(run), stacklevel=1)
+        return
+    rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
+    knn1, knn2 = NearestNeighbors(), NearestNeighbors()
+
+    cat1 = csiborgtools.read.ClumpsCatalogue(ics[0], paths, max_dist=Rmax)
+    pos1 = read_single(_config, cat1)
+    knn1.fit(pos1)
+
+    cat2 = csiborgtools.read.ClumpsCatalogue(ics[1], paths, max_dist=Rmax)
+    pos2 = read_single(_config, cat2)
+    knn2.fit(pos2)
+
+    rs, cdf0, cdf1, joint_cdf = knncdf.joint(
+        knn1,
+        knn2,
+        rvs_gen=rvs_gen,
+        nneighbours=int(config["nneighbours"]),
+        rmin=config["rmin"],
+        rmax=config["rmax"],
+        nsamples=int(config["nsamples"]),
+        neval=int(config["neval"]),
+        batch_size=int(config["batch_size"]),
+        random_state=config["seed"],
+    )
+
+    corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
+    fout = paths.knncross(args.simname, run, ics)
+    joblib.dump({"rs": rs, "corr": corr}, fout)
+
+
+def do_runs(nsims):
+    for run in args.runs:
+        do_cross(run, nsims)
+
+
+###############################################################################
+#                         Crosscorrelation calculation                        #
+###############################################################################
+
+
+if nproc > 1:
+    if rank == 0:
+        tasks = list(combinations(ics, 2))
+        master_process(tasks, comm, verbose=True)
+    else:
+        worker_process(do_runs, comm, verbose=False)
+else:
+    tasks = list(combinations(ics, 2))
+    for task in tasks:
+        print("{}: completing task `{}`.".format(datetime.now(), task))
+        do_runs(task)
+comm.Barrier()
+
+
+if rank == 0:
+    print("{}: all finished.".format(datetime.now()))
+quit()  # Force quit the script
--- a/old/cluster_knn_cross.sh
+++ b/old/cluster_knn_cross.sh
@ -0,0 +1,18 @@
+nthreads=151
+memory=4
+queue="cmb"
+env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
+file="knn_cross.py"
+
+runs="mass001"
+
+pythoncm="$env $file --runs $runs"
+
+echo $pythoncm
+$pythoncm
+
+# cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
+# echo "Submitting:"
+# echo $cm
+# echo
+# $cm
--- a/old/cluster_knn_cross.yml
+++ b/old/cluster_knn_cross.yml
@ -0,0 +1,29 @@
+rmin: 0.1
+rmax: 100
+nneighbours: 64
+nsamples: 1.e+7
+batch_size: 1.e+6
+neval: 10000
+seed: 42
+
+
+################################################################################
+#                                 totpartmass                                 #
+################################################################################
+
+"mass001":
+  primary:
+    name: totpartmass
+    min: 1.e+12
+    max: 1.e+13
+
+"mass002":
+  primary:
+    name: totpartmass
+    min: 1.e+13
+    max: 1.e+14
+
+"mass003":
+  primary:
+    name: totpartmass
+    min: 1.e+14
--- a/old/cluster_tpcf_auto.py
+++ b/old/cluster_tpcf_auto.py
@ -0,0 +1,82 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+A script to calculate the auto-2PCF of CSiBORG catalogues.
+"""
+from argparse import ArgumentParser
+from datetime import datetime
+from distutils.util import strtobool
+
+import joblib
+import numpy
+import yaml
+from mpi4py import MPI
+
+from taskmaster import work_delegation
+from utils import open_catalogues
+
+try:
+    import csiborgtools
+except ModuleNotFoundError:
+    import sys
+
+    sys.path.append("../")
+    import csiborgtools
+
+
+def do_auto(args, config, cats, nsim, paths):
+    cat = cats[nsim]
+    tpcf = csiborgtools.clustering.Mock2PCF()
+    rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax, cat.boxsize)
+    bins = numpy.logspace(
+        numpy.log10(config["rpmin"]), numpy.log10(config["rpmax"]),
+        config["nrpbins"] + 1,)
+
+    pos = cat.position(in_initial=False, cartesian=True)
+    nrandom = int(config["randmult"] * pos.shape[0])
+    rp, wp = tpcf(pos, rvs_gen, nrandom, bins)
+
+    fout = paths.knnauto(args.simname, args.run, nsim)
+    joblib.dump({"rp": rp, "wp": wp}, fout)
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument("--run", type=str, help="Run name.")
+    parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"],
+                        help="Simulation name")
+    parser.add_argument("--nsims", type=int, nargs="+", default=None,
+                        help="Indices of simulations to cross. If `-1` processes all simulations.")  # noqa
+    parser.add_argument("--Rmax", type=float, default=155,
+                        help="High-resolution region radius.")
+    parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
+                        default=False, help="Verbosity flag.")
+    args = parser.parse_args()
+
+    with open("./cluster_tpcf_auto.yml", "r") as file:
+        config = yaml.safe_load(file)
+
+    comm = MPI.COMM_WORLD
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    cats = open_catalogues(args, config, paths, comm)
+
+    if args.verbose and comm.Get_rank() == 0:
+        print(f"{datetime.now()}: starting to calculate the 2PCF statistic.")
+
+    def do_work(nsim):
+        return do_auto(args, config, cats, nsim, paths)
+
+    nsims = list(cats.keys())
+    work_delegation(do_work, nsims, comm)
--- a/old/cluster_tpcf_auto.sh
+++ b/old/cluster_tpcf_auto.sh
@ -0,0 +1,26 @@
+nthreads=26
+memory=7
+queue="cmb"
+env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
+file="cluster_tpcf_auto.py"
+Rmax=219.8581560283688
+verbose="true"
+
+# simname="quijote"
+# nsims="0 1 2"
+simname="csiborg"
+nsims="7444 7900 9052"
+
+
+run="mass003"
+
+pythoncm="$env $file --run $run --simname $simname --nsims $nsims --Rmax $Rmax --verbose $verbose"
+
+echo $pythoncm
+$pythoncm
+
+# cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
+# echo "Submitting:"
+# echo $cm
+# echo
+# $cm
--- a/old/cluster_tpcf_auto.yml
+++ b/old/cluster_tpcf_auto.yml
@ -0,0 +1,136 @@
+rpmin: 0.5
+rpmax: 40
+nrpbins: 20
+randmult: 100
+seed: 42
+nbins_marks: 10
+
+
+################################################################################
+#                                 totpartmass                                 #
+################################################################################
+
+
+"mass001":
+  primary:
+    name: totpartmass
+    min: 1.e+12
+    max: 1.e+13
+
+"mass002":
+  primary:
+    name: totpartmass
+    min: 1.e+13
+    max: 1.e+14
+
+"mass003":
+  primary:
+    name: totpartmass
+    min: 1.e+14
+
+
+################################################################################
+#                        totpartmass + lambda200c                             #
+################################################################################
+
+
+"mass001_spinlow":
+  primary:
+    name: totpartmass
+    min: 1.e+12
+    max: 1.e+13
+  secondary:
+    name: lambda200c
+    marked: true
+    max: 0.5
+
+"mass001_spinhigh":
+  primary:
+    name: totpartmass
+    min: 1.e+12
+    max: 1.e+13
+  secondary:
+    name: lambda200c
+    marked: true
+    min: 0.5
+
+"mass001_spinmedian_perm":
+  primary:
+    name: totpartmass
+    min: 1.e+12
+    max: 1.e+13
+  secondary:
+    name: lambda200c
+    toperm: true
+    marked : true
+    min: 0.5
+
+"mass002_spinlow":
+  primary:
+    name: totpartmass
+    min: 1.e+13
+    max: 1.e+14
+  secondary:
+    name: lambda200c
+    marked: true
+    max: 0.5
+
+"mass002_spinhigh":
+  primary:
+    name: totpartmass
+    min: 1.e+13
+    max: 1.e+14
+  secondary:
+    name: lambda200c
+    marked: true
+    min: 0.5
+
+"mass002_spinmedian_perm":
+  primary:
+    name: totpartmass
+    min: 1.e+13
+    max: 1.e+14
+  secondary:
+    name: lambda200c
+    toperm: true
+    marked : true
+    min: 0.5
+
+"mass003_spinlow":
+  primary:
+    name: totpartmass
+    min: 1.e+14
+  secondary:
+    name: lambda200c
+    marked: true
+    max: 0.5
+
+"mass003_spinhigh":
+  primary:
+    name: totpartmass
+    min: 1.e+14
+  secondary:
+    name: lambda200c
+    marked: true
+    min: 0.5
+
+"mass003_spinmedian_perm":
+  primary:
+    name: totpartmass
+    min: 1.e+14
+  secondary:
+    name: lambda200c
+    toperm: true
+    marked : true
+    min: 0.5
+
+
+################################################################################
+#                           Cross with random                                  #
+################################################################################
+
+"mass001_random":
+  primary:
+    name: totpartmass
+    min: 1.e+12
+    max: 1.e+13
--- a/old/fit_hmf.py
+++ b/old/fit_hmf.py
@ -0,0 +1,108 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+Script to calculate the HMF for CSIBORG and Quijote haloes.
+"""
+from argparse import ArgumentParser
+from datetime import datetime
+from distutils.util import strtobool
+
+import numpy
+from mpi4py import MPI
+
+from taskmaster import work_delegation
+from utils import get_nsims
+
+try:
+    import csiborgtools
+except ModuleNotFoundError:
+    import sys
+
+    sys.path.append("../")
+    import csiborgtools
+
+
+def get_counts(nsim, bins, paths, parser_args):
+    """
+    Calculate and save the number of haloes in each mass bin.
+    """
+    simname = parser_args.simname
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    bounds = {"dist": (0, parser_args.Rmax)}
+
+    if simname == "csiborg":
+        cat = csiborgtools.read.CSiBORGHaloCatalogue(
+            nsim, paths, bounds=bounds, load_fitted=False, load_initial=False)
+        logmass = numpy.log10(cat["fof_totpartmass"])
+        counts = csiborgtools.number_counts(logmass, bins)
+    elif simname == "quijote":
+        cat0 = csiborgtools.read.QuijoteHaloCatalogue(
+            nsim, paths, nsnap=4, load_fitted=False, load_initial=False)
+        nmax = int(cat0.box.boxsize // (2 * parser_args.Rmax))**3
+        counts = numpy.full((nmax, len(bins) - 1), numpy.nan,
+                            dtype=numpy.float32)
+
+        for nobs in range(nmax):
+            cat = cat0.pick_fiducial_observer(nobs, rmax=parser_args.Rmax)
+            logmass = numpy.log10(cat["group_mass"])
+            counts[nobs, :] = csiborgtools.number_counts(logmass, bins)
+    elif simname == "quijote_full":
+        cat = csiborgtools.read.QuijoteHaloCatalogue(
+            nsim, paths, nsnap=4, load_fitted=False, load_initial=False,
+            load_backup=parser_args.from_quijote_backup)
+        logmass = numpy.log10(cat["group_mass"])
+        counts = csiborgtools.number_counts(logmass, bins)
+    else:
+        raise ValueError(f"Unknown simulation name `{simname}`.")
+
+    fout = paths.halo_counts(simname, nsim, parser_args.from_quijote_backup)
+    if parser_args.verbose:
+        print(f"{datetime.now()}: saving halo counts to `{fout}`.")
+    numpy.savez(fout, counts=counts, bins=bins, rmax=parser_args.Rmax)
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument("--simname", type=str,
+                        choices=["csiborg", "quijote", "quijote_full"],
+                        help="Simulation name.")
+    parser.add_argument("--nsims", type=int, nargs="+", default=None,
+                        help="Indices of simulations to cross. If `-1` all .")
+    parser.add_argument(
+        "--Rmax", type=float, default=155,
+        help="High-res region radius in Mpc / h. Ignored for `quijote_full`.")
+    parser.add_argument("--from_quijote_backup",
+                        type=lambda x: bool(strtobool(x)), default=False,
+                        help="Flag to indicate Quijote backup data.")
+    parser.add_argument("--lims", type=float, nargs="+", default=[11., 16.],
+                        help="Mass limits in Msun / h.")
+    parser.add_argument("--bw", type=float, default=0.2,
+                        help="Bin width in dex.")
+    parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
+                        default=False, help="Verbosity flag.")
+    parser_args = parser.parse_args()
+
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    nsims = get_nsims(parser_args, paths)
+
+    if len(parser_args.lims) != 2:
+        raise ValueError("Mass limits must be a pair of floats.")
+
+    bins = numpy.arange(*parser_args.lims, parser_args.bw, dtype=numpy.float32)
+
+    def do_work(nsim):
+        get_counts(nsim, bins, paths, parser_args)
+
+    work_delegation(do_work, nsims, MPI.COMM_WORLD)
--- a/old/fit_hmf.sh
+++ b/old/fit_hmf.sh
@ -0,0 +1,24 @@
+nthreads=11
+memory=2
+queue="berg"
+env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
+file="fit_hmf.py"
+
+simname="quijote_full"
+nsims="-1"
+verbose=True
+lower_lim=12.0
+upper_lim=16.0
+Rmax=155
+from_quijote_backup="true"
+bw=0.2
+
+pythoncm="$env $file --simname $simname --nsims $nsims --Rmax $Rmax --lims $lower_lim $upper_lim --bw $bw --from_quijote_backup $from_quijote_backup --verbose $verbose"
+
+$pythoncm
+
+# cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
+# echo "Submitting:"
+# echo $cm
+# echo
+# $cm
--- a/old/fit_profiles.py
+++ b/old/fit_profiles.py
@ -0,0 +1,109 @@
+# Copyright (C) 2023 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+A script to calculate the particle's separation from the CM and save it.
+Currently MPI is not supported.
+"""
+from argparse import ArgumentParser
+from datetime import datetime
+from gc import collect
+
+import numpy
+from mpi4py import MPI
+from tqdm import trange
+
+try:
+    import csiborgtools
+except ModuleNotFoundError:
+    import sys
+
+    sys.path.append("../")
+    import csiborgtools
+
+parser = ArgumentParser()
+parser.add_argument("--ics", type=int, nargs="+", default=None,
+                    help="IC realisatiosn. If `-1` processes all simulations.")
+args = parser.parse_args()
+
+# Get MPI things
+comm = MPI.COMM_WORLD
+rank = comm.Get_rank()
+nproc = comm.Get_size()
+
+if nproc > 1:
+    raise NotImplementedError("MPI is not implemented implemented yet.")
+
+paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+cols_collect = [("r", numpy.float32), ("M", numpy.float32)]
+if args.ics is None or args.ics == -1:
+    nsims = paths.get_ics("csiborg")
+else:
+    nsims = args.ics
+
+
+# We loop over simulations. Here later optionally add MPI.
+for i, nsim in enumerate(nsims):
+    if rank == 0:
+        now = datetime.now()
+        print(f"{now}: calculating {i}th simulation `{nsim}`.", flush=True)
+    nsnap = max(paths.get_snapshots(nsim, "csiborg"))
+    box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
+
+    f = csiborgtools.read.read_h5(paths.particles(nsim, "csiborg"))
+    particles = f["particles"]
+    clump_map = f["clumpmap"]
+    clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
+    clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
+                                                   load_fitted=False)
+    ismain = clumps_cat.ismain
+    ntasks = len(clumps_cat)
+
+    # We loop over halos and add ther particle positions to this dictionary,
+    # which we will later save as an archive.
+    out = {}
+    for j in trange(ntasks) if nproc == 1 else range(ntasks):
+        # If we are fitting halos and this clump is not a main, then continue.
+        if not ismain[j]:
+            continue
+
+        clumpid = clumps_cat["index"][j]
+        parts = csiborgtools.read.load_parent_particles(
+            clumpid, particles, clump_map, clid2map, clumps_cat)
+        # If we have no particles, then do not save anything.
+        if parts is None:
+            continue
+        obj = csiborgtools.fits.Clump(parts, clumps_cat[j], box)
+        r200m, m200m = obj.spherical_overdensity_mass(200, npart_min=10,
+                                                      kind="matter")
+        r = obj.r()
+        mask = r <= r200m
+
+        _out = csiborgtools.read.cols_to_structured(numpy.sum(mask),
+                                                    cols_collect)
+
+        _out["r"] = r[mask]
+        _out["M"] = obj["M"][mask]
+        out[str(clumpid)] = _out
+
+    # Finished, so we save everything.
+    fout = paths.radpos_path(nsnap, nsim)
+    now = datetime.now()
+    print(f"{now}: saving radial profiles for simulation {nsim} to `{fout}`",
+          flush=True)
+    numpy.savez(fout, **out)
+
+    # Clean up the memory just to be sure.
+    del out
+    collect()
--- a/old/merger.py
+++ b/old/merger.py
@ -0,0 +1,686 @@
+# Copyright (C) 2022 Richard Stiskalek, Harry Desmond
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+Support for reading the PHEW/ACACIA CSiBORG merger trees. However, note that
+the merger trees are very unreliable.
+"""
+from abc import ABC
+from datetime import datetime
+from gc import collect
+
+import numpy
+from h5py import File
+from tqdm import tqdm, trange
+from treelib import Tree
+
+from ..utils import periodic_distance
+from .paths import Paths
+
+###############################################################################
+#                          Utility functions.                                 #
+###############################################################################
+
+
+def clump_identifier(clump, nsnap):
+    """
+    Generate a unique identifier for a clump at a given snapshot.
+
+    Parameters
+    ----------
+    clump : int
+        Clump ID.
+    nsnap : int
+        Snapshot index.
+
+    Returns
+    -------
+    str
+    """
+    return f"{str(clump).rjust(9, 'x')}__{str(nsnap).rjust(4, 'x')}"
+
+
+def extract_identifier(identifier):
+    """
+    Extract the clump ID and snapshot index from a identifier generated by
+    `clump_identifier`.
+
+    Parameters
+    ----------
+    identifier : str
+        Identifier.
+
+    Returns
+    -------
+    clump, nsnap : int
+        Clump ID and snapshot index.
+    """
+    clump, nsnap = identifier.split('__')
+    return int(clump.lstrip('x')), int(nsnap.lstrip('x'))
+
+
+###############################################################################
+#                       Merger tree reader class.                             #
+###############################################################################
+
+
+class BaseMergerReader(ABC):
+    """
+    Base class for the CSiBORG merger tree reader.
+    """
+    _paths = None
+    _nsim = None
+    _min_snap = None
+    _cache = {}
+
+    @property
+    def paths(self):
+        """Paths manager."""
+        if self._paths is None:
+            raise ValueError("`paths` is not set.")
+        return self._paths
+
+    @paths.setter
+    def paths(self, paths):
+        assert isinstance(paths, Paths)
+        self._paths = paths
+
+    @property
+    def nsim(self):
+        """Simulation index."""
+        if self._nsim is None:
+            raise ValueError("`nsim` is not set.")
+        return self._nsim
+
+    @nsim.setter
+    def nsim(self, nsim):
+        assert isinstance(nsim, (int, numpy.integer))
+        self._nsim = nsim
+
+    @property
+    def min_snap(self):
+        """Minimum snapshot index to read."""
+        return self._min_snap
+
+    @min_snap.setter
+    def min_snap(self, min_snap):
+        if min_snap is not None:
+            assert isinstance(min_snap, (int, numpy.integer))
+            self._min_snap = int(min_snap)
+
+    def cache_length(self):
+        """Length of the cache."""
+        return len(self._cache)
+
+    def cache_clear(self):
+        """Clear the cache."""
+        self._cache = {}
+        collect()
+
+    def __getitem__(self, key):
+        try:
+            return self._cache[key]
+        except KeyError:
+            fname = self.paths.processed_merger_tree(self.nsim)
+
+            nsnap, kind = key.split("__")
+
+            with File(fname, "r") as f:
+                if kind == "clump_to_array":
+                    cl = self[f"{nsnap}__clump"]
+
+                    x = {}
+                    for i, c in enumerate(cl):
+                        if c in x:
+                            x[c] += (i,)
+                        else:
+                            x[c] = (i,)
+                else:
+                    x = f[f"{str(nsnap)}/{kind}"][:]
+
+            # Cache it
+            self._cache[key] = x
+
+            return x
+
+
+class MergerReader(BaseMergerReader):
+    """
+    Merger tree reader.
+
+    Parameters
+    ----------
+    nsim : int
+        Simulation index.
+    paths : Paths
+        Paths manager.
+    min_snap : int
+        Minimum snapshot index. Trees below this snapshot will not be read.
+    """
+    def __init__(self, nsim, paths, min_snap=None):
+        self.nsim = nsim
+        self.paths = paths
+        self.min_snap = min_snap
+
+    def get_info(self, current_clump, current_snap, is_main=None):
+        """
+        Make a list of information about a clump at a given snapshot.
+
+        Parameters
+        ----------
+        current_clump : int
+            Clump ID.
+        current_snap : int
+            Snapshot index.
+        is_main : bool
+            Whether this is the main progenitor.
+
+        Returns
+        -------
+        list
+        """
+        if current_clump < 0:
+            raise ValueError("Clump ID must be positive.")
+
+        if is_main is not None and not isinstance(is_main, bool):
+            raise ValueError("`is_main` must be a boolean.")
+
+        k = self[f"{current_snap}__clump_to_array"][current_clump][0]
+
+        out = [self[f"{current_snap}__desc_mass"][k],
+               *self[f"{current_snap}__desc_pos"][k][::-1]] # TODO REMOVE LATER
+
+        if is_main is not None:
+            return [is_main,] + out
+
+        return out
+
+    def get_mass(self, clump, snap):
+        """
+        Get the mass of a clump at a given snapshot.
+
+        Parameters
+        ----------
+        clump : int
+            Clump ID.
+        snap : int
+            Snapshot index.
+
+        Returns
+        -------
+        float
+        """
+        if clump < 0:
+            raise ValueError("Clump ID must be positive.")
+        k = self[f"{snap}__clump_to_array"][clump][0]
+        return self[f"{snap}__desc_mass"][k]
+
+    def get_pos(self, clump, snap):
+        if clump < 0:
+            raise ValueError("Clump ID must be positive.")
+        k = self[f"{snap}__clump_to_array"][clump][0]
+        return self[f"{snap}__desc_pos"][k]
+
+    def find_main_progenitor(self, clump, nsnap):
+        """
+        Find the main progenitor of a clump at a given snapshot. Cases are:
+            - `clump > 0`, `progenitor > 0`: main progenitor is in the adjacent
+            snapshot,
+            - `clump > 0`, `progenitor < 0`: main progenitor is not in the
+            adjacent snapshot.
+            - `clump < 0`, `progenitor = 0`: no progenitor, newly formed clump.
+
+        Parameters
+        ----------
+        clump : int
+            Clump ID.
+        nsnap : int
+            Snapshot index.
+
+        Returns
+        -------
+        progenitor : int
+            Main progenitor clump ID.
+        progenitor_snap : int
+            Main progenitor snapshot index.
+        """
+        if not clump > 0:
+            raise ValueError("Clump ID must be positive.")
+
+        cl2array = self[f"{nsnap}__clump_to_array"]
+        if clump in cl2array:
+            k = cl2array[clump]
+        else:
+            raise ValueError("Clump ID not found.")
+
+        if len(k) > 1:
+            raise ValueError("Found more than one main progenitor.")
+        k = k[0]
+
+        progenitor = abs(self[f"{nsnap}__progenitor"][k])
+        progenitor_snap = self[f"{nsnap}__progenitor_outputnr"][k]
+
+        if (self.min_snap is not None) and (nsnap < self.min_snap):
+            return 0, numpy.nan
+
+        return progenitor, progenitor_snap
+
+    def find_minor_progenitors(self, clump, nsnap):
+        """
+        Find the minor progenitors of a clump at a given snapshot. This means
+        that `clump < 0`, `progenitor > 0`, i.e. this clump also has another
+        main progenitor.
+
+        If there are no minor progenitors, return `None` for both lists.
+
+        Parameters
+        ----------
+        clump : int
+            Clump ID.
+        nsnap : int
+            Snapshot index.
+
+        Returns
+        -------
+        prog : list
+            List of minor progenitor clump IDs.
+        prog_snap : list
+            List of minor progenitor snapshot indices.
+        """
+        if not clump > 0:
+            raise ValueError("Clump ID must be positive.")
+
+        try:
+            ks = self[f"{nsnap}__clump_to_array"][-clump]
+        except KeyError:
+            return None, None
+
+        prog = [self[f"{nsnap}__progenitor"][k] for k in ks]
+        prog_nsnap = [self[f"{nsnap}__progenitor_outputnr"][k] for k in ks]
+
+        if (self.min_snap is not None) and (nsnap < self.min_snap):
+            return None, None
+
+        return prog, prog_nsnap
+
+    def find_progenitors(self, clump, nsnap):
+        """
+        Find all progenitors of a clump at a given snapshot. The main
+        progenitor is the first element of the list.
+
+        Parameters
+        ----------
+        clump : int
+            Clump ID.
+        nsnap : int
+            Snapshot index.
+
+        Returns
+        -------
+        prog : list
+            List of progenitor clump IDs.
+        prog_nsnap : list
+            List of progenitor snapshot indices.
+        """
+        main_prog, main_prog_nsnap = self.find_main_progenitor(clump, nsnap)
+        min_prog, min_prog_nsnap = self.find_minor_progenitors(clump, nsnap)
+
+        # Check that if the main progenitor is not in the adjacent snapshot,
+        # then the minor progenitor are also in that snapshot (if any).
+        if (min_prog is not None) and (main_prog_nsnap != nsnap - 1) and not all(prog_nsnap == mprog for mprog in min_prog_nsnap):  # noqa
+            raise ValueError(f"For clump {clump} at snapshot {nsnap} we have "
+                             f"main progenitor at {main_prog_nsnap} and "
+                             "minor progenitors at {min_prog_nsnap}.")
+
+        if min_prog is None:
+            prog = [main_prog,]
+            prog_nsnap = [main_prog_nsnap,]
+        else:
+            prog = [main_prog,] + min_prog
+            prog_nsnap = [main_prog_nsnap,] + min_prog_nsnap
+
+        if prog[0] == 0 and len(prog) > 1:
+            raise ValueError("No main progenitor but minor progenitors "
+                             "found for clump {clump} at snapshot {nsnap}.")
+
+        return prog, prog_nsnap
+
+    def tree_mass_at_snapshot(self, clump, nsnap, target_snap):
+        """
+        Calculate the total mass of nodes in a tree at a given snapshot.
+        """
+        # If clump is 0 (i.e., we've reached the end of the tree), return 0
+        if clump == 0:
+            return 0
+
+        # Find the progenitors for the given clump and nsnap
+        prog, prog_nsnap = self.find_progenitors(clump, nsnap)
+
+        if prog[0] == 0:
+            print(prog)
+            return 0
+
+        # Sum the mass of the current clump's progenitors
+        tot = 0
+        for p, psnap in zip(prog, prog_nsnap):
+            if psnap == target_snap:
+                tot += self.get_mass(p, psnap)
+
+        # Recursively sum the mass of each progenitor's progenitors
+        for p, psnap in zip(prog, prog_nsnap):
+            # print("P ", p, psnap)
+            tot += self.mass_all_progenitor2(p, psnap, target_snap)
+
+        return tot
+
+    def is_jumper(self, clump, nsnap, nsnap_descendant):
+        pass
+
+    def make_tree(self, current_clump, current_nsnap,
+                  above_clump=None, above_nsnap=None,
+                  tree=None, is_main=None, verbose=False):
+        """
+        Make a merger tree for a clump at a given snapshot.
+
+        Parameters
+        ----------
+        current_clump : int
+            Clump ID of the descendant clump.
+        current_nsnap : int
+            Snapshot index of the descendent clump.
+        above_clump : int, optional
+            Clump ID of a clump above the current clump in the tree.
+        above_nsnap : int, optional
+            Snapshot index of a clump above the current clump in the tree.
+        tree : treelib.Tree, optional
+            Tree to add to.
+        is_main : bool, optional
+            Whether this is the main progenitor.
+        verbose : bool, optional
+            Verbosity flag.
+
+        Returns
+        -------
+        treelib.Tree
+            Tree with the current clump as the root.
+        """
+        if verbose:
+            print(f"{datetime.now()}: Node of a clump {current_clump} at "
+                  f"snapshot {current_nsnap}.", flush=True)
+
+        # Terminate if we are at the end of the tree
+        if current_clump == 0:
+            return
+
+        # Create the root node or add a new node
+        if tree is None:
+            tree = Tree()
+            tree.create_node(
+                "root",
+                identifier=clump_identifier(current_clump, current_nsnap),
+                data=self.get_info(current_clump, current_nsnap, True),
+                )
+        else:
+            tree.create_node(
+                identifier=clump_identifier(current_clump, current_nsnap),
+                parent=clump_identifier(above_clump, above_nsnap),
+                data=self.get_info(current_clump, current_nsnap, is_main),
+                )
+
+        # This returns a list of progenitors and their snapshots. The first
+        # element is the main progenitor.
+        prog, prog_nsnap = self.find_progenitors(current_clump, current_nsnap)
+
+        for i, (p, psnap) in enumerate(zip(prog, prog_nsnap)):
+            self.make_tree(p, psnap, current_clump, current_nsnap, tree,
+                           is_main=i == 0, verbose=verbose)
+
+        return tree
+
+    def walk_main_progenitor(self, main_clump, main_nsnap, verbose=False):
+        """
+        Walk the main progenitor branch of a clump.
+
+        Each snapshot contains information about the clump at that snapshot.
+
+        Parameters
+        ----------
+        clump : int
+            Clump ID.
+        nsnap : int
+            Snapshot index.
+
+        Returns
+        -------
+        structured array
+        """
+        out = []
+
+        pbar = tqdm(disable=not verbose)
+        while True:
+            prog, prog_nsnap = self.find_progenitors(main_clump, main_nsnap)
+
+            # Unpack the main and minor progenitor
+            mainprog, mainprog_nsnap = prog[0], prog_nsnap[0]
+            if len(prog) > 1:
+                minprog, minprog_nsnap = prog[1:], prog_nsnap[1:]
+            else:
+                minprog, minprog_nsnap = None, None
+
+            # If there is no progenitor, then set the main progenitor mass to 0
+            if mainprog == 0:
+                mainprog_mass = numpy.nan
+            else:
+                mainprog_mass = self.get_mass(mainprog, mainprog_nsnap)
+
+            totprog_mass = mainprog_mass
+
+            # Unpack masses of the progenitors
+            if minprog is not None:
+                minprog, minprog_nsnap = prog[1:], prog_nsnap[1:]
+                minprog_masses = [self.get_mass(c, n)
+                                  for c, n in zip(minprog, minprog_nsnap)]
+
+                max_minprog_mass = max(minprog_masses)
+                minprog_totmass = sum(minprog_masses)
+                totprog_mass += minprog_totmass
+            else:
+                minprog_totmass = numpy.nan
+                max_minprog_mass = numpy.nan
+
+            out += [
+                [main_nsnap,]
+                + self.get_info(main_clump, main_nsnap)
+                + [mainprog_nsnap, totprog_mass, mainprog_mass, minprog_totmass, max_minprog_mass / mainprog_mass]  # noqa
+                ]
+
+            pbar.update(1)
+            pbar.set_description(f"Clump {main_clump} ({main_nsnap})")
+
+            if mainprog == 0:
+                pbar.close()
+                break
+
+            main_clump = mainprog
+            main_nsnap = mainprog_nsnap
+
+        # Convert output to a structured array. We store integers as float
+        # to avoid errors because of converting NaNs to integers.
+        out = numpy.vstack(out)
+        dtype = [("desc_snapshot_index", numpy.float32),
+                 ("desc_mass", numpy.float32),
+                 ("desc_x", numpy.float32),
+                 ("desc_y", numpy.float32),
+                 ("desc_z", numpy.float32),
+                 ("prog_snapshot_index", numpy.float32),
+                 ("prog_totmass", numpy.float32),
+                 ("mainprog_mass", numpy.float32),
+                 ("minprog_totmass", numpy.float32),
+                 ("merger_ratio", numpy.float32),
+                 ]
+
+        return numpy.array([tuple(row) for row in out], dtype=dtype)
+
+    def match_mass_to_phewcat(self, phewcat):
+        """
+        For each clump mass in the PHEW catalogue, find the corresponding
+        clump mass in the merger tree file. If no match is found returns NaN.
+        These are not equal because the PHEW catalogue mass is the mass without
+        unbinding.
+
+        Parameters
+        ----------
+        phewcat : csiborgtools.read.CSiBORGPEEWReader
+            PHEW catalogue reader.
+
+        Returns
+        -------
+        mass : float
+        """
+        if phewcat.nsim != self.nsim:
+            raise ValueError("Simulation indices do not match.")
+
+        nsnap = phewcat.nsnap
+        indxs = phewcat["index"]
+        mergertree_mass = numpy.full(len(indxs), numpy.nan,
+                                     dtype=numpy.float32)
+
+        for i, ind in enumerate(indxs):
+            try:
+                mergertree_mass[i] = self.get_mass(ind, nsnap)
+            except KeyError:
+                continue
+
+        return mergertree_mass
+
+    def match_pos_to_phewcat(self, phewcat):
+        """
+        For each clump mass in the PHEW catalogue, find the corresponding
+        clump mass in the merger tree file. If no match is found returns NaN.
+        These are not equal because the PHEW catalogue mass is the mass without
+        unbinding.
+
+        Parameters
+        ----------
+        phewcat : csiborgtools.read.CSiBORGPEEWReader
+            PHEW catalogue reader.
+
+        Returns
+        -------
+        mass : float
+        """
+        if phewcat.nsim != self.nsim:
+            raise ValueError("Simulation indices do not match.")
+
+        nsnap = phewcat.nsnap
+        indxs = phewcat["index"]
+        mergertree_pos = numpy.full((len(indxs), 3), numpy.nan,
+                                    dtype=numpy.float32)
+
+        for i, ind in enumerate(indxs):
+            try:
+                mergertree_pos[i] = self.get_pos(ind, nsnap)
+            except KeyError:
+                continue
+
+        return mergertree_pos[:, ::-1]  # TODO later remove
+
+
+###############################################################################
+#                           Manual halo tracking.                             #
+###############################################################################
+
+
+def track_halo_manually(cats, hid, maxdist=0.15, max_dlogm=0.35):
+    """
+    Manually track a halo without using the merger tree. Searches for nearby
+    halo of similar mass in adjacent snapshots. Supports only main haloes and
+    can only work for the most massive haloes in a simulation, however even
+    then significant care should be taken.
+
+    Selects the most massive halo within a search radius to be a match.
+
+    In case a progenitor is not found in the adjacent snapshot, the search
+    continues in the next snapshot. Occasionally some haloes disappear..
+
+    Parameters
+    ----------
+    cats : dict
+        Dictionary of halo catalogues, keys are snapshot indices.
+    hid : int
+        Halo ID.
+    maxdist : float, optional
+        Maximum comoving distance for a halo to move between adjacent
+        snapshots.
+    max_dlogm : float, optional
+        Maximum |log mass ratio| for a halo to be considered a progenitor.
+
+    Returns
+    -------
+    hist : structured array
+        History of the halo.
+    """
+    nsnap0 = max(cats.keys())
+    k = cats[nsnap0]["hid_to_array_index"][hid]
+    pos = cats[nsnap0]["cartesian_pos"][k]
+    mass = cats[nsnap0]["summed_mass"][k]
+
+    if not cats[nsnap0]["is_main"][k]:
+        raise ValueError("Only main haloes are supported.")
+
+    if not mass > 1e13:
+        raise ValueError("Only the most massive haloes are supported.")
+
+    if not cats[nsnap0]["dist"][k] < 155.5:
+        raise ValueError("Only high-resolution region haloes are supported.")
+
+    dtype = [("snapshot_index", numpy.float32),
+             ("x", numpy.float32),
+             ("y", numpy.float32),
+             ("z", numpy.float32),
+             ("mass", numpy.float32),
+             ("desc_dist", numpy.float32),
+             ]
+    hist = numpy.full(len(cats), numpy.nan, dtype=dtype)
+    hist["snapshot_index"][0] = nsnap0
+    hist["x"][0], hist["y"][0], hist["z"][0] = pos
+    hist["mass"][0] = mass
+
+    for n in trange(1, len(cats), desc="Tracking halo"):
+        nsnap = nsnap0 - n
+        hist["snapshot_index"][n] = nsnap
+
+        # Find indices of all main haloes that are within a box of width
+        indxs = cats[nsnap].select_in_box(pos, 2 * maxdist)
+
+        if len(indxs) == 0:
+            continue
+
+        nearby_pos = cats[nsnap]["cartesian_pos"][indxs]
+        nearby_mass = cats[nsnap]["summed_mass"][indxs]
+
+        # Distance from the previous position and |log mass ratio|
+        dist = periodic_distance(nearby_pos, pos, cats[nsnap].box.boxsize)
+        dlogm = numpy.abs(numpy.log10(nearby_mass / mass))
+        k = numpy.argmin(dlogm)
+
+        if (dlogm[k] < max_dlogm) & (dist[k] < maxdist):
+            hist["x"][n], hist["y"][n], hist["z"][n] = nearby_pos[k]
+            hist["mass"][n] = nearby_mass[k]
+            hist["desc_dist"][n] = dist[k]
+
+            pos = nearby_pos[k]
+            mass = nearby_mass[k]
+
+    return hist
--- a/old/mv_fofmembership.py
+++ b/old/mv_fofmembership.py
@ -0,0 +1,142 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+Short script to move and change format of the CSiBORG FoF membership files
+calculated by Julien. Additionally, also orders the particles in the same way
+as the PHEW halo finder output.
+"""
+from argparse import ArgumentParser
+from datetime import datetime
+from gc import collect
+from os.path import join
+from shutil import copy
+
+import numpy
+from mpi4py import MPI
+from taskmaster import work_delegation
+from tqdm import trange
+
+from utils import get_nsims
+
+try:
+    import csiborgtools
+except ModuleNotFoundError:
+    import sys
+    sys.path.append("../")
+    import csiborgtools
+
+
+def copy_membership(nsim, verbose=True):
+    """
+    Copy the FoF particle halo membership to the CSiBORG directory and write it
+    as a NumPy array instead of a text file.
+    """
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    fpath = join("/mnt/extraspace/jeg/greenwhale/Constrained_Sims",
+                 f"sim_{nsim}/particle_membership_{nsim}_FOF.txt")
+    if verbose:
+        print(f"Loading from ... `{fpath}`.")
+    data = numpy.genfromtxt(fpath, dtype=int)
+
+    fout = paths.fof_membership(nsim, "csiborg")
+    if verbose:
+        print(f"Saving to ... `{fout}`.")
+    numpy.save(fout, data)
+
+
+def copy_catalogue(nsim, verbose=True):
+    """
+    Move the FoF catalogue to the CSiBORG directory.
+
+    Parameters
+    ----------
+    nsim : int
+        IC realisation index.
+    verbose : bool, optional
+        Verbosity flag.
+    """
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    source = join("/mnt/extraspace/jeg/greenwhale/Constrained_Sims",
+                  f"sim_{nsim}/halo_catalog_{nsim}_FOF.txt")
+    dest = paths.fof_cat(nsim, "csiborg")
+    if verbose:
+        print("Copying`{}` to `{}`.".format(source, dest))
+    copy(source, dest)
+
+
+def sort_fofid(nsim, verbose=True):
+    """
+    Read the FoF particle halo membership and sort the halo IDs to the ordering
+    of particles in the PHEW clump IDs.
+
+    Parameters
+    ----------
+    nsim : int
+        IC realisation index.
+    verbose : bool, optional
+        Verbosity flag.
+    """
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    nsnap = max(paths.get_snapshots(nsim, "csiborg"))
+    fpath = paths.fof_membership(nsim, "csiborg")
+    if verbose:
+        print(f"{datetime.now()}: loading from ... `{fpath}`.")
+    # Columns are halo ID, particle ID.
+    fof = numpy.load(fpath)
+
+    reader = csiborgtools.read.CSiBORGReader(paths)
+    pars_extract = ["x"]  # Dummy variable
+    __, pids = reader.read_snapshot(nsnap, nsim, pars_extract,
+                                    return_structured=False, verbose=verbose)
+    del __
+    collect()
+
+    # Map the particle IDs in pids to their corresponding PHEW array index
+    if verbose:
+        print(f"{datetime.now()}: mapping particle IDs to their indices.")
+    pids_idx = {pid: i for i, pid in enumerate(pids)}
+
+    if verbose:
+        print(f"{datetime.now()}: mapping FoF HIDs to their array indices.")
+    # Unassigned particle IDs are assigned a halo ID of 0. Same as PHEW.
+    fof_hids = numpy.zeros(pids.size, dtype=numpy.int32)
+    for i in trange(fof.shape[0]) if verbose else range(fof.shape[0]):
+        hid, pid = fof[i]
+        fof_hids[pids_idx[pid]] = hid
+
+    fout = paths.fof_membership(nsim, "csiborg", sorted=True)
+    if verbose:
+        print(f"Saving the sorted data to ... `{fout}`")
+    numpy.save(fout, fof_hids)
+
+
+def main(nsim, verbose=True):
+    copy_membership(nsim, verbose=verbose)
+    copy_catalogue(nsim, verbose=verbose)
+    sort_fofid(nsim, verbose=verbose)
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument("--simname", type=str, default="csiborg",
+                        choices=["csiborg", "quijote"],
+                        help="Simulation name")
+    parser.add_argument("--nsims", type=int, nargs="+", default=None,
+                        help="Indices of simulations to cross. If `-1` processes all simulations.")  # noqa
+    args = parser.parse_args()
+
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    nsims = get_nsims(args, paths)
+    work_delegation(main, nsims, MPI.COMM_WORLD)
--- a/old/mv_fofmembership.sh
+++ b/old/mv_fofmembership.sh
@ -0,0 +1,17 @@
+nthreads=1
+memory=100
+queue="berg"
+env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
+file="mv_fofmembership.py"
+nsims="5511"
+
+pythoncm="$env $file --nsims $nsims"
+
+# echo $pythoncm
+# $pythoncm
+
+cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
+echo "Submitting:"
+echo $cm
+echo
+$cm
--- a/old/pre_dumppart.py
+++ b/old/pre_dumppart.py
@ -0,0 +1,185 @@
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+r"""
+Script to load in the simulation particles, sort them by their halo ID and
+dump into a HDF5 file. Stores the first and last index of each halo in the
+particle array. This can be used for fast slicing of the array to acces
+particles of a single clump.
+
+Ensures the following units:
+    - Positions in box units.
+    - Velocities in :math:`\mathrm{km} / \mathrm{s}`.
+    - Masses in :math:`M_\odot / h`.
+"""
+from argparse import ArgumentParser
+from datetime import datetime
+from gc import collect
+
+import h5py
+import numba
+import numpy
+from mpi4py import MPI
+from taskmaster import work_delegation
+from tqdm import trange
+
+from utils import get_nsims
+
+try:
+    import csiborgtools
+except ModuleNotFoundError:
+    import sys
+
+    sys.path.append("../")
+    import csiborgtools
+
+
+@numba.jit(nopython=True)
+def minmax_halo(hid, halo_ids, start_loop=0):
+    """
+    Find the start and end index of a halo in a sorted array of halo IDs.
+    This is much faster than using `numpy.where` and then `numpy.min` and
+    `numpy.max`.
+    """
+    start = None
+    end = None
+
+    for i in range(start_loop, halo_ids.size):
+        n = halo_ids[i]
+        if n == hid:
+            if start is None:
+                start = i
+            end = i
+        elif n > hid:
+            break
+    return start, end
+
+
+###############################################################################
+#                           Sorting and dumping                               #
+###############################################################################
+
+
+def main(nsim, simname, verbose):
+    """
+    Read in the snapshot particles, sort them by their FoF halo ID and dump
+    into a HDF5 file. Stores the first and last index of each halo in the
+    particle array for fast slicing of the array to acces particles of a single
+    halo.
+    """
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    if simname == "csiborg":
+        partreader = csiborgtools.read.CSiBORGReader(paths)
+    else:
+        partreader = csiborgtools.read.QuijoteReader(paths)
+
+    nsnap = max(paths.get_snapshots(nsim, simname))
+    fname = paths.particles(nsim, simname)
+    # We first read in the halo IDs of the particles and infer the sorting.
+    # Right away we dump the halo IDs to a HDF5 file and clear up memory.
+    if verbose:
+        print(f"{datetime.now()}: loading PIDs of IC {nsim}.", flush=True)
+    part_hids = partreader.read_fof_hids(
+        nsnap=nsnap, nsim=nsim, verbose=verbose)
+    if verbose:
+        print(f"{datetime.now()}: sorting PIDs of IC {nsim}.", flush=True)
+    sort_indxs = numpy.argsort(part_hids).astype(numpy.int32)
+    part_hids = part_hids[sort_indxs]
+    with h5py.File(fname, "w") as f:
+        f.create_dataset("halo_ids", data=part_hids)
+        f.close()
+    del part_hids
+    collect()
+
+    # Next we read in the particles and sort them by their halo ID.
+    # We cannot directly read this as an unstructured array because the float32
+    # precision is insufficient to capture the halo IDs.
+    if simname == "csiborg":
+        pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"]
+    else:
+        pars_extract = None
+    parts, pids = partreader.read_snapshot(
+        nsnap, nsim, pars_extract, return_structured=False, verbose=verbose)
+
+    # In case of CSiBORG, we need to convert the mass and velocities from
+    # box units.
+    if simname == "csiborg":
+        box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
+        parts[:, [3, 4, 5]] = box.box2vel(parts[:, [3, 4, 5]])
+        parts[:, 6] = box.box2solarmass(parts[:, 6])
+
+    # Now we in two steps save the particles and particle IDs.
+    if verbose:
+        print(f"{datetime.now()}: dumping particles from {nsim}.", flush=True)
+    parts = parts[sort_indxs]
+    pids = pids[sort_indxs]
+    del sort_indxs
+    collect()
+
+    with h5py.File(fname, "r+") as f:
+        f.create_dataset("particle_ids", data=pids)
+        f.close()
+    del pids
+    collect()
+
+    with h5py.File(fname, "r+") as f:
+        f.create_dataset("particles", data=parts)
+        f.close()
+    del parts
+    collect()
+
+    if verbose:
+        print(f"{datetime.now()}: creating a halo map for {nsim}.", flush=True)
+    # Load clump IDs back to memory
+    with h5py.File(fname, "r") as f:
+        part_hids = f["halo_ids"][:]
+    # We loop over the unique halo IDs.
+    unique_halo_ids = numpy.unique(part_hids)
+    halo_map = numpy.full((unique_halo_ids.size, 3), numpy.nan,
+                          dtype=numpy.int32)
+    start_loop = 0
+    niters = unique_halo_ids.size
+    for i in trange(niters) if verbose else range(niters):
+        hid = unique_halo_ids[i]
+        k0, kf = minmax_halo(hid, part_hids, start_loop=start_loop)
+        halo_map[i, 0] = hid
+        halo_map[i, 1] = k0
+        halo_map[i, 2] = kf
+        start_loop = kf
+
+    # We save the mapping to a HDF5 file
+    with h5py.File(fname, "r+") as f:
+        f.create_dataset("halomap", data=halo_map)
+        f.close()
+
+    del part_hids
+    collect()
+
+
+if __name__ == "__main__":
+    # And next parse all the arguments and set up CSiBORG objects
+    parser = ArgumentParser()
+    parser.add_argument("--simname", type=str, default="csiborg",
+                        choices=["csiborg", "quijote"],
+                        help="Simulation name")
+    parser.add_argument("--nsims", type=int, nargs="+", default=None,
+                        help="IC realisations. If `-1` processes all .")
+    args = parser.parse_args()
+
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    nsims = get_nsims(args, paths)
+
+    def _main(nsim):
+        main(nsim, args.simname, verbose=MPI.COMM_WORLD.Get_size() == 1)
+
+    work_delegation(_main, nsims, MPI.COMM_WORLD)
--- a/old/pre_dumppart.sh
+++ b/old/pre_dumppart.sh
@ -0,0 +1,18 @@
+nthreads=1
+memory=40
+queue="berg"
+env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
+file="pre_dumppart.py"
+simname="csiborg"
+nsims="5511"
+
+pythoncm="$env $file --nsims $nsims --simname $simname"
+
+# echo $pythoncm
+# $pythoncm
+
+cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
+echo "Submitting:"
+echo $cm
+echo
+$cm
--- a/old/pre_mmain.py
+++ b/old/pre_mmain.py
@ -0,0 +1,64 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+Script to generate the mmain files, i.e. sums up the substructe of children.
+"""
+from datetime import datetime
+
+import numpy
+from mpi4py import MPI
+from taskmaster import master_process, worker_process
+
+try:
+    import csiborgtools
+except ModuleNotFoundError:
+    import sys
+    sys.path.append("../")
+    import csiborgtools
+
+# Get MPI things
+comm = MPI.COMM_WORLD
+rank = comm.Get_rank()
+nproc = comm.Get_size()
+
+paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+mmain_reader = csiborgtools.read.MmainReader(paths)
+
+
+def do_mmain(nsim):
+    nsnap = max(paths.get_snapshots(nsim, "csiborg"))
+    # NOTE: currently works for highest snapshot anyway
+    mmain, ultimate_parent = mmain_reader.make_mmain(nsim, verbose=False)
+    numpy.savez(paths.mmain(nsnap, nsim),
+                mmain=mmain, ultimate_parent=ultimate_parent)
+
+###############################################################################
+#                             MPI task delegation                             #
+###############################################################################
+
+
+if nproc > 1:
+    if rank == 0:
+        tasks = list(paths.get_ics("csiborg"))
+        master_process(tasks, comm, verbose=True)
+    else:
+        worker_process(do_mmain, comm, verbose=False)
+else:
+    tasks = paths.get_ics("csiborg")
+    for task in tasks:
+        print(f"{datetime.now()}: completing task `{task}`.", flush=True)
+        do_mmain(task)
+
+comm.Barrier()
--- a/old/pre_mmain.sh
+++ b/old/pre_mmain.sh
@ -0,0 +1,14 @@
+nthreads=102
+memory=5
+queue="cmb"
+env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
+file="pre_mmain.py"
+
+# pythoncm="$env $file"
+# $pythoncm
+
+
+cm="addqueue -q $queue -n $nthreads -m $memory $env $file"
+echo "Submitting:"
+echo $cm
+$cm
--- a/old/sort_halomaker.py
+++ b/old/sort_halomaker.py
@ -0,0 +1,99 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+Script to sort the HaloMaker's `particle_membership` file to match the ordering
+of particles in the simulation snapshot.
+"""
+from argparse import ArgumentParser
+from datetime import datetime
+from glob import iglob
+
+import h5py
+import numpy
+import pynbody
+from mpi4py import MPI
+from taskmaster import work_delegation
+from tqdm import trange
+
+import csiborgtools
+
+
+def sort_particle_membership(nsim, nsnap, method):
+    """
+    Read the FoF particle halo membership and sort the halo IDs to the ordering
+    of particles in the PHEW clump IDs.
+
+    Parameters
+    ----------
+    nsim : int
+        IC realisation index.
+    verbose : bool, optional
+        Verbosity flag.
+    """
+    print(f"{datetime.now()}:   starting simulation {nsim}, snapshot {nsnap} and method {method}.")  # noqa
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+
+    fpath = next(iglob(f"/mnt/extraspace/rstiskalek/CSiBORG/halo_maker/ramses_{nsim}/output_{str(nsnap).zfill(5)}/**/*particle_membership*", recursive=True), None)  # noqa
+    print(f"{datetime.now()}:   loading particle membership `{fpath}`.")
+    # Columns are halo ID, particle ID
+    membership = numpy.genfromtxt(fpath, dtype=int)
+
+    print(f"{datetime.now()}:   loading particle IDs from the snapshot.")
+    sim = pynbody.load(paths.snapshot(nsnap, nsim, "csiborg"))
+    pids = numpy.asanyarray(sim["iord"])
+
+    print(f"{datetime.now()}:   mapping particle IDs to their indices.")
+    pids_idx = {pid: i for i, pid in enumerate(pids)}
+
+    print(f"{datetime.now()}:   mapping HIDs to their array indices.")
+    # Unassigned particle IDs are assigned a halo ID of 0.
+    hids = numpy.zeros(pids.size, dtype=numpy.int32)
+    for i in trange(membership.shape[0]):
+        hid, pid = membership[i]
+        hids[pids_idx[pid]] = hid
+
+    fout = fpath + "_sorted.hdf5"
+    print(f"{datetime.now()}:   saving the sorted data to ... `{fout}`")
+
+    with h5py.File(fout, 'w') as hdf:
+        dset = hdf.create_dataset('hids', data=hids)
+        dset.attrs['header'] = """
+        This dataset represents (sub)halo indices for each particle.
+        - The particles are ordered as they appear in the simulation snapshot.
+        - Unassigned particles are given an index of 0.
+        """
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument("--method", type=str, required=True,
+                        help="HaloMaker method")
+    parser.add_argument("--nsim", type=int, required=False, default=None,
+                        help="IC index. If not set process all.")
+    args = parser.parse_args()
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+
+    if args.nsim is None:
+        ics = paths.get_ics("csiborg")
+    else:
+        ics = [args.nsim]
+
+    snaps = numpy.array([max(paths.get_snapshots(nsim, "csiborg"))
+                         for nsim in ics])
+
+    def main(n):
+        sort_particle_membership(ics[n], snaps[n], args.method)
+
+    work_delegation(main, list(range(len(ics))), MPI.COMM_WORLD)
--- a/old/sort_halomaker.sh
+++ b/old/sort_halomaker.sh
@ -0,0 +1,19 @@
+nthreads=1
+memory=64
+queue="berg"
+env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
+file="sort_halomaker.py"
+
+method="FOF"
+nsim="7444"
+
+pythoncm="$env $file --method $method --nsim $nsim"
+
+# echo $pythoncm
+# $pythoncm
+
+cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
+echo "Submitting:"
+echo $cm
+echo
+$cm