Add pynbody and other support (#92)

* Simplify box units * Move old scripts * Add printing * Update readers * Disable boundscheck * Add new ordering * Clean up imports * Enforce dtype and add mass to quijote * Simplify print statements * Fix little typos * Fix key bug * Bug fixing * Delete boring comments * Improve ultimate clumps for PHEW * Delete boring comments * Add basic reading * Remove 0th index HID * Add flipping of X and Z * Updates to halo catalogues * Add ordered caching * Fix flipping * Add new flags * Fix PHEW empty clumps * Stop over-wrriting * Little improvements to angular neighbours * Add catalogue masking * Change if-else statements * Cache only filtered data * Add PHEW cats * Add comments * Sort imports * Get Quijote workign * Docs * Add HMF calculation * Move to old * Fix angular * Add great circle distance * Update imports * Update impotrts * Update docs * Remove unused import * Fix a quick bug * Update compatibility * Rename files * Renaming * Improve compatiblity * Rename snapsht * Fix snapshot bug * Update interface * Finish updating interface * Update all paths * Add old scripts * Add basic halo * Update imports * Improve snapshot processing * Update ordering * Fix how CM positions accessed * Add merger paths * Add imports * Add merger reading * Add making a merger tree * Add a basic merger tree reader * Add imports * Add main branch walking + comments + debuggin * Get tree running * Add working merger tree walking along main branch * Add units conversion for merger data * Add hid_to_array_index * Update merger tree * Add mergertree mass to PHEWcat * Edit comments * Add this to track changes... * Fix a little bug * Add mergertree mass * Add cache clearing * Improve summing substructure code * Littbe bug * Little updates to the merger tree reader * Update .giignore * Add box selection * Add optional deletingf of a group * add to keep track of changes * Update changes * Remove * Add manual tracker * Fix bug * Add m200c_to_r200c * Add manual halo tracking * Remove skipped snapshots * update cosmo params to match csiborg * remove old comments * Add SDSSxALFALFA * Fix bugs * Rename * Edit paths * Updates * Add comments * Add comment * Add hour conversion * Add imports * Add new observation class * Add selection * Add imports * Fix small bug * Add field copying for safety * Add matching to survey without masking * Add P(k) calculation * Add nb * Edit comment * Move files * Remove merger import * Edit setup.yp * Fix typo * Edit import warnigns * update nb * Update README * Update README * Update README * Add skeleton * Add skeleton
2025-06-30 19:41:12 +00:00 · 2023-12-07 14:23:32 +00:00 · 2023-12-07 14:23:32 +00:00 · e972f8e3f2
commit e972f8e3f2
parent 5500fbd2b9
53 changed files with 4627 additions and 1774 deletions
--- a/scripts/cluster_crosspk.py
+++ b/scripts/cluster_crosspk.py
@ -1,159 +0,0 @@
-# Copyright (C) 2022 Richard Stiskalek
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
-# Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""
-MPI script to calculate the matter cross power spectrum between CSiBORG
-IC realisations. Units are Mpc/h.
-"""
-raise NotImplementedError("This script is currently not working.")
-from argparse import ArgumentParser
-from datetime import datetime
-from gc import collect
-from itertools import combinations
-from os import remove
-from os.path import join
-
-import joblib
-import numpy
-import Pk_library as PKL
-from mpi4py import MPI
-
-try:
-    import csiborgtools
-except ModuleNotFoundError:
-    import sys
-    sys.path.append("../")
-    import csiborgtools
-
-
-dumpdir = "/mnt/extraspace/rstiskalek/csiborg/"
-parser = ArgumentParser()
-parser.add_argument("--grid", type=int)
-parser.add_argument("--halfwidth", type=float, default=0.5)
-args = parser.parse_args()
-
-# Get MPI things
-comm = MPI.COMM_WORLD
-rank = comm.Get_rank()
-nproc = comm.Get_size()
-MAS = "CIC"  # mass asignment scheme
-
-paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-box = csiborgtools.read.CSiBORGBox(paths)
-reader = csiborgtools.read.CSiBORGReader(paths)
-ics = paths.get_ics("csiborg")
-nsims = len(ics)
-
-# File paths
-ftemp = join(dumpdir, "temp_crosspk",
-             "out_{}_{}" + "_{}".format(args.halfwidth))
-fout = join(dumpdir, "crosspk",
-            "out_{}_{}" + "_{}.p".format(args.halfwidth))
-
-
-jobs = csiborgtools.utils.split_jobs(nsims, nproc)[rank]
-for n in jobs:
-    print(f"Rank {rank} at {datetime.now()}: saving {n}th delta.", flush=True)
-    nsim = ics[n]
-    particles = reader.read_particle(max(paths.get_snapshots(nsim, "csiborg")),
-                                     nsim, ["x", "y", "z", "M"], verbose=False)
-    # Halfwidth -- particle selection
-    if args.halfwidth < 0.5:
-        particles = csiborgtools.read.halfwidth_select(
-            args.halfwidth, particles)
-        length = box.box2mpc(2 * args.halfwidth) * box.h  # Mpc/h
-    else:
-        length = box.box2mpc(1) * box.h  # Mpc/h
-    # Calculate the overdensity field
-    field = csiborgtools.field.DensityField(particles, length, box, MAS)
-    delta = field.overdensity_field(args.grid, verbose=False)
-    aexp = box._aexp
-
-    # Try to clean up memory
-    del field, particles, box, reader
-    collect()
-
-    # Dump the results
-    with open(ftemp.format(nsim, "delta") + ".npy", "wb") as f:
-        numpy.save(f, delta)
-    joblib.dump([aexp, length], ftemp.format(nsim, "lengths") + ".p")
-
-    # Try to clean up memory
-    del delta
-    collect()
-
-
-comm.Barrier()
-
-# Get off-diagonal elements and append the diagoal
-combs = [c for c in combinations(range(nsims), 2)]
-for i in range(nsims):
-    combs.append((i, i))
-prev_delta = [-1, None, None, None]  # i, delta, aexp, length
-
-jobs = csiborgtools.utils.split_jobs(len(combs), nproc)[rank]
-for n in jobs:
-    i, j = combs[n]
-    print("Rank {}@{}: combination {}.".format(rank, datetime.now(), (i, j)))
-
-    # If i same as last time then don't have to load it
-    if prev_delta[0] == i:
-        delta_i = prev_delta[1]
-        aexp_i = prev_delta[2]
-        length_i = prev_delta[3]
-    else:
-        with open(ftemp.format(ics[i], "delta") + ".npy", "rb") as f:
-            delta_i = numpy.load(f)
-        aexp_i, length_i = joblib.load(ftemp.format(ics[i], "lengths") + ".p")
-        # Store in prev_delta
-        prev_delta[0] = i
-        prev_delta[1] = delta_i
-        prev_delta[2] = aexp_i
-        prev_delta[3] = length_i
-
-    # Get jth delta
-    with open(ftemp.format(ics[j], "delta") + ".npy", "rb") as f:
-        delta_j = numpy.load(f)
-    aexp_j, length_j = joblib.load(ftemp.format(ics[j], "lengths") + ".p")
-
-    # Verify the difference between the scale factors! Say more than 1%
-    daexp = abs((aexp_i - aexp_j) / aexp_i)
-    if daexp > 0.01:
-        raise ValueError(
-            "Boxes {} and {} final snapshot scale factors disagree by "
-            "`{}` percent!".format(ics[i], ics[j], daexp * 100))
-    # Check how well the boxsizes agree
-    dlength = abs((length_i - length_j) / length_i)
-    if dlength > 0.001:
-        raise ValueError("Boxes {} and {} box sizes disagree by `{}` percent!"
-                         .format(ics[i], ics[j], dlength * 100))
-
-    # Calculate the cross power spectrum
-    Pk = PKL.XPk([delta_i, delta_j], length_i, axis=1, MAS=[MAS, MAS],
-                 threads=1)
-    joblib.dump(Pk, fout.format(ics[i], ics[j]))
-
-    del delta_i, delta_j, Pk
-    collect()
-
-
-# Clean up the temp files
-comm.Barrier()
-if rank == 0:
-    print("Cleaning up the temporary files...")
-    for ic in ics:
-        remove(ftemp.format(ic, "delta") + ".npy")
-        remove(ftemp.format(ic, "lengths") + ".p")
-
-    print("All finished!")
--- a/scripts/cluster_knn_auto.py
+++ b/scripts/cluster_knn_auto.py
@ -1,155 +0,0 @@
-# Copyright (C) 2022 Richard Stiskalek
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
-# Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""
-A script to calculate the KNN-CDF for a set of halo catalogues.
-"""
-from argparse import ArgumentParser
-from datetime import datetime
-from distutils.util import strtobool
-
-import joblib
-import numpy
-import yaml
-from mpi4py import MPI
-from sklearn.neighbors import NearestNeighbors
-from taskmaster import work_delegation
-
-try:
-    import csiborgtools
-except ModuleNotFoundError:
-    import sys
-
-    sys.path.append("../")
-    import csiborgtools
-
-from utils import open_catalogues
-
-
-def do_auto(args, config, cats, nsim, paths):
-    """
-    Calculate the kNN-CDF single catalogue auto-correlation.
-
-    Parameters
-    ----------
-    args : argparse.Namespace
-        Command line arguments.
-    config : dict
-        Configuration dictionary.
-    cats : dict
-        Dictionary of halo catalogues. Keys are simulation indices, values are
-        the catalogues.
-    nsim : int
-        Simulation index.
-    paths : csiborgtools.paths.Paths
-        Paths object.
-
-    Returns
-    -------
-    None
-    """
-    cat = cats[nsim]
-    rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax, cat.boxsize)
-    knncdf = csiborgtools.clustering.kNN_1DCDF()
-    knn = cat.knn(in_initial=False, subtract_observer=False, periodic=True)
-    rs, cdf = knncdf(
-        knn, rvs_gen=rvs_gen, nneighbours=config["nneighbours"],
-        rmin=config["rmin"], rmax=config["rmax"],
-        nsamples=int(config["nsamples"]), neval=int(config["neval"]),
-        batch_size=int(config["batch_size"]), random_state=config["seed"])
-    totvol = (4 / 3) * numpy.pi * args.Rmax ** 3
-    fout = paths.knnauto(args.simname, args.run, nsim)
-    if args.verbose:
-        print(f"Saving output to `{fout}`.")
-    joblib.dump({"rs": rs, "cdf": cdf, "ndensity": len(cat) / totvol}, fout)
-
-
-def do_cross_rand(args, config, cats, nsim, paths):
-    """
-    Calculate the kNN-CDF cross catalogue random correlation.
-
-    Parameters
-    ----------
-    args : argparse.Namespace
-        Command line arguments.
-    config : dict
-        Configuration dictionary.
-    cats : dict
-        Dictionary of halo catalogues. Keys are simulation indices, values are
-        the catalogues.
-    nsim : int
-        Simulation index.
-    paths : csiborgtools.paths.Paths
-        Paths object.
-
-    Returns
-    -------
-    None
-    """
-    cat = cats[nsim]
-    rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax, cat.boxsize)
-    knn1 = cat.knn(in_initial=False, subtract_observer=False, periodic=True)
-
-    knn2 = NearestNeighbors()
-    pos2 = rvs_gen(len(cat).shape[0])
-    knn2.fit(pos2)
-
-    knncdf = csiborgtools.clustering.kNN_1DCDF()
-    rs, cdf0, cdf1, joint_cdf = knncdf.joint(
-        knn1, knn2, rvs_gen=rvs_gen, nneighbours=int(config["nneighbours"]),
-        rmin=config["rmin"], rmax=config["rmax"],
-        nsamples=int(config["nsamples"]), neval=int(config["neval"]),
-        batch_size=int(config["batch_size"]), random_state=config["seed"])
-    corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
-
-    fout = paths.knnauto(args.simname, args.run, nsim)
-    if args.verbose:
-        print(f"Saving output to `{fout}`.", flush=True)
-    joblib.dump({"rs": rs, "corr": corr}, fout)
-
-
-if __name__ == "__main__":
-    parser = ArgumentParser()
-    parser.add_argument("--run", type=str, help="Run name.")
-    parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"],
-                        help="Simulation name")
-    parser.add_argument("--nsims", type=int, nargs="+", default=None,
-                        help="Indices of simulations to cross. If `-1` processes all simulations.")  # noqa
-    parser.add_argument("--Rmax", type=float, default=155,
-                        help="High-resolution region radius")  # noqa
-    parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
-                        default=False)
-    args = parser.parse_args()
-
-    with open("./cluster_knn_auto.yml", "r") as file:
-        config = yaml.safe_load(file)
-    comm = MPI.COMM_WORLD
-    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-    cats = open_catalogues(args, config, paths, comm)
-
-    if args.verbose and comm.Get_rank() == 0:
-        print(f"{datetime.now()}: starting to calculate the kNN statistic.")
-
-    def do_work(nsim):
-        if "random" in args.run:
-            do_cross_rand(args, config, cats, nsim, paths)
-        else:
-            do_auto(args, config, cats, nsim, paths)
-
-    nsims = list(cats.keys())
-    work_delegation(do_work, nsims, comm, master_verbose=args.verbose)
-
-    comm.Barrier()
-    if comm.Get_rank() == 0:
-        print(f"{datetime.now()}: all finished. Quitting.")
--- a/scripts/cluster_knn_auto.yml
+++ b/scripts/cluster_knn_auto.yml
@ -1,158 +0,0 @@
-rmin: 0.1
-rmax: 100
-nneighbours: 8
-nsamples: 1.e+7
-batch_size: 1.e+6
-neval: 10000
-seed: 42
-nbins_marks: 10
-
-
-################################################################################
-#                                 totpartmass                                 #
-################################################################################
-
-
-"mass001":
-  primary:
-    name:
-    - totpartmass
-    - group_mass
-    min: 1.e+12
-    max: 1.e+13
-
-"mass002":
-  primary:
-    name:
-    - totpartmass
-    - group_mass
-    min: 1.e+13
-    max: 1.e+14
-
-"mass003":
-  primary:
-    name:
-    - totpartmass
-    - group_mass
-    min: 1.e+14
-
-"mass003_poisson":
-  poisson: true
-  primary:
-    name:
-    - totpartmass
-    - group_mass
-    min: 1.e+14
-
-
-################################################################################
-#                        totpartmass + lambda200c                             #
-################################################################################
-
-
-"mass001_spinlow":
-  primary:
-    name: totpartmass
-    min: 1.e+12
-    max: 1.e+13
-  secondary:
-    name: lambda200c
-    toperm: false
-    marked: true
-    max: 0.5
-
-"mass001_spinhigh":
-  primary:
-    name: totpartmass
-    min: 1.e+12
-    max: 1.e+13
-  secondary:
-    name: lambda200c
-    toperm: false
-    marked: true
-    min: 0.5
-
-"mass001_spinmedian_perm":
-  primary:
-    name: totpartmass
-    min: 1.e+12
-    max: 1.e+13
-  secondary:
-    name: lambda200c
-    toperm: true
-    marked : true
-    min: 0.5
-
-"mass002_spinlow":
-  primary:
-    name: totpartmass
-    min: 1.e+13
-    max: 1.e+14
-  secondary:
-    name: lambda200c
-    toperm: false
-    marked: true
-    max: 0.5
-
-"mass002_spinhigh":
-  primary:
-    name: totpartmass
-    min: 1.e+13
-    max: 1.e+14
-  secondary:
-    name: lambda200c
-    toperm: false
-    marked: true
-    min: 0.5
-
-"mass002_spinmedian_perm":
-  primary:
-    name: totpartmass
-    min: 1.e+13
-    max: 1.e+14
-  secondary:
-    name: lambda200c
-    toperm: true
-    marked : true
-    min: 0.5
-
-"mass003_spinlow":
-  primary:
-    name: totpartmass
-    min: 1.e+14
-  secondary:
-    name: lambda200c
-    toperm: false
-    marked: true
-    max: 0.5
-
-"mass003_spinhigh":
-  primary:
-    name: totpartmass
-    min: 1.e+14
-  secondary:
-    name: lambda200c
-    toperm: false
-    marked: true
-    min: 0.5
-
-"mass003_spinmedian_perm":
-  primary:
-    name: totpartmass
-    min: 1.e+14
-  secondary:
-    name: lambda200c
-    toperm: true
-    marked : true
-    min: 0.5
-
-
-################################################################################
-#                           Cross with random                                  #
-################################################################################
-
-"mass001_random":
-  primary:
-    name: totpartmass
-    min: 1.e+12
-    max: 1.e+13
--- a/scripts/cluster_knn_cross.py
+++ b/scripts/cluster_knn_cross.py
@ -1,144 +0,0 @@
-# Copyright (C) 2022 Richard Stiskalek
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
-# Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""
-A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues.
-
-TODO:
-    - [ ] Add support for new catalogue readers. Currently will not work.
-    - [ ] Update catalogue readers.
-    - [ ] Update paths.
-    - [ ] Update to cross-correlate different mass populations from different
-    simulations.
-"""
-raise NotImplementedError("This script is currently not working.")
-from argparse import ArgumentParser
-from datetime import datetime
-from itertools import combinations
-from warnings import warn
-
-import joblib
-import numpy
-import yaml
-from mpi4py import MPI
-from sklearn.neighbors import NearestNeighbors
-from taskmaster import master_process, worker_process
-
-try:
-    import csiborgtools
-except ModuleNotFoundError:
-    import sys
-
-    sys.path.append("../")
-    import csiborgtools
-
-
-###############################################################################
-#                            MPI and arguments                                #
-###############################################################################
-comm = MPI.COMM_WORLD
-rank = comm.Get_rank()
-nproc = comm.Get_size()
-
-parser = ArgumentParser()
-parser.add_argument("--runs", type=str, nargs="+")
-parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"])
-args = parser.parse_args()
-with open("../scripts/knn_cross.yml", "r") as file:
-    config = yaml.safe_load(file)
-
-Rmax = 155 / 0.705  # Mpc (h = 0.705) high resolution region radius
-paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-ics = paths.get_ics("csiborg")
-knncdf = csiborgtools.clustering.kNN_1DCDF()
-
-###############################################################################
-#                               Analysis                                      #
-###############################################################################
-
-
-def read_single(selection, cat):
-    mmask = numpy.ones(len(cat), dtype=bool)
-    pos = cat.positions(False)
-    # Primary selection
-    psel = selection["primary"]
-    pmin, pmax = psel.get("min", None), psel.get("max", None)
-    if pmin is not None:
-        mmask &= cat[psel["name"]] >= pmin
-    if pmax is not None:
-        mmask &= cat[psel["name"]] < pmax
-    return pos[mmask, ...]
-
-
-def do_cross(run, ics):
-    _config = config.get(run, None)
-    if _config is None:
-        warn("No configuration for run {}.".format(run), stacklevel=1)
-        return
-    rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
-    knn1, knn2 = NearestNeighbors(), NearestNeighbors()
-
-    cat1 = csiborgtools.read.ClumpsCatalogue(ics[0], paths, max_dist=Rmax)
-    pos1 = read_single(_config, cat1)
-    knn1.fit(pos1)
-
-    cat2 = csiborgtools.read.ClumpsCatalogue(ics[1], paths, max_dist=Rmax)
-    pos2 = read_single(_config, cat2)
-    knn2.fit(pos2)
-
-    rs, cdf0, cdf1, joint_cdf = knncdf.joint(
-        knn1,
-        knn2,
-        rvs_gen=rvs_gen,
-        nneighbours=int(config["nneighbours"]),
-        rmin=config["rmin"],
-        rmax=config["rmax"],
-        nsamples=int(config["nsamples"]),
-        neval=int(config["neval"]),
-        batch_size=int(config["batch_size"]),
-        random_state=config["seed"],
-    )
-
-    corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
-    fout = paths.knncross(args.simname, run, ics)
-    joblib.dump({"rs": rs, "corr": corr}, fout)
-
-
-def do_runs(nsims):
-    for run in args.runs:
-        do_cross(run, nsims)
-
-
-###############################################################################
-#                         Crosscorrelation calculation                        #
-###############################################################################
-
-
-if nproc > 1:
-    if rank == 0:
-        tasks = list(combinations(ics, 2))
-        master_process(tasks, comm, verbose=True)
-    else:
-        worker_process(do_runs, comm, verbose=False)
-else:
-    tasks = list(combinations(ics, 2))
-    for task in tasks:
-        print("{}: completing task `{}`.".format(datetime.now(), task))
-        do_runs(task)
-comm.Barrier()
-
-
-if rank == 0:
-    print("{}: all finished.".format(datetime.now()))
-quit()  # Force quit the script
--- a/scripts/cluster_knn_cross.yml
+++ b/scripts/cluster_knn_cross.yml
@ -1,29 +0,0 @@
-rmin: 0.1
-rmax: 100
-nneighbours: 64
-nsamples: 1.e+7
-batch_size: 1.e+6
-neval: 10000
-seed: 42
-
-
-################################################################################
-#                                 totpartmass                                 #
-################################################################################
-
-"mass001":
-  primary:
-    name: totpartmass
-    min: 1.e+12
-    max: 1.e+13
-
-"mass002":
-  primary:
-    name: totpartmass
-    min: 1.e+13
-    max: 1.e+14
-
-"mass003":
-  primary:
-    name: totpartmass
-    min: 1.e+14
--- a/scripts/cluster_tpcf_auto.py
+++ b/scripts/cluster_tpcf_auto.py
@ -1,82 +0,0 @@
-# Copyright (C) 2022 Richard Stiskalek
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
-# Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""
-A script to calculate the auto-2PCF of CSiBORG catalogues.
-"""
-from argparse import ArgumentParser
-from datetime import datetime
-from distutils.util import strtobool
-
-import joblib
-import numpy
-import yaml
-from mpi4py import MPI
-
-from taskmaster import work_delegation
-from utils import open_catalogues
-
-try:
-    import csiborgtools
-except ModuleNotFoundError:
-    import sys
-
-    sys.path.append("../")
-    import csiborgtools
-
-
-def do_auto(args, config, cats, nsim, paths):
-    cat = cats[nsim]
-    tpcf = csiborgtools.clustering.Mock2PCF()
-    rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax, cat.boxsize)
-    bins = numpy.logspace(
-        numpy.log10(config["rpmin"]), numpy.log10(config["rpmax"]),
-        config["nrpbins"] + 1,)
-
-    pos = cat.position(in_initial=False, cartesian=True)
-    nrandom = int(config["randmult"] * pos.shape[0])
-    rp, wp = tpcf(pos, rvs_gen, nrandom, bins)
-
-    fout = paths.knnauto(args.simname, args.run, nsim)
-    joblib.dump({"rp": rp, "wp": wp}, fout)
-
-
-if __name__ == "__main__":
-    parser = ArgumentParser()
-    parser.add_argument("--run", type=str, help="Run name.")
-    parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"],
-                        help="Simulation name")
-    parser.add_argument("--nsims", type=int, nargs="+", default=None,
-                        help="Indices of simulations to cross. If `-1` processes all simulations.")  # noqa
-    parser.add_argument("--Rmax", type=float, default=155,
-                        help="High-resolution region radius.")
-    parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
-                        default=False, help="Verbosity flag.")
-    args = parser.parse_args()
-
-    with open("./cluster_tpcf_auto.yml", "r") as file:
-        config = yaml.safe_load(file)
-
-    comm = MPI.COMM_WORLD
-    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-    cats = open_catalogues(args, config, paths, comm)
-
-    if args.verbose and comm.Get_rank() == 0:
-        print(f"{datetime.now()}: starting to calculate the 2PCF statistic.")
-
-    def do_work(nsim):
-        return do_auto(args, config, cats, nsim, paths)
-
-    nsims = list(cats.keys())
-    work_delegation(do_work, nsims, comm)
--- a/scripts/cluster_tpcf_auto.yml
+++ b/scripts/cluster_tpcf_auto.yml
@ -1,136 +0,0 @@
-rpmin: 0.5
-rpmax: 40
-nrpbins: 20
-randmult: 100
-seed: 42
-nbins_marks: 10
-
-
-################################################################################
-#                                 totpartmass                                 #
-################################################################################
-
-
-"mass001":
-  primary:
-    name: totpartmass
-    min: 1.e+12
-    max: 1.e+13
-
-"mass002":
-  primary:
-    name: totpartmass
-    min: 1.e+13
-    max: 1.e+14
-
-"mass003":
-  primary:
-    name: totpartmass
-    min: 1.e+14
-
-
-################################################################################
-#                        totpartmass + lambda200c                             #
-################################################################################
-
-
-"mass001_spinlow":
-  primary:
-    name: totpartmass
-    min: 1.e+12
-    max: 1.e+13
-  secondary:
-    name: lambda200c
-    marked: true
-    max: 0.5
-
-"mass001_spinhigh":
-  primary:
-    name: totpartmass
-    min: 1.e+12
-    max: 1.e+13
-  secondary:
-    name: lambda200c
-    marked: true
-    min: 0.5
-
-"mass001_spinmedian_perm":
-  primary:
-    name: totpartmass
-    min: 1.e+12
-    max: 1.e+13
-  secondary:
-    name: lambda200c
-    toperm: true
-    marked : true
-    min: 0.5
-
-"mass002_spinlow":
-  primary:
-    name: totpartmass
-    min: 1.e+13
-    max: 1.e+14
-  secondary:
-    name: lambda200c
-    marked: true
-    max: 0.5
-
-"mass002_spinhigh":
-  primary:
-    name: totpartmass
-    min: 1.e+13
-    max: 1.e+14
-  secondary:
-    name: lambda200c
-    marked: true
-    min: 0.5
-
-"mass002_spinmedian_perm":
-  primary:
-    name: totpartmass
-    min: 1.e+13
-    max: 1.e+14
-  secondary:
-    name: lambda200c
-    toperm: true
-    marked : true
-    min: 0.5
-
-"mass003_spinlow":
-  primary:
-    name: totpartmass
-    min: 1.e+14
-  secondary:
-    name: lambda200c
-    marked: true
-    max: 0.5
-
-"mass003_spinhigh":
-  primary:
-    name: totpartmass
-    min: 1.e+14
-  secondary:
-    name: lambda200c
-    marked: true
-    min: 0.5
-
-"mass003_spinmedian_perm":
-  primary:
-    name: totpartmass
-    min: 1.e+14
-  secondary:
-    name: lambda200c
-    toperm: true
-    marked : true
-    min: 0.5
-
-
-################################################################################
-#                           Cross with random                                  #
-################################################################################
-
-"mass001_random":
-  primary:
-    name: totpartmass
-    min: 1.e+12
-    max: 1.e+13
--- a/scripts/dump_to_ascii.py
+++ b/scripts/dump_to_ascii.py
@ -61,13 +61,13 @@ def positions_to_ascii(positions, output_filename, boxsize=None,
            out_file.write(chunk_str + "\n")


-def extract_positions(nsim, paths, kind):
+def extract_positions(nsim, simname, paths, kind):
    """
    Extract either the particle or halo positions.
    """
    if kind == "particles":
-        fname = paths.particles(nsim, args.simname)
-        return h5py.File(fname, 'r')["particles"]
+        fname = paths.processed_output(nsim, simname, "FOF")
+        return h5py.File(fname, 'r')["snapshot_final/pos"][:]

    if kind == "particles_rsp":
        raise NotImplementedError("RSP of particles is not implemented yet.")
@ -75,23 +75,23 @@ def extract_positions(nsim, paths, kind):
    fpath = paths.observer_peculiar_velocity("PCS", 512, nsim)
    vpec_observer = numpy.load(fpath)["observer_vp"][0, :]
    cat = csiborgtools.read.CSiBORGHaloCatalogue(
-        nsim, paths, bounds={"dist": (0, 155.5)}, load_fitted=True,
-        load_initial=False, observer_velocity=vpec_observer, )
+        nsim, paths, "halo_catalogue", "FOF", bounds={"dist": (0, 155.5)},
+        observer_velocity=vpec_observer)

    if kind == "halos":
-        return cat.position()
+        return cat["cartesian_pos"]

    if kind == "halos_rsp":
-        return cat.redshift_space_position()
+        return cat["cartesian_redshift_pos"]

    raise ValueError(f"Unknown kind `{kind}`. Allowed values are: "
                     "`particles`, `particles_rsp`, `halos`, `halos_rsp`.")


-def main(nsim, paths, kind):
-    boxsize = 677.7 if "particles" in kind else None
-    pos = extract_positions(nsim, paths, kind)
-    output_filename = paths.ascii_positions(nsim, kind)
+def main(args, paths):
+    boxsize = 677.7 if "particles" in args.kind else None
+    pos = extract_positions(args.nsim, args.simname, paths, args.kind)
+    output_filename = paths.ascii_positions(args.nsim, args.kind)
    positions_to_ascii(pos, output_filename, boxsize=boxsize)


--- a/scripts/field_prop.py
+++ b/scripts/field_prop.py
@ -28,6 +28,16 @@ from taskmaster import work_delegation
 import csiborgtools
 from utils import get_nsims

+
+###############################################################################
+#                   Cosmotool SPH density & velocity field                    #
+###############################################################################
+
+def cosmotool_sph(nsim, parser_args):
+    pass
+
+
+
 ###############################################################################
 #                            Density field                                    #
 ###############################################################################
@ -40,13 +50,15 @@ def density_field(nsim, parser_args, to_save=True):
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    nsnap = max(paths.get_snapshots(nsim, "csiborg"))
    box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
+    fname = paths.processed_output(nsim, "csiborg", "halo_catalogue")

    if not parser_args.in_rsp:
-        parts = csiborgtools.read.read_h5(paths.particles(nsim, "csiborg"))
-        parts = parts["particles"]
+        snap = csiborgtools.read.read_h5(fname)["snapshot_final"]
+        pos = snap["pos"]
+        mass = snap["mass"]

        gen = csiborgtools.field.DensityField(box, parser_args.MAS)
-        field = gen(parts, parser_args.grid, verbose=parser_args.verbose)
+        field = gen(pos, mass, parser_args.grid, verbose=parser_args.verbose)
    else:
        field = numpy.load(paths.field(
            "density", parser_args.MAS, parser_args.grid, nsim, False))
@ -83,12 +95,15 @@ def velocity_field(nsim, parser_args, to_save=True):
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    nsnap = max(paths.get_snapshots(nsim, "csiborg"))
    box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
+    fname = paths.processed_output(nsim, "csiborg", "halo_catalogue")

-    parts = csiborgtools.read.read_h5(paths.particles(nsim, "csiborg"))
-    parts = parts["particles"]
+    snap = csiborgtools.read.read_h5(fname)["snapshot_final"]
+    pos = snap["pos"]
+    vel = snap["vel"]
+    mass = snap["mass"]

    gen = csiborgtools.field.VelocityField(box, parser_args.MAS)
-    field = gen(parts, parser_args.grid, verbose=parser_args.verbose)
+    field = gen(pos, vel, mass, parser_args.grid, verbose=parser_args.verbose)

    if to_save:
        fout = paths.field("velocity", parser_args.MAS, parser_args.grid,
@ -247,6 +262,7 @@ if __name__ == "__main__":
    parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
                        help="Verbosity flag for reading in particles.")
    parser.add_argument("--simname", type=str, default="csiborg",
+                        choices=["csiborg", "csiborg2"],
                        help="Verbosity flag for reading in particles.")
    parser_args = parser.parse_args()
    comm = MPI.COMM_WORLD
--- a/scripts/field_sample.py
+++ b/scripts/field_sample.py
@ -53,12 +53,20 @@ def open_galaxy_positions(survey_name, comm):

    if rank == 0:
        if survey_name == "SDSS":
-            survey = csiborgtools.read.SDSS(
-                h=1, sel_steps=lambda cls: steps(cls, survey_name))
+            survey = csiborgtools.SDSS()()
            pos = numpy.vstack([survey["DIST_UNCORRECTED"],
                                survey["RA"],
                                survey["DEC"]],
                               ).T
+            pos = pos.astype(numpy.float32)
+            indxs = survey["INDEX"]
+        if survey_name == "SDSSxALFALFA":
+            survey = csiborgtools.SDSSxALFALFA()()
+            pos = numpy.vstack([survey["DIST_UNCORRECTED"],
+                                survey["RA_1"],
+                                survey["DEC_1"]],
+                               ).T
+            pos = pos.astype(numpy.float32)
            indxs = survey["INDEX"]
        elif survey_name == "GW170817":
            samples = File("/mnt/extraspace/rstiskalek/GWLSS/H1L1V1-EXTRACT_POSTERIOR_GW170817-1187008600-400.hdf", 'r')["samples"]  # noqa
@ -110,7 +118,7 @@ def evaluate_field(field, pos, nrand, smooth_scales=None, seed=42,
            field_smoothed = csiborgtools.field.smoothen_field(
                field, scale * MPC2BOX, boxsize=1, make_copy=True)
        else:
-            field_smoothed = field
+            field_smoothed = numpy.copy(field)

        val[:, i] = csiborgtools.field.evaluate_sky(
            field_smoothed, pos=pos, mpc2box=MPC2BOX)
@ -164,7 +172,7 @@ if __name__ == "__main__":
    parser.add_argument("--nsims", type=int, nargs="+", default=None,
                        help="IC realisations. If `-1` processes all.")
    parser.add_argument("--survey", type=str, required=True,
-                        choices=["SDSS", "GW170817"],
+                        choices=["SDSS", "SDSSxALFALFA", "GW170817"],
                        help="Galaxy survey")
    parser.add_argument("--smooth_scales", type=float, nargs="+", default=None,
                        help="Smoothing scales in Mpc / h.")
@ -189,12 +197,6 @@ if __name__ == "__main__":

    pos, indxs = open_galaxy_positions(args.survey, MPI.COMM_WORLD)

-    if MPI.COMM_WORLD.Get_rank() == 0 and args.survey != "GW170817":
-        fout = f"/mnt/extraspace/rstiskalek/CSiBORG/ascii_positions/{args.survey}_positions.npz"  # noqa
-        pos = csiborgtools.utils.radec_to_cartesian(pos) + 677.7 / 2
-        print(f"Saving to ... `{fout}`.")
-        numpy.savez(fout, pos=pos, indxs=indxs)
-
    def _main(nsim):
        main(nsim, args, pos, indxs, paths,
             verbose=MPI.COMM_WORLD.Get_size() == 1)
--- a/scripts/fit_hmf.py
+++ b/scripts/fit_hmf.py
@ -1,108 +0,0 @@
-# Copyright (C) 2022 Richard Stiskalek
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
-# Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""
-Script to calculate the HMF for CSIBORG and Quijote haloes.
-"""
-from argparse import ArgumentParser
-from datetime import datetime
-from distutils.util import strtobool
-
-import numpy
-from mpi4py import MPI
-
-from taskmaster import work_delegation
-from utils import get_nsims
-
-try:
-    import csiborgtools
-except ModuleNotFoundError:
-    import sys
-
-    sys.path.append("../")
-    import csiborgtools
-
-
-def get_counts(nsim, bins, paths, parser_args):
-    """
-    Calculate and save the number of haloes in each mass bin.
-    """
-    simname = parser_args.simname
-    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-    bounds = {"dist": (0, parser_args.Rmax)}
-
-    if simname == "csiborg":
-        cat = csiborgtools.read.CSiBORGHaloCatalogue(
-            nsim, paths, bounds=bounds, load_fitted=False, load_initial=False)
-        logmass = numpy.log10(cat["fof_totpartmass"])
-        counts = csiborgtools.number_counts(logmass, bins)
-    elif simname == "quijote":
-        cat0 = csiborgtools.read.QuijoteHaloCatalogue(
-            nsim, paths, nsnap=4, load_fitted=False, load_initial=False)
-        nmax = int(cat0.box.boxsize // (2 * parser_args.Rmax))**3
-        counts = numpy.full((nmax, len(bins) - 1), numpy.nan,
-                            dtype=numpy.float32)
-
-        for nobs in range(nmax):
-            cat = cat0.pick_fiducial_observer(nobs, rmax=parser_args.Rmax)
-            logmass = numpy.log10(cat["group_mass"])
-            counts[nobs, :] = csiborgtools.number_counts(logmass, bins)
-    elif simname == "quijote_full":
-        cat = csiborgtools.read.QuijoteHaloCatalogue(
-            nsim, paths, nsnap=4, load_fitted=False, load_initial=False,
-            load_backup=parser_args.from_quijote_backup)
-        logmass = numpy.log10(cat["group_mass"])
-        counts = csiborgtools.number_counts(logmass, bins)
-    else:
-        raise ValueError(f"Unknown simulation name `{simname}`.")
-
-    fout = paths.halo_counts(simname, nsim, parser_args.from_quijote_backup)
-    if parser_args.verbose:
-        print(f"{datetime.now()}: saving halo counts to `{fout}`.")
-    numpy.savez(fout, counts=counts, bins=bins, rmax=parser_args.Rmax)
-
-
-if __name__ == "__main__":
-    parser = ArgumentParser()
-    parser.add_argument("--simname", type=str,
-                        choices=["csiborg", "quijote", "quijote_full"],
-                        help="Simulation name.")
-    parser.add_argument("--nsims", type=int, nargs="+", default=None,
-                        help="Indices of simulations to cross. If `-1` all .")
-    parser.add_argument(
-        "--Rmax", type=float, default=155,
-        help="High-res region radius in Mpc / h. Ignored for `quijote_full`.")
-    parser.add_argument("--from_quijote_backup",
-                        type=lambda x: bool(strtobool(x)), default=False,
-                        help="Flag to indicate Quijote backup data.")
-    parser.add_argument("--lims", type=float, nargs="+", default=[11., 16.],
-                        help="Mass limits in Msun / h.")
-    parser.add_argument("--bw", type=float, default=0.2,
-                        help="Bin width in dex.")
-    parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
-                        default=False, help="Verbosity flag.")
-    parser_args = parser.parse_args()
-
-    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-    nsims = get_nsims(parser_args, paths)
-
-    if len(parser_args.lims) != 2:
-        raise ValueError("Mass limits must be a pair of floats.")
-
-    bins = numpy.arange(*parser_args.lims, parser_args.bw, dtype=numpy.float32)
-
-    def do_work(nsim):
-        get_counts(nsim, bins, paths, parser_args)
-
-    work_delegation(do_work, nsims, MPI.COMM_WORLD)
--- a/scripts/fit_init.py
+++ b/scripts/fit_init.py
@ -1,118 +0,0 @@
-# Copyright (C) 2022 Richard Stiskalek
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
-# Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""
-Script to calculate the particle centre of mass, Lagrangian patch size in the
-initial snapshot.
-
-The initial snapshot particles are read from the sorted files.
-"""
-from argparse import ArgumentParser
-from datetime import datetime
-
-import numpy
-from mpi4py import MPI
-from taskmaster import work_delegation
-from tqdm import tqdm
-
-from utils import get_nsims
-
-try:
-    import csiborgtools
-except ModuleNotFoundError:
-    import sys
-
-    sys.path.append("../")
-    import csiborgtools
-
-
-def _main(nsim, simname, verbose):
-    """
-    Calculate the Lagrangian halo centre of mass and Lagrangian patch size in
-    the initial snapshot.
-    """
-    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-    cols = [("index", numpy.int32),
-            ("x", numpy.float32),
-            ("y", numpy.float32),
-            ("z", numpy.float32),
-            ("lagpatch_size", numpy.float32),
-            ("lagpatch_ncells", numpy.int32),]
-
-    fname = paths.initmatch(nsim, simname, "particles")
-    parts = csiborgtools.read.read_h5(fname)
-    parts = parts['particles']
-    halo_map = csiborgtools.read.read_h5(paths.particles(nsim, simname))
-    halo_map = halo_map["halomap"]
-
-    if simname == "csiborg":
-        cat = csiborgtools.read.CSiBORGHaloCatalogue(
-            nsim, paths, bounds=None, load_fitted=False, load_initial=False)
-    else:
-        cat = csiborgtools.read.QuijoteHaloCatalogue(
-            nsim, paths, nsnap=4, load_fitted=False, load_initial=False)
-    hid2map = {hid: i for i, hid in enumerate(halo_map[:, 0])}
-
-    # Initialise the overlapper.
-    if simname == "csiborg":
-        kwargs = {"box_size": 2048, "bckg_halfsize": 512}
-    else:
-        kwargs = {"box_size": 512, "bckg_halfsize": 256}
-    overlapper = csiborgtools.match.ParticleOverlap(**kwargs)
-
-    out = csiborgtools.read.cols_to_structured(len(cat), cols)
-    for i, hid in enumerate(tqdm(cat["index"]) if verbose else cat["index"]):
-        out["index"][i] = hid
-        part = csiborgtools.read.load_halo_particles(hid, parts, halo_map,
-                                                     hid2map)
-
-        # Skip if the halo has no particles or is too small.
-        if part is None or part.size < 40:
-            continue
-
-        pos, mass = part[:, :3], part[:, 3]
-        # Calculate the centre of mass and the Lagrangian patch size.
-        cm = csiborgtools.center_of_mass(pos, mass, boxsize=1.0)
-        distances = csiborgtools.periodic_distance(pos, cm, boxsize=1.0)
-        out["x"][i], out["y"][i], out["z"][i] = cm
-        out["lagpatch_size"][i] = numpy.percentile(distances, 99)
-
-        # Calculate the number of cells with > 0 density.
-        delta = overlapper.make_delta(pos, mass, subbox=True)
-        out["lagpatch_ncells"][i] = csiborgtools.delta2ncells(delta)
-
-    # Now save it
-    fout = paths.initmatch(nsim, simname, "fit")
-    if verbose:
-        print(f"{datetime.now()}: dumping fits to .. `{fout}`.", flush=True)
-    with open(fout, "wb") as f:
-        numpy.save(f, out)
-
-
-if __name__ == "__main__":
-    parser = ArgumentParser()
-    parser.add_argument("--simname", type=str, default="csiborg",
-                        choices=["csiborg", "quijote"],
-                        help="Simulation name")
-    parser.add_argument("--nsims", type=int, nargs="+", default=None,
-                        help="IC realisations. If `-1` processes all.")
-    args = parser.parse_args()
-
-    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-    nsims = get_nsims(args, paths)
-
-    def main(nsim):
-        _main(nsim, args.simname, MPI.COMM_WORLD.Get_size() == 1)
-
-    work_delegation(main, nsims, MPI.COMM_WORLD)
--- a/scripts/match_finsnap.py
+++ b/scripts/match_finsnap.py
--- a/scripts/match_finsnap.yml
+++ b/scripts/match_finsnap.yml
--- a/scripts/match_overlap_all.py
+++ b/scripts/match_overlap_all.py
--- a/scripts/match_overlap_single.py
+++ b/scripts/match_overlap_single.py
@ -69,7 +69,7 @@ def pair_match_max(nsim0, nsimx, simname, min_logmass, mult, verbose):
        raise ValueError(f"Unknown simulation `{simname}`.")

    reader = csiborgtools.summary.PairOverlap(cat0, catx, paths, min_logmass,
-                                           maxdist=maxdist)
+                                              maxdist=maxdist)
    out = csiborgtools.match.matching_max(
        cat0, catx, mass_kind, mult=mult, periodic=periodic,
        overlap=reader.overlap(from_smoothed=True),
@ -106,54 +106,36 @@ def pair_match(nsim0, nsimx, simname, min_logmass, sigma, verbose):
    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    smooth_kwargs = {"sigma": sigma, "mode": "constant", "cval": 0}
+    bounds = {"lagpatch_size": (0, None)}

    if simname == "csiborg":
        overlapper_kwargs = {"box_size": 2048, "bckg_halfsize": 512}
        mass_kind = "fof_totpartmass"
-        bounds = {"dist": (0, 155), mass_kind: (10**min_logmass, None)}
-
-        cat0 = csiborgtools.read.CSiBORGHaloCatalogue(
-            nsim0, paths, bounds=bounds, load_fitted=False,
-            with_lagpatch=True)
-        catx = csiborgtools.read.CSiBORGHaloCatalogue(
-            nsimx, paths, bounds=bounds, load_fitted=False,
-            with_lagpatch=True)
+        bounds |= {"dist": (0, 155), mass_kind: (10**min_logmass, None)}
+        cat0 = csiborgtools.read.CSiBORGCatalogue(
+            nsim0, paths, "halo_catalogue", "FOF", mass_kind, bounds)
+        catx = csiborgtools.read.CSiBORGCatalogue(
+            nsimx, paths, "halo_catalogue", "FOF", mass_kind, bounds)
    elif simname == "quijote":
        overlapper_kwargs = {"box_size": 512, "bckg_halfsize": 256}
        mass_kind = "group_mass"
-        bounds = {mass_kind: (10**min_logmass, None)}
+        bounds |= {mass_kind: (10**min_logmass, None)}

-        cat0 = csiborgtools.read.QuijoteHaloCatalogue(
-            nsim0, paths, 4, bounds=bounds, load_fitted=False,
-            with_lagpatch=True)
-        catx = csiborgtools.read.QuijoteHaloCatalogue(
-            nsimx, paths, 4, bounds=bounds, load_fitted=False,
-            with_lagpatch=True)
+        cat0 = csiborgtools.read.QuijoteCatalogue(
+            nsim0, paths, "halo_catalogue", "FOF", mass_kind, bounds=bounds)
+        catx = csiborgtools.read.QuijoteCatalogue(
+            nsimx, paths, "halo_catalogue", "FOF", mass_kind, bounds=bounds)
    else:
        raise ValueError(f"Unknown simulation name: `{simname}`.")

-    halomap0 = csiborgtools.read.read_h5(
-        paths.particles(nsim0, simname))["halomap"]
-    parts0 = csiborgtools.read.read_h5(
-        paths.initmatch(nsim0, simname, "particles"))["particles"]
-    hid2map0 = {hid: i for i, hid in enumerate(halomap0[:, 0])}
-
-    halomapx = csiborgtools.read.read_h5(
-        paths.particles(nsimx, simname))["halomap"]
-    partsx = csiborgtools.read.read_h5(
-        paths.initmatch(nsimx, simname, "particles"))["particles"]
-    hid2mapx = {hid: i for i, hid in enumerate(halomapx[:, 0])}
-
    overlapper = csiborgtools.match.ParticleOverlap(**overlapper_kwargs)
-    delta_bckg = overlapper.make_bckg_delta(parts0, halomap0, hid2map0, cat0,
+    delta_bckg = overlapper.make_bckg_delta(cat0, verbose=verbose)
+    delta_bckg = overlapper.make_bckg_delta(catx, delta=delta_bckg,
                                            verbose=verbose)
-    delta_bckg = overlapper.make_bckg_delta(partsx, halomapx, hid2mapx, catx,
-                                            delta=delta_bckg, verbose=verbose)

-    matcher = csiborgtools.match.RealisationsMatcher(
-        mass_kind=mass_kind, **overlapper_kwargs)
-    match_indxs, ngp_overlap = matcher.cross(cat0, catx, parts0, partsx,
-                                             halomap0, halomapx, delta_bckg,
+    matcher = csiborgtools.match.RealisationsMatcher(mass_kind=mass_kind,
+                                                     **overlapper_kwargs)
+    match_indxs, ngp_overlap = matcher.cross(cat0, catx, delta_bckg,
                                             verbose=verbose)

    # We want to store the halo IDs of the matches, not their array positions
@ -177,8 +159,7 @@ def pair_match(nsim0, nsimx, simname, min_logmass, sigma, verbose):
    gaussian_filter(delta_bckg, output=delta_bckg, **smooth_kwargs)

    # We calculate the smoothed overlap for the pairs whose NGP overlap is > 0.
-    smoothed_overlap = matcher.smoothed_cross(cat0, catx, parts0, partsx,
-                                              halomap0, halomapx, delta_bckg,
+    smoothed_overlap = matcher.smoothed_cross(cat0, catx, delta_bckg,
                                              match_indxs, smooth_kwargs,
                                              verbose=verbose)

--- a/scripts/mergertree_extract.py
+++ b/scripts/mergertree_extract.py
@ -0,0 +1,979 @@
+# Copyright (C) 2023 Mladen Ivkovic, Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+import copy
+import os
+from os.path import exists, join
+from os import makedirs
+from sys import argv
+from datetime import datetime
+
+import numpy as np
+from joblib import dump, load
+from tqdm import trange
+
+errmsg = """
+
+------------------------------------
+    mergertree-extract.py
+------------------------------------
+
+
+---------------
+    Usage
+---------------
+
+This script extracts the masses of clumps and haloes written by the mergertree
+patch.
+It needs output_XXXXX/mergertree_XXXXX.txtYYYYY and
+output_XXXXX/clump_XXXXX.txtYYYYY files to work.
+You need to run it from the directory where the output_XXXXX directories are
+in.
+
+
+There are three working modes defined:
+
+1) do for one clump only.
+    You need to provide the clump ID you want it done for.
+    You can provide a starting directory, but by default the script will
+    search for the directory where z = 0.
+
+    run with `python3 mergertree-extract.py <clumpid> [--options] `
+
+    this creates the file mergertree_XXXXX_halo-<halo-ID>.txt. Its contents are
+    discussed below.
+
+
+2) do for one halo.
+    You need to provide the halo ID you want it done for, and the flag
+    -c or --children.
+    The script will by itself find all the child clumps and walk through
+    their main branches as well, and write them down.
+
+    run with `python3 mergertree-extract.py <haloid> -c [--options]`
+          or `python3 mergertree-extract.py <haloid> --children [--options]`
+
+    this creates the hollowing files:
+
+        - halo_hierarchy_XXXXX-<halo-ID>.txt
+            contains the halo ID, how many children it has, and the children
+            IDs
+
+        - mergertree_XXXXX_halo-<halo-ID>.txt
+            mergertree data for halo that you chose.
+
+        - mergertree_XXXXX_subhalo-<child-ID>.txt
+            mergertree data for subhalos of the halo you chose.  One file will
+            be created for each subhalo.
+
+        The contents of the mergertree_XXXXX* files are discussed below.
+
+
+3) do for all haloes
+    The script will just walk off all haloes in the z = 0 directory. Note:
+    Haloes, not clumps!
+    run with `python3 mergertree-extract.py -a [--options]`
+          or `python3 mergertree-extract.py --all [--options]`
+
+    This will create the same type of files as in mode (2), just for all
+    haloes.
+
+
+If only an integer is given as cmdline arg, mode (1) [one clump only] will be
+run. If no cmd line argument is given, mode (3) [--all] will be run.
+
+
+
+---------------
+    Output
+---------------
+
+the mergertree_XXXXX* files have 6 columns:
+
+snapshot            The snapshot from which this data is taken from
+
+redshift            The redshift of that snapshot
+
+clump_ID            The clump ID of the clump at that snapshot
+
+mass                The mass of the clump at that snapshot, based on what's in
+                    the output_XXXXX/mergertree_XXXXX.txtYYYYY files, not the
+                    output_XXXXX/clump_XXXXX.txtYYYYY files.
+
+mass_from_mergers   how much mass has been merged into this clump in this
+                    snapshot, i.e. the sum of all the clump masses that have
+                    been found to merge with this clump at this snapshot. This
+                    does not include the mass of clumps which only seem to
+                    merge with this clump, but re-emerge later.
+
+mass_from_jumpers   The mass of all clumps that seem to merge with this clump,
+                    but re-emerge at a later time.
+
+
+----------------
+    Options
+----------------
+
+List of all flags:
+
+Running modes
+
+    -a, --all:      make trees for all clumps in output where z = 0
+    -c --children:  make trees for a halo and all its subhaloes. You need to
+                    specify which halo via its halo ID.
+    -h, --help:     print this help and exit.
+
+Options:
+    --start-at=INT      don't start at z = 0 snapshot, but with the specified
+                        directory output_00INT.
+    --prefix=some/path/ path where you want your output written to.
+    -v, --verbose:      be more verbose about what you're doing
+
+
+
+
+-----------------
+  Requirements
+-----------------
+
+It needs output_XXXXX/mergertree_XXXXX.txtYYYYY and
+output_XXXXX/clump_XXXXX.txtYYYYY files to work, which are created using the
+mergertree patch in ramses.
+
+Also needs numpy.
+"""
+
+###############################################################################
+#                             Clump data                                      #
+###############################################################################
+
+
+class ClumpData:
+    """
+    Data from clump_XXXXX.txt
+
+    Parameters
+    ----------
+    par : params object
+    """
+    def __init__(self, par):
+        self.clumpids = np.zeros(1)     # clump ID
+        self.parent = np.zeros(1)       # parent ID
+        self.level = np.zeros(1)        # clump level
+
+    def read_clumpdata(self, par):
+        """Reads in the clump data for the z = 0 directory."""
+        if par.verbose:
+            print("Reading clump data.")
+
+        out = p.z0
+
+        raw_data = [None for i in range(par.ncpu)]
+        dirnrstr = str(par.outputnrs[out]).zfill(5)
+        dirname = 'output_' + dirnrstr
+
+        i = 0
+        for cpu in range(1):
+            fname = join(par.workdir, dirname, 'clump_' + dirnrstr + '.dat')
+            new_data = np.loadtxt(fname, dtype='int', skiprows=1,
+                                  usecols=[0, 1, 2])
+            if new_data.ndim == 2:
+                raw_data[i] = new_data
+                i += 1
+            elif new_data.shape[0] == 3:  # if only 1 row is present in file
+                raw_data[i] = np.atleast_2d(new_data)
+                i += 1
+
+        fulldata = np.concatenate(raw_data[:i], axis=0)
+        self.clumpids = fulldata[:, 0]
+        self.level = fulldata[:, 1]
+        self.parent = fulldata[:, 2]
+
+    def cleanup_clumpdata(self, par, mtd):
+        """
+        The particle unbinding can remove entire clumps from the catalogue.
+        If the option isn't set in the namelist, the clumpfinder output will
+        still be made not based on the clumpfinder. If that is the case, the
+        clumpfinder catalogue will contain clumps which the mergertree data
+        doesn't have, leading to problems. So remove those here.
+        """
+        for i, c in enumerate(self.clumpids):
+            if c not in mtd.descendants[par.z0]:
+                self.clumpids[i] = 0
+                self.level[i] = 0
+                self.parent[i] = -1  # don't make it the same as clumpid
+
+    def find_children(self, clumpid):
+        """Find the children for given clump ID."""
+        children = []
+        last_added = [clumpid]
+
+        loopcounter = 0
+        while True:
+            loopcounter += 1
+            this_level_parents = copy.copy(last_added)
+            children += this_level_parents
+            last_added = []
+            for i, cid in enumerate(self.clumpids):
+                if self.parent[i] in this_level_parents and cid != clumpid:
+                    last_added.append(cid)
+
+            if len(last_added) == 0:
+                break
+
+            if loopcounter == 100:
+                print("Finished 100 iterations, we shouldn't be this deep")
+                break
+
+        return children[1:]  # don't return top level parent
+
+    def write_children(self, par, clumpid, children):
+        """Write the children to file."""
+        hfile = join(par.outdir, f"{par.halofilename}-{str(clumpid)}.txt")
+
+        with open(hfile, 'w') as f:
+            f.write("# {0:>18} {1:>18} {2:>18}\n".format("halo", "nr_of_children", "children"))  # noqa
+            nc = len(children)
+            dumpstring = "  {0:18d} {1:18d}".format(clumpid, nc)
+            dumpstring = "".join([dumpstring] + [" {0:18d}".format(c) for c in children] + ['\n'])  # noqa
+            f.write(dumpstring)
+
+
+###############################################################################
+#                            Constants object                                 #
+###############################################################################
+
+
+class Constants:
+    """
+    Class holding constants.
+    """
+    def __init__(self):
+        self.Mpc = 3.086e24                 # cm
+        self.M_Sol = 1.98855e33             # g
+        self.Gyr = (24 * 3600 * 365 * 1e9)  # s
+        self.G = 4.492e-15                  # Mpc^3/(M_sol Gyr^2)
+
+        self.H0 = 100                      # km/s/Mpc
+        self.omega_m = 0.307000011205673
+        self.omega_l = 0.693000018596649
+        self.omega_k = 0.0
+        self.omega_b = 0.0
+
+
+###############################################################################
+#                             Params object                                   #
+###############################################################################
+
+
+class Params:
+    """
+    Global parameters to be stored
+    """
+    def __init__(self):
+        # self.workdir = f"/mnt/extraspace/hdesmond/ramses_out_{self.nsim}"
+        # self.outdir = f"/mnt/extraspace/rstiskalek/CSiBORG/cleaned_mtree/ramses_out_{self.nsim}"  # noqa
+        # if not exists(self.outdir):
+        #     makedirs(self.outdir)
+        self.lastdir = ""               # last output_XXXXX directory
+        self.lastdirnr = -1             # XXXX from lastdir
+        self.ncpu = 1                   # Number of CPUs used
+        self.noutput = 1                # how many output_XXXXX dirs exist
+        self.nout = 1                   # how many outputs we're gonna deal with. (Some might not have merger tree data)  # noqa
+        self.outputnrs = None           # numpy array of output numbers
+        self.output_lowest = 0          # lowest snapshot number that we're dealing with (>= 1)  # noqa
+        self.z0 = 0                     # index of z=0 snapshot (or whichever you want to start with)  # noqa
+
+        # NOTE: params.nout will be defined such that you can easily loop
+
+        self.verbose = False            # verbosity
+        self.start_at = 0               # output dir to start with, if given
+
+        self.output_prefix = ""         # user given prefix for output files
+        self.outputfilename = ""        # output filename. Stores prefix/mergertree_XXXXX part of name only  # noqa
+        self.halofilename = ""          # output filename for halo hierarchy. Stores prefix/halo_hierarchy_XXXXX part of filename only  # noqa
+
+        self.one_halo_only = False      # do the tree for one clump only
+        self.halo_and_children = False  # do the tree for one halo, including subhaloes  # noqa
+        self.do_all = False             # do for all clumps at z=0 output
+
+        self.clumpid = 0                # which clump ID to work for.
+        self.nsim = None
+
+        # Dictionnary of accepted keyword command line arguments
+        self.accepted_flags = {
+            '-a': self.set_do_all,
+            '--all': self.set_do_all,
+            '-r': self.set_halo_and_children,
+            '--recursive': self.set_halo_and_children,
+            '-c': self.set_halo_and_children,
+            '--children': self.set_halo_and_children,
+            '-h': self.get_help,
+            '--help': self.get_help,
+            '-v': self.set_verbose,
+            '--verbose': self.set_verbose,
+            }
+
+        self.accepted_flags_with_args = {
+            "--nsim": self.set_nsim,
+            '--start-at': self.set_startnr,
+            '--prefix': self.set_prefix,
+            }
+
+    # -----------------------------
+    # Setter methods
+    # -----------------------------
+
+    def set_do_all(self):
+        self.do_all = True
+        return
+
+    def set_halo_and_children(self):
+        self.halo_and_children = True
+        return
+
+    def get_help(self):
+        print(errmsg)
+        quit()
+        return
+
+    def set_verbose(self):
+        self.verbose = True
+        return
+
+    def set_startnr(self, arg):
+        flag, startnr = arg.split("=")
+        try:
+            self.start_at = int(startnr)
+        except ValueError:
+            print("given value for --start-at=INT isn't an integer?")
+
+    def set_prefix(self, arg):
+        flag, prefix = arg.split("=")
+        #  try:
+        self.output_prefix = prefix
+        try:
+            os.makedirs(self.output_prefix)
+        except FileExistsError:
+            pass
+        return
+
+    def set_nsim(self, arg):
+        flag, nsim = arg.split("=")
+        try:
+            self.nsim = int(nsim)
+        except ValueError:
+            print("given value for --nsim=INT isn't an integer?")
+
+    def read_cmdlineargs(self):
+        """
+        Reads in the command line arguments and store them in the
+        global_params object.
+        """
+        nargs = len(argv)
+        i = 1  # first cmdlinearg is filename of this file, so skip it
+
+        while i < nargs:
+            arg = argv[i]
+            arg = arg.strip()
+            if arg in self.accepted_flags.keys():
+                self.accepted_flags[arg]()
+            else:
+                for key in self.accepted_flags_with_args.keys():
+                    if arg.startswith(key):
+                        self.accepted_flags_with_args[key](arg)
+                        break
+                else:
+                    try:
+                        self.clumpid = int(arg)
+                    except ValueError:
+                        print(f"I didn't recognize the argument '{arg}'. Use "
+                              "mergertre-extract.py -h or --help to print "
+                              "help message.")
+                        quit()
+
+            i += 1
+
+        if self.nsim is None:
+            raise ValueError("nsim not set. Use --nsim=INT to set it.")
+
+    @property
+    def workdir(self):
+        return f"/mnt/extraspace/hdesmond/ramses_out_{self.nsim}"
+
+    @property
+    def outdir(self):
+        fname = f"/mnt/extraspace/rstiskalek/CSiBORG/cleaned_mtree/ramses_out_{self.nsim}"  # noqa
+        if not exists(fname):
+            makedirs(fname)
+        return fname
+
+    def get_output_info(self):
+        """
+        Read in the output info based on the files in the current working
+        directory. Reads in last directory, ncpu, noutputs. Doesn't read
+        infofiles.
+        """
+        # self.workdir = os.getcwd()
+        filelist = os.listdir(self.workdir)
+
+        outputlist = []
+        for filename in filelist:
+            if filename.startswith('output_'):
+                outputlist.append(filename)
+
+        if len(outputlist) < 1:
+            print("I didn't find any output_XXXXX directories in current "
+                  "working directory. Are you in the correct workdir? "
+                  "Use mergertree-extract.py -h or --help to print help "
+                  "message.")
+            quit()
+
+        outputlist.sort()
+
+        self.lastdir = outputlist[-1]
+        self.lastdirnr = int(self.lastdir[-5:])
+        self.noutput = len(outputlist)
+
+        if (self.start_at > 0):
+            # check that directory exists
+            startnrstr = str(self.start_at).zfill(5)
+            if 'output_' + startnrstr not in outputlist:
+                print("Didn't find specified starting directory "
+                      f"output_{startnrstr} use mergertree-extract.py -h or "
+                      "--help to print help message.")
+                quit()
+
+        # read ncpu from infofile in last output directory
+        infofile = join(self.workdir, self.lastdir,
+                        f"info_{self.lastdir[-5:]}.txt")
+        with open(infofile, 'r') as f:
+            ncpuline = f.readline()
+            line = ncpuline.split()
+            self.ncpu = int(line[-1])
+
+    def setup_and_checks(self, sd):
+        """
+        Do checks and additional setups once you have all the cmd line args and
+        output infos
+
+        Parameters
+        ----------
+        sd: snapshotdata object
+        """
+        # set running mode
+        if not self.do_all:
+            if self.clumpid <= 0:
+                print("No or wrong clump id given. Setting the --all mode.")
+                self.set_do_all()
+            else:
+                if not self.halo_and_children:
+                    self.one_halo_only = True
+
+        # generate list of outputdirnumbers
+        startnr = self.lastdirnr
+        self.outputnrs = np.array(range(startnr, startnr - self.noutput, -1))
+
+        # find starting output directory
+        self.z0 = np.argmin(np.absolute(sd.redshift))
+
+        if self.start_at > 0:
+            # replace z0 dir with starting dir
+            self.z0 = self.lastdirnr - self.start_at
+
+        # generate output filename
+        dirnrstr = str(self.outputnrs[self.z0]).zfill(5)
+        fname = "mergertree_" + dirnrstr
+        self.outputfilename = join(self.output_prefix, fname)
+
+        # generate halo output filename
+        fname = "halo_hierarchy_" + dirnrstr
+        self.halofilename = join(self.output_prefix, fname)
+
+        # rename output_prefix to something if it wasn't set
+        if self.output_prefix == "":
+            self.output_prefix = os.path.relpath(self.workdir)
+
+        # find self.nout; i.e. how many outputs we are actually going to have
+        for out in range(self.noutput - 1, -1, -1):
+            dirnrstr = str(self.outputnrs[out]).zfill(5)
+            mtreefile = join(self.workdir,
+                             f"output_{dirnrstr}",
+                             f"mergertree_{dirnrstr}.dat")
+
+            if os.path.exists(mtreefile):
+                print("Loading mergertree data from ", mtreefile)
+                # if there is a file, this is lowest snapshot number directory
+                # that we'll be dealing with, and hence will have the highest
+                # index number in the arrays I'm using
+
+                # NOTE: params.nout will be defined such that you can easily
+                # loop for out in range(p.z0, p.nout)
+                self.nout = out + 1
+                break
+
+    def print_params(self):
+        """Prints out the parameters that are set."""
+        if self.do_all:
+            print("Working mode:             all clumps")
+        else:
+            if self.halo_and_children:
+                print("Working mode:             halo", self.clumpid, "and its children")  # noqa
+            else:
+                print("Working mode:             clump ", self.clumpid)
+
+        print("workdir:                 ", self.workdir)
+        print("snapshot of tree root:   ", self.outputnrs[self.z0])
+        print("p.one_halo_only          ", p.one_halo_only)
+        print("p.do_all                 ", p.do_all)
+        print("p.halo_and_children      ", p.halo_and_children)
+        print("p.one_halo_only          ", p.one_halo_only)
+
+
+###############################################################################
+#                             Merger tree data                                #
+###############################################################################
+
+
+class MTreeData:
+    """
+    Merger tree data lists
+
+    Parameters
+    ----------
+    par : params object
+    """
+    def __init__(self, par):
+        self.progenitors = [np.zeros(1) for i in range(par.noutput)]            # progenitor IDs  # noqa
+        self.descendants = [np.zeros(1) for i in range(par.noutput)]            # descendant IDs  # noqa
+        self.progenitor_outputnrs = [np.zeros(1) for i in range(par.noutput)]   # snapshot number of progenitor  # noqa
+        self.mass = [np.zeros(1) for i in range(par.noutput)]                   # descendant mass  # noqa
+        self.mass_to_remove = [np.zeros(1) for i in range(par.noutput)]         # descendant mass  # noqa
+
+    def read_mergertree_data(self, par, sd):
+        """Reads in mergertree data."""
+
+        if par.verbose:
+            print("Reading in mergertree data")
+
+        # Preparation
+
+        # define new datatype for mergertree output
+        mtree = np.dtype([('clump', 'i4'),
+                          ('prog', 'i4'),
+                          ('prog_outnr', 'i4'),
+                          ('mass', 'f8'),
+                          ('npart', 'f8'),
+                          ('x', 'f8'),
+                          ('y', 'f8'),
+                          ('z', 'f8'),
+                          ('vx', 'f8'),
+                          ('vy', 'f8'),
+                          ('vz', 'f8')
+                          ])
+
+        # ---------------------------
+        # Loop over directories
+        # ---------------------------
+
+        startnr = par.lastdirnr
+        # READ THE ONES BEFORE z0 TOO!
+        for output in trange(par.nout, desc="Reading merger"):
+            dirnr = str(startnr - output).zfill(5)
+            srcdir = 'output_' + dirnr
+
+            fnames = [srcdir + '/' + "mergertree_" + dirnr + '.dat']
+            fnames[0] = join(par.workdir, fnames[0])
+
+            datalist = [np.zeros((1, 3)) for i in range(par.ncpu)]
+            i = 0
+            nofile = 0
+            for f in fnames:
+                if os.path.exists(f):
+                    datalist[i] = np.atleast_1d(np.genfromtxt(f, dtype=mtree,
+                                                              skip_header=1))
+                    i += 1
+                else:
+                    nofile += 1
+
+            if nofile == p.ncpu:
+                print("Didn't find any mergertree data in", srcdir)
+
+            # ---------------------------------
+            # Sort out data
+            # ---------------------------------
+            if i > 0:
+                fulldata = np.concatenate(datalist[:i], axis=0)
+
+                self.descendants[output] = fulldata[:]['clump']
+                self.progenitors[output] = fulldata[:]['prog']
+                self.progenitor_outputnrs[output] = fulldata[:]['prog_outnr']
+                self.mass[output] = fulldata[:]['mass']
+                #  self.npart[output] = fulldata[:]['npart']
+                #  self.x[output] = fulldata[:]['x']
+                #  self.y[output] = fulldata[:]['y']
+                #  self.z[output] = fulldata[:]['z']
+                #  self.vx[output] = fulldata[:]['vx']
+                #  self.vy[output] = fulldata[:]['vy']
+                #  self.vz[output] = fulldata[:]['vz']
+
+        # --------------------------------------
+        # Transform units to physical units
+        # --------------------------------------
+
+        # transform units to physical units
+        for i in range(len(self.descendants)):
+            self.mass[i] *= sd.unit_m[i]
+            #  self.x[i] *= sd.unit_l[i] # only transform later when needed; Need to check for periodicity first!  # noqa
+            #  self.y[i] *= sd.unit_l[i]
+            #  self.z[i] *= sd.unit_l[i]
+            #  self.vx[i] *= sd.unit_l[i]/sd.unit_t[i]
+            #  self.vy[i] *= sd.unit_l[i]/sd.unit_t[i]
+            #  self.vz[i] *= sd.unit_l[i]/sd.unit_t[i]
+
+    def clean_up_jumpers(self, par):
+        """
+        Remove jumpers from the merger list. Take note of how much mass should
+        be removed from the descendant because the jumper is to be removed.
+        """
+        # First initialize mass_to_remove arrays
+        self.mass_to_remove = [np.zeros(self.descendants[out].shape)
+                               for out in range(par.noutput)]
+        nreplaced = 0
+        for out in trange(par.nout + par.z0 - 1, desc="Cleaning jumpers"):
+            for i, pr in enumerate(self.progenitors[out]):
+                if pr < 0:
+                    # Subtract 1 here from snapind:
+                    # progenitor_outputnrs gives the snapshot number where the
+                    # jumper was a descendant for the last time
+                    # so you need to overwrite the merging one snapshot later,
+                    # where the clump is the progenitor
+                    snapind = get_snap_ind(p, self.progenitor_outputnrs[out][i]) - 1  # noqa
+
+                    # NOTE bottleneck
+                    jumpind = self.progenitors[snapind] == -pr
+
+                    # NOTE bottleneck
+                    # find index of descendant into which this clump will
+                    # appearingly merge into
+                    mergerind = self.descendants[snapind] == - self.descendants[snapind][jumpind]  # noqa
+                    # overwrite merging event so it won't count
+                    self.descendants[snapind][jumpind] = 0
+
+                    # find mass of jumper in previous snapshot
+                    jumpmassind = self.descendants[snapind + 1] == -pr
+                    # note how much mass might need to be removed for whatever
+                    # you need it
+                    self.mass_to_remove[snapind][mergerind] += self.mass[snapind + 1][jumpmassind]  # noqa
+
+                    nreplaced += 1
+
+        print("Cleaned out", nreplaced, "jumpers")
+
+    def get_tree(self, par, tree, sd, clumpid):
+        """Follow the main branch down."""
+        if par.verbose:
+            print("Computing tree for clump", clumpid)
+
+        dind = self.descendants[par.z0] == clumpid
+        desc_snap_ind = p.z0
+        desc = self.descendants[p.z0][dind]
+        prog = self.progenitors[p.z0][dind]
+
+        def get_prog_indices(prog, desc_snap_ind):
+            """
+            Compute snapshot index at which given progenitor has been a
+            descendant and its index in the array
+
+            prog:           progenitor ID
+            desc_snap_ind:  snapshot index of descendant of given prog
+
+            returns:
+            p_snap_ind:     snapshot index of the progenitor
+            pind:           progenitor index (np.array mask) of progenitor in
+                            array where it is descendant
+            """
+            if prog > 0:  # if progenitor isn't jumper
+                # find progenitor's index in previous snapshot
+                p_snap_ind = desc_snap_ind + 1
+                pind = self.descendants[p_snap_ind] == prog
+
+            elif prog < 0:
+                p_snap_ind = get_snap_ind(
+                    par, self.progenitor_outputnrs[desc_snap_ind][dind])
+                pind = self.descendants[p_snap_ind] == -prog
+
+            return p_snap_ind, pind
+
+        while True:
+            # first calculate merger mass
+            mergers = self.descendants[desc_snap_ind] == -desc
+            mergermass = 0.0
+            if mergers.any():
+                for m in self.progenitors[desc_snap_ind][mergers]:
+                    # find mass of merger. That's been written down at the
+                    # place where merger was descendant.
+                    m_snap_ind, mergerind = get_prog_indices(m, desc_snap_ind)
+                    mergermass += self.mass[m_snap_ind][mergerind]
+
+            # add the descendant to the tree
+            tree.add_snap(par.outputnrs[desc_snap_ind],
+                          sd.redshift[desc_snap_ind], desc,
+                          self.mass[desc_snap_ind][dind], mergermass,
+                          self.mass_to_remove[desc_snap_ind][dind])
+
+            # now descend down the main branch
+            if prog != 0:
+                p_snap_ind, pind = get_prog_indices(prog, desc_snap_ind)
+            else:
+                # stop at progenitor = 0
+                break
+
+            # prepare for next round
+            desc_snap_ind = p_snap_ind
+            dind = pind
+            desc = abs(prog)
+            prog = self.progenitors[p_snap_ind][pind]
+
+
+###############################################################################
+#                             Snapshot data                                   #
+###############################################################################
+
+
+class SnapshotData():
+    """Snapshot specific data"""
+    def __init__(self, par):
+        # read in
+        self.aexp = np.zeros(par.noutput)
+        self.unit_l = np.zeros(par.noutput)
+        self.unit_m = np.zeros(par.noutput)
+        self.unit_t = np.zeros(par.noutput)
+        self.unit_dens = np.zeros(par.noutput)
+        # to be computed
+        self.redshift = np.zeros(par.noutput)  # z
+
+    def read_infofiles(self, par, const):
+        """Read the info_XXXXX.txt files."""
+        if par.verbose:
+            print("Reading info files.")
+
+        startnr = par.lastdirnr
+
+        for output in range(p.noutput):
+            # Start with last directory (e.g. output_00060),
+            # work your way to first directory (e.g. output_00001)
+            # p.z0 isn't decided yet, so just read in everything here.
+            dirnr = str(startnr - output).zfill(5)
+            srcdir = 'output_' + dirnr
+
+            try:
+                # ------------------------------------------------------
+                # get time, redshift, and units even for output_00001
+                # ------------------------------------------------------
+                fileloc = srcdir + '/info_' + dirnr + '.txt'
+                fileloc = join(par.workdir, fileloc)
+                infofile = open(fileloc)
+                for i in range(9):
+                    infofile.readline()  # skip first 9 lines
+
+                # get expansion factor
+                aline = infofile.readline()
+                astring, equal, aval = aline.partition("=")
+                afloat = float(aval)
+                sd.aexp[output] = afloat
+
+                for i in range(5):
+                    infofile.readline()  # skip 5 lines
+
+                # get unit_l
+                unitline = infofile.readline()
+                unitstring, equal, unitval = unitline.partition("=")
+                unitfloat = float(unitval)
+                sd.unit_l[output] = unitfloat
+
+                # get unit_dens
+                unitline = infofile.readline()
+                unitstring, equal, unitval = unitline.partition("=")
+                unitfloat = float(unitval)
+                sd.unit_dens[output] = unitfloat
+
+                # get unit_t
+                unitline = infofile.readline()
+                unitstring, equal, unitval = unitline.partition("=")
+                unitfloat = float(unitval)
+                sd.unit_t[output] = unitfloat
+
+                infofile.close()
+
+            except IOError:  # If file doesn't exist
+                print("Didn't find any info data in ", srcdir)
+                break
+
+        self.unit_m = self.unit_dens * self.unit_l ** 3 / const.M_Sol
+        self.unit_l /= const.Mpc
+        self.unit_t /= const.Gyr
+
+        self.redshift = 1. / self.aexp - 1
+
+###############################################################################
+#                             Tree object                                     #
+###############################################################################
+
+
+class Tree:
+    """
+    Holds tree result data. It's not really a tree, it's just the values along
+    the main branch, but let's call it a tree anyway. Sue me.
+
+    Parameters
+    ----------
+    nelements : int
+        Estimate for how many snapshots you need to allocate space for.
+    """
+    def __init__(self, nelements):
+        self.n = 0                                              # number of elements in tree  # noqa
+        self.snapshotnr = -np.ones(nelements, dtype=int)        # snapshot number of array values  # noqa
+        self.redshift = -np.ones(nelements, dtype=float)        # redshift at that snapshot  # noqa
+        self.clumpids = -np.ones(nelements, dtype=int)          # clump id of halo in that snapshot  # noqa
+        self.mass = np.zeros(nelements, dtype=float)            # mass at that snapshot  # noqa
+        self.mergermass = np.zeros(nelements, dtype=float)      # sum of mass of swallowed up clumps  # noqa
+        self.mass_to_remove = np.zeros(nelements, dtype=float)  # sum of mass of swallowed up clumps  # noqa
+
+    def add_snap(self, nr, z, ID, m, mm, mdel):
+        """Add new result."""
+        n = self.n
+        self.snapshotnr[n] = nr
+        self.redshift[n] = z
+        self.clumpids[n] = ID
+        self.mass[n] = m
+        self.mergermass[n] = mm
+        self.mass_to_remove[n] = mdel
+        self.n += 1
+
+    def write_tree(self, par, case='halo'):
+        """Write the results to file."""
+        resfile = join(
+            par.outdir,
+            f"{par.outputfilename}_{case}-{str(self.clumpids[0])}.txt")
+
+        with open(resfile, 'w') as f:
+            f.write('# {0:>12} {1:>12} {2:>16} {3:>18} {4:>18} {5:>18}\n'.format(  # noqa
+                "snapshot", "redshift", "clump_ID", "mass[M_sol]",
+                "mass_from_mergers", "mass_from_jumpers"))
+
+            for i in range(self.n):
+                f.write('  {0:12d} {1:12.4f} {2:16d} {3:18.6e} {4:18.6e} {5:18.6e}\n'.format(  # noqa
+                    self.snapshotnr[i], self.redshift[i], self.clumpids[i],
+                    self.mass[i], self.mergermass[i], self.mass_to_remove[i]))
+
+        return
+
+
+def get_snap_ind(p, snap):
+    """
+    Computes the snapshot index in mtreedata/halodata/snapshotdata arrays for a
+    given snapshot number snap
+    """
+    return (p.noutput - snap).item()
+
+
+if __name__ == '__main__':
+
+    p = Params()
+    c = Constants()
+
+    # Read cmdlineargs, available output, get global parameters
+    p.read_cmdlineargs()
+    p.get_output_info()
+
+    sd = SnapshotData(p)
+    sd.read_infofiles(p, c)
+
+    # finish setup
+    p.setup_and_checks(sd)
+    p.print_params()
+
+    # now read in mergertree data
+    fname = join(p.outdir, "mtreedata.p")
+    if exists(fname):
+        print(f"{datetime.now()}: loading mergertree data from `{fname}`.",
+              flush=True)
+        mtd = load(fname)
+        print(f"{datetime.now()}: finished loading mergertree data from `{fname}`.",  # noqa
+              flush=True)
+    else:
+        print("Generating mergertree data.", flush=True)
+        mtd = MTreeData(p)
+        mtd.read_mergertree_data(p, sd)
+        # clean up jumpers
+        mtd.clean_up_jumpers(p)
+
+        print("Saving mergertree data.", flush=True)
+        dump(mtd, fname)
+
+    # read in clump data if required
+    if p.do_all or p.halo_and_children:
+        cd = ClumpData(p)
+        cd.read_clumpdata(p)
+
+        # clean up halo catalogue
+        cd.cleanup_clumpdata(p, mtd)
+
+        # find children, and write them down
+        if p.verbose:
+            print("Searching for child clumps.")
+
+        if p.halo_and_children:
+            children = cd.find_children(p.clumpid)
+            cd.write_children(p, p.clumpid, children)
+
+        if p.do_all:
+            is_halo = cd.clumpids == cd.parent
+            childlist = [None for c in cd.clumpids[is_halo]]
+            for i, halo in enumerate(cd.clumpids[is_halo]):
+                children = cd.find_children(halo)
+                cd.write_children(p, halo, children)
+                childlist[i] = children
+
+    # finally, get the bloody tree
+
+    if p.one_halo_only:
+        newtree = Tree(p.nout)
+        mtd.get_tree(p, newtree, sd, p.clumpid)
+        newtree.write_tree(p, 'halo')
+
+    if p.halo_and_children:
+        newtree = Tree(p.nout)
+        mtd.get_tree(p, newtree, sd, p.clumpid)
+        newtree.write_tree(p, 'halo')
+
+        for c in children:
+            newtree = Tree(p.nout)
+            mtd.get_tree(p, newtree, sd, c)
+            newtree.write_tree(p, 'subhalo')
+
+    if p.do_all:
+        for i, halo in enumerate(cd.clumpids[is_halo]):
+            newtree = Tree(p.nout)
+            mtd.get_tree(p, newtree, sd, halo)
+            newtree.write_tree(p, 'halo')
+
+            for c in childlist[i]:
+                newtree = Tree(p.nout)
+                mtd.get_tree(p, newtree, sd, c)
+                newtree.write_tree(p, 'subhalo')
+
+    print('Finished.')
--- a/scripts/mv_fofmembership.py
+++ b/scripts/mv_fofmembership.py
@ -1,142 +0,0 @@
-# Copyright (C) 2022 Richard Stiskalek
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
-# Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""
-Short script to move and change format of the CSiBORG FoF membership files
-calculated by Julien. Additionally, also orders the particles in the same way
-as the PHEW halo finder output.
-"""
-from argparse import ArgumentParser
-from datetime import datetime
-from gc import collect
-from os.path import join
-from shutil import copy
-
-import numpy
-from mpi4py import MPI
-from taskmaster import work_delegation
-from tqdm import trange
-
-from utils import get_nsims
-
-try:
-    import csiborgtools
-except ModuleNotFoundError:
-    import sys
-    sys.path.append("../")
-    import csiborgtools
-
-
-def copy_membership(nsim, verbose=True):
-    """
-    Copy the FoF particle halo membership to the CSiBORG directory and write it
-    as a NumPy array instead of a text file.
-    """
-    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-    fpath = join("/mnt/extraspace/jeg/greenwhale/Constrained_Sims",
-                 f"sim_{nsim}/particle_membership_{nsim}_FOF.txt")
-    if verbose:
-        print(f"Loading from ... `{fpath}`.")
-    data = numpy.genfromtxt(fpath, dtype=int)
-
-    fout = paths.fof_membership(nsim, "csiborg")
-    if verbose:
-        print(f"Saving to ... `{fout}`.")
-    numpy.save(fout, data)
-
-
-def copy_catalogue(nsim, verbose=True):
-    """
-    Move the FoF catalogue to the CSiBORG directory.
-
-    Parameters
-    ----------
-    nsim : int
-        IC realisation index.
-    verbose : bool, optional
-        Verbosity flag.
-    """
-    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-    source = join("/mnt/extraspace/jeg/greenwhale/Constrained_Sims",
-                  f"sim_{nsim}/halo_catalog_{nsim}_FOF.txt")
-    dest = paths.fof_cat(nsim, "csiborg")
-    if verbose:
-        print("Copying`{}` to `{}`.".format(source, dest))
-    copy(source, dest)
-
-
-def sort_fofid(nsim, verbose=True):
-    """
-    Read the FoF particle halo membership and sort the halo IDs to the ordering
-    of particles in the PHEW clump IDs.
-
-    Parameters
-    ----------
-    nsim : int
-        IC realisation index.
-    verbose : bool, optional
-        Verbosity flag.
-    """
-    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-    nsnap = max(paths.get_snapshots(nsim, "csiborg"))
-    fpath = paths.fof_membership(nsim, "csiborg")
-    if verbose:
-        print(f"{datetime.now()}: loading from ... `{fpath}`.")
-    # Columns are halo ID, particle ID.
-    fof = numpy.load(fpath)
-
-    reader = csiborgtools.read.CSiBORGReader(paths)
-    pars_extract = ["x"]  # Dummy variable
-    __, pids = reader.read_particle(nsnap, nsim, pars_extract,
-                                    return_structured=False, verbose=verbose)
-    del __
-    collect()
-
-    # Map the particle IDs in pids to their corresponding PHEW array index
-    if verbose:
-        print(f"{datetime.now()}: mapping particle IDs to their indices.")
-    pids_idx = {pid: i for i, pid in enumerate(pids)}
-
-    if verbose:
-        print(f"{datetime.now()}: mapping FoF HIDs to their array indices.")
-    # Unassigned particle IDs are assigned a halo ID of 0. Same as PHEW.
-    fof_hids = numpy.zeros(pids.size, dtype=numpy.int32)
-    for i in trange(fof.shape[0]) if verbose else range(fof.shape[0]):
-        hid, pid = fof[i]
-        fof_hids[pids_idx[pid]] = hid
-
-    fout = paths.fof_membership(nsim, "csiborg", sorted=True)
-    if verbose:
-        print(f"Saving the sorted data to ... `{fout}`")
-    numpy.save(fout, fof_hids)
-
-
-def main(nsim, verbose=True):
-    copy_membership(nsim, verbose=verbose)
-    copy_catalogue(nsim, verbose=verbose)
-    sort_fofid(nsim, verbose=verbose)
-
-
-if __name__ == "__main__":
-    parser = ArgumentParser()
-    parser.add_argument("--simname", type=str, default="csiborg",
-                        choices=["csiborg", "quijote"],
-                        help="Simulation name")
-    parser.add_argument("--nsims", type=int, nargs="+", default=None,
-                        help="Indices of simulations to cross. If `-1` processes all simulations.")  # noqa
-    args = parser.parse_args()
-
-    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-    nsims = get_nsims(args, paths)
-    work_delegation(main, nsims, MPI.COMM_WORLD)
--- a/scripts/old/fit_profiles.py
+++ b/scripts/old/fit_profiles.py
@ -1,109 +0,0 @@
-# Copyright (C) 2023 Richard Stiskalek
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
-# Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""
-A script to calculate the particle's separation from the CM and save it.
-Currently MPI is not supported.
-"""
-from argparse import ArgumentParser
-from datetime import datetime
-from gc import collect
-
-import numpy
-from mpi4py import MPI
-from tqdm import trange
-
-try:
-    import csiborgtools
-except ModuleNotFoundError:
-    import sys
-
-    sys.path.append("../")
-    import csiborgtools
-
-parser = ArgumentParser()
-parser.add_argument("--ics", type=int, nargs="+", default=None,
-                    help="IC realisatiosn. If `-1` processes all simulations.")
-args = parser.parse_args()
-
-# Get MPI things
-comm = MPI.COMM_WORLD
-rank = comm.Get_rank()
-nproc = comm.Get_size()
-
-if nproc > 1:
-    raise NotImplementedError("MPI is not implemented implemented yet.")
-
-paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-cols_collect = [("r", numpy.float32), ("M", numpy.float32)]
-if args.ics is None or args.ics == -1:
-    nsims = paths.get_ics("csiborg")
-else:
-    nsims = args.ics
-
-
-# We loop over simulations. Here later optionally add MPI.
-for i, nsim in enumerate(nsims):
-    if rank == 0:
-        now = datetime.now()
-        print(f"{now}: calculating {i}th simulation `{nsim}`.", flush=True)
-    nsnap = max(paths.get_snapshots(nsim, "csiborg"))
-    box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
-
-    f = csiborgtools.read.read_h5(paths.particles(nsim, "csiborg"))
-    particles = f["particles"]
-    clump_map = f["clumpmap"]
-    clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
-    clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
-                                                   load_fitted=False)
-    ismain = clumps_cat.ismain
-    ntasks = len(clumps_cat)
-
-    # We loop over halos and add ther particle positions to this dictionary,
-    # which we will later save as an archive.
-    out = {}
-    for j in trange(ntasks) if nproc == 1 else range(ntasks):
-        # If we are fitting halos and this clump is not a main, then continue.
-        if not ismain[j]:
-            continue
-
-        clumpid = clumps_cat["index"][j]
-        parts = csiborgtools.read.load_parent_particles(
-            clumpid, particles, clump_map, clid2map, clumps_cat)
-        # If we have no particles, then do not save anything.
-        if parts is None:
-            continue
-        obj = csiborgtools.fits.Clump(parts, clumps_cat[j], box)
-        r200m, m200m = obj.spherical_overdensity_mass(200, npart_min=10,
-                                                      kind="matter")
-        r = obj.r()
-        mask = r <= r200m
-
-        _out = csiborgtools.read.cols_to_structured(numpy.sum(mask),
-                                                    cols_collect)
-
-        _out["r"] = r[mask]
-        _out["M"] = obj["M"][mask]
-        out[str(clumpid)] = _out
-
-    # Finished, so we save everything.
-    fout = paths.radpos_path(nsnap, nsim)
-    now = datetime.now()
-    print(f"{now}: saving radial profiles for simulation {nsim} to `{fout}`",
-          flush=True)
-    numpy.savez(fout, **out)
-
-    # Clean up the memory just to be sure.
-    del out
-    collect()
--- a/scripts/old/pre_mmain.py
+++ b/scripts/old/pre_mmain.py
@ -1,64 +0,0 @@
-# Copyright (C) 2022 Richard Stiskalek
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
-# Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""
-Script to generate the mmain files, i.e. sums up the substructe of children.
-"""
-from datetime import datetime
-
-import numpy
-from mpi4py import MPI
-from taskmaster import master_process, worker_process
-
-try:
-    import csiborgtools
-except ModuleNotFoundError:
-    import sys
-    sys.path.append("../")
-    import csiborgtools
-
-# Get MPI things
-comm = MPI.COMM_WORLD
-rank = comm.Get_rank()
-nproc = comm.Get_size()
-
-paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-mmain_reader = csiborgtools.read.MmainReader(paths)
-
-
-def do_mmain(nsim):
-    nsnap = max(paths.get_snapshots(nsim, "csiborg"))
-    # NOTE: currently works for highest snapshot anyway
-    mmain, ultimate_parent = mmain_reader.make_mmain(nsim, verbose=False)
-    numpy.savez(paths.mmain(nsnap, nsim),
-                mmain=mmain, ultimate_parent=ultimate_parent)
-
-###############################################################################
-#                             MPI task delegation                             #
-###############################################################################
-
-
-if nproc > 1:
-    if rank == 0:
-        tasks = list(paths.get_ics("csiborg"))
-        master_process(tasks, comm, verbose=True)
-    else:
-        worker_process(do_mmain, comm, verbose=False)
-else:
-    tasks = paths.get_ics("csiborg")
-    for task in tasks:
-        print(f"{datetime.now()}: completing task `{task}`.", flush=True)
-        do_mmain(task)
-
-comm.Barrier()
--- a/scripts/old/pre_mmain.sh
+++ b/scripts/old/pre_mmain.sh
@ -1,14 +0,0 @@
-nthreads=102
-memory=5
-queue="cmb"
-env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
-file="pre_mmain.py"
-
-# pythoncm="$env $file"
-# $pythoncm
-
-
-cm="addqueue -q $queue -n $nthreads -m $memory $env $file"
-echo "Submitting:"
-echo $cm
-$cm
--- a/scripts/pre_dumppart.py
+++ b/scripts/pre_dumppart.py
@ -1,185 +0,0 @@
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
-# Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-r"""
-Script to load in the simulation particles, sort them by their FoF halo ID and
-dump into a HDF5 file. Stores the first and last index of each halo in the
-particle array. This can be used for fast slicing of the array to acces
-particles of a single clump.
-
-Ensures the following units:
-    - Positions in box units.
-    - Velocities in :math:`\mathrm{km} / \mathrm{s}`.
-    - Masses in :math:`M_\odot / h`.
-"""
-from argparse import ArgumentParser
-from datetime import datetime
-from gc import collect
-
-import h5py
-import numba
-import numpy
-from mpi4py import MPI
-from taskmaster import work_delegation
-from tqdm import trange
-
-from utils import get_nsims
-
-try:
-    import csiborgtools
-except ModuleNotFoundError:
-    import sys
-
-    sys.path.append("../")
-    import csiborgtools
-
-
-@numba.jit(nopython=True)
-def minmax_halo(hid, halo_ids, start_loop=0):
-    """
-    Find the start and end index of a halo in a sorted array of halo IDs.
-    This is much faster than using `numpy.where` and then `numpy.min` and
-    `numpy.max`.
-    """
-    start = None
-    end = None
-
-    for i in range(start_loop, halo_ids.size):
-        n = halo_ids[i]
-        if n == hid:
-            if start is None:
-                start = i
-            end = i
-        elif n > hid:
-            break
-    return start, end
-
-
-###############################################################################
-#                           Sorting and dumping                               #
-###############################################################################
-
-
-def main(nsim, simname, verbose):
-    """
-    Read in the snapshot particles, sort them by their FoF halo ID and dump
-    into a HDF5 file. Stores the first and last index of each halo in the
-    particle array for fast slicing of the array to acces particles of a single
-    halo.
-    """
-    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-    if simname == "csiborg":
-        partreader = csiborgtools.read.CSiBORGReader(paths)
-    else:
-        partreader = csiborgtools.read.QuijoteReader(paths)
-
-    nsnap = max(paths.get_snapshots(nsim, simname))
-    fname = paths.particles(nsim, simname)
-    # We first read in the halo IDs of the particles and infer the sorting.
-    # Right away we dump the halo IDs to a HDF5 file and clear up memory.
-    if verbose:
-        print(f"{datetime.now()}: loading PIDs of IC {nsim}.", flush=True)
-    part_hids = partreader.read_fof_hids(
-        nsnap=nsnap, nsim=nsim, verbose=verbose)
-    if verbose:
-        print(f"{datetime.now()}: sorting PIDs of IC {nsim}.", flush=True)
-    sort_indxs = numpy.argsort(part_hids).astype(numpy.int32)
-    part_hids = part_hids[sort_indxs]
-    with h5py.File(fname, "w") as f:
-        f.create_dataset("halo_ids", data=part_hids)
-        f.close()
-    del part_hids
-    collect()
-
-    # Next we read in the particles and sort them by their halo ID.
-    # We cannot directly read this as an unstructured array because the float32
-    # precision is insufficient to capture the halo IDs.
-    if simname == "csiborg":
-        pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"]
-    else:
-        pars_extract = None
-    parts, pids = partreader.read_particle(
-        nsnap, nsim, pars_extract, return_structured=False, verbose=verbose)
-
-    # In case of CSiBORG, we need to convert the mass and velocities from
-    # box units.
-    if simname == "csiborg":
-        box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
-        parts[:, [3, 4, 5]] = box.box2vel(parts[:, [3, 4, 5]])
-        parts[:, 6] = box.box2solarmass(parts[:, 6])
-
-    # Now we in two steps save the particles and particle IDs.
-    if verbose:
-        print(f"{datetime.now()}: dumping particles from {nsim}.", flush=True)
-    parts = parts[sort_indxs]
-    pids = pids[sort_indxs]
-    del sort_indxs
-    collect()
-
-    with h5py.File(fname, "r+") as f:
-        f.create_dataset("particle_ids", data=pids)
-        f.close()
-    del pids
-    collect()
-
-    with h5py.File(fname, "r+") as f:
-        f.create_dataset("particles", data=parts)
-        f.close()
-    del parts
-    collect()
-
-    if verbose:
-        print(f"{datetime.now()}: creating a halo map for {nsim}.", flush=True)
-    # Load clump IDs back to memory
-    with h5py.File(fname, "r") as f:
-        part_hids = f["halo_ids"][:]
-    # We loop over the unique halo IDs.
-    unique_halo_ids = numpy.unique(part_hids)
-    halo_map = numpy.full((unique_halo_ids.size, 3), numpy.nan,
-                          dtype=numpy.int32)
-    start_loop = 0
-    niters = unique_halo_ids.size
-    for i in trange(niters) if verbose else range(niters):
-        hid = unique_halo_ids[i]
-        k0, kf = minmax_halo(hid, part_hids, start_loop=start_loop)
-        halo_map[i, 0] = hid
-        halo_map[i, 1] = k0
-        halo_map[i, 2] = kf
-        start_loop = kf
-
-    # We save the mapping to a HDF5 file
-    with h5py.File(fname, "r+") as f:
-        f.create_dataset("halomap", data=halo_map)
-        f.close()
-
-    del part_hids
-    collect()
-
-
-if __name__ == "__main__":
-    # And next parse all the arguments and set up CSiBORG objects
-    parser = ArgumentParser()
-    parser.add_argument("--simname", type=str, default="csiborg",
-                        choices=["csiborg", "quijote"],
-                        help="Simulation name")
-    parser.add_argument("--nsims", type=int, nargs="+", default=None,
-                        help="IC realisations. If `-1` processes all .")
-    args = parser.parse_args()
-
-    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-    nsims = get_nsims(args, paths)
-
-    def _main(nsim):
-        main(nsim, args.simname, verbose=MPI.COMM_WORLD.Get_size() == 1)
-
-    work_delegation(_main, nsims, MPI.COMM_WORLD)
--- a/scripts/process_snapshot.py
+++ b/scripts/process_snapshot.py
@ -0,0 +1,457 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+r"""
+Script to process simulation files and create a single HDF5 file, in which
+particles are sorted by the particle halo IDs.
+"""
+from argparse import ArgumentParser
+from gc import collect
+
+import h5py
+import numpy
+from mpi4py import MPI
+
+import csiborgtools
+from csiborgtools import fprint
+from numba import jit
+from taskmaster import work_delegation
+from tqdm import trange, tqdm
+from utils import get_nsims
+
+
+@jit(nopython=True, boundscheck=False)
+def minmax_halo(hid, halo_ids, start_loop=0):
+    """
+    Find the start and end index of a halo in a sorted array of halo IDs.
+    This is much faster than using `numpy.where` and then `numpy.min` and
+    `numpy.max`.
+    """
+    start = None
+    end = None
+
+    for i in range(start_loop, halo_ids.size):
+        n = halo_ids[i]
+        if n == hid:
+            if start is None:
+                start = i
+            end = i
+        elif n > hid:
+            break
+    return start, end
+
+
+def process_snapshot(nsim, simname, halo_finder, verbose):
+    """
+    Read in the snapshot particles, sort them by their halo ID and dump
+    into a HDF5 file. Stores the first and last index of each halo in the
+    particle array for fast slicing of the array to acces particles of a single
+    halo.
+    """
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    nsnap = max(paths.get_snapshots(nsim, simname))
+
+    if simname == "csiborg":
+        partreader = csiborgtools.read.CSiBORGReader(paths)
+        box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
+    else:
+        partreader = csiborgtools.read.QuijoteReader(paths)
+        box = None
+
+    desc = {"hid": f"Halo finder ID ({halo_finder})of the particle.",
+            "pos": "DM particle positions in box units.",
+            "vel": "DM particle velocity in km / s.",
+            "mass": "DM particle mass in Msun / h.",
+            "pid": "DM particle ID",
+            }
+
+    fname = paths.processed_output(nsim, simname, halo_finder)
+
+    fprint(f"loading HIDs of IC {nsim}.", verbose)
+    hids = partreader.read_halo_id(nsnap, nsim, halo_finder, verbose)
+    collect()
+
+    fprint(f"sorting HIDs of IC {nsim}.")
+    sort_indxs = numpy.argsort(hids)
+
+    with h5py.File(fname, "w") as f:
+        group = f.create_group("snapshot_final")
+        group.attrs["header"] = "Snapshot data at z = 0."
+
+        fprint("dumping halo IDs.", verbose)
+        dset = group.create_dataset("halo_ids", data=hids[sort_indxs])
+        dset.attrs["header"] = desc["hid"]
+        del hids
+        collect()
+
+        fprint("reading, sorting and dumping the snapshot particles.", verbose)
+        for kind in ["pos", "vel", "mass", "pid"]:
+            x = partreader.read_snapshot(nsnap, nsim, kind)[sort_indxs]
+
+            if simname == "csiborg" and kind == "vel":
+                x = box.box2vel(x) if simname == "csiborg" else x
+
+            if simname == "csiborg" and kind == "mass":
+                x = box.box2solarmass(x) if simname == "csiborg" else x
+
+            dset = f["snapshot_final"].create_dataset(kind, data=x)
+            dset.attrs["header"] = desc[kind]
+            del x
+            collect()
+
+    del sort_indxs
+    collect()
+
+    fprint(f"creating a halo map for IC {nsim}.")
+    with h5py.File(fname, "r") as f:
+        part_hids = f["snapshot_final"]["halo_ids"][:]
+    # We loop over the unique halo IDs and remove the 0 halo ID
+    unique_halo_ids = numpy.unique(part_hids)
+    unique_halo_ids = unique_halo_ids[unique_halo_ids != 0]
+    halo_map = numpy.full((unique_halo_ids.size, 3), numpy.nan,
+                          dtype=numpy.uint64)
+    start_loop, niters = 0, unique_halo_ids.size
+    for i in trange(niters, disable=not verbose):
+        hid = unique_halo_ids[i]
+        k0, kf = minmax_halo(hid, part_hids, start_loop=start_loop)
+        halo_map[i, :] = hid, k0, kf
+        start_loop = kf
+
+    # Dump the halo mapping.
+    with h5py.File(fname, "r+") as f:
+        dset = f["snapshot_final"].create_dataset("halo_map", data=halo_map)
+        dset.attrs["header"] = """
+        Halo to particle mapping. Columns are HID, start index, end index.
+        """
+        f.close()
+
+    del part_hids
+    collect()
+
+    # Add the halo finder catalogue
+    with h5py.File(fname, "r+") as f:
+        group = f.create_group("halofinder_catalogue")
+        group.attrs["header"] = f"Original {halo_finder} halo catalogue."
+        cat = partreader.read_catalogue(nsnap, nsim, halo_finder)
+
+        hid2pos = {hid: i for i, hid in enumerate(unique_halo_ids)}
+
+        for key in cat.dtype.names:
+            x = numpy.full(unique_halo_ids.size, numpy.nan,
+                           dtype=cat[key].dtype)
+            for i in range(len(cat)):
+                j = hid2pos[cat["index"][i]]
+                x[j] = cat[key][i]
+            group.create_dataset(key, data=x)
+        f.close()
+
+    # Lastly create the halo catalogue
+    with h5py.File(fname, "r+") as f:
+        group = f.create_group("halo_catalogue")
+        group.attrs["header"] = f"{halo_finder} halo catalogue."
+        group.create_dataset("index", data=unique_halo_ids)
+        f.close()
+
+
+def add_initial_snapshot(nsim, simname, halo_finder, verbose):
+    """
+    Sort the initial snapshot particles according to their final snapshot and
+    add them to the final snapshot's HDF5 file.
+    """
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    fname = paths.processed_output(nsim, simname, halo_finder)
+
+    if simname == "csiborg":
+        partreader = csiborgtools.read.CSiBORGReader(paths)
+    else:
+        partreader = csiborgtools.read.QuijoteReader(paths)
+
+    fprint(f"processing simulation `{nsim}`.", verbose)
+    if simname == "csiborg":
+        nsnap0 = 1
+    elif simname == "quijote":
+        nsnap0 = -1
+    else:
+        raise ValueError(f"Unknown simulation `{simname}`.")
+
+    fprint("loading and sorting the initial PID.", verbose)
+    sort_indxs = numpy.argsort(partreader.read_snapshot(nsnap0, nsim, "pid"))
+
+    fprint("loading the final particles.", verbose)
+    with h5py.File(fname, "r") as f:
+        sort_indxs_final = f["snapshot_final/pid"][:]
+        f.close()
+
+    fprint("sorting the particles according to the final snapshot.", verbose)
+    sort_indxs_final = numpy.argsort(numpy.argsort(sort_indxs_final))
+    sort_indxs = sort_indxs[sort_indxs_final]
+
+    del sort_indxs_final
+    collect()
+
+    fprint("loading and sorting the initial particle position.", verbose)
+    pos = partreader.read_snapshot(nsnap0, nsim, "pos")[sort_indxs]
+
+    del sort_indxs
+    collect()
+
+    # In Quijote some particles are position precisely at the edge of the
+    # box. Move them to be just inside.
+    if simname == "quijote":
+        mask = pos >= 1
+        if numpy.any(mask):
+            spacing = numpy.spacing(pos[mask])
+            assert numpy.max(spacing) <= 1e-5
+            pos[mask] -= spacing
+
+    fprint(f"dumping particles for `{nsim}` to `{fname}`.", verbose)
+    with h5py.File(fname, "r+") as f:
+        if "snapshot_initial" in f.keys():
+            del f["snapshot_initial"]
+        group = f.create_group("snapshot_initial")
+        group.attrs["header"] = "Initial snapshot data."
+        dset = group.create_dataset("pos", data=pos)
+        dset.attrs["header"] = "DM particle positions in box units."
+
+        f.close()
+
+
+def calculate_initial(nsim, simname, halo_finder, verbose):
+    """Calculate the Lagrangian patch centre of mass and size."""
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+
+    fname = paths.processed_output(nsim, simname, halo_finder)
+    fprint("loading the particle information.", verbose)
+    f = h5py.File(fname, "r")
+    pos = f["snapshot_initial/pos"]
+    mass = f["snapshot_final/mass"]
+    hid = f["halo_catalogue/index"][:]
+    hid2map = csiborgtools.read.make_halomap_dict(
+        f["snapshot_final/halo_map"][:])
+
+    if simname == "csiborg":
+        kwargs = {"box_size": 2048, "bckg_halfsize": 512}
+    else:
+        kwargs = {"box_size": 512, "bckg_halfsize": 256}
+    overlapper = csiborgtools.match.ParticleOverlap(**kwargs)
+
+    lagpatch_pos = numpy.full((len(hid), 3), numpy.nan, dtype=numpy.float32)
+    lagpatch_size = numpy.full(len(hid), numpy.nan, dtype=numpy.float32)
+    lagpatch_ncells = numpy.full(len(hid), numpy.nan, dtype=numpy.int32)
+
+    for i in trange(len(hid), disable=not verbose):
+        h = hid[i]
+        # These are unasigned particles.
+        if h == 0:
+            continue
+
+        parts_pos = csiborgtools.read.load_halo_particles(h, pos, hid2map)
+        parts_mass = csiborgtools.read.load_halo_particles(h, mass, hid2map)
+
+        # Skip if the halo has no particles or is too small.
+        if parts_pos is None or parts_pos.size < 5:
+            continue
+
+        cm = csiborgtools.center_of_mass(parts_pos, parts_mass, boxsize=1.0)
+        sep = csiborgtools.periodic_distance(parts_pos, cm, boxsize=1.0)
+        delta = overlapper.make_delta(parts_pos, parts_mass, subbox=True)
+
+        lagpatch_pos[i] = cm
+        lagpatch_size[i] = numpy.percentile(sep, 99)
+        lagpatch_ncells[i] = csiborgtools.delta2ncells(delta)
+
+    f.close()
+    collect()
+
+    with h5py.File(fname, "r+") as f:
+        grp = f["halo_catalogue"]
+        dset = grp.create_dataset("lagpatch_pos", data=lagpatch_pos)
+        dset.attrs["header"] = "Lagrangian patch centre of mass in box units."
+
+        dset = grp.create_dataset("lagpatch_size", data=lagpatch_size)
+        dset.attrs["header"] = "Lagrangian patch size in box units."
+
+        dset = grp.create_dataset("lagpatch_ncells", data=lagpatch_ncells)
+        dset.attrs["header"] = f"Lagrangian patch number of cells on a {kwargs['box_size']}^3 grid."  # noqa
+
+        f.close()
+
+
+def make_phew_halo_catalogue(nsim, verbose):
+    """
+    Process the PHEW halo catalogue for a CSiBORG simulation at all snapshots.
+    """
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    snapshots = paths.get_snapshots(nsim, "csiborg")
+    reader = csiborgtools.read.CSiBORGReader(paths)
+    keys_write = ["index", "x", "y", "z", "mass_cl", "parent",
+                  "ultimate_parent", "summed_mass"]
+
+    # Create a HDF5 file to store all this.
+    fname = paths.processed_phew(nsim)
+    with h5py.File(fname, "w") as f:
+        f.close()
+
+    for nsnap in tqdm(snapshots, disable=not verbose, desc="Snapshot"):
+        try:
+            data = reader.read_phew_clumps(nsnap, nsim, verbose=False)
+        except FileExistsError:
+            continue
+
+        with h5py.File(fname, "r+") as f:
+            if str(nsnap) in f:
+                print(f"Group {nsnap} already exists. Deleting.", flush=True)
+                del f[str(nsnap)]
+            grp = f.create_group(str(nsnap))
+            for key in keys_write:
+                grp.create_dataset(key, data=data[key])
+
+            grp.attrs["header"] = f"CSiBORG PHEW clumps at snapshot {nsnap}."
+            f.close()
+
+    # Now write the redshifts
+    scale_factors = numpy.full(len(snapshots), numpy.nan, dtype=numpy.float32)
+    for i, nsnap in enumerate(snapshots):
+        box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
+        scale_factors[i] = box._aexp
+
+    redshifts = scale_factors[-1] / scale_factors - 1
+
+    with h5py.File(fname, "r+") as f:
+        grp = f.create_group("info")
+        grp.create_dataset("redshift", data=redshifts)
+        grp.create_dataset("snapshots", data=snapshots)
+        grp.create_dataset("Om0", data=[box.Om0])
+        grp.create_dataset("boxsize", data=[box.boxsize])
+        f.close()
+
+
+def make_merger_tree_file(nsim, verbose):
+    """
+    Process the `.dat` merger tree files and dump them into a HDF5 file.
+    """
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    reader = csiborgtools.read.CSiBORGReader(paths)
+    snaps = paths.get_snapshots(nsim, "csiborg")
+
+    fname = paths.processed_merger_tree(nsim)
+    with h5py.File(fname, "w") as f:
+        f.close()
+
+    for nsnap in tqdm(snaps, desc="Loading merger files",
+                      disable=not verbose):
+        try:
+            data = reader.read_merger_tree(nsnap, nsim)
+        except FileExistsError:
+            continue
+
+        with h5py.File(fname, "r+") as f:
+            grp = f.create_group(str(nsnap))
+
+            grp.create_dataset("clump",
+                               data=data[:, 0].astype(numpy.int32))
+            grp.create_dataset("progenitor",
+                               data=data[:, 1].astype(numpy.int32))
+            grp.create_dataset("progenitor_outputnr",
+                               data=data[:, 2].astype(numpy.int32))
+            grp.create_dataset("desc_mass",
+                               data=data[:, 3].astype(numpy.float32))
+            grp.create_dataset("desc_npart",
+                               data=data[:, 4].astype(numpy.int32))
+            grp.create_dataset("desc_pos",
+                               data=data[:, 5:8].astype(numpy.float32))
+            grp.create_dataset("desc_vel",
+                               data=data[:, 8:11].astype(numpy.float32))
+            f.close()
+
+
+def append_merger_tree_mass_to_phew_catalogue(nsim, verbose):
+    """
+    Append mass of haloes from mergertree files to the PHEW catalogue. The
+    difference between this and the PHEW value is that the latter is written
+    before unbinding is performed.
+
+    Note that currently only does this for the highest snapshot.
+    """
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    snapshots = paths.get_snapshots(nsim, "csiborg")
+    merger_reader = csiborgtools.read.MergerReader(nsim, paths)
+
+    for nsnap in tqdm(snapshots, disable=not verbose, desc="Snapshot"):
+        # TODO do this for all later
+        if nsnap < 930:
+            continue
+        try:
+            phewcat = csiborgtools.read.CSiBORGPHEWCatalogue(nsnap, nsim,
+                                                             paths)
+        except ValueError:
+            phewcat.close()
+            continue
+
+        mergertree_mass = merger_reader.match_mass_to_phewcat(phewcat)
+        phewcat.close()
+
+        fname = paths.processed_phew(nsim)
+        with h5py.File(fname, "r+") as f:
+            grp = f[str(nsnap)]
+            grp.create_dataset("mergertree_mass_new", data=mergertree_mass)
+            f.close()
+
+
+def main(nsim, args):
+    if args.make_final:
+        process_snapshot(nsim, args.simname, args.halofinder, True)
+
+    if args.make_initial:
+        add_initial_snapshot(nsim, args.simname, args.halofinder, True)
+        calculate_initial(nsim, args.simname, args.halofinder, True)
+
+    if args.make_phew:
+        make_phew_halo_catalogue(nsim, True)
+
+    if args.make_merger:
+        make_merger_tree_file(nsim, True)
+
+    if args.append_merger_mass:
+        append_merger_tree_mass_to_phew_catalogue(nsim, True)
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument("--simname", type=str, default="csiborg",
+                        choices=["csiborg", "quijote"],
+                        help="Simulation name")
+    parser.add_argument("--nsims", type=int, nargs="+", default=None,
+                        help="IC realisations. If `-1` processes all.")
+    parser.add_argument("--halofinder", type=str, help="Halo finder")
+    parser.add_argument("--make_final", action="store_true", default=False,
+                        help="Process the final snapshot.")
+    parser.add_argument("--make_initial", action="store_true", default=False,
+                        help="Process the initial snapshot.")
+    parser.add_argument("--make_phew", action="store_true", default=False,
+                        help="Process the PHEW halo catalogue.")
+    parser.add_argument("--make_merger", action="store_true", default=False,
+                        help="Process the merger tree files.")
+    parser.add_argument("--append_merger_mass", action="store_true",
+                        default=False,
+                        help="Append the merger tree mass to the PHEW cat.")
+
+    args = parser.parse_args()
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    nsims = get_nsims(args, paths)
+
+    def _main(nsim):
+        main(nsim, args)
+
+    work_delegation(_main, nsims, MPI.COMM_WORLD)
--- a/scripts/sort_halomaker.py
+++ b/scripts/sort_halomaker.py
@ -1,100 +0,0 @@
-# Copyright (C) 2022 Richard Stiskalek
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
-# Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""
-Script to sort the HaloMaker's `particle_membership` file to match the ordering
-of particles in the simulation snapshot.
-"""
-from argparse import ArgumentParser
-from datetime import datetime
-from glob import iglob
-
-import h5py
-import numpy
-import pynbody
-from mpi4py import MPI
-from taskmaster import work_delegation
-from tqdm import trange
-
-import csiborgtools
-
-
-def sort_particle_membership(nsim, nsnap, method):
-    """
-    Read the FoF particle halo membership and sort the halo IDs to the ordering
-    of particles in the PHEW clump IDs.
-
-    Parameters
-    ----------
-    nsim : int
-        IC realisation index.
-    verbose : bool, optional
-        Verbosity flag.
-    """
-    print(f"{datetime.now()}:   starting simulation {nsim}, snapshot {nsnap} and method {method}.")  # noqa
-    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-
-    fpath = next(iglob(f"/mnt/extraspace/rstiskalek/CSiBORG/halo_maker/ramses_{nsim}/output_{str(nsnap).zfill(5)}/**/*particle_membership*", recursive=True), None)  # noqa
-    print(f"{datetime.now()}:   loading particle membership `{fpath}`.")
-    # Columns are halo ID, particle ID
-    membership = numpy.genfromtxt(fpath, dtype=int)
-
-    print(f"{datetime.now()}:   loading particle IDs from the snapshot.")
-    sim = pynbody.load(paths.snapshot(nsnap, nsim, "csiborg"))
-    pids = numpy.asanyarray(sim["iord"])
-
-    print(f"{datetime.now()}:   mapping particle IDs to their indices.")
-    pids_idx = {pid: i for i, pid in enumerate(pids)}
-
-    print(f"{datetime.now()}:   mapping HIDs to their array indices.")
-    # Unassigned particle IDs are assigned a halo ID of 0.
-    hids = numpy.zeros(pids.size, dtype=numpy.int32)
-    for i in trange(membership.shape[0]):
-        hid, pid = membership[i]
-        hids[pids_idx[pid]] = hid
-
-    fout = fpath + "_sorted.hdf5"
-    print(f"{datetime.now()}:   saving the sorted data to ... `{fout}`")
-
-    header = """
-    This dataset represents halo indices for each particle.
-        - The particles are ordered as they appear in the simulation snapshot.
-        - Unassigned particles are given a halo index of 0.
-        """
-    with h5py.File(fout, 'w') as hdf:
-        dset = hdf.create_dataset('hids_dataset', data=hids)
-        dset.attrs['header'] = header
-
-
-if __name__ == "__main__":
-    parser = ArgumentParser()
-    parser.add_argument("--method", type=str, required=True,
-                        help="HaloMaker method")
-    parser.add_argument("--nsim", type=int, required=False, default=None,
-                        help="IC index. If not set process all.")
-    args = parser.parse_args()
-    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-
-    if args.nsim is None:
-        ics = paths.get_ics("csiborg")
-    else:
-        ics = [args.nsim]
-
-    snaps = numpy.array([max(paths.get_snapshots(nsim, "csiborg"))
-                         for nsim in ics])
-
-    def main(n):
-        sort_particle_membership(ics[n], snaps[n], args.method)
-
-    work_delegation(main, list(range(len(ics))), MPI.COMM_WORLD)
--- a/scripts/sort_initsnap.py
+++ b/scripts/sort_initsnap.py
@ -1,114 +0,0 @@
-# Copyright (C) 2022 Richard Stiskalek
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
-# Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-r"""
-Script to sort the initial snapshot particles according to their final
-snapshot ordering, which is sorted by the halo IDs.
-
-Ensures the following units:
-    - Positions in box units.
-    - Masses in :math:`M_\odot / h`.
-"""
-from argparse import ArgumentParser
-from datetime import datetime
-from gc import collect
-
-import h5py
-import numpy
-from mpi4py import MPI
-from taskmaster import work_delegation
-
-import csiborgtools
-from utils import get_nsims
-
-
-def _main(nsim, simname, verbose):
-    """
-    Sort the initial snapshot particles according to their final snapshot
-    ordering and dump them into a HDF5 file.
-    """
-    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-    if simname == "csiborg":
-        partreader = csiborgtools.read.CSiBORGReader(paths)
-    else:
-        partreader = csiborgtools.read.QuijoteReader(paths)
-
-    print(f"{datetime.now()}:   processing simulation `{nsim}`.", flush=True)
-    # We first load the particle IDs in the final snapshot.
-    pidf = csiborgtools.read.read_h5(paths.particles(nsim, simname))
-    pidf = pidf["particle_ids"]
-    # Then we load the particles in the initil snapshot and make sure that
-    # their particle IDs are sorted as in the final snapshot. Again, because of
-    # precision this must be read as structured.
-    if simname == "csiborg":
-        pars_extract = ["x", "y", "z", "M", "ID"]
-        # CSiBORG's initial snapshot ID
-        nsnap = 1
-    else:
-        pars_extract = None
-        # Use this to point the reader to the ICs snapshot
-        nsnap = -1
-    part0, pid0 = partreader.read_particle(
-        nsnap, nsim, pars_extract, return_structured=False, verbose=verbose)
-
-    # In CSiBORG we need to convert particle masses from box units.
-    if simname == "csiborg":
-        box = csiborgtools.read.CSiBORGBox(
-            max(paths.get_snapshots(nsim, simname)), nsim, paths)
-        part0[:, 3] = box.box2solarmass(part0[:, 3])
-
-    # Quijote's initial snapshot information also contains velocities but we
-    # don't need those.
-    if simname == "quijote":
-        part0 = part0[:, [0, 1, 2, 6]]
-        # In Quijote some particles are position precisely at the edge of the
-        # box. Move them to be just inside.
-        pos = part0[:, :3]
-        mask = pos >= 1
-        if numpy.any(mask):
-            spacing = numpy.spacing(pos[mask])
-            assert numpy.max(spacing) <= 1e-5
-            pos[mask] -= spacing
-
-    # First enforce them to already be sorted and then apply reverse
-    # sorting from the final snapshot.
-    part0 = part0[numpy.argsort(pid0)]
-    del pid0
-    collect()
-    part0 = part0[numpy.argsort(numpy.argsort(pidf))]
-    fout = paths.initmatch(nsim, simname, "particles")
-    if verbose:
-        print(f"{datetime.now()}: dumping particles for `{nsim}` to `{fout}`",
-              flush=True)
-    with h5py.File(fout, "w") as f:
-        f.create_dataset("particles", data=part0)
-
-
-if __name__ == "__main__":
-    # Argument parser
-    parser = ArgumentParser()
-    parser.add_argument("--simname", type=str, default="csiborg",
-                        choices=["csiborg", "quijote"],
-                        help="Simulation name")
-    parser.add_argument("--nsims", type=int, nargs="+", default=None,
-                        help="IC realisations. If `-1` processes all.")
-    args = parser.parse_args()
-
-    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-    nsims = get_nsims(args, paths)
-
-    def main(nsim):
-        _main(nsim, args.simname, MPI.COMM_WORLD.Get_size() == 1)
-
-    work_delegation(main, nsims, MPI.COMM_WORLD)