Add pynbody and other support (#92)

* Simplify box units * Move old scripts * Add printing * Update readers * Disable boundscheck * Add new ordering * Clean up imports * Enforce dtype and add mass to quijote * Simplify print statements * Fix little typos * Fix key bug * Bug fixing * Delete boring comments * Improve ultimate clumps for PHEW * Delete boring comments * Add basic reading * Remove 0th index HID * Add flipping of X and Z * Updates to halo catalogues * Add ordered caching * Fix flipping * Add new flags * Fix PHEW empty clumps * Stop over-wrriting * Little improvements to angular neighbours * Add catalogue masking * Change if-else statements * Cache only filtered data * Add PHEW cats * Add comments * Sort imports * Get Quijote workign * Docs * Add HMF calculation * Move to old * Fix angular * Add great circle distance * Update imports * Update impotrts * Update docs * Remove unused import * Fix a quick bug * Update compatibility * Rename files * Renaming * Improve compatiblity * Rename snapsht * Fix snapshot bug * Update interface * Finish updating interface * Update all paths * Add old scripts * Add basic halo * Update imports * Improve snapshot processing * Update ordering * Fix how CM positions accessed * Add merger paths * Add imports * Add merger reading * Add making a merger tree * Add a basic merger tree reader * Add imports * Add main branch walking + comments + debuggin * Get tree running * Add working merger tree walking along main branch * Add units conversion for merger data * Add hid_to_array_index * Update merger tree * Add mergertree mass to PHEWcat * Edit comments * Add this to track changes... * Fix a little bug * Add mergertree mass * Add cache clearing * Improve summing substructure code * Littbe bug * Little updates to the merger tree reader * Update .giignore * Add box selection * Add optional deletingf of a group * add to keep track of changes * Update changes * Remove * Add manual tracker * Fix bug * Add m200c_to_r200c * Add manual halo tracking * Remove skipped snapshots * update cosmo params to match csiborg * remove old comments * Add SDSSxALFALFA * Fix bugs * Rename * Edit paths * Updates * Add comments * Add comment * Add hour conversion * Add imports * Add new observation class * Add selection * Add imports * Fix small bug * Add field copying for safety * Add matching to survey without masking * Add P(k) calculation * Add nb * Edit comment * Move files * Remove merger import * Edit setup.yp * Fix typo * Edit import warnigns * update nb * Update README * Update README * Update README * Add skeleton * Add skeleton
2025-06-08 01:41:12 +00:00 · 2023-12-07 14:23:32 +00:00 · 2023-12-07 14:23:32 +00:00 · e972f8e3f2
commit e972f8e3f2
parent 5500fbd2b9
53 changed files with 4627 additions and 1774 deletions
--- a/.gitignore
+++ b/.gitignore
@ -24,3 +24,5 @@ scripts_plots/submit.sh
 scripts_plots/*.out
 scripts_plots/*.sh
 notebooks/test.ipynb
 scripts/mgtree.py
 scripts/makemerger.py
--- a/README.md
+++ b/README.md
@ -1,3 +1,22 @@
 # CSiBORG Tools
-A compendium of tools for analysing the suite of Constrained Simulations in BORG (CSiBORG) simulations.
+Tools for analysing the suite of Constrained Simulations in BORG (CSiBORG) simulations. The interface is designed to work with the following suites of simulations:
 - CSiBORG1 dark matter-only RAMSES simulations (full support),
 - CSiBORG2 dark matter-only Gadget4 simulations (planned full support),
 - Quijote dark matter-only Gadget2 simulations (partial support),
 however with little effort it can support other simulations as well.
 ## TODO
 - [ ] Add full support for CSiBORG2 suite of simulations.
 - [ ] Add SPH field calculation from cosmotools.
 ## Adding a new simulation suite
 box units
 paths
 readsim
 halo_cat
--- a/csiborgtools/init.py
+++ b/csiborgtools/init.py
@ -12,12 +12,12 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-from csiborgtools import clustering, field, match, read, summary                 # noqa
+from csiborgtools import clustering, field, halo, match, read, summary          # noqa
 from .utils import (center_of_mass, delta2ncells, number_counts,                 # noqa
                    periodic_distance, periodic_distance_two_points,             # noqa
                    binned_statistic, cosine_similarity)                         # noqa
 from .utils import (center_of_mass, delta2ncells, number_counts,                # noqa
                    periodic_distance, periodic_distance_two_points,            # noqa
                    binned_statistic, cosine_similarity, fprint,                # noqa
                    hms_to_degrees, dms_to_degrees, great_circle_distance)      # noqa
 # Arguments to csiborgtools.read.Paths.
 paths_glamdring = {"srcdir": "/mnt/extraspace/hdesmond/",
@ -46,5 +46,34 @@ class SDSS:
                (lambda x: cls[x] < 155, ("DIST", ))
                ]
-    def __call__(self):
+    def __call__(self, fpath=None, apply_selection=True):
-        return read.SDSS(h=1, sel_steps=self.steps)
+        if fpath is None:
            fpath = "/mnt/extraspace/rstiskalek/catalogs/nsa_v1_0_1.fits"
        sel_steps = self.steps if apply_selection else None
        return read.SDSS(fpath, h=1, sel_steps=sel_steps)
 class SDSSxALFALFA:
    @staticmethod
    def steps(cls):
        return [(lambda x: cls[x], ("IN_DR7_LSS",)),
                (lambda x: cls[x] < 17.6, ("ELPETRO_APPMAG_r", )),
                (lambda x: cls[x] < 155, ("DIST", ))
                ]
    def __call__(self, fpath=None, apply_selection=True):
        if fpath is None:
            fpath = "/mnt/extraspace/rstiskalek/catalogs/5asfullmatch.fits"
        sel_steps = self.steps if apply_selection else None
        return read.SDSS(fpath, h=1, sel_steps=sel_steps)
 ###############################################################################
 #                              Clusters                                       #
 ###############################################################################
 clusters = {"Virgo": read.ObservedCluster(RA=hms_to_degrees(12, 27),
                                          dec=dms_to_degrees(12, 43),
                                          dist=16.5 * 0.7,
                                          name="Virgo"),
            }
--- a/csiborgtools/field/init.py
+++ b/csiborgtools/field/init.py
@ -15,12 +15,11 @@
 from warnings import warn
 try:
-    import MAS_library as MASL  # noqa
+    import MAS_library as MASL                                                  # noqa
-
+    from .density import (DensityField, PotentialField, TidalTensorField,       # noqa
-    from .density import (DensityField, PotentialField,  # noqa
+                          VelocityField, power_spectrum)                        # noqa
-                          TidalTensorField, VelocityField)
+    from .interp import (evaluate_cartesian, evaluate_sky, field2rsp,           # noqa
-    from .interp import (evaluate_cartesian, evaluate_sky, field2rsp,  # noqa
+                         fill_outside, make_sky, observer_vobs)                 # noqa
-                         fill_outside, make_sky, observer_vobs)
+    from .utils import nside2radec, smoothen_field                              # noqa
    from .utils import nside2radec, smoothen_field  # noqa
 except ImportError:
-    warn("MAS_library not found, `DensityField` will not be available", UserWarning)  # noqa
+    warn("MAS_library not found, `DensityField` and related Pylians-based routines will not be available")  # noqa
--- a/csiborgtools/field/density.py
+++ b/csiborgtools/field/density.py
@ -18,6 +18,7 @@ Density field and cross-correlation calculations.
 from abc import ABC
 import MAS_library as MASL
 import Pk_library as PKL
 import numpy
 from numba import jit
 from tqdm import trange
@ -33,13 +34,7 @@ class BaseField(ABC):
    @property
    def box(self):
-        """
+        """Simulation box information and transformations."""
        Simulation box information and transformations.
        Returns
        -------
        :py:class:`csiborgtools.units.CSiBORGBox`
        """
        return self._box
    @box.setter
@ -52,13 +47,7 @@ class BaseField(ABC):
    @property
    def MAS(self):
-        """
+        """Mass-assignment scheme."""
        Mass-assignment scheme.
        Returns
        -------
        str
        """
        if self._MAS is None:
            raise ValueError("`MAS` is not set.")
        return self._MAS
@ -103,7 +92,6 @@ class DensityField(BaseField):
        Calculate the overdensity field from the density field.
        Defined as :math:`\rho/ <\rho> - 1`. Overwrites the input array.
        Parameters
        ----------
        delta : 3-dimensional array of shape `(grid, grid, grid)`
@ -117,7 +105,7 @@ class DensityField(BaseField):
        delta -= 1
        return delta
-    def __call__(self, parts, grid, flip_xz=True, nbatch=30, verbose=True):
+    def __call__(self, pos, mass, grid, nbatch=30, verbose=True):
        """
        Calculate the density field using a Pylians routine [1, 2].
        Iteratively loads the particles into memory, flips their `x` and `z`
@ -126,13 +114,12 @@ class DensityField(BaseField):
        Parameters
        ----------
-        parts : 2-dimensional array of shape `(n_parts, 7)`
+        pos : 2-dimensional array of shape `(n_parts, 3)`
-            Particle positions, velocities and masses.
+            Particle positions
-            Columns are: `x`, `y`, `z`, `vx`, `vy`, `vz`, `M`.
+        mass : 1-dimensional array of shape `(n_parts,)`
            Particle masses
        grid : int
            Grid size.
        flip_xz : bool, optional
            Whether to flip the `x` and `z` coordinates.
        nbatch : int, optional
            Number of batches to split the particle loading into.
        verbose : bool, optional
@ -150,24 +137,20 @@ class DensityField(BaseField):
        """
        rho = numpy.zeros((grid, grid, grid), dtype=numpy.float32)
-        nparts = parts.shape[0]
+        nparts = pos.shape[0]
        batch_size = nparts // nbatch
        start = 0
        for __ in trange(nbatch + 1, disable=not verbose,
                         desc="Loading particles for the density field"):
            end = min(start + batch_size, nparts)
-            pos = parts[start:end]
+            batch_pos = pos[start:end]
-            pos, vel, mass = pos[:, :3], pos[:, 3:6], pos[:, 6]
+            batch_mass = mass[start:end]
-            pos = force_single_precision(pos)
+            batch_pos = force_single_precision(batch_pos)
-            vel = force_single_precision(vel)
+            batch_mass = force_single_precision(batch_mass)
            mass = force_single_precision(mass)
            if flip_xz:
                pos[:, [0, 2]] = pos[:, [2, 0]]
                vel[:, [0, 2]] = vel[:, [2, 0]]
-            MASL.MA(pos, rho, 1., self.MAS, W=mass, verbose=False)
+            MASL.MA(batch_pos, rho, 1., self.MAS, W=batch_mass, verbose=False)
            if end == nparts:
                break
            start = end
@ -178,8 +161,105 @@ class DensityField(BaseField):
        return rho
 # class SPHDensityVelocity(BaseField):
 #     r"""
 #     Density field calculation. Based primarily on routines of Pylians [1].
 #
 #     Parameters
 #     ----------
 #     box : :py:class:`csiborgtools.read.CSiBORGBox`
 #         The simulation box information and transformations.
 #     MAS : str
 #         Mass assignment scheme. Options are Options are: 'NGP' (nearest grid
 #         point), 'CIC' (cloud-in-cell), 'TSC' (triangular-shape cloud), 'PCS'
 #         (piecewise cubic spline).
 #     paths : :py:class:`csiborgtools.read.Paths`
 #         The simulation paths.
 #
 #     References
 #     ----------
 #     [1] https://pylians3.readthedocs.io/
 #     """
 #
 #     def __init__(self, box, MAS):
 #         self.box = box
 #         self.MAS = MAS
 #
 #     def overdensity_field(self, delta):
 #         r"""
 #         Calculate the overdensity field from the density field.
 #         Defined as :math:`\rho/ <\rho> - 1`. Overwrites the input array.
 #
 #         Parameters
 #         ----------
 #         delta : 3-dimensional array of shape `(grid, grid, grid)`
 #             The density field.
 #
 #         Returns
 #         -------
 #         3-dimensional array of shape `(grid, grid, grid)`.
 #         """
 #         delta /= delta.mean()
 #         delta -= 1
 #         return delta
 #
 #     def __call__(self, pos, mass, grid, nbatch=30, verbose=True):
 #         """
 #         Calculate the density field using a Pylians routine [1, 2].
 #         Iteratively loads the particles into memory, flips their `x` and `z`
 #         coordinates. Particles are assumed to be in box units, with positions
 #         in [0, 1]
 #
 #         Parameters
 #         ----------
 #         pos : 2-dimensional array of shape `(n_parts, 3)`
 #             Particle positions
 #         mass : 1-dimensional array of shape `(n_parts,)`
 #             Particle masses
 #         grid : int
 #             Grid size.
 #         nbatch : int, optional
 #             Number of batches to split the particle loading into.
 #         verbose : bool, optional
 #             Verbosity flag.
 #
 #         Returns
 #         -------
 #         3-dimensional array of shape `(grid, grid, grid)`.
 #
 #         References
 #         ----------
 #         [1] https://pylians3.readthedocs.io/
 #         [2] https://github.com/franciscovillaescusa/Pylians3/blob/master
 #             /library/MAS_library/MAS_library.pyx
 #         """
 #         rho = numpy.zeros((grid, grid, grid), dtype=numpy.float32)
 #
 #         nparts = pos.shape[0]
 #         batch_size = nparts // nbatch
 #         start = 0
 #
 #         for __ in trange(nbatch + 1, disable=not verbose,
 #                          desc="Loading particles for the density field"):
 #             end = min(start + batch_size, nparts)
 #             batch_pos = pos[start:end]
 #             batch_mass = mass[start:end]
 #
 #             batch_pos = force_single_precision(batch_pos)
 #             batch_mass = force_single_precision(batch_mass)
 #
 #             MASL.MA(batch_pos, rho, 1., self.MAS, W=batch_mass, verbose=False)
 #             if end == nparts:
 #                 break
 #             start = end
 #
 #         # Divide by the cell volume in (kpc / h)^3
 #         rho /= (self.box.boxsize / grid * 1e3)**3
 #
 #         return rho
 ###############################################################################
-#                         Density field calculation                           #
+#                         Velocity field calculation                          #
 ###############################################################################
@ -242,7 +322,7 @@ class VelocityField(BaseField):
                                       / numpy.sqrt(px**2 + py**2 + pz**2))
        return radvel
-    def __call__(self, parts, grid, flip_xz=True, nbatch=30,
+    def __call__(self, pos, vel, mass, grid, flip_xz=True, nbatch=30,
                 verbose=True):
        """
        Calculate the velocity field using a Pylians routine [1, 2].
@ -251,9 +331,12 @@ class VelocityField(BaseField):
        Parameters
        ----------
-        parts : 2-dimensional array of shape `(n_parts, 7)`
+        pos : 2-dimensional array of shape `(n_parts, 3)`
-            Particle positions, velocities and masses.
+            Particle positions.
-            Columns are: `x`, `y`, `z`, `vx`, `vy`, `vz`, `M`.
+        vel : 2-dimensional array of shape `(n_parts, 3)`
            Particle velocities.
        mass : 1-dimensional array of shape `(n_parts,)`
            Particle masses.
        grid : int
            Grid size.
        flip_xz : bool, optional
@ -273,26 +356,26 @@ class VelocityField(BaseField):
        [2] https://github.com/franciscovillaescusa/Pylians3/blob/master
            /library/MAS_library/MAS_library.pyx
        """
-        rho_velx = numpy.zeros((grid, grid, grid), dtype=numpy.float32)
+        rho_vel = [numpy.zeros((grid, grid, grid), dtype=numpy.float32),
-        rho_vely = numpy.zeros((grid, grid, grid), dtype=numpy.float32)
+                   numpy.zeros((grid, grid, grid), dtype=numpy.float32),
-        rho_velz = numpy.zeros((grid, grid, grid), dtype=numpy.float32)
+                   numpy.zeros((grid, grid, grid), dtype=numpy.float32),
-        rho_vel = [rho_velx, rho_vely, rho_velz]
+                   ]
        cellcounts = numpy.zeros((grid, grid, grid), dtype=numpy.float32)
-        nparts = parts.shape[0]
+        nparts = pos.shape[0]
        batch_size = nparts // nbatch
        start = 0
        for __ in trange(nbatch + 1) if verbose else range(nbatch + 1):
            end = min(start + batch_size, nparts)
            pos = parts[start:end]
            pos, vel, mass = pos[:, :3], pos[:, 3:6], pos[:, 6]
-            pos = force_single_precision(pos)
+            batch_pos = pos[start:end]
-            vel = force_single_precision(vel)
+            batch_vel = vel[start:end]
-            mass = force_single_precision(mass)
+            batch_mass = mass[start:end]
-            if flip_xz:
+
-                pos[:, [0, 2]] = pos[:, [2, 0]]
+            batch_pos = force_single_precision(batch_pos)
-                vel[:, [0, 2]] = vel[:, [2, 0]]
+            batch_vel = force_single_precision(batch_vel)
            batch_mass = force_single_precision(batch_mass)
            vel *= mass.reshape(-1, 1)
            for i in range(3):
@ -308,7 +391,7 @@ class VelocityField(BaseField):
        for i in range(3):
            divide_nonzero(rho_vel[i], cellcounts)
-        return numpy.stack([rho_velx, rho_vely, rho_velz])
+        return numpy.stack(rho_vel)
 ###############################################################################
@ -505,3 +588,35 @@ def eigenvalues_to_environment(eigvals, th):
                else:
                    env[i, j, k] = 3
    return env
 ###############################################################################
 #                       Power spectrum calculation                            #
 ###############################################################################
 def power_spectrum(delta, boxsize, MAS, threads=1, verbose=True):
    """
    Calculate the monopole power spectrum of the density field.
    Parameters
    ----------
    delta : 3-dimensional array of shape `(grid, grid, grid)`
        The over-density field.
    boxsize : float
        The simulation box size in `Mpc / h`.
    MAS : str
        Mass assignment scheme used to calculate the density field.
    threads : int, optional
        Number of threads to use.
    verbose : bool, optional
        Verbosity flag.
    Returns
    -------
    k, Pk : 1-dimensional arrays of shape `(grid,)`
        The wavenumbers and the power spectrum.
    """
    axis = 2  # Axis along which compute the quadrupole and hexadecapole
    Pk = PKL.Pk(delta, boxsize, axis, MAS, threads, verbose)
    return Pk.k3D, Pk.Pk[:, 0]
--- a/csiborgtools/field/interp.py
+++ b/csiborgtools/field/interp.py
@ -98,9 +98,12 @@ def evaluate_sky(*fields, pos, mpc2box, smooth_scales=None, verbose=False):
    -------
    (list of) 1-dimensional array of shape `(n_samples, len(smooth_scales))`
    """
-    pos = force_single_precision(pos)
+    # Make a copy of the positions to avoid modifying the input.
    pos = numpy.copy(pos)
    pos = force_single_precision(pos)
    pos[:, 0] *= mpc2box
    cart_pos = radec_to_cartesian(pos) + 0.5
    if smooth_scales is not None:
--- a/csiborgtools/halo/init.py
+++ b/csiborgtools/halo/init.py
@ -0,0 +1,16 @@
 # Copyright (C) 2023 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 from .prop import density_profile                                               # noqa
--- a/csiborgtools/halo/prop.py
+++ b/csiborgtools/halo/prop.py
@ -0,0 +1,46 @@
 # Copyright (C) 2023 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 import numpy
 from scipy.stats import binned_statistic
 from ..utils import periodic_distance
 def density_profile(pos, mass, center, nbins, boxsize):
    """
    Calculate a density profile.
    """
    raise NotImplementedError("Not implemented yet..")
    rdist = periodic_distance(pos, center, boxsize)
    rmin, rmax = numpy.min(rdist), numpy.max(rdist)
    bin_edges = numpy.logspace(numpy.log10(rmin), numpy.log10(rmax), nbins)
    rho, __, __ = binned_statistic(rdist, mass, statistic='sum',
                                   bins=bin_edges)
    rho /= 4. / 3 * numpy.pi * (bin_edges[1:]**3 - bin_edges[:-1]**3)
    print(bin_edges)
    r = 0.5 * (bin_edges[1:] + bin_edges[:-1])
 #    r = numpy.sqrt(bin_edges[:1] * bin_edges[:-1])
    return r, rho
--- a/csiborgtools/match/init.py
+++ b/csiborgtools/match/init.py
@ -12,7 +12,5 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-from .match import (ParticleOverlap, RealisationsMatcher,  # noqa
+from .match import (ParticleOverlap, RealisationsMatcher, calculate_overlap,    # noqa
-                    calculate_overlap, calculate_overlap_indxs, pos2cell, # noqa
+                    pos2cell, find_neighbour, matching_max)                     # noqa
                    find_neighbour, get_halo_cell_limits,  # noqa
                    matching_max)  # noqa
--- a/csiborgtools/match/match.py
+++ b/csiborgtools/match/match.py
@ -13,7 +13,8 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
-Support for matching halos between CSiBORG IC realisations.
+Support for matching halos between CSiBORG IC realisations based on their
 Lagrangian patch overlap.
 """
 from abc import ABC
 from datetime import datetime
@ -21,30 +22,19 @@ from functools import lru_cache
 from math import ceil
 import numpy
 from scipy.ndimage import gaussian_filter
 from numba import jit
 from scipy.ndimage import gaussian_filter
 from tqdm import tqdm, trange
 from ..read import load_halo_particles
 class BaseMatcher(ABC):
-    """
+    """Base class for `RealisationsMatcher` and `ParticleOverlap`."""
    Base class for `RealisationsMatcher` and `ParticleOverlap`.
    """
    _box_size = None
    _bckg_halfsize = None
    @property
    def box_size(self):
-        """
+        """Number of cells in the box."""
        Number of cells in the box.
        Returns
        -------
        box_size : int
        """
        if self._box_size is None:
            raise RuntimeError("`box_size` has not been set.")
        return self._box_size
@ -64,10 +54,6 @@ class BaseMatcher(ABC):
        grid distance from the center of the box to each side over which to
        evaluate the background density field. Must be less than or equal to
        half the box size.
        Returns
        -------
        bckg_halfsize : int
        """
        if self._bckg_halfsize is None:
            raise RuntimeError("`bckg_halfsize` has not been set.")
@ -130,10 +116,6 @@ class RealisationsMatcher(BaseMatcher):
        """
        Multiplier of the sum of the initial Lagrangian patch sizes of a halo
        pair. Determines the range within which neighbors are returned.
        Returns
        -------
        nmult : float
        """
        return self._nmult
@ -148,10 +130,6 @@ class RealisationsMatcher(BaseMatcher):
        """
        Tolerance on the absolute logarithmic mass difference of potential
        matches.
        Returns
        -------
        float
        """
        return self._dlogmass
@ -166,10 +144,6 @@ class RealisationsMatcher(BaseMatcher):
        """
        Mass kind whose similarity is to be checked. Must be a valid key in the
        halo catalogue.
        Returns
        -------
        str
        """
        return self._mass_kind
@ -181,17 +155,10 @@ class RealisationsMatcher(BaseMatcher):
    @property
    def overlapper(self):
-        """
+        """The overlapper object."""
        The overlapper object.
        Returns
        -------
        :py:class:`csiborgtools.match.ParticleOverlap`
        """
        return self._overlapper
-    def cross(self, cat0, catx, particles0, particlesx, halo_map0, halo_mapx,
+    def cross(self, cat0, catx, delta_bckg, cache_size=10000, verbose=True):
              delta_bckg, cache_size=10000, verbose=True):
        r"""
        Find all neighbours whose CM separation is less than `nmult` times the
        sum of their initial Lagrangian patch sizes and calculate their
@ -204,16 +171,6 @@ class RealisationsMatcher(BaseMatcher):
            Halo catalogue of the reference simulation.
        catx : instance of :py:class:`csiborgtools.read.BaseCatalogue`
            Halo catalogue of the cross simulation.
        particles0 : 2-dimensional array
            Particles archive file of the reference simulation. The columns
            must be `x`, `y`, `z` and `M`.
        particlesx : 2-dimensional array
            Particles archive file of the cross simulation. The columns must be
            `x`, `y`, `z` and `M`.
        halo_map0 : 2-dimensional array
            Halo map of the reference simulation.
        halo_mapx : 2-dimensional array
            Halo map of the cross simulation.
        delta_bckg : 3-dimensional array
            Summed background density field of the reference and cross
            simulations calculated with particles assigned to halos at the
@ -250,14 +207,11 @@ class RealisationsMatcher(BaseMatcher):
                aratio = numpy.abs(numpy.log10(catx[p][indx] / cat0[p][i]))
                match_indxs[i] = match_indxs[i][aratio < self.dlogmass]
        hid2map0 = {hid: i for i, hid in enumerate(halo_map0[:, 0])}
        hid2mapx = {hid: i for i, hid in enumerate(halo_mapx[:, 0])}
        # We will cache the halos from the cross simulation to speed up the I/O
        @lru_cache(maxsize=cache_size)
        def load_cached_halox(hid):
-            return load_processed_halo(hid, particlesx, halo_mapx, hid2mapx,
+            return load_processed_halo(hid, catx, nshift=0,
-                                       nshift=0, ncells=self.box_size)
+                                       ncells=self.box_size)
        iterator = tqdm(
            cat0["index"],
@ -273,8 +227,7 @@ class RealisationsMatcher(BaseMatcher):
            # Next, we find this halo's particles, total mass, minimum and
            # maximum cells and convert positions to cells.
            pos0, mass0, totmass0, mins0, maxs0 = load_processed_halo(
-                k0, particles0, halo_map0, hid2map0, nshift=0,
+                k0, cat0, nshift=0, ncells=self.box_size)
                ncells=self.box_size)
            # We now loop over matches of this halo and calculate their
            # overlap, storing them in `_cross`.
@ -298,9 +251,8 @@ class RealisationsMatcher(BaseMatcher):
        cross = numpy.asanyarray(cross, dtype=object)
        return match_indxs, cross
-    def smoothed_cross(self, cat0, catx, particles0, particlesx, halo_map0,
+    def smoothed_cross(self, cat0, catx, delta_bckg, match_indxs,
-                       halo_mapx, delta_bckg, match_indxs, smooth_kwargs,
+                       smooth_kwargs, cache_size=10000, verbose=True):
                       cache_size=10000, verbose=True):
        r"""
        Calculate the smoothed overlaps for pairs previously identified via
        `self.cross(...)` to have a non-zero NGP overlap.
@ -311,16 +263,6 @@ class RealisationsMatcher(BaseMatcher):
            Halo catalogue of the reference simulation.
        catx : instance of :py:class:`csiborgtools.read.BaseCatalogue`
            Halo catalogue of the cross simulation.
        particles0 : 2-dimensional array
            Particles archive file of the reference simulation. The columns
            must be `x`, `y`, `z` and `M`.
        particlesx : 2-dimensional array
            Particles archive file of the cross simulation. The columns must be
            `x`, `y`, `z` and `M`.
        halo_map0 : 2-dimensional array
            Halo map of the reference simulation.
        halo_mapx : 2-dimensional array
            Halo map of the cross simulation.
        delta_bckg : 3-dimensional array
            Smoothed summed background density field of the reference and cross
            simulations calculated with particles assigned to halos at the
@ -339,13 +281,11 @@ class RealisationsMatcher(BaseMatcher):
        overlaps : 1-dimensional array of arrays
        """
        nshift = read_nshift(smooth_kwargs)
        hid2map0 = {hid: i for i, hid in enumerate(halo_map0[:, 0])}
        hid2mapx = {hid: i for i, hid in enumerate(halo_mapx[:, 0])}
        @lru_cache(maxsize=cache_size)
        def load_cached_halox(hid):
-            return load_processed_halo(hid, particlesx, halo_mapx, hid2mapx,
+            return load_processed_halo(hid, catx, nshift=nshift,
-                                       nshift=nshift, ncells=self.box_size)
+                                       ncells=self.box_size)
        iterator = tqdm(
            cat0["index"],
@ -355,8 +295,7 @@ class RealisationsMatcher(BaseMatcher):
        cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
        for i, k0 in enumerate(iterator):
            pos0, mass0, __, mins0, maxs0 = load_processed_halo(
-                k0, particles0, halo_map0, hid2map0, nshift=nshift,
+                k0, cat0, nshift=nshift, ncells=self.box_size)
                ncells=self.box_size)
            # Now loop over the matches and calculate the smoothed overlap.
            _cross = numpy.full(match_indxs[i].size, numpy.nan, numpy.float32)
@ -396,8 +335,7 @@ class ParticleOverlap(BaseMatcher):
        self.box_size = box_size
        self.bckg_halfsize = bckg_halfsize
-    def make_bckg_delta(self, particles, halo_map, hid2map, halo_cat,
+    def make_bckg_delta(self, cat, delta=None, verbose=False):
                        delta=None, verbose=False):
        """
        Calculate a NGP density field of particles belonging to halos of a
        halo catalogue `halo_cat`. Particles are only counted within the
@ -406,15 +344,8 @@ class ParticleOverlap(BaseMatcher):
        Parameters
        ----------
-        particles : 2-dimensional array
+        cat : instance of :py:class:`csiborgtools.read.BaseCatalogue`
-            Particles archive file. The columns must be `x`, `y`, `z` and `M`.
+            Halo catalogue of the reference simulation.
        halo_map : 2-dimensional array
            Array containing start and end indices in the particle array
            corresponding to each halo.
        hid2map : dict
            Dictionary mapping halo IDs to `halo_map` array positions.
        halo_cat : instance of :py:class:`csiborgtools.read.BaseCatalogue`
            Halo catalogue.
        delta : 3-dimensional array, optional
            Array to store the density field. If `None` a new array is
            created.
@ -436,16 +367,17 @@ class ParticleOverlap(BaseMatcher):
                    & (delta.dtype == numpy.float32))
        iterator = tqdm(
-            halo_cat["index"],
+            cat["index"],
            desc=f"{datetime.now()} Calculating the background field",
            disable=not verbose
            )
        for hid in iterator:
-            pos = load_halo_particles(hid, particles, halo_map, hid2map)
+            pos = cat.halo_particles(hid, "pos", in_initial=True)
            if pos is None:
                continue
-            pos, mass = pos[:, :3], pos[:, 3]
+            mass = cat.halo_particles(hid, "mass", in_initial=True)
            pos = pos2cell(pos, self.box_size)
            # We mask out particles outside the cubical high-resolution region
@ -874,7 +806,7 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
    return intersect / (mass1 + mass2 - intersect)
-def load_processed_halo(hid, particles, halo_map, hid2map, ncells, nshift):
+def load_processed_halo(hid, cat, ncells, nshift):
    """
    Load a processed halo from the `.h5` file. This is to be wrapped by a
    cacher.
@ -883,14 +815,8 @@ def load_processed_halo(hid, particles, halo_map, hid2map, ncells, nshift):
    ----------
    hid : int
        Halo ID.
-    particles : 2-dimensional array
+    cat : instance of :py:class:`csiborgtools.read.BaseCatalogue`
-        Array of particles in box units. The columns must be `x`, `y`, `z`
+        Halo catalogue.
        and `M`.
    halo_map : 2-dimensional array
        Array containing start and end indices in the particle array
        corresponding to each halo.
    hid2map : dict
        Dictionary mapping halo IDs to `halo_map` array positions.
    ncells : int
        Number of cells in the box density field.
    nshift : int
@ -909,8 +835,8 @@ def load_processed_halo(hid, particles, halo_map, hid2map, ncells, nshift):
    maxs : len-3 tuple
        Maximum cell indices of the halo.
    """
-    pos = load_halo_particles(hid, particles, halo_map, hid2map)
+    pos = cat.halo_particles(hid, "pos", in_initial=True)
-    pos, mass = pos[:, :3], pos[:, 3]
+    mass = cat.halo_particles(hid, "mass", in_initial=True)
    pos = pos2cell(pos, ncells)
    mins, maxs = get_halo_cell_limits(pos, ncells=ncells, nshift=nshift)
--- a/csiborgtools/read/init.py
+++ b/csiborgtools/read/init.py
@ -12,9 +12,12 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-from .box_units import CSiBORGBox, QuijoteBox  # noqa
+from .box_units import CSiBORGBox, QuijoteBox                                   # noqa
-from .halo_cat import CSiBORGHaloCatalogue, QuijoteHaloCatalogue, fiducial_observers  # noqa
+from .halo_cat import (CSiBORGCatalogue, QuijoteCatalogue,                      # noqa
-from .obs import SDSS, MCXCClusters, PlanckClusters, TwoMPPGalaxies, TwoMPPGroups  # noqa
+                       CSiBORGPHEWCatalogue, fiducial_observers)                # noqa
-from .paths import Paths  # noqa
+from .obs import (SDSS, MCXCClusters, PlanckClusters, TwoMPPGalaxies,           # noqa
-from .readsim import MmainReader, CSiBORGReader, QuijoteReader, halfwidth_mask, load_halo_particles  # noqa
+                  TwoMPPGroups, ObservedCluster, match_array_to_no_masking)     # noqa
-from .utils import cols_to_structured, read_h5  # noqa
+from .paths import Paths                                                        # noqa
 from .readsim import (CSiBORGReader, QuijoteReader, load_halo_particles,        # noqa
                      make_halomap_dict)                                        # noqa
 from .utils import cols_to_structured, read_h5                                  # noqa
--- a/csiborgtools/read/box_units.py
+++ b/csiborgtools/read/box_units.py
@ -17,6 +17,7 @@ Simulation box unit transformations.
 """
 from abc import ABC, abstractmethod, abstractproperty
 import numpy
 from astropy import constants, units
 from astropy.cosmology import LambdaCDM
@ -28,80 +29,39 @@ from .readsim import CSiBORGReader, QuijoteReader
 class BaseBox(ABC):
    """
    Base class for box units.
    """
    _name = "box_units"
    _cosmo = None
    @property
    def cosmo(self):
        """
        The  box cosmology.
        Returns
        -------
        cosmo : `astropy.cosmology.LambdaCDM`
        """
        if self._cosmo is None:
            raise ValueError("Cosmology not set.")
        return self._cosmo
    @property
    def H0(self):
-        r"""
+        r"""Present Hubble parameter in :math:`\mathrm{km} \mathrm{s}^{-1}`"""
        The Hubble parameter at the time of the snapshot in units of
        :math:`\mathrm{km} \mathrm{s}^{-1} \mathrm{Mpc}^{-1}`.
        Returns
        -------
        H0 : float
        """
        return self.cosmo.H0.value
    @property
    def rho_crit0(self):
-        r"""
+        """Present-day critical density in M_sun h^2 / cMpc^3."""
        Present-day critical density in :math:`M_\odot h^2 / \mathrm{cMpc}^3`.
        Returns
        -------
        rho_crit0 : float
        """
        rho_crit0 = self.cosmo.critical_density0
        return rho_crit0.to_value(units.solMass / units.Mpc**3)
    @property
    def h(self):
-        r"""
+        """The little 'h' parameter at the time of the snapshot."""
        The little 'h' parameter at the time of the snapshot.
        Returns
        -------
        h : float
        """
        return self._h
    @property
    def Om0(self):
-        r"""
+        """The present time matter density parameter."""
        The matter density parameter.
        Returns
        -------
        Om0 : float
        """
        return self.cosmo.Om0
    @abstractproperty
    def boxsize(self):
-        """
+        """Box size in cMpc."""
        Box size in cMpc.
        Returns
        -------
        boxsize : float
        """
        pass
    @abstractmethod
@ -116,8 +76,7 @@ class BaseBox(ABC):
        Returns
        -------
-        length : float
+        float
            Length in box units.
        """
        pass
@ -133,8 +92,7 @@ class BaseBox(ABC):
        Returns
        -------
-        length : float
+        float
            Length in :math:`\mathrm{cMpc} / h`
        """
        pass
@ -150,8 +108,7 @@ class BaseBox(ABC):
        Returns
        -------
-        mass : float
+        float
            Mass in box units.
        """
        pass
@ -167,8 +124,23 @@ class BaseBox(ABC):
        Returns
        -------
-        mass : float
+        float
-            Mass in :math:`M_\odot / h`.
+        """
        pass
    @abstractmethod
    def m200c_to_r200c(self, m200c):
        """
        Convert M200c to R200c in units of cMpc / h.
        Parameters
        ----------
        m200c : float
            M200c in units of M_sun / h.
        Returns
        -------
        float
        """
        pass
@ -248,6 +220,12 @@ class CSiBORGBox(BaseBox):
    def boxsize(self):
        return self.box2mpc(1.)
    def m200c_to_r200c(self, m200c):
        rho_crit = self.cosmo.critical_density(1 / self._aexp - 1)
        rho_crit = rho_crit.to_value(units.solMass / units.Mpc**3)
        r200c = (3 * m200c / (4 * numpy.pi * 200 * rho_crit))**(1 / 3)
        return r200c / self._aexp
 ###############################################################################
 #                      Quijote fiducial cosmology box                         #
@ -256,7 +234,7 @@ class CSiBORGBox(BaseBox):
 class QuijoteBox(BaseBox):
    """
-    Quijote fiducial cosmology box.
+    Quijote cosmology box.
    Parameters
    ----------
@ -289,33 +267,10 @@ class QuijoteBox(BaseBox):
        return length / self.boxsize
    def solarmass2box(self, mass):
        r"""
        Convert mass from :math:`M_\odot / h` to box units.
        Parameters
        ----------
        mass : float
            Mass in :math:`M_\odot`.
        Returns
        -------
        mass : float
            Mass in box units.
        """
        return mass / self._info["TotMass"]
    def box2solarmass(self, mass):
        r"""
        Convert mass from box units to :math:`M_\odot / h`.
        Parameters
        ----------
        mass : float
            Mass in box units.
        Returns
        -------
        mass : float
            Mass in :math:`M_\odot / h`.
        """
        return mass * self._info["TotMass"]
    def m200c_to_r200c(self, m200c):
        raise ValueError("Not implemented for Quijote boxes.")
--- a/csiborgtools/read/halo_cat.py
+++ b/csiborgtools/read/halo_cat.py
--- a/csiborgtools/read/obs.py
+++ b/csiborgtools/read/obs.py
@ -383,6 +383,9 @@ class FitsSurvey(ABC):
            return out
        return out[self.selection_mask]
    def __len__(self):
        return self.size
 ###############################################################################
 #                            Planck clusters                                  #
@ -560,8 +563,7 @@ class SDSS(FitsSurvey):
    Parameters
    ----------
    fpath : str, optional
-        Path to the FITS file. By default
+        Path to the FITS file.
        `/mnt/extraspace/rstiskalek/catalogs/nsa_v1_0_1.fits`.
    h : float, optional
        Little h. By default `h = 1`. The catalogue assumes this value.
        The routine properties should take care of little h conversion.
@ -581,9 +583,7 @@ class SDSS(FitsSurvey):
    """
    name = "SDSS"
-    def __init__(self, fpath=None, h=1, Om0=0.3175, sel_steps=None):
+    def __init__(self, fpath, h=1, Om0=0.3175, sel_steps=None):
        if fpath is None:
            fpath = "/mnt/extraspace/rstiskalek/catalogs/nsa_v1_0_1.fits"
        self._file = fits.open(fpath, memmap=False)
        self.h = h
@ -719,3 +719,114 @@ class SDSS(FitsSurvey):
        Get `IN_DR7_LSS` and turn to a boolean array.
        """
        return self.get_fitsitem("IN_DR7_LSS").astype(bool)
 ###############################################################################
 #                           Individual observations                           #
 ###############################################################################
 class BaseSingleObservation(ABC):
    """
    Base class to hold information about a single object.
    """
    def __init__(self):
        self._spherical_pos = None
        self._name = None
    @property
    def spherical_pos(self):
        """
        Spherical position of the observation in dist/RA/dec in Mpc / h and
        degrees, respectively.
        Returns
        -------
        1-dimensional array of shape (3,)
        """
        if self._spherical_pos is None:
            raise ValueError("`spherical_pos` is not set!")
        return self._spherical_pos
    @spherical_pos.setter
    def spherical_pos(self, pos):
        if isinstance(pos, (list, tuple)):
            pos = numpy.array(pos)
        if not pos.shape == (3,):
            raise ValueError("`spherical_pos` must be a of shape (3,).")
        self._spherical_pos = pos
    @property
    def name(self):
        """
        Observated object name.
        Returns
        -------
        str
        """
        if self._name is None:
            raise ValueError("`name` is not set!")
        return self._name
    @name.setter
    def name(self, name):
        if not isinstance(name, str):
            raise ValueError("`name` must be a string.")
        self._name = name
 class ObservedCluster(BaseSingleObservation):
    """
    Class to hold information about an observed cluster.
    Parameters
    ----------
    RA : float
        Right ascension in degrees.
    dec : float
        Declination in degrees.
    dist : float
        Distance in Mpc / h.
    name : str
        Cluster name.
    """
    def __init__(self, RA, dec, dist, name):
        super().__init__()
        self.name = name
        self.spherical_pos = [dist, RA, dec]
 ###############################################################################
 #                           Utility functions                                 #
 ###############################################################################
 def match_array_to_no_masking(arr, surv):
    """
    Match an array to a survey without masking.
    Parameters
    ----------
    arr : n-dimensional array
        Array to match.
    surv : survey class
        Survey class.
    Returns
    -------
    out : n-dimensional array
    """
    dtype = arr.dtype
    if arr.ndim > 1:
        shape = arr.shape
        out = numpy.full((surv.selection_mask.size, *shape[1:]), numpy.nan,
                         dtype=dtype)
    else:
        out = numpy.full(surv.selection_mask.size, numpy.nan, dtype=dtype)
    for i, indx in enumerate(surv["INDEX"]):
        out[indx] = arr[i]
    return out
--- a/csiborgtools/read/paths.py
+++ b/csiborgtools/read/paths.py
@ -13,7 +13,7 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """CSiBORG paths manager."""
-from glob import glob
+from glob import glob, iglob
 from os import makedirs
 from os.path import isdir, join
 from warnings import warn
@ -61,13 +61,7 @@ class Paths:
    @property
    def srcdir(self):
-        """
+        """Path to the folder where CSiBORG simulations are stored."""
        Path to the folder where CSiBORG simulations are stored.
        Returns
        -------
        str
        """
        if self._srcdir is None:
            raise ValueError("`srcdir` is not set!")
        return self._srcdir
@ -81,13 +75,7 @@ class Paths:
    @property
    def borg_dir(self):
-        """
+        """Path to the folder where BORG MCMC chains are stored."""
        Path to the folder where BORG MCMC chains are stored.
        Returns
        -------
        str
        """
        if self._borg_dir is None:
            raise ValueError("`borg_dir` is not set!")
        return self._borg_dir
@ -101,13 +89,7 @@ class Paths:
    @property
    def quijote_dir(self):
-        """
+        """Path to the folder where Quijote simulations are stored."""
        Path to the folder where Quijote simulations are stored.
        Returns
        -------
        str
        """
        if self._quijote_dir is None:
            raise ValueError("`quijote_dir` is not set!")
        return self._quijote_dir
@ -121,13 +103,7 @@ class Paths:
    @property
    def postdir(self):
-        """
+        """Path to the folder where post-processed files are stored."""
        Path to the folder where post-processed files are stored.
        Returns
        -------
        str
        """
        if self._postdir is None:
            raise ValueError("`postdir` is not set!")
        return self._postdir
@ -139,19 +115,6 @@ class Paths:
        check_directory(path)
        self._postdir = path
    @property
    def temp_dumpdir(self):
        """
        Path to a temporary dumping folder.
        Returns
        -------
        str
        """
        fpath = join(self.postdir, "temp")
        try_create_directory(fpath)
        return fpath
    @staticmethod
    def quijote_fiducial_nsim(nsim, nobs=None):
        """
@ -167,7 +130,7 @@ class Paths:
        Returns
        -------
-        id : str
+        str
        """
        if nobs is None:
            assert isinstance(nsim, str)
@ -190,36 +153,14 @@ class Paths:
        """
        return join(self.borg_dir, "mcmc", f"mcmc_{nsim}.h5")
-    def fof_membership(self, nsim, simname, sorted=False):
+    def fof_cat(self, nsnap, nsim, simname, from_quijote_backup=False):
        """
        Path to the file containing the FoF particle membership.
        Parameters
        ----------
        nsim : int
            IC realisation index.
        simname : str
            Simulation name. Must be one of `csiborg` or `quijote`.
        sorted : bool, optional
            Whether to return path to the file that is sorted in the same
            order as the PHEW output.
        """
        assert simname in ["csiborg", "quijote"]
        if simname == "quijote":
            raise RuntimeError("Quijote FoF membership is in the FoF cats.")
        fdir = join(self.postdir, "FoF_membership", )
        try_create_directory(fdir)
        fout = join(fdir, f"fof_membership_{nsim}.npy")
        if sorted:
            fout = fout.replace(".npy", "_sorted.npy")
        return fout
    def fof_cat(self, nsim, simname, from_quijote_backup=False):
        r"""
        Path to the :math:`z = 0` FoF halo catalogue.
        Parameters
        ----------
        nsnap : int
            Snapshot index.
        nsim : int
            IC realisation index.
        simname : str
@ -228,15 +169,15 @@ class Paths:
            Whether to return the path to the Quijote FoF catalogue from the
            backup.
        Returns
        -------
        str
        """
        if simname == "csiborg":
-            fdir = join(self.postdir, "FoF_membership", )
+            fdir = join(self.postdir, "halo_maker", f"ramses_{nsim}",
                        f"output_{str(nsnap).zfill(5)}", "FOF")
            try_create_directory(fdir)
-            return join(fdir, f"halo_catalog_{nsim}_FOF.txt")
+            return join(fdir, "fort.132")
        elif simname == "quijote":
            if from_quijote_backup:
                return join(self.quijote_dir, "halos_backup", str(nsim))
@ -245,57 +186,6 @@ class Paths:
        else:
            raise ValueError(f"Unknown simulation name `{simname}`.")
    def mmain(self, nsnap, nsim):
        """
        Path to the `mmain` CSiBORG files of summed substructure.
        Parameters
        ----------
        nsnap : int
            Snapshot index.
        nsim : int
            IC realisation index.
        Returns
        -------
        str
        """
        fdir = join(self.postdir, "mmain")
        try_create_directory(fdir)
        return join(
            fdir, f"mmain_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npz")
    def initmatch(self, nsim, simname, kind):
        """
        Path to the `initmatch` files where the halo match between the
        initial and final snapshot of a CSiBORG realisaiton is stored.
        Parameters
        ----------
        nsim : int
            IC realisation index.
        simname : str
            Simulation name. Must be one of `csiborg` or `quijote`.
        kind : str
            Type of match. Must be one of `particles` or `fit`.
        Returns
        -------
        str
        """
        assert kind in ["particles", "fit"]
        ftype = "npy" if kind == "fit" else "h5"
        if simname == "csiborg":
            fdir = join(self.postdir, "initmatch")
        elif simname == "quijote":
            fdir = join(self.quijote_dir, "initmatch")
        else:
            raise ValueError(f"Unknown simulation name `{simname}`.")
        try_create_directory(fdir)
        return join(fdir, f"{kind}_{str(nsim).zfill(5)}.{ftype}")
    def get_ics(self, simname, from_quijote_backup=False):
        """
        Get available IC realisation IDs for either the CSiBORG or Quijote
@ -411,7 +301,7 @@ class Paths:
        Returns
        -------
-        snapstr
+        str
        """
        simpath = self.snapshots(nsim, simname, tonew=nsnap == 1)
        if simname == "csiborg":
@ -422,7 +312,27 @@ class Paths:
            nsnap = str(nsnap).zfill(3)
            return join(simpath, f"snapdir_{nsnap}", f"snap_{nsnap}")
-    def particles(self, nsim, simname):
+    def merger_tree_file(self, nsnap, nsim):
        """
        Path to the CSiBORG on-the-fly generated merger tree file.
        Parameters
        ----------
        nsnap : int
            Snapshot index.
        nsim : int
            IC realisation index.
        Returns
        -------
        str
        """
        nsim = str(nsim)
        nsnap = str(nsnap).zfill(5)
        return join(self.srcdir, f"ramses_out_{nsim}",
                    f"output_{nsnap}", f"mergertree_{nsnap}.dat")
    def processed_output(self, nsim, simname, halo_finder):
        """
        Path to the files containing all particles of a CSiBORG realisation at
        :math:`z = 0`.
@ -433,22 +343,80 @@ class Paths:
            IC realisation index.
        simname : str
            Simulation name. Must be one of `csiborg` or `quijote`.
        halo_finder : str
            Halo finder name.
        Returns
        -------
        str
        """
        if simname == "csiborg":
-            fdir = join(self.postdir, "particles")
+            fdir = join(self.postdir, "processed_output")
        elif simname == "quijote":
            fdir = join(self.quijote_dir, "Particles_fiducial")
        else:
            raise ValueError(f"Unknown simulation name `{simname}`.")
        try_create_directory(fdir)
-        fname = f"parts_{str(nsim).zfill(5)}.h5"
+        fname = f"parts_{halo_finder}_{str(nsim).zfill(5)}.hdf5"
        return join(fdir, fname)
    def processed_phew(self, nsim):
        """
        Path to the files containing PHEW CSiBORG catalogues.
        Parameters
        ----------
        nsim : int
            IC realisation index.
        Returns
        -------
        str
        """
        fdir = join(self.postdir, "processed_output")
        try_create_directory(fdir)
        return join(fdir, f"phew_{str(nsim).zfill(5)}.hdf5")
    def processed_merger_tree(self, nsim):
        """
        Path to the files containing the processed original merger tree files.
        Parameters
        ----------
        nsim : int
            IC realisation index.
        Returns
        -------
        str
        """
        fdir = join(self.postdir, "processed_output")
        try_create_directory(fdir)
        return join(fdir, f"merger_{str(nsim).zfill(5)}.hdf5")
    def halomaker_particle_membership(self, nsnap, nsim, halo_finder):
        """
        Path to the HaloMaker particle membership file (CSiBORG only).
        Parameters
        ----------
        nsnap : int
            Snapshot index.
        nsim : int
            IC realisation index.
        halo_finder : str
            Halo finder name.
        Returns
        -------
        str
        """
        fdir = join(self.postdir, "halo_maker", f"ramses_{nsim}",
                    f"output_{str(nsnap).zfill(5)}", halo_finder)
        fpath = join(fdir, "*particle_membership*")
        return next(iglob(fpath, recursive=True), None)
    def ascii_positions(self, nsim, kind):
        """
        Path to ASCII files containing the positions of particles or halos.
@ -469,35 +437,6 @@ class Paths:
        return join(fdir, fname)
    def structfit(self, nsnap, nsim, simname):
        """
        Path to the halo catalogue from `fit_halos.py`.
        Parameters
        ----------
        nsnap : int
            Snapshot index.
        nsim : int
            IC realisation index.
        simname : str
            Simulation name. Must be one of `csiborg` or `quijote`.
        Returns
        -------
        str
        """
        if simname == "csiborg":
            fdir = join(self.postdir, "structfit")
        elif simname == "quijote":
            fdir = join(self.quijote_dir, "structfit")
        else:
            raise ValueError(f"Unknown simulation name `{simname}`.")
        try_create_directory(fdir)
        fname = f"out_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npy"
        return join(fdir, fname)
    def overlap(self, simname, nsim0, nsimx, min_logmass, smoothed):
        """
        Path to the overlap files between two CSiBORG simulations.
@ -688,31 +627,6 @@ class Paths:
        fname = f"obs_vp_{MAS}_{str(nsim).zfill(5)}_{grid}.npz"
        return join(fdir, fname)
    def halo_counts(self, simname, nsim, from_quijote_backup=False):
        """
        Path to the files containing the binned halo counts.
        Parameters
        ----------
        simname : str
            Simulation name. Must be `csiborg`, `quijote` or `quijote_full`.
        nsim : int
            IC realisation index.
        from_quijote_backup : bool, optional
            Whether to return the path to the Quijote halo counts from the
            backup catalogues.
        Returns
        -------
        str
        """
        fdir = join(self.postdir, "HMF")
        try_create_directory(fdir)
        fname = f"halo_counts_{simname}_{str(nsim).zfill(5)}.npz"
        if from_quijote_backup:
            fname = fname.replace("halo_counts", "halo_counts_backup")
        return join(fdir, fname)
    def cross_nearest(self, simname, run, kind, nsim=None, nobs=None):
        """
        Path to the files containing distance from a halo in a reference
--- a/csiborgtools/read/readsim.py
+++ b/csiborgtools/read/readsim.py
--- a/csiborgtools/utils.py
+++ b/csiborgtools/utils.py
@ -15,6 +15,7 @@
 """Collection of stand-off utility functions used in the scripts."""
 import numpy
 from numba import jit
 from datetime import datetime
 ###############################################################################
 #                           Positions                                         #
@ -87,7 +88,7 @@ def periodic_distance_two_points(p1, p2, boxsize):
    return dist**0.5
-@jit(nopython=True)
+@jit(nopython=True, boundscheck=False)
 def periodic_wrap_grid(pos, boxsize=1):
    """Wrap positions in a periodic box."""
    for n in range(pos.shape[0]):
@ -139,17 +140,34 @@ def radec_to_cartesian(X):
    """
    dist, ra, dec = X[:, 0], X[:, 1], X[:, 2]
-    ra *= numpy.pi / 180
+    cdec = numpy.cos(dec * numpy.pi / 180)
    dec *= numpy.pi / 180
    cdec = numpy.cos(dec)
    return numpy.vstack([
-        dist * cdec * numpy.cos(ra),
+        dist * cdec * numpy.cos(ra * numpy.pi / 180),
-        dist * cdec * numpy.sin(ra),
+        dist * cdec * numpy.sin(ra * numpy.pi / 180),
-        dist * numpy.sin(dec)
+        dist * numpy.sin(dec * numpy.pi / 180)
        ]).T
@jit(nopython=True, fastmath=True, boundscheck=False)
 def great_circle_distance(x1, x2):
    """
    Great circle distance between two points on a sphere, defined by RA and
    dec, both in degrees.
    """
    ra1, dec1 = x1
    ra2, dec2 = x2
    ra1 *= numpy.pi / 180
    dec1 *= numpy.pi / 180
    ra2 *= numpy.pi / 180
    dec2 *= numpy.pi / 180
    return 180 / numpy.pi * numpy.arccos(
        numpy.sin(dec1) * numpy.sin(dec2)
        + numpy.cos(dec1) * numpy.cos(dec2) * numpy.cos(ra1 - ra2)
        )
 def cosine_similarity(x, y):
    r"""
    Calculate the cosine similarity between two Cartesian vectors. Defined
@ -179,6 +197,36 @@ def cosine_similarity(x, y):
    return out[0] if out.size == 1 else out
 def hms_to_degrees(hours, minutes=None, seconds=None):
    """
    Convert hours, minutes and seconds to degrees.
    Parameters
    ----------
    hours, minutes, seconds : float
    Returns
    -------
    float
    """
    return hours * 15 + (minutes or 0) / 60 * 15 + (seconds or 0) / 3600 * 15
 def dms_to_degrees(degrees, arcminutes=None, arcseconds=None):
    """
    Convert degrees, arcminutes and arcseconds to decimal degrees.
    Parameters
    ----------
    degrees, arcminutes, arcseconds : float
    Returns
    -------
    float
    """
    return degrees + (arcminutes or 0) / 60 + (arcseconds or 0) / 3600
 def real2redshift(pos, vel, observer_location, observer_velocity, box,
                  periodic_wrap=True, make_copy=True):
    r"""
@ -262,3 +310,9 @@ def binned_statistic(x, y, left_edges, bin_width, statistic):
        if numpy.any(mask):
            out[i] = statistic(y[mask])
    return out
 def fprint(msg, verbose=True):
    """Print and flush a message with a timestamp."""
    if verbose:
        print(f"{datetime.now()}:   {msg}", flush=True)
--- a/notebooks/powerspectrum_test.ipynb
+++ b/notebooks/powerspectrum_test.ipynb
--- a/scripts/cluster_crosspk.py
+++ b/scripts/cluster_crosspk.py
@ -66,7 +66,7 @@ jobs = csiborgtools.utils.split_jobs(nsims, nproc)[rank]
 for n in jobs:
    print(f"Rank {rank} at {datetime.now()}: saving {n}th delta.", flush=True)
    nsim = ics[n]
-    particles = reader.read_particle(max(paths.get_snapshots(nsim, "csiborg")),
+    particles = reader.read_snapshot(max(paths.get_snapshots(nsim, "csiborg")),
                                     nsim, ["x", "y", "z", "M"], verbose=False)
    # Halfwidth -- particle selection
    if args.halfwidth < 0.5:
--- a/old/cluster_crosspk.sh
+++ b/old/cluster_crosspk.sh
@ -0,0 +1,14 @@
 nthreads=20
 memory=40
 queue="berg"
 env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
 file="cluster_crosspk.py"
 grid=1024
 halfwidth=0.13
 cm="addqueue -q $queue -n $nthreads -m $memory $env $file --grid $grid --halfwidth $halfwidth"
 echo "Submitting:"
 echo $cm
 echo
 $cm
--- a/scripts/cluster_knn_auto.py
+++ b/scripts/cluster_knn_auto.py
--- a/old/cluster_knn_auto.sh
+++ b/old/cluster_knn_auto.sh
@ -0,0 +1,27 @@
 nthreads=4
 memory=4
 queue="cmb"
 env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
 file="cluster_knn_auto.py"
 Rmax=219.8581560283688
 verbose="true"
 simname="quijote"
 nsims="0 1 2"
 # simname="csiborg"
 # nsims="7444 7900 9052"
 run="mass003"
 pythoncm="$env $file --run $run --simname $simname --nsims $nsims --Rmax $Rmax --verbose $verbose"
 echo $pythoncm
 $pythoncm
 # cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
 # echo "Submitting:"
 # echo $cm
 # echo
 # $cm
--- a/scripts/cluster_knn_auto.yml
+++ b/scripts/cluster_knn_auto.yml
--- a/scripts/cluster_knn_cross.py
+++ b/scripts/cluster_knn_cross.py
--- a/old/cluster_knn_cross.sh
+++ b/old/cluster_knn_cross.sh
@ -0,0 +1,18 @@
 nthreads=151
 memory=4
 queue="cmb"
 env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
 file="knn_cross.py"
 runs="mass001"
 pythoncm="$env $file --runs $runs"
 echo $pythoncm
 $pythoncm
 # cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
 # echo "Submitting:"
 # echo $cm
 # echo
 # $cm
--- a/scripts/cluster_knn_cross.yml
+++ b/scripts/cluster_knn_cross.yml
--- a/scripts/cluster_tpcf_auto.py
+++ b/scripts/cluster_tpcf_auto.py
--- a/old/cluster_tpcf_auto.sh
+++ b/old/cluster_tpcf_auto.sh
@ -0,0 +1,26 @@
 nthreads=26
 memory=7
 queue="cmb"
 env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
 file="cluster_tpcf_auto.py"
 Rmax=219.8581560283688
 verbose="true"
 # simname="quijote"
 # nsims="0 1 2"
 simname="csiborg"
 nsims="7444 7900 9052"
 run="mass003"
 pythoncm="$env $file --run $run --simname $simname --nsims $nsims --Rmax $Rmax --verbose $verbose"
 echo $pythoncm
 $pythoncm
 # cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
 # echo "Submitting:"
 # echo $cm
 # echo
 # $cm
--- a/scripts/cluster_tpcf_auto.yml
+++ b/scripts/cluster_tpcf_auto.yml
--- a/scripts/fit_hmf.py
+++ b/scripts/fit_hmf.py
--- a/old/fit_hmf.sh
+++ b/old/fit_hmf.sh
@ -0,0 +1,24 @@
 nthreads=11
 memory=2
 queue="berg"
 env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
 file="fit_hmf.py"
 simname="quijote_full"
 nsims="-1"
 verbose=True
 lower_lim=12.0
 upper_lim=16.0
 Rmax=155
 from_quijote_backup="true"
 bw=0.2
 pythoncm="$env $file --simname $simname --nsims $nsims --Rmax $Rmax --lims $lower_lim $upper_lim --bw $bw --from_quijote_backup $from_quijote_backup --verbose $verbose"
 $pythoncm
 # cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
 # echo "Submitting:"
 # echo $cm
 # echo
 # $cm
--- a/scripts/old/fit_profiles.py
+++ b/scripts/old/fit_profiles.py
--- a/old/merger.py
+++ b/old/merger.py
@ -0,0 +1,686 @@
 # Copyright (C) 2022 Richard Stiskalek, Harry Desmond
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 Support for reading the PHEW/ACACIA CSiBORG merger trees. However, note that
 the merger trees are very unreliable.
 """
 from abc import ABC
 from datetime import datetime
 from gc import collect
 import numpy
 from h5py import File
 from tqdm import tqdm, trange
 from treelib import Tree
 from ..utils import periodic_distance
 from .paths import Paths
 ###############################################################################
 #                          Utility functions.                                 #
 ###############################################################################
 def clump_identifier(clump, nsnap):
    """
    Generate a unique identifier for a clump at a given snapshot.
    Parameters
    ----------
    clump : int
        Clump ID.
    nsnap : int
        Snapshot index.
    Returns
    -------
    str
    """
    return f"{str(clump).rjust(9, 'x')}__{str(nsnap).rjust(4, 'x')}"
 def extract_identifier(identifier):
    """
    Extract the clump ID and snapshot index from a identifier generated by
    `clump_identifier`.
    Parameters
    ----------
    identifier : str
        Identifier.
    Returns
    -------
    clump, nsnap : int
        Clump ID and snapshot index.
    """
    clump, nsnap = identifier.split('__')
    return int(clump.lstrip('x')), int(nsnap.lstrip('x'))
 ###############################################################################
 #                       Merger tree reader class.                             #
 ###############################################################################
 class BaseMergerReader(ABC):
    """
    Base class for the CSiBORG merger tree reader.
    """
    _paths = None
    _nsim = None
    _min_snap = None
    _cache = {}
    @property
    def paths(self):
        """Paths manager."""
        if self._paths is None:
            raise ValueError("`paths` is not set.")
        return self._paths
    @paths.setter
    def paths(self, paths):
        assert isinstance(paths, Paths)
        self._paths = paths
    @property
    def nsim(self):
        """Simulation index."""
        if self._nsim is None:
            raise ValueError("`nsim` is not set.")
        return self._nsim
    @nsim.setter
    def nsim(self, nsim):
        assert isinstance(nsim, (int, numpy.integer))
        self._nsim = nsim
    @property
    def min_snap(self):
        """Minimum snapshot index to read."""
        return self._min_snap
    @min_snap.setter
    def min_snap(self, min_snap):
        if min_snap is not None:
            assert isinstance(min_snap, (int, numpy.integer))
            self._min_snap = int(min_snap)
    def cache_length(self):
        """Length of the cache."""
        return len(self._cache)
    def cache_clear(self):
        """Clear the cache."""
        self._cache = {}
        collect()
    def __getitem__(self, key):
        try:
            return self._cache[key]
        except KeyError:
            fname = self.paths.processed_merger_tree(self.nsim)
            nsnap, kind = key.split("__")
            with File(fname, "r") as f:
                if kind == "clump_to_array":
                    cl = self[f"{nsnap}__clump"]
                    x = {}
                    for i, c in enumerate(cl):
                        if c in x:
                            x[c] += (i,)
                        else:
                            x[c] = (i,)
                else:
                    x = f[f"{str(nsnap)}/{kind}"][:]
            # Cache it
            self._cache[key] = x
            return x
 class MergerReader(BaseMergerReader):
    """
    Merger tree reader.
    Parameters
    ----------
    nsim : int
        Simulation index.
    paths : Paths
        Paths manager.
    min_snap : int
        Minimum snapshot index. Trees below this snapshot will not be read.
    """
    def __init__(self, nsim, paths, min_snap=None):
        self.nsim = nsim
        self.paths = paths
        self.min_snap = min_snap
    def get_info(self, current_clump, current_snap, is_main=None):
        """
        Make a list of information about a clump at a given snapshot.
        Parameters
        ----------
        current_clump : int
            Clump ID.
        current_snap : int
            Snapshot index.
        is_main : bool
            Whether this is the main progenitor.
        Returns
        -------
        list
        """
        if current_clump < 0:
            raise ValueError("Clump ID must be positive.")
        if is_main is not None and not isinstance(is_main, bool):
            raise ValueError("`is_main` must be a boolean.")
        k = self[f"{current_snap}__clump_to_array"][current_clump][0]
        out = [self[f"{current_snap}__desc_mass"][k],
               *self[f"{current_snap}__desc_pos"][k][::-1]] # TODO REMOVE LATER
        if is_main is not None:
            return [is_main,] + out
        return out
    def get_mass(self, clump, snap):
        """
        Get the mass of a clump at a given snapshot.
        Parameters
        ----------
        clump : int
            Clump ID.
        snap : int
            Snapshot index.
        Returns
        -------
        float
        """
        if clump < 0:
            raise ValueError("Clump ID must be positive.")
        k = self[f"{snap}__clump_to_array"][clump][0]
        return self[f"{snap}__desc_mass"][k]
    def get_pos(self, clump, snap):
        if clump < 0:
            raise ValueError("Clump ID must be positive.")
        k = self[f"{snap}__clump_to_array"][clump][0]
        return self[f"{snap}__desc_pos"][k]
    def find_main_progenitor(self, clump, nsnap):
        """
        Find the main progenitor of a clump at a given snapshot. Cases are:
            - `clump > 0`, `progenitor > 0`: main progenitor is in the adjacent
            snapshot,
            - `clump > 0`, `progenitor < 0`: main progenitor is not in the
            adjacent snapshot.
            - `clump < 0`, `progenitor = 0`: no progenitor, newly formed clump.
        Parameters
        ----------
        clump : int
            Clump ID.
        nsnap : int
            Snapshot index.
        Returns
        -------
        progenitor : int
            Main progenitor clump ID.
        progenitor_snap : int
            Main progenitor snapshot index.
        """
        if not clump > 0:
            raise ValueError("Clump ID must be positive.")
        cl2array = self[f"{nsnap}__clump_to_array"]
        if clump in cl2array:
            k = cl2array[clump]
        else:
            raise ValueError("Clump ID not found.")
        if len(k) > 1:
            raise ValueError("Found more than one main progenitor.")
        k = k[0]
        progenitor = abs(self[f"{nsnap}__progenitor"][k])
        progenitor_snap = self[f"{nsnap}__progenitor_outputnr"][k]
        if (self.min_snap is not None) and (nsnap < self.min_snap):
            return 0, numpy.nan
        return progenitor, progenitor_snap
    def find_minor_progenitors(self, clump, nsnap):
        """
        Find the minor progenitors of a clump at a given snapshot. This means
        that `clump < 0`, `progenitor > 0`, i.e. this clump also has another
        main progenitor.
        If there are no minor progenitors, return `None` for both lists.
        Parameters
        ----------
        clump : int
            Clump ID.
        nsnap : int
            Snapshot index.
        Returns
        -------
        prog : list
            List of minor progenitor clump IDs.
        prog_snap : list
            List of minor progenitor snapshot indices.
        """
        if not clump > 0:
            raise ValueError("Clump ID must be positive.")
        try:
            ks = self[f"{nsnap}__clump_to_array"][-clump]
        except KeyError:
            return None, None
        prog = [self[f"{nsnap}__progenitor"][k] for k in ks]
        prog_nsnap = [self[f"{nsnap}__progenitor_outputnr"][k] for k in ks]
        if (self.min_snap is not None) and (nsnap < self.min_snap):
            return None, None
        return prog, prog_nsnap
    def find_progenitors(self, clump, nsnap):
        """
        Find all progenitors of a clump at a given snapshot. The main
        progenitor is the first element of the list.
        Parameters
        ----------
        clump : int
            Clump ID.
        nsnap : int
            Snapshot index.
        Returns
        -------
        prog : list
            List of progenitor clump IDs.
        prog_nsnap : list
            List of progenitor snapshot indices.
        """
        main_prog, main_prog_nsnap = self.find_main_progenitor(clump, nsnap)
        min_prog, min_prog_nsnap = self.find_minor_progenitors(clump, nsnap)
        # Check that if the main progenitor is not in the adjacent snapshot,
        # then the minor progenitor are also in that snapshot (if any).
        if (min_prog is not None) and (main_prog_nsnap != nsnap - 1) and not all(prog_nsnap == mprog for mprog in min_prog_nsnap):  # noqa
            raise ValueError(f"For clump {clump} at snapshot {nsnap} we have "
                             f"main progenitor at {main_prog_nsnap} and "
                             "minor progenitors at {min_prog_nsnap}.")
        if min_prog is None:
            prog = [main_prog,]
            prog_nsnap = [main_prog_nsnap,]
        else:
            prog = [main_prog,] + min_prog
            prog_nsnap = [main_prog_nsnap,] + min_prog_nsnap
        if prog[0] == 0 and len(prog) > 1:
            raise ValueError("No main progenitor but minor progenitors "
                             "found for clump {clump} at snapshot {nsnap}.")
        return prog, prog_nsnap
    def tree_mass_at_snapshot(self, clump, nsnap, target_snap):
        """
        Calculate the total mass of nodes in a tree at a given snapshot.
        """
        # If clump is 0 (i.e., we've reached the end of the tree), return 0
        if clump == 0:
            return 0
        # Find the progenitors for the given clump and nsnap
        prog, prog_nsnap = self.find_progenitors(clump, nsnap)
        if prog[0] == 0:
            print(prog)
            return 0
        # Sum the mass of the current clump's progenitors
        tot = 0
        for p, psnap in zip(prog, prog_nsnap):
            if psnap == target_snap:
                tot += self.get_mass(p, psnap)
        # Recursively sum the mass of each progenitor's progenitors
        for p, psnap in zip(prog, prog_nsnap):
            # print("P ", p, psnap)
            tot += self.mass_all_progenitor2(p, psnap, target_snap)
        return tot
    def is_jumper(self, clump, nsnap, nsnap_descendant):
        pass
    def make_tree(self, current_clump, current_nsnap,
                  above_clump=None, above_nsnap=None,
                  tree=None, is_main=None, verbose=False):
        """
        Make a merger tree for a clump at a given snapshot.
        Parameters
        ----------
        current_clump : int
            Clump ID of the descendant clump.
        current_nsnap : int
            Snapshot index of the descendent clump.
        above_clump : int, optional
            Clump ID of a clump above the current clump in the tree.
        above_nsnap : int, optional
            Snapshot index of a clump above the current clump in the tree.
        tree : treelib.Tree, optional
            Tree to add to.
        is_main : bool, optional
            Whether this is the main progenitor.
        verbose : bool, optional
            Verbosity flag.
        Returns
        -------
        treelib.Tree
            Tree with the current clump as the root.
        """
        if verbose:
            print(f"{datetime.now()}: Node of a clump {current_clump} at "
                  f"snapshot {current_nsnap}.", flush=True)
        # Terminate if we are at the end of the tree
        if current_clump == 0:
            return
        # Create the root node or add a new node
        if tree is None:
            tree = Tree()
            tree.create_node(
                "root",
                identifier=clump_identifier(current_clump, current_nsnap),
                data=self.get_info(current_clump, current_nsnap, True),
                )
        else:
            tree.create_node(
                identifier=clump_identifier(current_clump, current_nsnap),
                parent=clump_identifier(above_clump, above_nsnap),
                data=self.get_info(current_clump, current_nsnap, is_main),
                )
        # This returns a list of progenitors and their snapshots. The first
        # element is the main progenitor.
        prog, prog_nsnap = self.find_progenitors(current_clump, current_nsnap)
        for i, (p, psnap) in enumerate(zip(prog, prog_nsnap)):
            self.make_tree(p, psnap, current_clump, current_nsnap, tree,
                           is_main=i == 0, verbose=verbose)
        return tree
    def walk_main_progenitor(self, main_clump, main_nsnap, verbose=False):
        """
        Walk the main progenitor branch of a clump.
        Each snapshot contains information about the clump at that snapshot.
        Parameters
        ----------
        clump : int
            Clump ID.
        nsnap : int
            Snapshot index.
        Returns
        -------
        structured array
        """
        out = []
        pbar = tqdm(disable=not verbose)
        while True:
            prog, prog_nsnap = self.find_progenitors(main_clump, main_nsnap)
            # Unpack the main and minor progenitor
            mainprog, mainprog_nsnap = prog[0], prog_nsnap[0]
            if len(prog) > 1:
                minprog, minprog_nsnap = prog[1:], prog_nsnap[1:]
            else:
                minprog, minprog_nsnap = None, None
            # If there is no progenitor, then set the main progenitor mass to 0
            if mainprog == 0:
                mainprog_mass = numpy.nan
            else:
                mainprog_mass = self.get_mass(mainprog, mainprog_nsnap)
            totprog_mass = mainprog_mass
            # Unpack masses of the progenitors
            if minprog is not None:
                minprog, minprog_nsnap = prog[1:], prog_nsnap[1:]
                minprog_masses = [self.get_mass(c, n)
                                  for c, n in zip(minprog, minprog_nsnap)]
                max_minprog_mass = max(minprog_masses)
                minprog_totmass = sum(minprog_masses)
                totprog_mass += minprog_totmass
            else:
                minprog_totmass = numpy.nan
                max_minprog_mass = numpy.nan
            out += [
                [main_nsnap,]
                + self.get_info(main_clump, main_nsnap)
                + [mainprog_nsnap, totprog_mass, mainprog_mass, minprog_totmass, max_minprog_mass / mainprog_mass]  # noqa
                ]
            pbar.update(1)
            pbar.set_description(f"Clump {main_clump} ({main_nsnap})")
            if mainprog == 0:
                pbar.close()
                break
            main_clump = mainprog
            main_nsnap = mainprog_nsnap
        # Convert output to a structured array. We store integers as float
        # to avoid errors because of converting NaNs to integers.
        out = numpy.vstack(out)
        dtype = [("desc_snapshot_index", numpy.float32),
                 ("desc_mass", numpy.float32),
                 ("desc_x", numpy.float32),
                 ("desc_y", numpy.float32),
                 ("desc_z", numpy.float32),
                 ("prog_snapshot_index", numpy.float32),
                 ("prog_totmass", numpy.float32),
                 ("mainprog_mass", numpy.float32),
                 ("minprog_totmass", numpy.float32),
                 ("merger_ratio", numpy.float32),
                 ]
        return numpy.array([tuple(row) for row in out], dtype=dtype)
    def match_mass_to_phewcat(self, phewcat):
        """
        For each clump mass in the PHEW catalogue, find the corresponding
        clump mass in the merger tree file. If no match is found returns NaN.
        These are not equal because the PHEW catalogue mass is the mass without
        unbinding.
        Parameters
        ----------
        phewcat : csiborgtools.read.CSiBORGPEEWReader
            PHEW catalogue reader.
        Returns
        -------
        mass : float
        """
        if phewcat.nsim != self.nsim:
            raise ValueError("Simulation indices do not match.")
        nsnap = phewcat.nsnap
        indxs = phewcat["index"]
        mergertree_mass = numpy.full(len(indxs), numpy.nan,
                                     dtype=numpy.float32)
        for i, ind in enumerate(indxs):
            try:
                mergertree_mass[i] = self.get_mass(ind, nsnap)
            except KeyError:
                continue
        return mergertree_mass
    def match_pos_to_phewcat(self, phewcat):
        """
        For each clump mass in the PHEW catalogue, find the corresponding
        clump mass in the merger tree file. If no match is found returns NaN.
        These are not equal because the PHEW catalogue mass is the mass without
        unbinding.
        Parameters
        ----------
        phewcat : csiborgtools.read.CSiBORGPEEWReader
            PHEW catalogue reader.
        Returns
        -------
        mass : float
        """
        if phewcat.nsim != self.nsim:
            raise ValueError("Simulation indices do not match.")
        nsnap = phewcat.nsnap
        indxs = phewcat["index"]
        mergertree_pos = numpy.full((len(indxs), 3), numpy.nan,
                                    dtype=numpy.float32)
        for i, ind in enumerate(indxs):
            try:
                mergertree_pos[i] = self.get_pos(ind, nsnap)
            except KeyError:
                continue
        return mergertree_pos[:, ::-1]  # TODO later remove
 ###############################################################################
 #                           Manual halo tracking.                             #
 ###############################################################################
 def track_halo_manually(cats, hid, maxdist=0.15, max_dlogm=0.35):
    """
    Manually track a halo without using the merger tree. Searches for nearby
    halo of similar mass in adjacent snapshots. Supports only main haloes and
    can only work for the most massive haloes in a simulation, however even
    then significant care should be taken.
    Selects the most massive halo within a search radius to be a match.
    In case a progenitor is not found in the adjacent snapshot, the search
    continues in the next snapshot. Occasionally some haloes disappear..
    Parameters
    ----------
    cats : dict
        Dictionary of halo catalogues, keys are snapshot indices.
    hid : int
        Halo ID.
    maxdist : float, optional
        Maximum comoving distance for a halo to move between adjacent
        snapshots.
    max_dlogm : float, optional
        Maximum |log mass ratio| for a halo to be considered a progenitor.
    Returns
    -------
    hist : structured array
        History of the halo.
    """
    nsnap0 = max(cats.keys())
    k = cats[nsnap0]["hid_to_array_index"][hid]
    pos = cats[nsnap0]["cartesian_pos"][k]
    mass = cats[nsnap0]["summed_mass"][k]
    if not cats[nsnap0]["is_main"][k]:
        raise ValueError("Only main haloes are supported.")
    if not mass > 1e13:
        raise ValueError("Only the most massive haloes are supported.")
    if not cats[nsnap0]["dist"][k] < 155.5:
        raise ValueError("Only high-resolution region haloes are supported.")
    dtype = [("snapshot_index", numpy.float32),
             ("x", numpy.float32),
             ("y", numpy.float32),
             ("z", numpy.float32),
             ("mass", numpy.float32),
             ("desc_dist", numpy.float32),
             ]
    hist = numpy.full(len(cats), numpy.nan, dtype=dtype)
    hist["snapshot_index"][0] = nsnap0
    hist["x"][0], hist["y"][0], hist["z"][0] = pos
    hist["mass"][0] = mass
    for n in trange(1, len(cats), desc="Tracking halo"):
        nsnap = nsnap0 - n
        hist["snapshot_index"][n] = nsnap
        # Find indices of all main haloes that are within a box of width
        indxs = cats[nsnap].select_in_box(pos, 2 * maxdist)
        if len(indxs) == 0:
            continue
        nearby_pos = cats[nsnap]["cartesian_pos"][indxs]
        nearby_mass = cats[nsnap]["summed_mass"][indxs]
        # Distance from the previous position and |log mass ratio|
        dist = periodic_distance(nearby_pos, pos, cats[nsnap].box.boxsize)
        dlogm = numpy.abs(numpy.log10(nearby_mass / mass))
        k = numpy.argmin(dlogm)
        if (dlogm[k] < max_dlogm) & (dist[k] < maxdist):
            hist["x"][n], hist["y"][n], hist["z"][n] = nearby_pos[k]
            hist["mass"][n] = nearby_mass[k]
            hist["desc_dist"][n] = dist[k]
            pos = nearby_pos[k]
            mass = nearby_mass[k]
    return hist
--- a/scripts/mv_fofmembership.py
+++ b/scripts/mv_fofmembership.py
@ -98,7 +98,7 @@ def sort_fofid(nsim, verbose=True):
    reader = csiborgtools.read.CSiBORGReader(paths)
    pars_extract = ["x"]  # Dummy variable
-    __, pids = reader.read_particle(nsnap, nsim, pars_extract,
+    __, pids = reader.read_snapshot(nsnap, nsim, pars_extract,
                                    return_structured=False, verbose=verbose)
    del __
    collect()
--- a/old/mv_fofmembership.sh
+++ b/old/mv_fofmembership.sh
@ -0,0 +1,17 @@
 nthreads=1
 memory=100
 queue="berg"
 env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
 file="mv_fofmembership.py"
 nsims="5511"
 pythoncm="$env $file --nsims $nsims"
 # echo $pythoncm
 # $pythoncm
 cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
 echo "Submitting:"
 echo $cm
 echo
 $cm
--- a/scripts/pre_dumppart.py
+++ b/scripts/pre_dumppart.py
@ -12,7 +12,7 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 r"""
-Script to load in the simulation particles, sort them by their FoF halo ID and
+Script to load in the simulation particles, sort them by their halo ID and
 dump into a HDF5 file. Stores the first and last index of each halo in the
 particle array. This can be used for fast slicing of the array to acces
 particles of a single clump.
@ -108,7 +108,7 @@ def main(nsim, simname, verbose):
        pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"]
    else:
        pars_extract = None
-    parts, pids = partreader.read_particle(
+    parts, pids = partreader.read_snapshot(
        nsnap, nsim, pars_extract, return_structured=False, verbose=verbose)
    # In case of CSiBORG, we need to convert the mass and velocities from
--- a/old/pre_dumppart.sh
+++ b/old/pre_dumppart.sh
@ -0,0 +1,18 @@
 nthreads=1
 memory=40
 queue="berg"
 env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
 file="pre_dumppart.py"
 simname="csiborg"
 nsims="5511"
 pythoncm="$env $file --nsims $nsims --simname $simname"
 # echo $pythoncm
 # $pythoncm
 cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
 echo "Submitting:"
 echo $cm
 echo
 $cm
--- a/scripts/old/pre_mmain.py
+++ b/scripts/old/pre_mmain.py
--- a/scripts/old/pre_mmain.sh
+++ b/scripts/old/pre_mmain.sh
--- a/scripts/sort_halomaker.py
+++ b/scripts/sort_halomaker.py
@ -67,14 +67,13 @@ def sort_particle_membership(nsim, nsnap, method):
    fout = fpath + "_sorted.hdf5"
    print(f"{datetime.now()}:   saving the sorted data to ... `{fout}`")
    header = """
    This dataset represents halo indices for each particle.
        - The particles are ordered as they appear in the simulation snapshot.
        - Unassigned particles are given a halo index of 0.
        """
    with h5py.File(fout, 'w') as hdf:
-        dset = hdf.create_dataset('hids_dataset', data=hids)
+        dset = hdf.create_dataset('hids', data=hids)
-        dset.attrs['header'] = header
+        dset.attrs['header'] = """
        This dataset represents (sub)halo indices for each particle.
        - The particles are ordered as they appear in the simulation snapshot.
        - Unassigned particles are given an index of 0.
        """
 if __name__ == "__main__":
--- a/old/sort_halomaker.sh
+++ b/old/sort_halomaker.sh
@ -0,0 +1,19 @@
 nthreads=1
 memory=64
 queue="berg"
 env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
 file="sort_halomaker.py"
 method="FOF"
 nsim="7444"
 pythoncm="$env $file --method $method --nsim $nsim"
 # echo $pythoncm
 # $pythoncm
 cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
 echo "Submitting:"
 echo $cm
 echo
 $cm
--- a/scripts/dump_to_ascii.py
+++ b/scripts/dump_to_ascii.py
@ -61,13 +61,13 @@ def positions_to_ascii(positions, output_filename, boxsize=None,
            out_file.write(chunk_str + "\n")
-def extract_positions(nsim, paths, kind):
+def extract_positions(nsim, simname, paths, kind):
    """
    Extract either the particle or halo positions.
    """
    if kind == "particles":
-        fname = paths.particles(nsim, args.simname)
+        fname = paths.processed_output(nsim, simname, "FOF")
-        return h5py.File(fname, 'r')["particles"]
+        return h5py.File(fname, 'r')["snapshot_final/pos"][:]
    if kind == "particles_rsp":
        raise NotImplementedError("RSP of particles is not implemented yet.")
@ -75,23 +75,23 @@ def extract_positions(nsim, paths, kind):
    fpath = paths.observer_peculiar_velocity("PCS", 512, nsim)
    vpec_observer = numpy.load(fpath)["observer_vp"][0, :]
    cat = csiborgtools.read.CSiBORGHaloCatalogue(
-        nsim, paths, bounds={"dist": (0, 155.5)}, load_fitted=True,
+        nsim, paths, "halo_catalogue", "FOF", bounds={"dist": (0, 155.5)},
-        load_initial=False, observer_velocity=vpec_observer, )
+        observer_velocity=vpec_observer)
    if kind == "halos":
-        return cat.position()
+        return cat["cartesian_pos"]
    if kind == "halos_rsp":
-        return cat.redshift_space_position()
+        return cat["cartesian_redshift_pos"]
    raise ValueError(f"Unknown kind `{kind}`. Allowed values are: "
                     "`particles`, `particles_rsp`, `halos`, `halos_rsp`.")
-def main(nsim, paths, kind):
+def main(args, paths):
-    boxsize = 677.7 if "particles" in kind else None
+    boxsize = 677.7 if "particles" in args.kind else None
-    pos = extract_positions(nsim, paths, kind)
+    pos = extract_positions(args.nsim, args.simname, paths, args.kind)
-    output_filename = paths.ascii_positions(nsim, kind)
+    output_filename = paths.ascii_positions(args.nsim, args.kind)
    positions_to_ascii(pos, output_filename, boxsize=boxsize)
--- a/scripts/field_prop.py
+++ b/scripts/field_prop.py
@ -28,6 +28,16 @@ from taskmaster import work_delegation
 import csiborgtools
 from utils import get_nsims
 ###############################################################################
 #                   Cosmotool SPH density & velocity field                    #
 ###############################################################################
 def cosmotool_sph(nsim, parser_args):
    pass
 ###############################################################################
 #                            Density field                                    #
 ###############################################################################
@ -40,13 +50,15 @@ def density_field(nsim, parser_args, to_save=True):
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    nsnap = max(paths.get_snapshots(nsim, "csiborg"))
    box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
    fname = paths.processed_output(nsim, "csiborg", "halo_catalogue")
    if not parser_args.in_rsp:
-        parts = csiborgtools.read.read_h5(paths.particles(nsim, "csiborg"))
+        snap = csiborgtools.read.read_h5(fname)["snapshot_final"]
-        parts = parts["particles"]
+        pos = snap["pos"]
        mass = snap["mass"]
        gen = csiborgtools.field.DensityField(box, parser_args.MAS)
-        field = gen(parts, parser_args.grid, verbose=parser_args.verbose)
+        field = gen(pos, mass, parser_args.grid, verbose=parser_args.verbose)
    else:
        field = numpy.load(paths.field(
            "density", parser_args.MAS, parser_args.grid, nsim, False))
@ -83,12 +95,15 @@ def velocity_field(nsim, parser_args, to_save=True):
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    nsnap = max(paths.get_snapshots(nsim, "csiborg"))
    box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
    fname = paths.processed_output(nsim, "csiborg", "halo_catalogue")
-    parts = csiborgtools.read.read_h5(paths.particles(nsim, "csiborg"))
+    snap = csiborgtools.read.read_h5(fname)["snapshot_final"]
-    parts = parts["particles"]
+    pos = snap["pos"]
    vel = snap["vel"]
    mass = snap["mass"]
    gen = csiborgtools.field.VelocityField(box, parser_args.MAS)
-    field = gen(parts, parser_args.grid, verbose=parser_args.verbose)
+    field = gen(pos, vel, mass, parser_args.grid, verbose=parser_args.verbose)
    if to_save:
        fout = paths.field("velocity", parser_args.MAS, parser_args.grid,
@ -247,6 +262,7 @@ if __name__ == "__main__":
    parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
                        help="Verbosity flag for reading in particles.")
    parser.add_argument("--simname", type=str, default="csiborg",
                        choices=["csiborg", "csiborg2"],
                        help="Verbosity flag for reading in particles.")
    parser_args = parser.parse_args()
    comm = MPI.COMM_WORLD
--- a/scripts/field_sample.py
+++ b/scripts/field_sample.py
@ -53,12 +53,20 @@ def open_galaxy_positions(survey_name, comm):
    if rank == 0:
        if survey_name == "SDSS":
-            survey = csiborgtools.read.SDSS(
+            survey = csiborgtools.SDSS()()
                h=1, sel_steps=lambda cls: steps(cls, survey_name))
            pos = numpy.vstack([survey["DIST_UNCORRECTED"],
                                survey["RA"],
                                survey["DEC"]],
                               ).T
            pos = pos.astype(numpy.float32)
            indxs = survey["INDEX"]
        if survey_name == "SDSSxALFALFA":
            survey = csiborgtools.SDSSxALFALFA()()
            pos = numpy.vstack([survey["DIST_UNCORRECTED"],
                                survey["RA_1"],
                                survey["DEC_1"]],
                               ).T
            pos = pos.astype(numpy.float32)
            indxs = survey["INDEX"]
        elif survey_name == "GW170817":
            samples = File("/mnt/extraspace/rstiskalek/GWLSS/H1L1V1-EXTRACT_POSTERIOR_GW170817-1187008600-400.hdf", 'r')["samples"]  # noqa
@ -110,7 +118,7 @@ def evaluate_field(field, pos, nrand, smooth_scales=None, seed=42,
            field_smoothed = csiborgtools.field.smoothen_field(
                field, scale * MPC2BOX, boxsize=1, make_copy=True)
        else:
-            field_smoothed = field
+            field_smoothed = numpy.copy(field)
        val[:, i] = csiborgtools.field.evaluate_sky(
            field_smoothed, pos=pos, mpc2box=MPC2BOX)
@ -164,7 +172,7 @@ if __name__ == "__main__":
    parser.add_argument("--nsims", type=int, nargs="+", default=None,
                        help="IC realisations. If `-1` processes all.")
    parser.add_argument("--survey", type=str, required=True,
-                        choices=["SDSS", "GW170817"],
+                        choices=["SDSS", "SDSSxALFALFA", "GW170817"],
                        help="Galaxy survey")
    parser.add_argument("--smooth_scales", type=float, nargs="+", default=None,
                        help="Smoothing scales in Mpc / h.")
@ -189,12 +197,6 @@ if __name__ == "__main__":
    pos, indxs = open_galaxy_positions(args.survey, MPI.COMM_WORLD)
    if MPI.COMM_WORLD.Get_rank() == 0 and args.survey != "GW170817":
        fout = f"/mnt/extraspace/rstiskalek/CSiBORG/ascii_positions/{args.survey}_positions.npz"  # noqa
        pos = csiborgtools.utils.radec_to_cartesian(pos) + 677.7 / 2
        print(f"Saving to ... `{fout}`.")
        numpy.savez(fout, pos=pos, indxs=indxs)
    def _main(nsim):
        main(nsim, args, pos, indxs, paths,
             verbose=MPI.COMM_WORLD.Get_size() == 1)
--- a/scripts/fit_init.py
+++ b/scripts/fit_init.py
@ -1,118 +0,0 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 Script to calculate the particle centre of mass, Lagrangian patch size in the
 initial snapshot.
 The initial snapshot particles are read from the sorted files.
 """
 from argparse import ArgumentParser
 from datetime import datetime
 import numpy
 from mpi4py import MPI
 from taskmaster import work_delegation
 from tqdm import tqdm
 from utils import get_nsims
 try:
    import csiborgtools
 except ModuleNotFoundError:
    import sys
    sys.path.append("../")
    import csiborgtools
 def _main(nsim, simname, verbose):
    """
    Calculate the Lagrangian halo centre of mass and Lagrangian patch size in
    the initial snapshot.
    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    cols = [("index", numpy.int32),
            ("x", numpy.float32),
            ("y", numpy.float32),
            ("z", numpy.float32),
            ("lagpatch_size", numpy.float32),
            ("lagpatch_ncells", numpy.int32),]
    fname = paths.initmatch(nsim, simname, "particles")
    parts = csiborgtools.read.read_h5(fname)
    parts = parts['particles']
    halo_map = csiborgtools.read.read_h5(paths.particles(nsim, simname))
    halo_map = halo_map["halomap"]
    if simname == "csiborg":
        cat = csiborgtools.read.CSiBORGHaloCatalogue(
            nsim, paths, bounds=None, load_fitted=False, load_initial=False)
    else:
        cat = csiborgtools.read.QuijoteHaloCatalogue(
            nsim, paths, nsnap=4, load_fitted=False, load_initial=False)
    hid2map = {hid: i for i, hid in enumerate(halo_map[:, 0])}
    # Initialise the overlapper.
    if simname == "csiborg":
        kwargs = {"box_size": 2048, "bckg_halfsize": 512}
    else:
        kwargs = {"box_size": 512, "bckg_halfsize": 256}
    overlapper = csiborgtools.match.ParticleOverlap(**kwargs)
    out = csiborgtools.read.cols_to_structured(len(cat), cols)
    for i, hid in enumerate(tqdm(cat["index"]) if verbose else cat["index"]):
        out["index"][i] = hid
        part = csiborgtools.read.load_halo_particles(hid, parts, halo_map,
                                                     hid2map)
        # Skip if the halo has no particles or is too small.
        if part is None or part.size < 40:
            continue
        pos, mass = part[:, :3], part[:, 3]
        # Calculate the centre of mass and the Lagrangian patch size.
        cm = csiborgtools.center_of_mass(pos, mass, boxsize=1.0)
        distances = csiborgtools.periodic_distance(pos, cm, boxsize=1.0)
        out["x"][i], out["y"][i], out["z"][i] = cm
        out["lagpatch_size"][i] = numpy.percentile(distances, 99)
        # Calculate the number of cells with > 0 density.
        delta = overlapper.make_delta(pos, mass, subbox=True)
        out["lagpatch_ncells"][i] = csiborgtools.delta2ncells(delta)
    # Now save it
    fout = paths.initmatch(nsim, simname, "fit")
    if verbose:
        print(f"{datetime.now()}: dumping fits to .. `{fout}`.", flush=True)
    with open(fout, "wb") as f:
        numpy.save(f, out)
 if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument("--simname", type=str, default="csiborg",
                        choices=["csiborg", "quijote"],
                        help="Simulation name")
    parser.add_argument("--nsims", type=int, nargs="+", default=None,
                        help="IC realisations. If `-1` processes all.")
    args = parser.parse_args()
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    nsims = get_nsims(args, paths)
    def main(nsim):
        _main(nsim, args.simname, MPI.COMM_WORLD.Get_size() == 1)
    work_delegation(main, nsims, MPI.COMM_WORLD)
--- a/scripts/match_finsnap.py
+++ b/scripts/match_finsnap.py
--- a/scripts/match_finsnap.yml
+++ b/scripts/match_finsnap.yml
--- a/scripts/match_overlap_all.py
+++ b/scripts/match_overlap_all.py
--- a/scripts/match_overlap_single.py
+++ b/scripts/match_overlap_single.py
@ -69,7 +69,7 @@ def pair_match_max(nsim0, nsimx, simname, min_logmass, mult, verbose):
        raise ValueError(f"Unknown simulation `{simname}`.")
    reader = csiborgtools.summary.PairOverlap(cat0, catx, paths, min_logmass,
-                                           maxdist=maxdist)
+                                              maxdist=maxdist)
    out = csiborgtools.match.matching_max(
        cat0, catx, mass_kind, mult=mult, periodic=periodic,
        overlap=reader.overlap(from_smoothed=True),
@ -106,54 +106,36 @@ def pair_match(nsim0, nsimx, simname, min_logmass, sigma, verbose):
    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    smooth_kwargs = {"sigma": sigma, "mode": "constant", "cval": 0}
    bounds = {"lagpatch_size": (0, None)}
    if simname == "csiborg":
        overlapper_kwargs = {"box_size": 2048, "bckg_halfsize": 512}
        mass_kind = "fof_totpartmass"
-        bounds = {"dist": (0, 155), mass_kind: (10**min_logmass, None)}
+        bounds |= {"dist": (0, 155), mass_kind: (10**min_logmass, None)}
-
+        cat0 = csiborgtools.read.CSiBORGCatalogue(
-        cat0 = csiborgtools.read.CSiBORGHaloCatalogue(
+            nsim0, paths, "halo_catalogue", "FOF", mass_kind, bounds)
-            nsim0, paths, bounds=bounds, load_fitted=False,
+        catx = csiborgtools.read.CSiBORGCatalogue(
-            with_lagpatch=True)
+            nsimx, paths, "halo_catalogue", "FOF", mass_kind, bounds)
        catx = csiborgtools.read.CSiBORGHaloCatalogue(
            nsimx, paths, bounds=bounds, load_fitted=False,
            with_lagpatch=True)
    elif simname == "quijote":
        overlapper_kwargs = {"box_size": 512, "bckg_halfsize": 256}
        mass_kind = "group_mass"
-        bounds = {mass_kind: (10**min_logmass, None)}
+        bounds |= {mass_kind: (10**min_logmass, None)}
-        cat0 = csiborgtools.read.QuijoteHaloCatalogue(
+        cat0 = csiborgtools.read.QuijoteCatalogue(
-            nsim0, paths, 4, bounds=bounds, load_fitted=False,
+            nsim0, paths, "halo_catalogue", "FOF", mass_kind, bounds=bounds)
-            with_lagpatch=True)
+        catx = csiborgtools.read.QuijoteCatalogue(
-        catx = csiborgtools.read.QuijoteHaloCatalogue(
+            nsimx, paths, "halo_catalogue", "FOF", mass_kind, bounds=bounds)
            nsimx, paths, 4, bounds=bounds, load_fitted=False,
            with_lagpatch=True)
    else:
        raise ValueError(f"Unknown simulation name: `{simname}`.")
    halomap0 = csiborgtools.read.read_h5(
        paths.particles(nsim0, simname))["halomap"]
    parts0 = csiborgtools.read.read_h5(
        paths.initmatch(nsim0, simname, "particles"))["particles"]
    hid2map0 = {hid: i for i, hid in enumerate(halomap0[:, 0])}
    halomapx = csiborgtools.read.read_h5(
        paths.particles(nsimx, simname))["halomap"]
    partsx = csiborgtools.read.read_h5(
        paths.initmatch(nsimx, simname, "particles"))["particles"]
    hid2mapx = {hid: i for i, hid in enumerate(halomapx[:, 0])}
    overlapper = csiborgtools.match.ParticleOverlap(**overlapper_kwargs)
-    delta_bckg = overlapper.make_bckg_delta(parts0, halomap0, hid2map0, cat0,
+    delta_bckg = overlapper.make_bckg_delta(cat0, verbose=verbose)
    delta_bckg = overlapper.make_bckg_delta(catx, delta=delta_bckg,
                                            verbose=verbose)
    delta_bckg = overlapper.make_bckg_delta(partsx, halomapx, hid2mapx, catx,
                                            delta=delta_bckg, verbose=verbose)
-    matcher = csiborgtools.match.RealisationsMatcher(
+    matcher = csiborgtools.match.RealisationsMatcher(mass_kind=mass_kind,
-        mass_kind=mass_kind, **overlapper_kwargs)
+                                                     **overlapper_kwargs)
-    match_indxs, ngp_overlap = matcher.cross(cat0, catx, parts0, partsx,
+    match_indxs, ngp_overlap = matcher.cross(cat0, catx, delta_bckg,
                                             halomap0, halomapx, delta_bckg,
                                             verbose=verbose)
    # We want to store the halo IDs of the matches, not their array positions
@ -177,8 +159,7 @@ def pair_match(nsim0, nsimx, simname, min_logmass, sigma, verbose):
    gaussian_filter(delta_bckg, output=delta_bckg, **smooth_kwargs)
    # We calculate the smoothed overlap for the pairs whose NGP overlap is > 0.
-    smoothed_overlap = matcher.smoothed_cross(cat0, catx, parts0, partsx,
+    smoothed_overlap = matcher.smoothed_cross(cat0, catx, delta_bckg,
                                              halomap0, halomapx, delta_bckg,
                                              match_indxs, smooth_kwargs,
                                              verbose=verbose)
--- a/scripts/mergertree_extract.py
+++ b/scripts/mergertree_extract.py
@ -0,0 +1,979 @@
 # Copyright (C) 2023 Mladen Ivkovic, Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 import copy
 import os
 from os.path import exists, join
 from os import makedirs
 from sys import argv
 from datetime import datetime
 import numpy as np
 from joblib import dump, load
 from tqdm import trange
 errmsg = """
 ------------------------------------
    mergertree-extract.py
 ------------------------------------
 ---------------
    Usage
 ---------------
 This script extracts the masses of clumps and haloes written by the mergertree
 patch.
 It needs output_XXXXX/mergertree_XXXXX.txtYYYYY and
 output_XXXXX/clump_XXXXX.txtYYYYY files to work.
 You need to run it from the directory where the output_XXXXX directories are
 in.
 There are three working modes defined:
 1) do for one clump only.
    You need to provide the clump ID you want it done for.
    You can provide a starting directory, but by default the script will
    search for the directory where z = 0.
    run with `python3 mergertree-extract.py <clumpid> [--options] `
    this creates the file mergertree_XXXXX_halo-<halo-ID>.txt. Its contents are
    discussed below.
 2) do for one halo.
    You need to provide the halo ID you want it done for, and the flag
    -c or --children.
    The script will by itself find all the child clumps and walk through
    their main branches as well, and write them down.
    run with `python3 mergertree-extract.py <haloid> -c [--options]`
          or `python3 mergertree-extract.py <haloid> --children [--options]`
    this creates the hollowing files:
        - halo_hierarchy_XXXXX-<halo-ID>.txt
            contains the halo ID, how many children it has, and the children
            IDs
        - mergertree_XXXXX_halo-<halo-ID>.txt
            mergertree data for halo that you chose.
        - mergertree_XXXXX_subhalo-<child-ID>.txt
            mergertree data for subhalos of the halo you chose.  One file will
            be created for each subhalo.
        The contents of the mergertree_XXXXX* files are discussed below.
 3) do for all haloes
    The script will just walk off all haloes in the z = 0 directory. Note:
    Haloes, not clumps!
    run with `python3 mergertree-extract.py -a [--options]`
          or `python3 mergertree-extract.py --all [--options]`
    This will create the same type of files as in mode (2), just for all
    haloes.
 If only an integer is given as cmdline arg, mode (1) [one clump only] will be
 run. If no cmd line argument is given, mode (3) [--all] will be run.
 ---------------
    Output
 ---------------
 the mergertree_XXXXX* files have 6 columns:
 snapshot            The snapshot from which this data is taken from
 redshift            The redshift of that snapshot
 clump_ID            The clump ID of the clump at that snapshot
 mass                The mass of the clump at that snapshot, based on what's in
                    the output_XXXXX/mergertree_XXXXX.txtYYYYY files, not the
                    output_XXXXX/clump_XXXXX.txtYYYYY files.
 mass_from_mergers   how much mass has been merged into this clump in this
                    snapshot, i.e. the sum of all the clump masses that have
                    been found to merge with this clump at this snapshot. This
                    does not include the mass of clumps which only seem to
                    merge with this clump, but re-emerge later.
 mass_from_jumpers   The mass of all clumps that seem to merge with this clump,
                    but re-emerge at a later time.
 ----------------
    Options
 ----------------
 List of all flags:
 Running modes
    -a, --all:      make trees for all clumps in output where z = 0
    -c --children:  make trees for a halo and all its subhaloes. You need to
                    specify which halo via its halo ID.
    -h, --help:     print this help and exit.
 Options:
    --start-at=INT      don't start at z = 0 snapshot, but with the specified
                        directory output_00INT.
    --prefix=some/path/ path where you want your output written to.
    -v, --verbose:      be more verbose about what you're doing
 -----------------
  Requirements
 -----------------
 It needs output_XXXXX/mergertree_XXXXX.txtYYYYY and
 output_XXXXX/clump_XXXXX.txtYYYYY files to work, which are created using the
 mergertree patch in ramses.
 Also needs numpy.
 """
 ###############################################################################
 #                             Clump data                                      #
 ###############################################################################
 class ClumpData:
    """
    Data from clump_XXXXX.txt
    Parameters
    ----------
    par : params object
    """
    def __init__(self, par):
        self.clumpids = np.zeros(1)     # clump ID
        self.parent = np.zeros(1)       # parent ID
        self.level = np.zeros(1)        # clump level
    def read_clumpdata(self, par):
        """Reads in the clump data for the z = 0 directory."""
        if par.verbose:
            print("Reading clump data.")
        out = p.z0
        raw_data = [None for i in range(par.ncpu)]
        dirnrstr = str(par.outputnrs[out]).zfill(5)
        dirname = 'output_' + dirnrstr
        i = 0
        for cpu in range(1):
            fname = join(par.workdir, dirname, 'clump_' + dirnrstr + '.dat')
            new_data = np.loadtxt(fname, dtype='int', skiprows=1,
                                  usecols=[0, 1, 2])
            if new_data.ndim == 2:
                raw_data[i] = new_data
                i += 1
            elif new_data.shape[0] == 3:  # if only 1 row is present in file
                raw_data[i] = np.atleast_2d(new_data)
                i += 1
        fulldata = np.concatenate(raw_data[:i], axis=0)
        self.clumpids = fulldata[:, 0]
        self.level = fulldata[:, 1]
        self.parent = fulldata[:, 2]
    def cleanup_clumpdata(self, par, mtd):
        """
        The particle unbinding can remove entire clumps from the catalogue.
        If the option isn't set in the namelist, the clumpfinder output will
        still be made not based on the clumpfinder. If that is the case, the
        clumpfinder catalogue will contain clumps which the mergertree data
        doesn't have, leading to problems. So remove those here.
        """
        for i, c in enumerate(self.clumpids):
            if c not in mtd.descendants[par.z0]:
                self.clumpids[i] = 0
                self.level[i] = 0
                self.parent[i] = -1  # don't make it the same as clumpid
    def find_children(self, clumpid):
        """Find the children for given clump ID."""
        children = []
        last_added = [clumpid]
        loopcounter = 0
        while True:
            loopcounter += 1
            this_level_parents = copy.copy(last_added)
            children += this_level_parents
            last_added = []
            for i, cid in enumerate(self.clumpids):
                if self.parent[i] in this_level_parents and cid != clumpid:
                    last_added.append(cid)
            if len(last_added) == 0:
                break
            if loopcounter == 100:
                print("Finished 100 iterations, we shouldn't be this deep")
                break
        return children[1:]  # don't return top level parent
    def write_children(self, par, clumpid, children):
        """Write the children to file."""
        hfile = join(par.outdir, f"{par.halofilename}-{str(clumpid)}.txt")
        with open(hfile, 'w') as f:
            f.write("# {0:>18} {1:>18} {2:>18}\n".format("halo", "nr_of_children", "children"))  # noqa
            nc = len(children)
            dumpstring = "  {0:18d} {1:18d}".format(clumpid, nc)
            dumpstring = "".join([dumpstring] + [" {0:18d}".format(c) for c in children] + ['\n'])  # noqa
            f.write(dumpstring)
 ###############################################################################
 #                            Constants object                                 #
 ###############################################################################
 class Constants:
    """
    Class holding constants.
    """
    def __init__(self):
        self.Mpc = 3.086e24                 # cm
        self.M_Sol = 1.98855e33             # g
        self.Gyr = (24 * 3600 * 365 * 1e9)  # s
        self.G = 4.492e-15                  # Mpc^3/(M_sol Gyr^2)
        self.H0 = 100                      # km/s/Mpc
        self.omega_m = 0.307000011205673
        self.omega_l = 0.693000018596649
        self.omega_k = 0.0
        self.omega_b = 0.0
 ###############################################################################
 #                             Params object                                   #
 ###############################################################################
 class Params:
    """
    Global parameters to be stored
    """
    def __init__(self):
        # self.workdir = f"/mnt/extraspace/hdesmond/ramses_out_{self.nsim}"
        # self.outdir = f"/mnt/extraspace/rstiskalek/CSiBORG/cleaned_mtree/ramses_out_{self.nsim}"  # noqa
        # if not exists(self.outdir):
        #     makedirs(self.outdir)
        self.lastdir = ""               # last output_XXXXX directory
        self.lastdirnr = -1             # XXXX from lastdir
        self.ncpu = 1                   # Number of CPUs used
        self.noutput = 1                # how many output_XXXXX dirs exist
        self.nout = 1                   # how many outputs we're gonna deal with. (Some might not have merger tree data)  # noqa
        self.outputnrs = None           # numpy array of output numbers
        self.output_lowest = 0          # lowest snapshot number that we're dealing with (>= 1)  # noqa
        self.z0 = 0                     # index of z=0 snapshot (or whichever you want to start with)  # noqa
        # NOTE: params.nout will be defined such that you can easily loop
        self.verbose = False            # verbosity
        self.start_at = 0               # output dir to start with, if given
        self.output_prefix = ""         # user given prefix for output files
        self.outputfilename = ""        # output filename. Stores prefix/mergertree_XXXXX part of name only  # noqa
        self.halofilename = ""          # output filename for halo hierarchy. Stores prefix/halo_hierarchy_XXXXX part of filename only  # noqa
        self.one_halo_only = False      # do the tree for one clump only
        self.halo_and_children = False  # do the tree for one halo, including subhaloes  # noqa
        self.do_all = False             # do for all clumps at z=0 output
        self.clumpid = 0                # which clump ID to work for.
        self.nsim = None
        # Dictionnary of accepted keyword command line arguments
        self.accepted_flags = {
            '-a': self.set_do_all,
            '--all': self.set_do_all,
            '-r': self.set_halo_and_children,
            '--recursive': self.set_halo_and_children,
            '-c': self.set_halo_and_children,
            '--children': self.set_halo_and_children,
            '-h': self.get_help,
            '--help': self.get_help,
            '-v': self.set_verbose,
            '--verbose': self.set_verbose,
            }
        self.accepted_flags_with_args = {
            "--nsim": self.set_nsim,
            '--start-at': self.set_startnr,
            '--prefix': self.set_prefix,
            }
    # -----------------------------
    # Setter methods
    # -----------------------------
    def set_do_all(self):
        self.do_all = True
        return
    def set_halo_and_children(self):
        self.halo_and_children = True
        return
    def get_help(self):
        print(errmsg)
        quit()
        return
    def set_verbose(self):
        self.verbose = True
        return
    def set_startnr(self, arg):
        flag, startnr = arg.split("=")
        try:
            self.start_at = int(startnr)
        except ValueError:
            print("given value for --start-at=INT isn't an integer?")
    def set_prefix(self, arg):
        flag, prefix = arg.split("=")
        #  try:
        self.output_prefix = prefix
        try:
            os.makedirs(self.output_prefix)
        except FileExistsError:
            pass
        return
    def set_nsim(self, arg):
        flag, nsim = arg.split("=")
        try:
            self.nsim = int(nsim)
        except ValueError:
            print("given value for --nsim=INT isn't an integer?")
    def read_cmdlineargs(self):
        """
        Reads in the command line arguments and store them in the
        global_params object.
        """
        nargs = len(argv)
        i = 1  # first cmdlinearg is filename of this file, so skip it
        while i < nargs:
            arg = argv[i]
            arg = arg.strip()
            if arg in self.accepted_flags.keys():
                self.accepted_flags[arg]()
            else:
                for key in self.accepted_flags_with_args.keys():
                    if arg.startswith(key):
                        self.accepted_flags_with_args[key](arg)
                        break
                else:
                    try:
                        self.clumpid = int(arg)
                    except ValueError:
                        print(f"I didn't recognize the argument '{arg}'. Use "
                              "mergertre-extract.py -h or --help to print "
                              "help message.")
                        quit()
            i += 1
        if self.nsim is None:
            raise ValueError("nsim not set. Use --nsim=INT to set it.")
    @property
    def workdir(self):
        return f"/mnt/extraspace/hdesmond/ramses_out_{self.nsim}"
    @property
    def outdir(self):
        fname = f"/mnt/extraspace/rstiskalek/CSiBORG/cleaned_mtree/ramses_out_{self.nsim}"  # noqa
        if not exists(fname):
            makedirs(fname)
        return fname
    def get_output_info(self):
        """
        Read in the output info based on the files in the current working
        directory. Reads in last directory, ncpu, noutputs. Doesn't read
        infofiles.
        """
        # self.workdir = os.getcwd()
        filelist = os.listdir(self.workdir)
        outputlist = []
        for filename in filelist:
            if filename.startswith('output_'):
                outputlist.append(filename)
        if len(outputlist) < 1:
            print("I didn't find any output_XXXXX directories in current "
                  "working directory. Are you in the correct workdir? "
                  "Use mergertree-extract.py -h or --help to print help "
                  "message.")
            quit()
        outputlist.sort()
        self.lastdir = outputlist[-1]
        self.lastdirnr = int(self.lastdir[-5:])
        self.noutput = len(outputlist)
        if (self.start_at > 0):
            # check that directory exists
            startnrstr = str(self.start_at).zfill(5)
            if 'output_' + startnrstr not in outputlist:
                print("Didn't find specified starting directory "
                      f"output_{startnrstr} use mergertree-extract.py -h or "
                      "--help to print help message.")
                quit()
        # read ncpu from infofile in last output directory
        infofile = join(self.workdir, self.lastdir,
                        f"info_{self.lastdir[-5:]}.txt")
        with open(infofile, 'r') as f:
            ncpuline = f.readline()
            line = ncpuline.split()
            self.ncpu = int(line[-1])
    def setup_and_checks(self, sd):
        """
        Do checks and additional setups once you have all the cmd line args and
        output infos
        Parameters
        ----------
        sd: snapshotdata object
        """
        # set running mode
        if not self.do_all:
            if self.clumpid <= 0:
                print("No or wrong clump id given. Setting the --all mode.")
                self.set_do_all()
            else:
                if not self.halo_and_children:
                    self.one_halo_only = True
        # generate list of outputdirnumbers
        startnr = self.lastdirnr
        self.outputnrs = np.array(range(startnr, startnr - self.noutput, -1))
        # find starting output directory
        self.z0 = np.argmin(np.absolute(sd.redshift))
        if self.start_at > 0:
            # replace z0 dir with starting dir
            self.z0 = self.lastdirnr - self.start_at
        # generate output filename
        dirnrstr = str(self.outputnrs[self.z0]).zfill(5)
        fname = "mergertree_" + dirnrstr
        self.outputfilename = join(self.output_prefix, fname)
        # generate halo output filename
        fname = "halo_hierarchy_" + dirnrstr
        self.halofilename = join(self.output_prefix, fname)
        # rename output_prefix to something if it wasn't set
        if self.output_prefix == "":
            self.output_prefix = os.path.relpath(self.workdir)
        # find self.nout; i.e. how many outputs we are actually going to have
        for out in range(self.noutput - 1, -1, -1):
            dirnrstr = str(self.outputnrs[out]).zfill(5)
            mtreefile = join(self.workdir,
                             f"output_{dirnrstr}",
                             f"mergertree_{dirnrstr}.dat")
            if os.path.exists(mtreefile):
                print("Loading mergertree data from ", mtreefile)
                # if there is a file, this is lowest snapshot number directory
                # that we'll be dealing with, and hence will have the highest
                # index number in the arrays I'm using
                # NOTE: params.nout will be defined such that you can easily
                # loop for out in range(p.z0, p.nout)
                self.nout = out + 1
                break
    def print_params(self):
        """Prints out the parameters that are set."""
        if self.do_all:
            print("Working mode:             all clumps")
        else:
            if self.halo_and_children:
                print("Working mode:             halo", self.clumpid, "and its children")  # noqa
            else:
                print("Working mode:             clump ", self.clumpid)
        print("workdir:                 ", self.workdir)
        print("snapshot of tree root:   ", self.outputnrs[self.z0])
        print("p.one_halo_only          ", p.one_halo_only)
        print("p.do_all                 ", p.do_all)
        print("p.halo_and_children      ", p.halo_and_children)
        print("p.one_halo_only          ", p.one_halo_only)
 ###############################################################################
 #                             Merger tree data                                #
 ###############################################################################
 class MTreeData:
    """
    Merger tree data lists
    Parameters
    ----------
    par : params object
    """
    def __init__(self, par):
        self.progenitors = [np.zeros(1) for i in range(par.noutput)]            # progenitor IDs  # noqa
        self.descendants = [np.zeros(1) for i in range(par.noutput)]            # descendant IDs  # noqa
        self.progenitor_outputnrs = [np.zeros(1) for i in range(par.noutput)]   # snapshot number of progenitor  # noqa
        self.mass = [np.zeros(1) for i in range(par.noutput)]                   # descendant mass  # noqa
        self.mass_to_remove = [np.zeros(1) for i in range(par.noutput)]         # descendant mass  # noqa
    def read_mergertree_data(self, par, sd):
        """Reads in mergertree data."""
        if par.verbose:
            print("Reading in mergertree data")
        # Preparation
        # define new datatype for mergertree output
        mtree = np.dtype([('clump', 'i4'),
                          ('prog', 'i4'),
                          ('prog_outnr', 'i4'),
                          ('mass', 'f8'),
                          ('npart', 'f8'),
                          ('x', 'f8'),
                          ('y', 'f8'),
                          ('z', 'f8'),
                          ('vx', 'f8'),
                          ('vy', 'f8'),
                          ('vz', 'f8')
                          ])
        # ---------------------------
        # Loop over directories
        # ---------------------------
        startnr = par.lastdirnr
        # READ THE ONES BEFORE z0 TOO!
        for output in trange(par.nout, desc="Reading merger"):
            dirnr = str(startnr - output).zfill(5)
            srcdir = 'output_' + dirnr
            fnames = [srcdir + '/' + "mergertree_" + dirnr + '.dat']
            fnames[0] = join(par.workdir, fnames[0])
            datalist = [np.zeros((1, 3)) for i in range(par.ncpu)]
            i = 0
            nofile = 0
            for f in fnames:
                if os.path.exists(f):
                    datalist[i] = np.atleast_1d(np.genfromtxt(f, dtype=mtree,
                                                              skip_header=1))
                    i += 1
                else:
                    nofile += 1
            if nofile == p.ncpu:
                print("Didn't find any mergertree data in", srcdir)
            # ---------------------------------
            # Sort out data
            # ---------------------------------
            if i > 0:
                fulldata = np.concatenate(datalist[:i], axis=0)
                self.descendants[output] = fulldata[:]['clump']
                self.progenitors[output] = fulldata[:]['prog']
                self.progenitor_outputnrs[output] = fulldata[:]['prog_outnr']
                self.mass[output] = fulldata[:]['mass']
                #  self.npart[output] = fulldata[:]['npart']
                #  self.x[output] = fulldata[:]['x']
                #  self.y[output] = fulldata[:]['y']
                #  self.z[output] = fulldata[:]['z']
                #  self.vx[output] = fulldata[:]['vx']
                #  self.vy[output] = fulldata[:]['vy']
                #  self.vz[output] = fulldata[:]['vz']
        # --------------------------------------
        # Transform units to physical units
        # --------------------------------------
        # transform units to physical units
        for i in range(len(self.descendants)):
            self.mass[i] *= sd.unit_m[i]
            #  self.x[i] *= sd.unit_l[i] # only transform later when needed; Need to check for periodicity first!  # noqa
            #  self.y[i] *= sd.unit_l[i]
            #  self.z[i] *= sd.unit_l[i]
            #  self.vx[i] *= sd.unit_l[i]/sd.unit_t[i]
            #  self.vy[i] *= sd.unit_l[i]/sd.unit_t[i]
            #  self.vz[i] *= sd.unit_l[i]/sd.unit_t[i]
    def clean_up_jumpers(self, par):
        """
        Remove jumpers from the merger list. Take note of how much mass should
        be removed from the descendant because the jumper is to be removed.
        """
        # First initialize mass_to_remove arrays
        self.mass_to_remove = [np.zeros(self.descendants[out].shape)
                               for out in range(par.noutput)]
        nreplaced = 0
        for out in trange(par.nout + par.z0 - 1, desc="Cleaning jumpers"):
            for i, pr in enumerate(self.progenitors[out]):
                if pr < 0:
                    # Subtract 1 here from snapind:
                    # progenitor_outputnrs gives the snapshot number where the
                    # jumper was a descendant for the last time
                    # so you need to overwrite the merging one snapshot later,
                    # where the clump is the progenitor
                    snapind = get_snap_ind(p, self.progenitor_outputnrs[out][i]) - 1  # noqa
                    # NOTE bottleneck
                    jumpind = self.progenitors[snapind] == -pr
                    # NOTE bottleneck
                    # find index of descendant into which this clump will
                    # appearingly merge into
                    mergerind = self.descendants[snapind] == - self.descendants[snapind][jumpind]  # noqa
                    # overwrite merging event so it won't count
                    self.descendants[snapind][jumpind] = 0
                    # find mass of jumper in previous snapshot
                    jumpmassind = self.descendants[snapind + 1] == -pr
                    # note how much mass might need to be removed for whatever
                    # you need it
                    self.mass_to_remove[snapind][mergerind] += self.mass[snapind + 1][jumpmassind]  # noqa
                    nreplaced += 1
        print("Cleaned out", nreplaced, "jumpers")
    def get_tree(self, par, tree, sd, clumpid):
        """Follow the main branch down."""
        if par.verbose:
            print("Computing tree for clump", clumpid)
        dind = self.descendants[par.z0] == clumpid
        desc_snap_ind = p.z0
        desc = self.descendants[p.z0][dind]
        prog = self.progenitors[p.z0][dind]
        def get_prog_indices(prog, desc_snap_ind):
            """
            Compute snapshot index at which given progenitor has been a
            descendant and its index in the array
            prog:           progenitor ID
            desc_snap_ind:  snapshot index of descendant of given prog
            returns:
            p_snap_ind:     snapshot index of the progenitor
            pind:           progenitor index (np.array mask) of progenitor in
                            array where it is descendant
            """
            if prog > 0:  # if progenitor isn't jumper
                # find progenitor's index in previous snapshot
                p_snap_ind = desc_snap_ind + 1
                pind = self.descendants[p_snap_ind] == prog
            elif prog < 0:
                p_snap_ind = get_snap_ind(
                    par, self.progenitor_outputnrs[desc_snap_ind][dind])
                pind = self.descendants[p_snap_ind] == -prog
            return p_snap_ind, pind
        while True:
            # first calculate merger mass
            mergers = self.descendants[desc_snap_ind] == -desc
            mergermass = 0.0
            if mergers.any():
                for m in self.progenitors[desc_snap_ind][mergers]:
                    # find mass of merger. That's been written down at the
                    # place where merger was descendant.
                    m_snap_ind, mergerind = get_prog_indices(m, desc_snap_ind)
                    mergermass += self.mass[m_snap_ind][mergerind]
            # add the descendant to the tree
            tree.add_snap(par.outputnrs[desc_snap_ind],
                          sd.redshift[desc_snap_ind], desc,
                          self.mass[desc_snap_ind][dind], mergermass,
                          self.mass_to_remove[desc_snap_ind][dind])
            # now descend down the main branch
            if prog != 0:
                p_snap_ind, pind = get_prog_indices(prog, desc_snap_ind)
            else:
                # stop at progenitor = 0
                break
            # prepare for next round
            desc_snap_ind = p_snap_ind
            dind = pind
            desc = abs(prog)
            prog = self.progenitors[p_snap_ind][pind]
 ###############################################################################
 #                             Snapshot data                                   #
 ###############################################################################
 class SnapshotData():
    """Snapshot specific data"""
    def __init__(self, par):
        # read in
        self.aexp = np.zeros(par.noutput)
        self.unit_l = np.zeros(par.noutput)
        self.unit_m = np.zeros(par.noutput)
        self.unit_t = np.zeros(par.noutput)
        self.unit_dens = np.zeros(par.noutput)
        # to be computed
        self.redshift = np.zeros(par.noutput)  # z
    def read_infofiles(self, par, const):
        """Read the info_XXXXX.txt files."""
        if par.verbose:
            print("Reading info files.")
        startnr = par.lastdirnr
        for output in range(p.noutput):
            # Start with last directory (e.g. output_00060),
            # work your way to first directory (e.g. output_00001)
            # p.z0 isn't decided yet, so just read in everything here.
            dirnr = str(startnr - output).zfill(5)
            srcdir = 'output_' + dirnr
            try:
                # ------------------------------------------------------
                # get time, redshift, and units even for output_00001
                # ------------------------------------------------------
                fileloc = srcdir + '/info_' + dirnr + '.txt'
                fileloc = join(par.workdir, fileloc)
                infofile = open(fileloc)
                for i in range(9):
                    infofile.readline()  # skip first 9 lines
                # get expansion factor
                aline = infofile.readline()
                astring, equal, aval = aline.partition("=")
                afloat = float(aval)
                sd.aexp[output] = afloat
                for i in range(5):
                    infofile.readline()  # skip 5 lines
                # get unit_l
                unitline = infofile.readline()
                unitstring, equal, unitval = unitline.partition("=")
                unitfloat = float(unitval)
                sd.unit_l[output] = unitfloat
                # get unit_dens
                unitline = infofile.readline()
                unitstring, equal, unitval = unitline.partition("=")
                unitfloat = float(unitval)
                sd.unit_dens[output] = unitfloat
                # get unit_t
                unitline = infofile.readline()
                unitstring, equal, unitval = unitline.partition("=")
                unitfloat = float(unitval)
                sd.unit_t[output] = unitfloat
                infofile.close()
            except IOError:  # If file doesn't exist
                print("Didn't find any info data in ", srcdir)
                break
        self.unit_m = self.unit_dens * self.unit_l ** 3 / const.M_Sol
        self.unit_l /= const.Mpc
        self.unit_t /= const.Gyr
        self.redshift = 1. / self.aexp - 1
 ###############################################################################
 #                             Tree object                                     #
 ###############################################################################
 class Tree:
    """
    Holds tree result data. It's not really a tree, it's just the values along
    the main branch, but let's call it a tree anyway. Sue me.
    Parameters
    ----------
    nelements : int
        Estimate for how many snapshots you need to allocate space for.
    """
    def __init__(self, nelements):
        self.n = 0                                              # number of elements in tree  # noqa
        self.snapshotnr = -np.ones(nelements, dtype=int)        # snapshot number of array values  # noqa
        self.redshift = -np.ones(nelements, dtype=float)        # redshift at that snapshot  # noqa
        self.clumpids = -np.ones(nelements, dtype=int)          # clump id of halo in that snapshot  # noqa
        self.mass = np.zeros(nelements, dtype=float)            # mass at that snapshot  # noqa
        self.mergermass = np.zeros(nelements, dtype=float)      # sum of mass of swallowed up clumps  # noqa
        self.mass_to_remove = np.zeros(nelements, dtype=float)  # sum of mass of swallowed up clumps  # noqa
    def add_snap(self, nr, z, ID, m, mm, mdel):
        """Add new result."""
        n = self.n
        self.snapshotnr[n] = nr
        self.redshift[n] = z
        self.clumpids[n] = ID
        self.mass[n] = m
        self.mergermass[n] = mm
        self.mass_to_remove[n] = mdel
        self.n += 1
    def write_tree(self, par, case='halo'):
        """Write the results to file."""
        resfile = join(
            par.outdir,
            f"{par.outputfilename}_{case}-{str(self.clumpids[0])}.txt")
        with open(resfile, 'w') as f:
            f.write('# {0:>12} {1:>12} {2:>16} {3:>18} {4:>18} {5:>18}\n'.format(  # noqa
                "snapshot", "redshift", "clump_ID", "mass[M_sol]",
                "mass_from_mergers", "mass_from_jumpers"))
            for i in range(self.n):
                f.write('  {0:12d} {1:12.4f} {2:16d} {3:18.6e} {4:18.6e} {5:18.6e}\n'.format(  # noqa
                    self.snapshotnr[i], self.redshift[i], self.clumpids[i],
                    self.mass[i], self.mergermass[i], self.mass_to_remove[i]))
        return
 def get_snap_ind(p, snap):
    """
    Computes the snapshot index in mtreedata/halodata/snapshotdata arrays for a
    given snapshot number snap
    """
    return (p.noutput - snap).item()
 if __name__ == '__main__':
    p = Params()
    c = Constants()
    # Read cmdlineargs, available output, get global parameters
    p.read_cmdlineargs()
    p.get_output_info()
    sd = SnapshotData(p)
    sd.read_infofiles(p, c)
    # finish setup
    p.setup_and_checks(sd)
    p.print_params()
    # now read in mergertree data
    fname = join(p.outdir, "mtreedata.p")
    if exists(fname):
        print(f"{datetime.now()}: loading mergertree data from `{fname}`.",
              flush=True)
        mtd = load(fname)
        print(f"{datetime.now()}: finished loading mergertree data from `{fname}`.",  # noqa
              flush=True)
    else:
        print("Generating mergertree data.", flush=True)
        mtd = MTreeData(p)
        mtd.read_mergertree_data(p, sd)
        # clean up jumpers
        mtd.clean_up_jumpers(p)
        print("Saving mergertree data.", flush=True)
        dump(mtd, fname)
    # read in clump data if required
    if p.do_all or p.halo_and_children:
        cd = ClumpData(p)
        cd.read_clumpdata(p)
        # clean up halo catalogue
        cd.cleanup_clumpdata(p, mtd)
        # find children, and write them down
        if p.verbose:
            print("Searching for child clumps.")
        if p.halo_and_children:
            children = cd.find_children(p.clumpid)
            cd.write_children(p, p.clumpid, children)
        if p.do_all:
            is_halo = cd.clumpids == cd.parent
            childlist = [None for c in cd.clumpids[is_halo]]
            for i, halo in enumerate(cd.clumpids[is_halo]):
                children = cd.find_children(halo)
                cd.write_children(p, halo, children)
                childlist[i] = children
    # finally, get the bloody tree
    if p.one_halo_only:
        newtree = Tree(p.nout)
        mtd.get_tree(p, newtree, sd, p.clumpid)
        newtree.write_tree(p, 'halo')
    if p.halo_and_children:
        newtree = Tree(p.nout)
        mtd.get_tree(p, newtree, sd, p.clumpid)
        newtree.write_tree(p, 'halo')
        for c in children:
            newtree = Tree(p.nout)
            mtd.get_tree(p, newtree, sd, c)
            newtree.write_tree(p, 'subhalo')
    if p.do_all:
        for i, halo in enumerate(cd.clumpids[is_halo]):
            newtree = Tree(p.nout)
            mtd.get_tree(p, newtree, sd, halo)
            newtree.write_tree(p, 'halo')
            for c in childlist[i]:
                newtree = Tree(p.nout)
                mtd.get_tree(p, newtree, sd, c)
                newtree.write_tree(p, 'subhalo')
    print('Finished.')
--- a/scripts/process_snapshot.py
+++ b/scripts/process_snapshot.py
@ -0,0 +1,457 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 r"""
 Script to process simulation files and create a single HDF5 file, in which
 particles are sorted by the particle halo IDs.
 """
 from argparse import ArgumentParser
 from gc import collect
 import h5py
 import numpy
 from mpi4py import MPI
 import csiborgtools
 from csiborgtools import fprint
 from numba import jit
 from taskmaster import work_delegation
 from tqdm import trange, tqdm
 from utils import get_nsims
@jit(nopython=True, boundscheck=False)
 def minmax_halo(hid, halo_ids, start_loop=0):
    """
    Find the start and end index of a halo in a sorted array of halo IDs.
    This is much faster than using `numpy.where` and then `numpy.min` and
    `numpy.max`.
    """
    start = None
    end = None
    for i in range(start_loop, halo_ids.size):
        n = halo_ids[i]
        if n == hid:
            if start is None:
                start = i
            end = i
        elif n > hid:
            break
    return start, end
 def process_snapshot(nsim, simname, halo_finder, verbose):
    """
    Read in the snapshot particles, sort them by their halo ID and dump
    into a HDF5 file. Stores the first and last index of each halo in the
    particle array for fast slicing of the array to acces particles of a single
    halo.
    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    nsnap = max(paths.get_snapshots(nsim, simname))
    if simname == "csiborg":
        partreader = csiborgtools.read.CSiBORGReader(paths)
        box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
    else:
        partreader = csiborgtools.read.QuijoteReader(paths)
        box = None
    desc = {"hid": f"Halo finder ID ({halo_finder})of the particle.",
            "pos": "DM particle positions in box units.",
            "vel": "DM particle velocity in km / s.",
            "mass": "DM particle mass in Msun / h.",
            "pid": "DM particle ID",
            }
    fname = paths.processed_output(nsim, simname, halo_finder)
    fprint(f"loading HIDs of IC {nsim}.", verbose)
    hids = partreader.read_halo_id(nsnap, nsim, halo_finder, verbose)
    collect()
    fprint(f"sorting HIDs of IC {nsim}.")
    sort_indxs = numpy.argsort(hids)
    with h5py.File(fname, "w") as f:
        group = f.create_group("snapshot_final")
        group.attrs["header"] = "Snapshot data at z = 0."
        fprint("dumping halo IDs.", verbose)
        dset = group.create_dataset("halo_ids", data=hids[sort_indxs])
        dset.attrs["header"] = desc["hid"]
        del hids
        collect()
        fprint("reading, sorting and dumping the snapshot particles.", verbose)
        for kind in ["pos", "vel", "mass", "pid"]:
            x = partreader.read_snapshot(nsnap, nsim, kind)[sort_indxs]
            if simname == "csiborg" and kind == "vel":
                x = box.box2vel(x) if simname == "csiborg" else x
            if simname == "csiborg" and kind == "mass":
                x = box.box2solarmass(x) if simname == "csiborg" else x
            dset = f["snapshot_final"].create_dataset(kind, data=x)
            dset.attrs["header"] = desc[kind]
            del x
            collect()
    del sort_indxs
    collect()
    fprint(f"creating a halo map for IC {nsim}.")
    with h5py.File(fname, "r") as f:
        part_hids = f["snapshot_final"]["halo_ids"][:]
    # We loop over the unique halo IDs and remove the 0 halo ID
    unique_halo_ids = numpy.unique(part_hids)
    unique_halo_ids = unique_halo_ids[unique_halo_ids != 0]
    halo_map = numpy.full((unique_halo_ids.size, 3), numpy.nan,
                          dtype=numpy.uint64)
    start_loop, niters = 0, unique_halo_ids.size
    for i in trange(niters, disable=not verbose):
        hid = unique_halo_ids[i]
        k0, kf = minmax_halo(hid, part_hids, start_loop=start_loop)
        halo_map[i, :] = hid, k0, kf
        start_loop = kf
    # Dump the halo mapping.
    with h5py.File(fname, "r+") as f:
        dset = f["snapshot_final"].create_dataset("halo_map", data=halo_map)
        dset.attrs["header"] = """
        Halo to particle mapping. Columns are HID, start index, end index.
        """
        f.close()
    del part_hids
    collect()
    # Add the halo finder catalogue
    with h5py.File(fname, "r+") as f:
        group = f.create_group("halofinder_catalogue")
        group.attrs["header"] = f"Original {halo_finder} halo catalogue."
        cat = partreader.read_catalogue(nsnap, nsim, halo_finder)
        hid2pos = {hid: i for i, hid in enumerate(unique_halo_ids)}
        for key in cat.dtype.names:
            x = numpy.full(unique_halo_ids.size, numpy.nan,
                           dtype=cat[key].dtype)
            for i in range(len(cat)):
                j = hid2pos[cat["index"][i]]
                x[j] = cat[key][i]
            group.create_dataset(key, data=x)
        f.close()
    # Lastly create the halo catalogue
    with h5py.File(fname, "r+") as f:
        group = f.create_group("halo_catalogue")
        group.attrs["header"] = f"{halo_finder} halo catalogue."
        group.create_dataset("index", data=unique_halo_ids)
        f.close()
 def add_initial_snapshot(nsim, simname, halo_finder, verbose):
    """
    Sort the initial snapshot particles according to their final snapshot and
    add them to the final snapshot's HDF5 file.
    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    fname = paths.processed_output(nsim, simname, halo_finder)
    if simname == "csiborg":
        partreader = csiborgtools.read.CSiBORGReader(paths)
    else:
        partreader = csiborgtools.read.QuijoteReader(paths)
    fprint(f"processing simulation `{nsim}`.", verbose)
    if simname == "csiborg":
        nsnap0 = 1
    elif simname == "quijote":
        nsnap0 = -1
    else:
        raise ValueError(f"Unknown simulation `{simname}`.")
    fprint("loading and sorting the initial PID.", verbose)
    sort_indxs = numpy.argsort(partreader.read_snapshot(nsnap0, nsim, "pid"))
    fprint("loading the final particles.", verbose)
    with h5py.File(fname, "r") as f:
        sort_indxs_final = f["snapshot_final/pid"][:]
        f.close()
    fprint("sorting the particles according to the final snapshot.", verbose)
    sort_indxs_final = numpy.argsort(numpy.argsort(sort_indxs_final))
    sort_indxs = sort_indxs[sort_indxs_final]
    del sort_indxs_final
    collect()
    fprint("loading and sorting the initial particle position.", verbose)
    pos = partreader.read_snapshot(nsnap0, nsim, "pos")[sort_indxs]
    del sort_indxs
    collect()
    # In Quijote some particles are position precisely at the edge of the
    # box. Move them to be just inside.
    if simname == "quijote":
        mask = pos >= 1
        if numpy.any(mask):
            spacing = numpy.spacing(pos[mask])
            assert numpy.max(spacing) <= 1e-5
            pos[mask] -= spacing
    fprint(f"dumping particles for `{nsim}` to `{fname}`.", verbose)
    with h5py.File(fname, "r+") as f:
        if "snapshot_initial" in f.keys():
            del f["snapshot_initial"]
        group = f.create_group("snapshot_initial")
        group.attrs["header"] = "Initial snapshot data."
        dset = group.create_dataset("pos", data=pos)
        dset.attrs["header"] = "DM particle positions in box units."
        f.close()
 def calculate_initial(nsim, simname, halo_finder, verbose):
    """Calculate the Lagrangian patch centre of mass and size."""
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    fname = paths.processed_output(nsim, simname, halo_finder)
    fprint("loading the particle information.", verbose)
    f = h5py.File(fname, "r")
    pos = f["snapshot_initial/pos"]
    mass = f["snapshot_final/mass"]
    hid = f["halo_catalogue/index"][:]
    hid2map = csiborgtools.read.make_halomap_dict(
        f["snapshot_final/halo_map"][:])
    if simname == "csiborg":
        kwargs = {"box_size": 2048, "bckg_halfsize": 512}
    else:
        kwargs = {"box_size": 512, "bckg_halfsize": 256}
    overlapper = csiborgtools.match.ParticleOverlap(**kwargs)
    lagpatch_pos = numpy.full((len(hid), 3), numpy.nan, dtype=numpy.float32)
    lagpatch_size = numpy.full(len(hid), numpy.nan, dtype=numpy.float32)
    lagpatch_ncells = numpy.full(len(hid), numpy.nan, dtype=numpy.int32)
    for i in trange(len(hid), disable=not verbose):
        h = hid[i]
        # These are unasigned particles.
        if h == 0:
            continue
        parts_pos = csiborgtools.read.load_halo_particles(h, pos, hid2map)
        parts_mass = csiborgtools.read.load_halo_particles(h, mass, hid2map)
        # Skip if the halo has no particles or is too small.
        if parts_pos is None or parts_pos.size < 5:
            continue
        cm = csiborgtools.center_of_mass(parts_pos, parts_mass, boxsize=1.0)
        sep = csiborgtools.periodic_distance(parts_pos, cm, boxsize=1.0)
        delta = overlapper.make_delta(parts_pos, parts_mass, subbox=True)
        lagpatch_pos[i] = cm
        lagpatch_size[i] = numpy.percentile(sep, 99)
        lagpatch_ncells[i] = csiborgtools.delta2ncells(delta)
    f.close()
    collect()
    with h5py.File(fname, "r+") as f:
        grp = f["halo_catalogue"]
        dset = grp.create_dataset("lagpatch_pos", data=lagpatch_pos)
        dset.attrs["header"] = "Lagrangian patch centre of mass in box units."
        dset = grp.create_dataset("lagpatch_size", data=lagpatch_size)
        dset.attrs["header"] = "Lagrangian patch size in box units."
        dset = grp.create_dataset("lagpatch_ncells", data=lagpatch_ncells)
        dset.attrs["header"] = f"Lagrangian patch number of cells on a {kwargs['box_size']}^3 grid."  # noqa
        f.close()
 def make_phew_halo_catalogue(nsim, verbose):
    """
    Process the PHEW halo catalogue for a CSiBORG simulation at all snapshots.
    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    snapshots = paths.get_snapshots(nsim, "csiborg")
    reader = csiborgtools.read.CSiBORGReader(paths)
    keys_write = ["index", "x", "y", "z", "mass_cl", "parent",
                  "ultimate_parent", "summed_mass"]
    # Create a HDF5 file to store all this.
    fname = paths.processed_phew(nsim)
    with h5py.File(fname, "w") as f:
        f.close()
    for nsnap in tqdm(snapshots, disable=not verbose, desc="Snapshot"):
        try:
            data = reader.read_phew_clumps(nsnap, nsim, verbose=False)
        except FileExistsError:
            continue
        with h5py.File(fname, "r+") as f:
            if str(nsnap) in f:
                print(f"Group {nsnap} already exists. Deleting.", flush=True)
                del f[str(nsnap)]
            grp = f.create_group(str(nsnap))
            for key in keys_write:
                grp.create_dataset(key, data=data[key])
            grp.attrs["header"] = f"CSiBORG PHEW clumps at snapshot {nsnap}."
            f.close()
    # Now write the redshifts
    scale_factors = numpy.full(len(snapshots), numpy.nan, dtype=numpy.float32)
    for i, nsnap in enumerate(snapshots):
        box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
        scale_factors[i] = box._aexp
    redshifts = scale_factors[-1] / scale_factors - 1
    with h5py.File(fname, "r+") as f:
        grp = f.create_group("info")
        grp.create_dataset("redshift", data=redshifts)
        grp.create_dataset("snapshots", data=snapshots)
        grp.create_dataset("Om0", data=[box.Om0])
        grp.create_dataset("boxsize", data=[box.boxsize])
        f.close()
 def make_merger_tree_file(nsim, verbose):
    """
    Process the `.dat` merger tree files and dump them into a HDF5 file.
    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    reader = csiborgtools.read.CSiBORGReader(paths)
    snaps = paths.get_snapshots(nsim, "csiborg")
    fname = paths.processed_merger_tree(nsim)
    with h5py.File(fname, "w") as f:
        f.close()
    for nsnap in tqdm(snaps, desc="Loading merger files",
                      disable=not verbose):
        try:
            data = reader.read_merger_tree(nsnap, nsim)
        except FileExistsError:
            continue
        with h5py.File(fname, "r+") as f:
            grp = f.create_group(str(nsnap))
            grp.create_dataset("clump",
                               data=data[:, 0].astype(numpy.int32))
            grp.create_dataset("progenitor",
                               data=data[:, 1].astype(numpy.int32))
            grp.create_dataset("progenitor_outputnr",
                               data=data[:, 2].astype(numpy.int32))
            grp.create_dataset("desc_mass",
                               data=data[:, 3].astype(numpy.float32))
            grp.create_dataset("desc_npart",
                               data=data[:, 4].astype(numpy.int32))
            grp.create_dataset("desc_pos",
                               data=data[:, 5:8].astype(numpy.float32))
            grp.create_dataset("desc_vel",
                               data=data[:, 8:11].astype(numpy.float32))
            f.close()
 def append_merger_tree_mass_to_phew_catalogue(nsim, verbose):
    """
    Append mass of haloes from mergertree files to the PHEW catalogue. The
    difference between this and the PHEW value is that the latter is written
    before unbinding is performed.
    Note that currently only does this for the highest snapshot.
    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    snapshots = paths.get_snapshots(nsim, "csiborg")
    merger_reader = csiborgtools.read.MergerReader(nsim, paths)
    for nsnap in tqdm(snapshots, disable=not verbose, desc="Snapshot"):
        # TODO do this for all later
        if nsnap < 930:
            continue
        try:
            phewcat = csiborgtools.read.CSiBORGPHEWCatalogue(nsnap, nsim,
                                                             paths)
        except ValueError:
            phewcat.close()
            continue
        mergertree_mass = merger_reader.match_mass_to_phewcat(phewcat)
        phewcat.close()
        fname = paths.processed_phew(nsim)
        with h5py.File(fname, "r+") as f:
            grp = f[str(nsnap)]
            grp.create_dataset("mergertree_mass_new", data=mergertree_mass)
            f.close()
 def main(nsim, args):
    if args.make_final:
        process_snapshot(nsim, args.simname, args.halofinder, True)
    if args.make_initial:
        add_initial_snapshot(nsim, args.simname, args.halofinder, True)
        calculate_initial(nsim, args.simname, args.halofinder, True)
    if args.make_phew:
        make_phew_halo_catalogue(nsim, True)
    if args.make_merger:
        make_merger_tree_file(nsim, True)
    if args.append_merger_mass:
        append_merger_tree_mass_to_phew_catalogue(nsim, True)
 if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument("--simname", type=str, default="csiborg",
                        choices=["csiborg", "quijote"],
                        help="Simulation name")
    parser.add_argument("--nsims", type=int, nargs="+", default=None,
                        help="IC realisations. If `-1` processes all.")
    parser.add_argument("--halofinder", type=str, help="Halo finder")
    parser.add_argument("--make_final", action="store_true", default=False,
                        help="Process the final snapshot.")
    parser.add_argument("--make_initial", action="store_true", default=False,
                        help="Process the initial snapshot.")
    parser.add_argument("--make_phew", action="store_true", default=False,
                        help="Process the PHEW halo catalogue.")
    parser.add_argument("--make_merger", action="store_true", default=False,
                        help="Process the merger tree files.")
    parser.add_argument("--append_merger_mass", action="store_true",
                        default=False,
                        help="Append the merger tree mass to the PHEW cat.")
    args = parser.parse_args()
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    nsims = get_nsims(args, paths)
    def _main(nsim):
        main(nsim, args)
    work_delegation(_main, nsims, MPI.COMM_WORLD)
--- a/scripts/sort_initsnap.py
+++ b/scripts/sort_initsnap.py
@ -1,114 +0,0 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 r"""
 Script to sort the initial snapshot particles according to their final
 snapshot ordering, which is sorted by the halo IDs.
 Ensures the following units:
    - Positions in box units.
    - Masses in :math:`M_\odot / h`.
 """
 from argparse import ArgumentParser
 from datetime import datetime
 from gc import collect
 import h5py
 import numpy
 from mpi4py import MPI
 from taskmaster import work_delegation
 import csiborgtools
 from utils import get_nsims
 def _main(nsim, simname, verbose):
    """
    Sort the initial snapshot particles according to their final snapshot
    ordering and dump them into a HDF5 file.
    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    if simname == "csiborg":
        partreader = csiborgtools.read.CSiBORGReader(paths)
    else:
        partreader = csiborgtools.read.QuijoteReader(paths)
    print(f"{datetime.now()}:   processing simulation `{nsim}`.", flush=True)
    # We first load the particle IDs in the final snapshot.
    pidf = csiborgtools.read.read_h5(paths.particles(nsim, simname))
    pidf = pidf["particle_ids"]
    # Then we load the particles in the initil snapshot and make sure that
    # their particle IDs are sorted as in the final snapshot. Again, because of
    # precision this must be read as structured.
    if simname == "csiborg":
        pars_extract = ["x", "y", "z", "M", "ID"]
        # CSiBORG's initial snapshot ID
        nsnap = 1
    else:
        pars_extract = None
        # Use this to point the reader to the ICs snapshot
        nsnap = -1
    part0, pid0 = partreader.read_particle(
        nsnap, nsim, pars_extract, return_structured=False, verbose=verbose)
    # In CSiBORG we need to convert particle masses from box units.
    if simname == "csiborg":
        box = csiborgtools.read.CSiBORGBox(
            max(paths.get_snapshots(nsim, simname)), nsim, paths)
        part0[:, 3] = box.box2solarmass(part0[:, 3])
    # Quijote's initial snapshot information also contains velocities but we
    # don't need those.
    if simname == "quijote":
        part0 = part0[:, [0, 1, 2, 6]]
        # In Quijote some particles are position precisely at the edge of the
        # box. Move them to be just inside.
        pos = part0[:, :3]
        mask = pos >= 1
        if numpy.any(mask):
            spacing = numpy.spacing(pos[mask])
            assert numpy.max(spacing) <= 1e-5
            pos[mask] -= spacing
    # First enforce them to already be sorted and then apply reverse
    # sorting from the final snapshot.
    part0 = part0[numpy.argsort(pid0)]
    del pid0
    collect()
    part0 = part0[numpy.argsort(numpy.argsort(pidf))]
    fout = paths.initmatch(nsim, simname, "particles")
    if verbose:
        print(f"{datetime.now()}: dumping particles for `{nsim}` to `{fout}`",
              flush=True)
    with h5py.File(fout, "w") as f:
        f.create_dataset("particles", data=part0)
 if __name__ == "__main__":
    # Argument parser
    parser = ArgumentParser()
    parser.add_argument("--simname", type=str, default="csiborg",
                        choices=["csiborg", "quijote"],
                        help="Simulation name")
    parser.add_argument("--nsims", type=int, nargs="+", default=None,
                        help="IC realisations. If `-1` processes all.")
    args = parser.parse_args()
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    nsims = get_nsims(args, paths)
    def main(nsim):
        _main(nsim, args.simname, MPI.COMM_WORLD.Get_size() == 1)
    work_delegation(main, nsims, MPI.COMM_WORLD)
--- a/setup.py
+++ b/setup.py
@ -1,52 +1,28 @@
 from setuptools import find_packages, setup
 # List of dependencies:
 #   - Corrfunc  -> To be moved to a separate package.
 #   - NumPy
 #   - SciPy
 #   - Numba
 #   - Pylians
 #   - tqdm
 #   - healpy
 #   - astropy
 #   - scikit-learn
 #   - joblib
 #   - h5py
 #   - MPI
 #   - pyyaml
 #   - taskmaster
 #   - matplotlib
 #   - scienceplots
 #   - cache_to_disk
 BUILD_REQ = ["numpy", "scipy"]
 INSTALL_REQ = BUILD_REQ
-INSTALL_REQ += ["Corrfunc",
+INSTALL_REQ += [
                "Pylians",
                "numba",
                "tqdm",
                "healpy",
                "astropy",
                "scikit-learn",
                "h5py",
-                "matplotlib",
+                "pynbody",
-                "scienceplots",
+                "joblib",
-                "mpi4py",
+                ]
                "pyyaml",
                "joblib",]
 setup(
    name="csiborgtools",
-    version="0.2",
+    version="0.3",
    description="CSiBORG analysis tools",
    url="https://github.com/Richard-Sti/csiborgtools",
    author="Richard Stiskalek",
    author_email="richard.stiskalek@protonmail.com",
    license="GPL-3.0",
    packages=find_packages(),
-    python_requires=">=3.8",
+    python_requires=">=3.6",
    build_requires=BUILD_REQ,
    setup_requires=BUILD_REQ,
    install_requires=INSTALL_REQ,
@ -55,5 +31,6 @@ setup(
        "Intended Audience :: Science/Research",
        "Operating System :: POSIX :: Linux",
        "Programming Language :: Python :: 3.8",
-        "Programming Language :: Python :: 3.9"]
+        "Programming Language :: Python :: 3.9"
        ]
 )