Add mmain and other major updates (#44)

* Move paths to a separate file * Add mmain reader * Add a verbosity flag * Fix imports * Fix bug * Rename files * Return ultimate parents * Add script to generate mmain * Remove mmain path * edit path * Add mmain path * Change function name * Rename function * Turn off verbose * Fix list requirement * Edit init match paths * Fix init pathing * Edit paths docs * Edit dumpdir name * Rename path * Fix split paths * Remove unused import * Add comment * Update readme * remove read mmain * Rename haloatalogue * Fix minor bugs * Update nbs * Add create directory option * Move split jobs * Move spliot jobs * Remove splitting * Add import * Edit script * Deeper split folder * Fix paths bug * Rename catalogue * Rename Catalogue * Add new clumpread * Edit paths * add knn paths * Update commenting * Update imports * Add more conversions * Update temp file * Add a note * Add catalogue * Cooment * Update TODO * Update script * add nb * Update * pep8 * edit paths & pep8 * Fix knn auto paths * add paths docs * Add auto and cross knn paths * Add new paths * Simplify tpcf reading * pep8 patch * update readme * Update progress * pep8 * pep8 * pep8 * pep8 * pep8 * pep8 * pep8 * pep8 * pep8 * pep8 * pep8 * pep8 * pep8 * pep8 * pep8 * Pep 8 and restructure * add lambda spin * add clump and halo * add checks * Edit halo profile fit * Update gitignore * backup script
2025-06-08 18:01:11 +00:00 · 2023-04-18 11:02:36 +02:00 · 2023-04-18 11:02:36 +02:00 · fdb0df8d4c
commit fdb0df8d4c
parent e0d3854277
50 changed files with 2152 additions and 1844 deletions
--- a/.gitignore
+++ b/.gitignore
@ -17,3 +17,4 @@ Pylians3/*
 scripts/plot_correlation.ipynb
 scripts/*.sh
 venv/
 .trunk/*
--- a/README.md
+++ b/README.md
@ -2,8 +2,16 @@
 ##  Project Overlap
- [ ] Calculate the overlap between all 101 IC realisations on DiRAC.
+- [x] Clean up the kNN paths in the summary.
 - [x] Clean up the 2PCF paths in the summary.
 - [ ] Sort out the splitting of individual clumps.
 - [ ] Update the fitting scripts to work for clumps and parent halos.
 - [ ] Calculated fitted quantities for clumps and parent halos and add them to the catalogues.
 - [ ] Update overlap scripts to work with summed parent halos.
 - [ ] Update the clustering scripts to work with clumps instead.
 - [ ] Calculate the overlap between all 101 IC realisations on DiRAC.
 ## Project Clustering
@ -20,7 +28,7 @@
 - [x] For the cross-correlation try making the second field randoms.
 - [x] Clean up the reader code.
 - [x] Correct the crossing script.
- [ ] Get started with the 2PCF calculation.
+- [x] Get started with the 2PCF calculation.
 ## Project Environmental Dependence
 - [ ] Add gradient and Hessian of the overdensity field.
--- a/csiborgtools/init.py
+++ b/csiborgtools/init.py
@ -12,11 +12,10 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-from csiborgtools import (read, match, utils, units, fits, field, clustering)  # noqa
+from csiborgtools import clustering, field, fits, match, read  # noqa
 # Arguments to csiborgtools.read.CSiBORGPaths.
 paths_glamdring = {
    "srcdir": "/mnt/extraspace/hdesmond/",
-    "dumpdir": "/mnt/extraspace/rstiskalek/csiborg/",
+    "postdir": "/mnt/extraspace/rstiskalek/csiborg/"
-    "mmain_path": "/mnt/zfsusers/hdesmond/Mmain",
+    }
    "initmatch_path": "/mnt/extraspace/rstiskalek/csiborg/initmatch/"}
--- a/csiborgtools/clustering/init.py
+++ b/csiborgtools/clustering/init.py
@ -13,10 +13,19 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 from warnings import warn
-from .knn import kNN_CDF  # noqa
+
-from .utils import (RVSinsphere, RVSinbox, RVSonsphere, BaseRVS, normalised_marks)  # noqa
+from csiborgtools.clustering.knn import kNN_CDF  # noqa
 from csiborgtools.clustering.utils import (  # noqa
    BaseRVS,
    RVSinbox,
    RVSinsphere,
    RVSonsphere,
    normalised_marks,
 )
 try:
-    import Corrfunc
+    import Corrfunc  # noqa
    from .tpcf import Mock2PCF  # noqa
 except ImportError:
    warn("`Corrfunc` not installed. 2PCF modules will not be available (`Mock2PCF`).")  # noqa
--- a/csiborgtools/clustering/knn.py
+++ b/csiborgtools/clustering/knn.py
@ -13,11 +13,12 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
-kNN-CDF calculation
+kNN-CDF calculation.
 """
 import numpy
 from scipy.interpolate import interp1d
 from scipy.stats import binned_statistic
 from .utils import BaseRVS
@ -105,7 +106,7 @@ class kNN_CDF:
            Catalogue NN object.
        rvs_gen : :py:class:`csiborgtools.clustering.BaseRVS`
            Uniform RVS generator matching `knn`.
-        neighbours : int
+        nneighbours : int
            Maximum number of neighbours to use for the kNN-CDF calculation.
        nsamples : int
            Number of random points to sample for the knn-CDF calculation.
@ -155,7 +156,7 @@ class kNN_CDF:
            NN object of the second catalogue.
        rvs_gen : :py:class:`csiborgtools.clustering.BaseRVS`
            Uniform RVS generator matching `knn1` and `knn2`.
-        neighbours : int
+        nneighbours : int
            Maximum number of neighbours to use for the kNN-CDF calculation.
        Rmax : float
            Maximum radius of the sphere in which to sample random points for
@ -246,7 +247,7 @@ class kNN_CDF:
            Catalogue NN object.
        rvs_gen : :py:class:`csiborgtools.clustering.BaseRVS`
            Uniform RVS generator matching `knn1` and `knn2`.
-        neighbours : int
+        nneighbours : int
            Maximum number of neighbours to use for the kNN-CDF calculation.
        nsamples : int
            Number of random points to sample for the knn-CDF calculation.
--- a/csiborgtools/clustering/tpcf.py
+++ b/csiborgtools/clustering/tpcf.py
@ -16,6 +16,7 @@
 import numpy
 from Corrfunc.theory.DD import DD
 from Corrfunc.utils import convert_3d_counts_to_cf
 from .utils import BaseRVS
@ -63,6 +64,7 @@ class Mock2PCF:
                periodic=False)
        ndata = pos.shape[0]
-        xi = convert_3d_counts_to_cf(ndata, ndata, nrandom, nrandom, dd, dr, dr, rr)
+        xi = convert_3d_counts_to_cf(ndata, ndata, nrandom, nrandom,
                                     dd, dr, dr, rr)
        rp = 0.5 * (bins[1:] + bins[:-1])
        return rp, xi
--- a/csiborgtools/clustering/utils.py
+++ b/csiborgtools/clustering/utils.py
@ -13,10 +13,10 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """Clustering support functions."""
-from abc import (ABC, abstractmethod)
+from abc import ABC, abstractmethod
 from warnings import warn
 import numpy
 import numpy
 ###############################################################################
 #                            Random points                                    #
@ -77,7 +77,7 @@ class RVSinsphere(BaseRVS):
 class RVSinbox(BaseRVS):
-    """
+    r"""
    Generator of uniform RVS in a box of width `L` in Cartesian coordinates in
    :math:`[0, L]^3`.
@ -100,7 +100,7 @@ class RVSinbox(BaseRVS):
 class RVSonsphere(BaseRVS):
-    """
+    r"""
    Generator of uniform RVS on the surface of a unit sphere. RA is in
    :math:`[0, 2\pi)` and dec in :math:`[-\pi / 2, \pi / 2]`, respectively.
    If `indeg` is `True` then converted to degrees.
@ -148,7 +148,7 @@ def wrapRA(ra, indeg):
    """
    mask = ra < 0
    if numpy.sum(mask) == 0:
-        warn("No negative right ascension found.", UserWarning())
+        warn("No negative right ascension found.", UserWarning, stacklevel=1)
    ra[mask] += 360 if indeg else 2 * numpy.pi
    return ra
@ -177,7 +177,7 @@ def normalised_marks(x, y, nbins):
    """
    assert x.ndim == y.ndim == 1
    if y.dtype not in [numpy.float32, numpy.float64]:
-        raise NotImplemented("Marks from integers are not supported.")
+        raise NotImplementedError("Marks from integers are not supported.")
    bins = numpy.percentile(x, q=numpy.linspace(0, 100, nbins + 1))
    marks = numpy.full_like(y, numpy.nan)
--- a/csiborgtools/field/init.py
+++ b/csiborgtools/field/init.py
@ -13,8 +13,10 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 from warnings import warn
 try:
-    import MAS_library as MASL
+    import MAS_library as MASL  # noqa
-    from .density import DensityField
+
    from .density import DensityField  # noqa
 except ImportError:
    warn("MAS_library not found, `DensityField` will not be available", UserWarning)  # noqa
--- a/csiborgtools/field/density.py
+++ b/csiborgtools/field/density.py
@ -16,12 +16,12 @@
 Density field and cross-correlation calculations.
 """
 from warnings import warn
-from tqdm import trange
+
 import numpy
 import MAS_library as MASL
 import numpy
 import Pk_library as PKL
 import smoothing_library as SL
-from ..units import (BoxUnits, radec_to_cartesian)
+from tqdm import trange
 class DensityField:
@ -56,8 +56,7 @@ class DensityField:
    def __init__(self, particles, boxsize, box, MAS="CIC"):
        self.particles = particles
        assert boxsize > 0
-        self._boxsize = boxsize
+        self.boxsize = boxsize
        assert isinstance(box, BoxUnits)
        self.box = box
        assert MAS in ["NGP", "CIC", "TSC", "PCS"]
        self._MAS = MAS
@ -103,6 +102,14 @@ class DensityField:
        """
        return self._box
    @box.setter
    def box(self, box):
        try:
            assert box._name  == "box_units"
            self._box = box
        except AttributeError as err:
            raise TypeError from err
    @property
    def MAS(self):
        """
@ -117,7 +124,7 @@ class DensityField:
    @staticmethod
    def _force_f32(x, name):
        if x.dtype != numpy.float32:
-            warn("Converting `{}` to float32.".format(name))
+            warn("Converting `{}` to float32.".format(name), stacklevel=1)
            x = x.astype(numpy.float32)
        return x
@ -348,13 +355,15 @@ class DensityField:
        -------
        interp_field : (list of) 1-dimensional array of shape `(n_samples,).
        """
-        self._force_f32(pos, "pos")
+        # TODO: implement this
-        X = numpy.vstack(
+        raise NotImplementedError("This method is not yet implemented.")
-            radec_to_cartesian(*(pos[:, i] for i in range(3)), isdeg)).T
+#         self._force_f32(pos, "pos")
-        X = X.astype(numpy.float32)
+#         X = numpy.vstack(
-        # Place the observer at the center of the box
+#             radec_to_cartesian(*(pos[:, i] for i in range(3)), isdeg)).T
-        X += 0.5 * self.boxsize
+#         X = X.astype(numpy.float32)
-        return self.evaluate_field(*field, pos=X)
+#         # Place the observer at the center of the box
 #         X += 0.5 * self.boxsize
 #         return self.evaluate_field(*field, pos=X)
    @staticmethod
    def gravitational_field_norm(gx, gy, gz):
--- a/csiborgtools/fits/init.py
+++ b/csiborgtools/fits/init.py
@ -12,7 +12,5 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-from .haloprofile import (NFWProfile, NFWPosterior)  # noqa
+from .halo import Clump, Halo  # noqa
-from .halo  import (distribute_halos, clump_with_particles,  # noqa
+from .haloprofile import NFWPosterior, NFWProfile  # noqa
                    dump_split_particles, load_split_particles,  # noqa
                    split_jobs, pick_single_clump, Clump)  # noqa
--- a/csiborgtools/fits/halo.py
+++ b/csiborgtools/fits/halo.py
@ -12,555 +12,195 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""
+"""A clump object."""
-Tools for splitting the particles and a clump object.
+from abc import ABC
-"""
+
 from os import remove
 from warnings import warn
 from os.path import join
 import numpy
 from tqdm import trange
 from ..read import ParticleReader
-def clump_with_particles(particle_clumps, clumps):
+class BaseStructure(ABC):
    """
-    Count how many particles does each clump have.
+    Basic structure object for handling operations on its particles.
    Parameters
    ----------
    particle_clumps : 1-dimensional array
        Array of particles' clump IDs.
    clumps : structured array
        Clumps array.
    Returns
    -------
    with_particles : 1-dimensional boolean array
        Array of whether a clump has any particles.
    """
-    return numpy.isin(clumps["index"], particle_clumps)
+    _particles = None
    _info = None
    _box = None
-
+    @property
-def distribute_halos(n_splits, clumps):
+    def particles(self):
-    """
+        """
    Evenly distribute clump indices to smaller splits. Clumps should only be
    clumps that contain particles.
    Parameters
    ----------
    n_splits : int
        Number of splits.
    clumps : structured array
        Clumps array.
    Returns
    -------
    splits : 2-dimensional array of shape `(njobs, 2)`
        Array of starting and ending indices of each CPU.
    """
    # Make sure these are unique IDs
    indxs = clumps["index"]
    if indxs.size > numpy.unique((indxs)).size:
        raise ValueError("`clump_indxs` constains duplicate indices.")
    Ntotal = indxs.size
    njobs_per_cpu = numpy.ones(n_splits, dtype=int) * Ntotal // n_splits
    # Split the remainder Ntotal % njobs among the CPU
    njobs_per_cpu[:Ntotal % n_splits] += 1
    start = ParticleReader.nparts_to_start_ind(njobs_per_cpu)
    return numpy.vstack([start, start + njobs_per_cpu]).T
 def dump_split_particles(particles, particle_clumps, clumps, n_splits,
                         nsnap, nsim, paths, verbose=True):
    """
    Save the data needed for each split so that a process does not have to load
    everything.
    Parameters
    ----------
    particles : structured array
        The particle array.
    particle_clumps : 1-dimensional array
        Array of particles' clump IDs.
    clumps : structured array
        The clumps array.
    n_splits : int
        Number of times to split the clumps.
    nsnap : int
        Snapshot index.
    nsim : int
        IC realisation index.
    paths : py:class`csiborgtools.read.CSiBORGPaths`
        CSiBORG paths-handling object with set `n_sim` and `n_snap`.
    verbose : bool, optional
        Verbosity flag. By default `True`.
    Returns
    -------
    None
    """
    if particles.size != particle_clumps.size:
        raise ValueError("`particles` must correspond to `particle_clumps`.")
    # Calculate which clumps have particles
    with_particles = clump_with_particles(particle_clumps, clumps)
    clumps = numpy.copy(clumps)[with_particles]
    if verbose:
        warn(r"There are {:.4f}% clumps that have identified particles."
             .format(with_particles.sum() / with_particles.size * 100))
    # The starting clump index of each split
    splits = distribute_halos(n_splits, clumps)
    fname = join(paths.temp_dumpdir, "out_{}_snap_{}_{}.npz")
    tot = 0
    for n in trange(n_splits) if verbose else range(n_splits):
        # Lower and upper array index of the clumps array
        i, j = splits[n, :]
        # Clump indices in this split
        indxs = clumps["index"][i:j]
        hmin, hmax = indxs.min(), indxs.max()
        mask = (particle_clumps >= hmin) & (particle_clumps <= hmax)
        # Check number of clumps
        npart_unique = numpy.unique(particle_clumps[mask]).size
        if indxs.size > npart_unique:
            raise RuntimeError(
                "Split `{}` contains more unique clumps (`{}`) than there are "
                "unique particles' clump indices (`{}`)after removing clumps "
                "with no particles.".format(n, indxs.size, npart_unique))
        # Dump it!
        tot += mask.sum()
        fout = fname.format(nsim, nsnap, n)
        numpy.savez(fout, particles[mask], particle_clumps[mask], clumps[i:j])
    # There are particles whose clump ID is > 1 and have no counterpart in the
    # clump file. Therefore can save fewer particles, depending on the cut.
    if tot > particle_clumps.size:
        raise RuntimeError(
            "Num. of dumped particles `{}` is greater than the particle file "
            "size `{}`.".format(tot, particle_clumps.size))
 def split_jobs(njobs, ncpu):
    """
    Split `njobs` amongst `ncpu`.
    Parameters
    ----------
    njobs : int
        Number of jobs.
    ncpu : int
        Number of CPUs.
    Returns
    -------
    jobs : list of lists of integers
        Outer list of each CPU and inner lists for CPU's jobs.
    """
    njobs_per_cpu, njobs_remainder = divmod(njobs, ncpu)
    jobs = numpy.arange(njobs_per_cpu * ncpu).reshape((njobs_per_cpu, ncpu)).T
    jobs = jobs.tolist()
    for i in range(njobs_remainder):
        jobs[i].append(njobs_per_cpu * ncpu + i)
    return jobs
 def load_split_particles(nsplit, nsnap, nsim, paths, remove_split=False):
    """
    Load particles of a split saved by `dump_split_particles`.
    Parameters
    --------
    n_split : int
        Split index.
    nsnap : int
        Snapshot index.
    nsim : int
        IC realisation index.
    paths : py:class`csiborgtools.read.CSiBORGPaths`
        CSiBORG paths-handling object with set `n_sim` and `n_snap`.
    remove_split : bool, optional
        Whether to remove the split file. By default `False`.
    Returns
    -------
    particles : structured array
        Particle array of this split.
    clumps_indxs : 1-dimensional array
        Array of particles' clump IDs of this split.
    clumps : 1-dimensional array
        Clumps belonging to this split.
    """
    fname = join(paths.temp_dumpdir,
                 "out_{}_snap_{}_{}.npz".format(nsim, nsnap, nsplit))
    file = numpy.load(fname)
    particles, clump_indxs, clumps = (file[f] for f in file.files)
    if remove_split:
        remove(fname)
    return particles, clump_indxs, clumps
 def pick_single_clump(n, particles, particle_clumps, clumps):
    """
    Get particles belonging to the `n`th clump in `clumps` arrays.
    Parameters
    ----------
    n : int
        Clump position in `clumps` array. Not its halo finder index!
    particles : structured array
        Particle array.
    particle_clumps : 1-dimensional array
        Array of particles' clump IDs.
    clumps : structured array
        Array of clumps.
-    Returns
+        Returns
-    -------
+        -------
-    sel_particles : structured array
+        particles : structured array
-        Particles belonging to the requested clump.
+        """
-    sel_clump : array
+        return self._particles
        A slice of a `clumps` array corresponding to this clump. Must
        contain `["peak_x", "peak_y", "peak_z", "mass_cl"]`.
    """
    # Clump index on the nth position
    k = clumps["index"][n]
    # Mask of which particles belong to this clump
    mask = particle_clumps == k
    return particles[mask], clumps[n]
    @particles.setter
    def particles(self, particles):
        pars = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M']
        assert all(p in particles.dtype.names for p in pars)
        self._particles = particles
-###############################################################################
+    @property
-#                           Clump object                                      #
+    def info(self):
-###############################################################################
+        """
        Array containing information from the clump finder.
        Returns
        -------
        info : structured array
        """
        return self._info
-class Clump:
+    @info.setter
-    r"""
+    def info(self, info):
-    A clump (halo) object to handle the particles and their clump's data.
+        # TODO turn this into a structured array and add some checks
        self._info = info
-    Parameters
+    @property
-    ----------
+    def box(self):
-    x : 1-dimensional array
+        """
-        Particle coordinates along the x-axis.
+        CSiBORG box object handling unit conversion.
    y : 1-dimensional array
        Particle coordinates along the y-axis.
    z : 1-dimensional array
        Particle coordinates along the z-axis.
    m : 1-dimensional array
        Particle masses.
    x0 : float
        Clump center coordinate along the x-axis.
    y0 : float
        Clump center coordinate along the y-axis.
    z0 : float
        Clump center coordinate along the z-axis.
    clump_mass : float, optional
        Mass of the clump. By default not set.
    vx : 1-dimensional array, optional
        Particle velocity along the x-axis. By default not set.
    vy : 1-dimensional array, optional
        Particle velocity along the y-axis. By default not set.
    vz : 1-dimensional array, optional
        Particle velocity along the z-axis. By default not set.
    index : int, optional
        The halo finder index of this clump. By default not set.
    rhoc : float, optional
        The critical density :math:`\rho_c` at this snapshot in box units. By
        default not set.
    G : float, optional
        The gravitational constant :math:`G` in box units. By default not set.
    """
    _pos = None
    _clump_pos = None
    _clump_mass = None
    _vel = None
    _index = None
    _rhoc = None
    _G = None
-    def __init__(self, x, y, z, m, x0, y0, z0, clump_mass=None,
+        Returns
-                 vx=None, vy=None, vz=None, index=None, rhoc=None, G=None):
+        -------
-        self._pos = numpy.vstack([x - x0, y - y0, z - z0]).T
+        box : :py:class:`csiborgtools.units.BoxUnits`
-        self._clump_pos = numpy.asarray((x0, y0, z0))
+        """
-        assert clump_mass is None or isinstance(clump_mass, float)
+        return self._box
-        self._clump_mass = clump_mass
+
-        if all(v is not None for v in (vx, vy, vz)):
+    @box.setter
-            self._vel = numpy.vstack([vx, vy, vz]).T
+    def box(self, box):
-            assert self._vel.shape == self.pos.shape
+        try:
-        assert m.ndim == 1 and m.size == self.Npart
+            assert box._name  == "box_units"
-        self._m = m
+            self._box = box
-        assert index is None or (isinstance(index, (int, numpy.int64)) and index >= 0)  # noqa
+        except AttributeError as err:
-        self._index = index
+            raise TypeError from err
        assert rhoc is None or rhoc > 0
        self._rhoc = rhoc
        assert G is None or G > 0
        self._G = G
    @property
    def pos(self):
        """
-        Cartesian particle coordinates centered at the clump.
+        Cartesian particle coordinates centered at the object.
        Returns
        -------
        pos : 2-dimensional array of shape `(n_particles, 3)`.
        """
-        return self._pos
+        ps = ('x', 'y', 'z')
-
+        return numpy.vstack([self[p] - self.info[p] for p in ps]).T
    @property
    def Npart(self):
        """
        Number of particles associated with this clump.
        Returns
        -------
        Npart : int
        """
        return self.pos.shape[0]
    @property
    def r(self):
        """
-        Radial distance of the particles from the clump peak.
+        Radial separation of the particles from the centre of the object.
        Returns
        -------
        r : 1-dimensional array of shape `(n_particles, )`.
        """
-        return numpy.sum(self.pos**2, axis=1)**0.5
+        return numpy.linalg.norm(self.pos, axis=1)
    @property
    def rmin(self):
        """
        The minimum radial distance of a particle.
        Returns
        -------
        rmin : float
        """
        return numpy.min(self.r)
    @property
    def rmax(self):
        """
        The maximum radial distance of a particle.
        Returns
        -------
        rmin : float
        """
        return numpy.max(self.r)
    @property
    def clump_pos(self):
        """
        Cartesian position components of the clump.
        Returns
        -------
        pos : 1-dimensional array of shape `(3, )`
        """
        return self._clump_pos
    @property
    def clump_mass(self):
        """
        Clump mass.
        Returns
        -------
        mass : float
        """
        if self._clump_mass is None:
            raise ValueError("Clump mass `clump_mass` has not been set.")
        return self._clump_mass
    @property
    def vel(self):
        """
-        Cartesian velocity components of the clump.
+        Cartesian particle velocity components.
        Returns
        -------
        vel : 2-dimensional array of shape (`n_particles, 3`)
        """
-        if self._vel is None:
+        return numpy.vstack([self[p] for p in ("vx", "vy", "vz")]).T
            raise ValueError("Velocities `vel` have not been set.")
        return self._vel
    @property
-    def m(self):
+    def cmass(self):
        """
-        Particle masses.
+        Cartesian position components of the object's centre of mass. Note that
-
+        this is already in a frame centered at the clump's potential minimum,
-        Returns
+        so its distance from origin indicates the separation of the centre of
-        -------
+        mass and potential minimum.
        m : 1-dimensional array of shape `(n_particles, )`
        """
        return self._m
    @property
    def center_mass(self):
        """
        Cartesian position components of the clump centre of mass. Note that
        this is already in a frame centered at the clump's potential minimum.
        Returns
        -------
        cm : 1-dimensional array of shape `(3, )`
        """
-        return numpy.average(self.pos, axis=0, weights=self.m)
+        return numpy.average(self.pos, axis=0, weights=self['M'])
    @property
    def angular_momentum(self):
        """
-        Clump angular momentum in the box coordinates.
+        Angular momentum in the box coordinates.
        NOTE: here also change velocities to the CM and appropriately edit the
        docs.
        Returns
        -------
        J : 1-dimensional array or shape `(3, )`
        """
-        J = numpy.cross(self.pos - self.center_mass, self.vel)
+        J = numpy.cross(self.pos - self.cmass, self.vel)
        return numpy.einsum("i,ij->j", self.m, J)
-    @property
+    def enclosed_mass(self, rmax, rmin=0):
    def lambda200c(self):
        r"""
        Clump Bullock spin, see Eq. 5 in [1], in a radius of
        :math:`R_{\rm 200c}`.
        References
        ----------
        [1] A Universal Angular Momentum Profile for Galactic Halos; 2001;
        Bullock, J. S.;  Dekel, A.;  Kolatt, T. S.;  Kravtsov, A. V.;
        Klypin, A. A.;  Porciani, C.;  Primack, J. R.
        Returns
        -------
        lambda200c : float
        """
-        J = self.angular_momentum
+        Sum of particle masses between two radii.
        R, M = self.spherical_overdensity_mass(200)
        V = numpy.sqrt(self.G * M / R)
        return numpy.linalg.norm(J) / (numpy.sqrt(2) * M * V * R)
    @property
    def index(self):
        """
        Halo finder clump index.
        Returns
        -------
        hindex : int
        """
        if self._index is None:
            raise ValueError("Halo index `hindex` has not been set.")
        return self._index
    @property
    def rhoc(self):
        r"""
        Critical density :math:`\rho_c` at this snapshot in box units.
        Returns
        -------
        rhoc : float
        """
        if self._rhoc is None:
            raise ValueError("The critical density `rhoc` has not been set.")
        return self._rhoc
    @property
    def G(self):
        r"""
        Gravitational constant :math:`G` in box units.
        Returns
        -------
        G : float
        """
        if self._G is None:
            raise ValueError("The grav. constant `G` has not been set.")
        return self._G
    @property
    def total_particle_mass(self):
        """
        Total mass of all particles.
        Returns
        -------
        tot_mass : float
        """
        return numpy.sum(self.m)
    @property
    def mean_particle_pos(self):
        """
        Mean Cartesian particle coordinate. Not centered at the halo!
        Returns
        -------
        pos : 1-dimensional array of shape `(3, )`
        """
        return numpy.mean(self.pos + self.clump_pos, axis=0)
    def enclosed_spherical_mass(self, rmax, rmin=0):
        """
        Enclosed spherical mass between two radii in box units.
        Parameters
        ----------
        rmax : float
            The maximum radial distance.
        rmin : float, optional
            The minimum radial distance. By default 0.
        Returns
        -------
        M_enclosed : float
            The enclosed mass.
        """
        return numpy.sum(self.m[(self.r >= rmin) & (self.r <= rmax)])
    def enclosed_spherical_volume(self, rmax, rmin=0):
        """
        Enclosed spherical volume within two radii in box units.
        Parameters
        ----------
        rmax : float
            Maximum radial distance.
        rmin : float, optional
-            Minimum radial distance. By default 0.
+            Minimum radial distance.
        Returns
        -------
-        vol : float
+        enclosed_mass : float
        """
-        return 4 * numpy.pi / 3 * (rmax**3 - rmin**3)
+        r = self.r
        return numpy.sum(self['M'][(r >= rmin) & (r <= rmax)])
-    def spherical_overdensity_mass(self, delta, n_particles_min=10):
+    def lambda_bullock(self, radius, npart_min=10):
        r"""
-        Spherical overdensity mass and radius. The mass is defined as the
+        Bullock spin, see Eq. 5 in [1], in a radius of `radius`, which should
-        enclosed mass within a radius of where the mean enclosed spherical
+        define to some overdensity radius.
        density reaches a multiple of the critical radius at a given redshift
        `self.rho_c`.
        Starts from the furthest particle, working its way inside the halo
        through an ordered list of particles. The corresponding values is the
        radial distance of the first particle whose addition sufficiently
        increases the mean density.
        Parameters
        ----------
-        delta : list of int or float
+        radius : float
-            The :math:`\delta_{\rm x}` parameters where :math:`\mathrm{x}` is
+            Radius in which to calculate the spin.
-            the overdensity multiple.
+        npart_min : int
-        n_particles_min : int
+            Minimum number of enclosed particles for a radius to be
            considered trustworthy.
        Returns
        -------
        lambda_bullock : float
        References
        ----------
        [1] A Universal Angular Momentum Profile for Galactic Halos; 2001;
        Bullock, J. S.;  Dekel, A.;  Kolatt, T. S.;  Kravtsov, A. V.;
        Klypin, A. A.;  Porciani, C.;  Primack, J. R.
        """
        mask = self.r <= radius
        if numpy.sum(mask) < npart_min:
            return numpy.nan
        mass = self.enclosed_mass(radius)
        V = numpy.sqrt(self.box.box_G * mass / radius)
        return (numpy.linalg.norm(self.angular_momentum[mask])
                / (numpy.sqrt(2) * mass * V * radius))
    def spherical_overdensity_mass(self, delta_mult, npart_min=10):
        r"""
        Calculate spherical overdensity mass and radius. The mass is defined as
        the enclosed mass within an outermost radius where the mean enclosed
        spherical density reaches a multiple of the critical density `delta`.
        Parameters
        ----------
        delta_mult : list of int or float
            Overdensity multiple.
        npart_min : int
            Minimum number of enclosed particles for a radius to be
            considered trustworthy.
@ -571,83 +211,75 @@ class Clump:
        mx :  float
            Corresponding spherical enclosed mass.
        """
-        # If single `delta` turn to list
+        # We first sort the particles in an increasing separation
-        delta = [delta] if isinstance(delta, (float, int)) else delta
+        rs = self.r
-        # If given a list or tuple turn to array
+        order = numpy.argsort(rs)
-        _istlist = isinstance(delta, (list, tuple))
+        rs = rs[order]
-        delta = numpy.asarray(delta, dtype=float) if _istlist else delta
+        cmass = numpy.cumsum(self['M'])  # Cumulative mass
        # We calculate the enclosed volume and indices where it is above target
        vol = 4 * numpy.pi / 3 * (rs**3 - rs[0]**3)
        ks = numpy.where([cmass / vol > delta_mult * self.box.rhoc])[0]
        if ks.size == 0:  # Never above the threshold?
            return numpy.nan, numpy.nan
        k = numpy.maximum(ks)
        if k < npart_min:  # Too few particles?
            return numpy.nan, numpy.nan
        return rs[k], cmass[k]
-        # Ordering of deltas
+    @property
-        order_delta = numpy.argsort(delta)
+    def keys(self):
-        # Sort the particles
+        """
-        order_particles = numpy.argsort(self.r)[::-1]
+        Particle array keys.
        # Density to aim for
        n_delta = delta.size
        target_density = delta * self.rhoc
        # The sum of particle masses, starting from the outside
        # Adds the furtherst particle ensure that the 0th index is tot mass
        cummass_ordered = (self.total_particle_mass
                           + self.m[order_particles][0]
                           - numpy.cumsum(self.m[order_particles]))
        # Enclosed volumes at particle radii
        volumes = self.enclosed_spherical_volume(self.r[order_particles])
        densities = cummass_ordered / volumes
        # Pre-allocate arrays
        rfound = numpy.full_like(delta, numpy.nan)
        mfound = numpy.full_like(rfound, numpy.nan)
        for n in order_delta:
            overdense_mask = densities > target_density[n]
            # Enforce that we have at least several particles enclosed
            if numpy.sum(overdense_mask) < n_particles_min:
                continue
            # The outermost particle radius where the overdensity is achieved
            k = numpy.where(overdense_mask)[0][0]
            rfound[n] = self.r[order_particles][k]
            mfound[n] = cummass_ordered[k]
        # If only one delta return simply numbers
        if n_delta == 1:
            rfound = rfound[0]
            mfound = mfound[0]
        return rfound, mfound
    @classmethod
    def from_arrays(cls, particles, clump, rhoc=None, G=None):
        r"""
        Initialises `Clump` from `particles` containing the relevant particle
        information and its `clump` information.
        Paramaters
        ----------
        particles : structured array
            Array of particles belonging to this clump. Must contain
            `["x", "y", "z", "M"]` and optionally also `["vx", "vy", "vz"]`.
        clump : array
            A slice of a `clumps` array corresponding to this clump. Must
            contain `["peak_x", "peak_y", "peak_z", "mass_cl"]`.
        rhoc : float, optional
            The critical density :math:`\rho_c` at this snapshot in box units.
            By default not set.
        G : float, optional
            The gravitational constant :math:`G` in box units. By default not
            set.
        Returns
        -------
-        clump : `Clump`
+        key : list of str
        """
-        x, y, z, m = (particles[p] for p in ["x", "y", "z", "M"])
+        return self.data.dtype.names
-        x0, y0, z0, cl_mass, hindex = (
+
-            clump[p] for p in ["peak_x", "peak_y", "peak_z", "mass_cl",
+    def __getitem__(self, key):
-                               "index"])
+        if key not in self.keys:
-        try:
+            raise RuntimeError("Invalid key `{}`!".format(key))
-            vx, vy, vz = (particles[p] for p in ["vx", "vy", "vz"])
+        return self.particles[key]
-        except ValueError:
+
-            vx, vy, vz = None, None, None
+    def __len__(self):
-        return cls(x, y, z, m, x0, y0, z0, cl_mass,
+        return self.particles.size
-                   vx, vy, vz, hindex, rhoc, G)
+
 class Clump(BaseStructure):
    """
    Clump object to handle operations on its particles.
    Parameters
    ----------
    particles : structured array
        Particle array. Must contain `['x', 'y', 'z', 'vx', 'vy', 'vz', 'M']`.
    info : structured array
        Array containing information from the clump finder.
    box : :py:class:`csiborgtools.read.BoxUnits`
        Box units object.
    """
    def __init__(self, particles, info, box):
        self.particles = particles
        self.info = info
        self.box = box
 class Halo(BaseStructure):
    """
    Ultimate halo object to handle operations on its particles, i.e. the summed
    particles halo.
    Parameters
    ----------
    particles : structured array
        Particle array. Must contain `['x', 'y', 'z', 'vx', 'vy', 'vz', 'M']`.
    info : structured array
        Array containing information from the clump finder.
    box : :py:class:`csiborgtools.read.BoxUnits`
        Box units object.
    """
    def __init__(self, particles, info, box):
        self.particles = particles
        self.info = info
        self.box = box
--- a/csiborgtools/fits/haloprofile.py
+++ b/csiborgtools/fits/haloprofile.py
@ -12,109 +12,78 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""
+"""Halo profiles functions and posteriors."""
 Halo profiles functions and posteriors.
 """
 from jax import numpy as jnumpy
 from jax import grad
 import numpy
 from scipy.optimize import minimize_scalar
 from scipy.stats import uniform
 from .halo import Clump
 class NFWProfile:
    r"""
-    The Navarro-Frenk-White (NFW) density profile defined as
+    The Navarro-Frenk-White (NFW) density profile.
    .. math::
-        \rho(r) = \frac{\rho_0}{x(1 + x)^2}
+        \rho(r) = \frac{\rho_0}{x(1 + x)^2},
-    where :math:`x = r / R_s` with free parameters :math:`R_s, \rho_0`.
+    :math:`x = r / R_s` and its free paramaters are :math:`R_s, \rho_0`: scale
-
+    radius and NFW density parameter.
    Parameters
    ----------
    Rs : float
        Scale radius :math:`R_s`.
    rho0 : float
        NFW density parameter :math:`\rho_0`.
    """
    @staticmethod
    def profile(r, Rs, rho0):
-        r"""
+        """
-        Halo profile evaluated at :math:`r`.
+        Evaluate the halo profile at `r`.
        Parameters
        ----------
-        r : float or 1-dimensional array
+        r : 1-dimensional array
-            Radial distance :math:`r`.
+            Radial distance.
        Rs : float
-            Scale radius :math:`R_s`.
+            Scale radius.
        rho0 : float
-            NFW density parameter :math:`\rho_0`.
+            NFW density parameter.
        Returns
        -------
-        density : float or 1-dimensional array
+        density : 1-dimensional array
            Density of the NFW profile at :math:`r`.
        """
        x = r / Rs
        return rho0 / (x * (1 + x)**2)
    @staticmethod
-    def logprofile(r, Rs, rho0, use_jax=False):
+    def _logprofile(r, Rs, rho0):
-        r"""
+        """Natural logarithm of `NFWPprofile.profile(...)`."""
        Natural logarithm of the halo profile evaluated at :math:`r`.
        Parameters
        ----------
        r : float or 1-dimensional array
            Radial distance :math:`r`.
        Rs : float
            Scale radius :math:`R_s`.
        rho0 : float
            NFW density parameter :math:`\rho_0`.
        use_jax : bool, optional
            Whether to use `JAX` expressions. By default `False`.
        Returns
        -------
        logdensity : float or 1-dimensional array
            Logarithmic density of the NFW profile at :math:`r`.
        """
        log = jnumpy.log if use_jax else numpy.log
        x = r / Rs
-        return log(rho0) - log(x) - 2 * log(1 + x)
+        return numpy.log(rho0) - numpy.log(x) - 2 * numpy.log(1 + x)
    @staticmethod
-    def enclosed_mass(r, Rs, rho0, use_jax=False):
+    def mass(r, Rs, rho0):
        r"""
-        Enclosed mass  of a NFW profile in radius :math:`r`.
+        Calculate the enclosed mass of a NFW profile in radius `r`.
        Parameters
        ----------
-        r : float or 1-dimensional array
+        r : 1-dimensional array
-            Radial distance :math:`r`.
+            Radial distance.
        Rs : float
-            Scale radius :math:`R_s`.
+            Scale radius.
        rho0 : float
-            NFW density parameter :math:`\rho_0`.
+            NFW density parameter.
        use_jax : bool, optional
            Whether to use `JAX` expressions. By default `False`.
        Returns
        -------
-        M : float or 1-dimensional array
+        M : 1-dimensional array
            The enclosed mass.
        """
        log = jnumpy.log if use_jax else numpy.log
        x = r / Rs
-        out = log(1 + x) - x / (1 + x)
+        out = numpy.log(1 + x) - x / (1 + x)
        return 4 * numpy.pi * rho0 * Rs**3 * out
-    def bounded_enclosed_mass(self, rmin, rmax, Rs, rho0, use_jax=False):
+    def bounded_mass(self, rmin, rmax, Rs, rho0):
        r"""
-        Calculate the enclosed mass between :math:`r_min <= r <= r_max`.
+        Calculate the enclosed mass between `rmin` and `rmax`.
        Parameters
        ----------
@ -125,51 +94,46 @@ class NFWProfile:
        Rs : float
            Scale radius :math:`R_s`.
        rho0 : float
-            NFW density parameter :math:`\rho_0`.
+            NFW density parameter.
        use_jax : bool, optional
            Whether to use `JAX` expressions. By default `False`.
        Returns
        -------
        M : float
            Enclosed mass within the radial range.
        """
-        return (self.enclosed_mass(rmax, Rs, rho0, use_jax)
+        return self.mass(rmax, Rs, rho0) - self.mass(rmin, Rs, rho0)
                - self.enclosed_mass(rmin, Rs, rho0, use_jax))
    def pdf(self, r, Rs, rmin, rmax):
        r"""
-        The radial probability density function of the NFW profile calculated
+        Calculate the radial PDF of the NFW profile, defined below.
        as
        .. math::
-            \frac{4\pi r^2 \rho(r)} {M(r_\min, r_\max)}
+            \frac{4\pi r^2 \rho(r)} {M(r_\min, r_\max)},
        where :math:`M(r_\min, r_\max)` is the enclosed mass between
        :math:`r_\min` and :math:`r_\max'. Note that the dependance on
-        :math:`\rho_0` is cancelled.
+        :math:`\rho_0` is cancelled and must be accounted for in the
        normalisation term to match the total mass.
        Parameters
        ----------
-        r : float or 1-dimensional array
+        r : 1-dimensional array
-            Radial distance :math:`r`.
+            Radial distance.
        Rs : float
-            Scale radius :math:`R_s`.
+            Scale radius.
        rmin : float
-            Minimum radius.
+            Minimum radius to evaluate the PDF (denominator term).
        rmax : float
-            Maximum radius.
+            Maximum radius to evaluate the PDF (denominator term).
        Returns
        -------
-        pdf : float or 1-dimensional array
+        pdf : 1-dimensional array
            Probability density of the NFW profile at :math:`r`.
        """
        norm = self.bounded_enclosed_mass(rmin, rmax, Rs, 1)
        return 4 * numpy.pi * r**2 * self.profile(r, Rs, 1) / norm
-    def rvs(self, rmin, rmax, Rs, N=1):
+    def rvs(self, rmin, rmax, Rs, size=1):
        """
        Generate random samples from the NFW profile via rejection sampling.
@ -180,8 +144,8 @@ class NFWProfile:
        rmax : float
            Maximum radius.
        Rs : float
-            Scale radius :math:`R_s`.
+            Scale radius.
-        N : int, optional
+        size : int, optional
            Number of samples to generate. By default 1.
        Returns
@ -190,15 +154,15 @@ class NFWProfile:
            Samples following the NFW profile.
        """
        gen = uniform(rmin, rmax-rmin)
-        samples = numpy.full(N, numpy.nan)
+        samples = numpy.full(size, numpy.nan)
-        for i in range(N):
+        for i in range(size):
            while True:
                r = gen.rvs()
                if self.pdf(r, Rs, rmin, rmax) > numpy.random.rand():
                    samples[i] = r
                    break
-        if N == 1:
+        if size == 1:
            return samples[0]
        return samples
@ -206,11 +170,10 @@ class NFWProfile:
 class NFWPosterior(NFWProfile):
    r"""
    Posterior for fitting the NFW profile in the range specified by the
-    closest particle and the :math:`r_{200c}` radius. The likelihood is
+    closest particle and the :math:`r_{200c}` radius, calculated as below.
    calculated as
    .. math::
-        \frac{4\pi r^2 \rho(r)} {M(r_{\min} r_{200c})} \frac{m}{M / N}
+        \frac{4\pi r^2 \rho(r)} {M(r_{\min} r_{200c})} \frac{m}{M / N},
    where :math:`M(r_{\min} r_{200c}))` is the NFW enclosed mass between the
    closest particle and the :math:`r_{200c}` radius, :math:`m` is the particle
@ -223,18 +186,9 @@ class NFWPosterior(NFWProfile):
    clump : `Clump`
        Clump object containing the particles and clump information.
    """
    _clump = None
    _binsguess = 10
    _r = None
    _Npart = None
    _m = None
    _rmin = None
    _rmax = None
-    def __init__(self, clump):
+    def __init__(self):
        # Initialise the NFW profile
        super().__init__()
        self.clump = clump
    @property
    def clump(self):
@ -248,97 +202,12 @@ class NFWPosterior(NFWProfile):
        """
        return self._clump
    @property
    def r(self):
        r"""
        Radial distance of particles used to fit the NFW profile, i.e. the ones
        whose radial distance is less than :math:`R_{\rm 200c}`.
        Returns
        -------
        r : 1-dimensional array
        """
        return self._r
    @property
    def Npart(self):
        r"""
        Number of particles used to fit the NFW profile, i.e. the ones
        whose radial distance is less than :math:`R_{\rm 200c}`.
        Returns
        -------
        Npart : int
        """
        return self._Npart
    @property
    def m(self):
        r"""
        Mass of particles used to fit the NFW profile, i.e. the ones
        whose radial distance is less than :math:`R_{\rm 200c}`.
        Returns
        -------
        r : 1-dimensional array
        """
        return self._m
    @property
    def rmin(self):
        """
        The minimum radial distance of a particle.
        Returns
        -------
        rmin : float
        """
        return self._rmin
    @property
    def rmax(self):
        r"""
        The maximum radial distance used to fit the profile, here takem to be
        the :math:`R_{\rm 200c}`.
        Returns
        -------
        rmax : float
        """
        return self._rmax
    @clump.setter
    def clump(self, clump):
-        """Sets `clump` and precalculates useful things."""
+        assert isinstance(clump, Clump)
        if not isinstance(clump, Clump):
            raise TypeError(
                "`clump` must be :py:class:`csiborgtools.fits.Clump` type. "
                "Currently `{}`".format(type(clump)))
        self._clump = clump
        # The minimum separation
        rmin = self.clump.rmin
        rmax, __ = self.clump.spherical_overdensity_mass(200)
        # Set the distances
        self._rmin = rmin
        self._rmax = rmax
        # Set particles that will be used to fit the halo
        mask_r200 = (self.clump.r >= rmin) & (self.clump.r <= rmax)
        self._r = self.clump.r[mask_r200]
        self._m = self.clump.m[mask_r200]
        self._Npart = self._r.size
        # Ensure that the minimum separation is > 0 for finite log
        if self.rmin > 0:
            self._logrmin = numpy.log10(self.rmin)
        else:
            self._logrmin = numpy.log10(numpy.min(self.r[self.r > 0]))
        self._logrmax = numpy.log10(self.rmax)
        self._logprior_volume = numpy.log(self._logrmax - self._logrmin)
        # Precalculate useful things
        self._logMtot = numpy.log(numpy.sum(self.m))
        gamma = 4 * numpy.pi * self.r**2 * self.m * self.Npart
        self._ll0 = numpy.sum(numpy.log(gamma)) - self.Npart * self._logMtot
-    def rho0_from_Rs(self, Rs):
+    def rho0_from_Rs(self, Rs, rmin, rmax, mass):
        r"""
        Obtain :math:`\rho_0` of the NFW profile from the integral constraint
        on total mass. Calculated as the ratio between the total particle mass
@ -346,75 +215,96 @@ class NFWPosterior(NFWProfile):
        Parameters
        ----------
-        logRs : float
+        Rs : float
            Logarithmic scale factor in units matching the coordinates.
        rmin : float
            Minimum radial distance of particles used to fit the profile.
        rmax : float
            Maximum radial distance of particles used to fit the profile.
        mass : float
            Mass enclosed within the radius used to fit the NFW profile.
        Returns
        -------
        rho0: float
        """
-        Mtot = numpy.exp(self._logMtot)
+        return mass / self.bounded_enclosed_mass(rmin, rmax, Rs, 1)
        Mnfw_norm = self.bounded_enclosed_mass(self.rmin, self.rmax, Rs, 1)
        return Mtot / Mnfw_norm
-    def logprior(self, logRs):
+    def initlogRs(self, r, rmin, rmax, binsguess=10):
        r"""
-        Logarithmic uniform prior on :math:`\log R_{\rm s}`.
+        Calculate the most often occuring value of :math:`r` used as initial
        guess of :math:`R_{\rm s}` since :math:`r^2 \rho(r)` peaks at
        :math:`r = R_{\rm s}`.
        Parameters
        ----------
        r : 1-dimensional array
            Radial distance of particles used to fit the profile.
        rmin : float
            Minimum radial distance of particles used to fit the profile.
        rmax : float
            Maximum radial distance of particles used to fit the profile.
        binsguess : int
            Number of bins to initially guess :math:`R_{\rm s}`.
        Returns
        -------
        initlogRs : float
        """
        bins = numpy.linspace(rmin, rmax, binsguess)
        counts, edges = numpy.histogram(r, bins)
        return numpy.log10(edges[numpy.argmax(counts)])
    def logprior(self, logRs, rmin, rmax):
        r"""
        Logarithmic uniform prior on :math:`\log R_{\rm s}`. Unnormalised but
        that does not matter.
        Parameters
        ----------
        logRs : float
-            Logarithmic scale factor in units matching the coordinates.
+            Logarithmic scale factor.
        rmin : float
            Minimum radial distance of particles used to fit the profile.
        rmax : float
            Maximum radial distance of particles used to fit the profile.
        Returns
        -------
        lp : float
        """
-        if not self._logrmin < logRs < self._logrmax:
+        if not rmin < 10**logRs < rmax:
            return - numpy.infty
-        return - self._logprior_volume
+        return 0.
-    def loglikelihood(self, logRs, use_jax=False):
+    def loglikelihood(self, logRs, r, rmin, rmax, npart):
        """
        Logarithmic likelihood.
        Parameters
        ----------
        r : 1-dimensional array
            Radial distance of particles used to fit the profile.
        logRs : float
            Logarithmic scale factor in units matching the coordinates.
-        use_jax : bool, optional
+        rmin : float
-            Whether to use `JAX` expressions. By default `False`.
+            Minimum radial distance of particles used to fit the profile.
        rmax : float
            Maximum radial distance of particles used to fit the profile.
        npart : int
            Number of particles used to fit the profile.
        Returns
        -------
        ll : float
        """
        Rs = 10**logRs
-        log = jnumpy.log if use_jax else numpy.log
+        mnfw = self.bounded_mass(rmin, rmax, Rs, 1)
-        # Expected enclosed mass from a NFW
+        return numpy.sum(self._logprofile(r, Rs, 1)) - npart * numpy.log(mnfw)
        Mnfw = self.bounded_enclosed_mass(self.rmin, self.rmax,
                                          Rs, 1, use_jax)
        fsum = jnumpy.sum if use_jax else numpy.sum
        ll = fsum(self.logprofile(self.r, Rs, 1, use_jax)) + self._ll0
        return ll - self.Npart * log(Mnfw)
    @property
    def initlogRs(self):
        r"""
        The most often occuring value of :math:`r` used as initial guess of
        :math:`R_{\rm s}` since :math:`r^2 \rho(r)` peaks at
        :math:`r = R_{\rm s}`.
-        Returns
+    def __call__(self, logRs, r, rmin, rmax, npart):
        -------
        initlogRs : float
        """
        bins = numpy.linspace(self.rmin, self.rmax,
                              self._binsguess)
        counts, edges = numpy.histogram(self.r, bins)
        return numpy.log10(edges[numpy.argmax(counts)])
    def __call__(self, logRs, use_jax=False):
        """
        Logarithmic posterior. Sum of the logarithmic prior and likelihood.
@ -422,83 +312,64 @@ class NFWPosterior(NFWProfile):
        ----------
        logRs : float
            Logarithmic scale factor in units matching the coordinates.
-        use_jax : bool, optional
+        r : 1-dimensional array
-            Whether to use `JAX` expressions. By default `False`.
+            Radial distance of particles used to fit the profile.
        rmin : float
            Minimum radial distance of particles used to fit the profile.
        rmax : float
            Maximum radial distance of particles used to fit the profile.
        npart : int
            Number of particles used to fit the profile.
        Returns
        -------
        lpost : float
        """
-        lp = self.logprior(logRs)
+        lp = self.logprior(logRs, rmin, rmax)
        if not numpy.isfinite(lp):
            return - numpy.infty
-        return self.loglikelihood(logRs, use_jax) + lp
+        return self.loglikelihood(logRs, r, rmin, rmax, npart) + lp
-    def uncertainty_at_maxpost(self, logRs_max):
+    def fit(self, clump, eps=1e-4):
        r"""
-        Calculate Gaussian approximation of the uncertainty at `logRs_max`, the
+        Fit the NFW profile. If the fit is not converged returns NaNs.
        maximum a-posteriori estimate. This is the square root of the negative
        inverse 2nd derivate of the logarithimic posterior with respect to the
        logarithm of the scale factor. This is only valid `logRs_max` is the
        maximum of the posterior!
-        This uses `JAX`. The functions should be compiled but unless there is
+        Checks whether :math:`log r_{\rm max} / R_{\rm s} > \epsilon`,
        a need for more speed this is fine as it is.
        Parameters
        ----------
        logRs_max : float
            Position :math:`\log R_{\rm s}` to evaluate the uncertainty. Must
            be the maximum.
        Returns
        -------
        uncertainty : float
        """
        def f(x):
            return self(x, use_jax=True)
        # Evaluate the second derivative
        h = grad(grad(f))(logRs_max)
        h = float(h)
        if not h < 0:
            return numpy.nan
        return (- 1 / h)**0.5
    def maxpost_logRs(self, calc_err=False, eps=1e-4):
        r"""
        Maximum a-posteriori estimate of the scale radius
        :math:`\log R_{\rm s}`. Returns the scale radius if the fit converged,
        otherwise `numpy.nan`. Checks whether
        :math:`log r_{\rm max} / R_{\rm s} > \epsilon`, where
        to ensure that the scale radius is not too close to the boundary which
        occurs if the fit fails.
        Parameters
        ----------
-        calc_err : bool, optional
+        clump : :py:class:`csiborgtools.fits.Clump`
-            Optional toggle to calculate the uncertainty on the scale radius.
+            Clump being fitted.
-            By default false.
+        eps : float
            Tolerance to ensure we are sufficiently far from math:`R_{200c}`.
        Returns
        -------
-        logRs: float
+        Rs: float
-            Log scale radius.
+            Best fit scale radius.
-        uncertainty : float
+        rho0: float
-            Uncertainty on the scale radius. Calculated following
+            Best fit NFW central density.
            `self.uncertainty_at_maxpost`.
        """
        assert isinstance(clump, Clump)
        r = clump.r
        rmin = numpy.min(r)
        rmax, mtot = clump.spherical_overdensity_mass(200)
        npart = numpy.sum((rmin <= r) & (r <= rmax))
        # Loss function to optimize
        def loss(logRs):
-            return - self(logRs)
+            return - self(logRs, r, rmin, rmax, npart)
-        res = minimize_scalar(loss, bounds=(self._logrmin, self._logrmax),
+        res = minimize_scalar(
-                              method='bounded')
+            loss, bounds=(numpy.log10(rmin), numpy.log10(rmax)),
            method='bounded')
-        if self._logrmax - res.x < eps:
+        if numpy.log10(rmax) - res.x < eps:
            res.success = False
        if not res.success:
            return numpy.nan, numpy.nan
-        e_logRs = self.uncertainty_at_maxpost(res.x) if calc_err else numpy.nan
+
-        return res.x, e_logRs
+        rho0 = self.rho0_from_Rs(10**res.x, rmin, rmax, mtot)
        return 10**res.x, rho0
--- a/csiborgtools/match/init.py
+++ b/csiborgtools/match/init.py
@ -12,9 +12,16 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-
+from csiborgtools.match.match import (  # noqa
-from .match import (RealisationsMatcher, cosine_similarity,  # noqa
+    ParticleOverlap,
-                    ParticleOverlap, get_clumplims, fill_delta, fill_delta_indxs,  # noqa
+    RealisationsMatcher,
-                    calculate_overlap, calculate_overlap_indxs,  # noqa
+    calculate_overlap,
-                    dist_centmass, dist_percentile)  # noqa
+    calculate_overlap_indxs,
-from .num_density import (binned_counts, number_density)  # noqa
+    cosine_similarity,
    dist_centmass,
    dist_percentile,
    fill_delta,
    fill_delta_indxs,
    get_clumplims,
 )
 from csiborgtools.match.num_density import binned_counts, number_density  # noqa
--- a/csiborgtools/match/match.py
+++ b/csiborgtools/match/match.py
@ -15,14 +15,15 @@
 """
 Support for matching halos between CSiBORG IC realisations.
 """
 from datetime import datetime
 from gc import collect
 import numpy
 from scipy.ndimage import gaussian_filter
 from numba import jit
 from tqdm import (tqdm, trange)
 from ..read import concatenate_clumps
 from ..utils import now
 import numpy
 from numba import jit
 from scipy.ndimage import gaussian_filter
 from tqdm import tqdm, trange
 from .utils import concatenate_clumps
 ###############################################################################
 #                  Realisations matcher for calculating overlaps              #
@ -47,24 +48,20 @@ class RealisationsMatcher:
        The mass kind whose similarity is to be checked. Must be a valid
        catalogue key. By default `totpartmass`, i.e. the total particle
        mass associated with a halo.
    overlapper_kwargs : dict, optional
        Keyword arguments passed to `ParticleOverlapper`.
    """
    _nmult = None
    _dlogmass = None
    _mass_kind = None
    _overlapper = None
-    def __init__(self, nmult=1., dlogmass=2., mass_kind="totpartmass",
+    def __init__(self, nmult=1., dlogmass=2., mass_kind="totpartmass"):
                 overlapper_kwargs={}):
        assert nmult > 0
        assert dlogmass > 0
        assert isinstance(mass_kind, str)
        self._nmult = nmult
        self._dlogmass = dlogmass
        self._mass_kind = mass_kind
-        self._overlapper = ParticleOverlap(**overlapper_kwargs)
+        self._overlapper = ParticleOverlap()
    @property
    def nmult(self):
@ -121,9 +118,9 @@ class RealisationsMatcher:
        Parameters
        ----------
-        cat0 : :py:class:`csiborgtools.read.HaloCatalogue`
+        cat0 : :py:class:`csiborgtools.read.ClumpsCatalogue`
            Halo catalogue of the reference simulation.
-        catx : :py:class:`csiborgtools.read.HaloCatalogue`
+        catx : :py:class:`csiborgtools.read.ClumpsCatalogue`
            Halo catalogue of the cross simulation.
        clumps0 : list of structured arrays
            List of clump structured arrays of the reference simulation, keys
@ -133,7 +130,7 @@ class RealisationsMatcher:
            List of clump structured arrays of the cross simulation, keys must
            include `x`, `y`, `z` and `M`. The positions must already be
            converted to cell numbers.
-        delta_bcgk : 3-dimensional array
+        delta_bckg : 3-dimensional array
            Summed background density field of the reference and cross
            simulations calculated with particles assigned to halos at the
            final snapshot. Assumed to only be sampled in cells
@ -153,7 +150,8 @@ class RealisationsMatcher:
            Overlaps with the cross catalogue.
        """
        # Query the KNN
-        verbose and print("{}: querying the KNN.".format(now()), flush=True)
+        verbose and print("{}: querying the KNN."
                          .format(datetime.now()), flush=True)
        match_indxs = radius_neighbours(
            catx.knn(select_initial=True), cat0.positions(in_initial=True),
            radiusX=cat0["lagpatch"], radiusKNN=catx["lagpatch"],
@ -229,7 +227,7 @@ class RealisationsMatcher:
            List of clump structured arrays of the cross simulation, keys must
            include `x`, `y`, `z` and `M`. The positions must already be
            converted to cell numbers.
-        delta_bcgk : 3-dimensional array
+        delta_bckg : 3-dimensional array
            Smoothed summed background density field of the reference and cross
            simulations calculated with particles assigned to halos at the
            final snapshot. Assumed to only be sampled in cells
@ -582,7 +580,7 @@ class ParticleOverlap:
            must include `x`, `y`, `z` and `M`.
        cellmins : len-3 tuple
            Tuple of left-most cell ID in the full box.
-        delta_bcgk : 3-dimensional array
+        delta_bckg : 3-dimensional array
            Summed background density field of the reference and cross
            simulations calculated with particles assigned to halos at the
            final snapshot. Assumed to only be sampled in cells
@ -735,7 +733,7 @@ def calculate_overlap(delta1, delta2, cellmins, delta_bckg):
        Density field of the second halo.
    cellmins : len-3 tuple
        Tuple of left-most cell ID in the full box.
-    delta_bcgk : 3-dimensional array
+    delta_bckg : 3-dimensional array
        Summed background density field of the reference and cross simulations
        calculated with particles assigned to halos at the final snapshot.
        Assumed to only be sampled in cells :math:`[512, 1536)^3`.
@ -787,7 +785,7 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
        Density field of the second halo.
    cellmins : len-3 tuple
        Tuple of left-most cell ID in the full box.
-    delta_bcgk : 3-dimensional array
+    delta_bckg : 3-dimensional array
        Summed background density field of the reference and cross simulations
        calculated with particles assigned to halos at the final snapshot.
        Assumed to only be sampled in cells :math:`[512, 1536)^3`.
@ -876,8 +874,8 @@ def dist_percentile(dist, qs, distmax=0.075):
    return x
-def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1., enforce_in32=False,
+def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1.,
-                      verbose=True):
+                      enforce_int32=False, verbose=True):
    """
    Find all neigbours of a trained KNN model whose center of mass separation
    is less than `nmult` times the sum of their respective radii.
@ -922,7 +920,7 @@ def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1., enforce_in32=False,
        # so we take the first item where appropriate
        mask = (dist[0] / (radiusX[i] + radiusKNN[indx[0]])) < nmult
        indxs[i] = indx[0][mask]
-        if enforce_in32:
+        if enforce_int32:
            indxs[i] = indxs[i].astype(numpy.int32)
    return numpy.asarray(indxs, dtype=object)
--- a/csiborgtools/match/utils.py
+++ b/csiborgtools/match/utils.py
@ -0,0 +1,55 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """Useful functions."""
 import numpy
 def concatenate_clumps(clumps):
    """
    Concatenate an array of clumps to a single array containing all particles.
    Parameters
    ----------
    clumps : list of structured arrays
        List of clumps. Each clump must be a structured array with keys
    Returns
    -------
    particles : structured array
    """
    # Count how large array will be needed
    N = 0
    for clump, __ in clumps:
        N += clump.size
    # Infer dtype of positions
    if clumps[0][0]['x'].dtype.char in numpy.typecodes["AllInteger"]:
        posdtype = numpy.int32
    else:
        posdtype = numpy.float32
    # Pre-allocate array
    dtype = {"names": ['x', 'y', 'z', 'M'],
             "formats": [posdtype] * 3 + [numpy.float32]}
    particles = numpy.full(N, numpy.nan, dtype)
    # Fill it one clump by another
    start = 0
    for clump, __ in clumps:
        end = start + clump.size
        for p in ('x', 'y', 'z', 'M'):
            particles[p][start:end] = clump[p]
        start = end
    return particles
--- a/csiborgtools/read/init.py
+++ b/csiborgtools/read/init.py
@ -12,13 +12,18 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-
+from .halo_cat import ClumpsCatalogue, HaloCatalogue  # noqa
 from .readsim import (CSiBORGPaths, ParticleReader, read_mmain, read_initcm, halfwidth_select)  # noqa
 from .halo_cat import (HaloCatalogue, concatenate_clumps)  # noqa
 from .obs import (PlanckClusters, MCXCClusters, TwoMPPGalaxies,  # noqa
                      TwoMPPGroups, SDSS)  # noqa
 from .outsim import (dump_split, combine_splits)  # noqa
 from .overlap_summary import (PairOverlap, NPairsOverlap, binned_resample_mean) # noqa
 from .knn_summary import kNNCDFReader  # noqa
 from .obs import (  # noqa
    SDSS,
    MCXCClusters,
    PlanckClusters,
    TwoMPPGalaxies,
    TwoMPPGroups,
 )
 from .outsim import combine_splits, dump_split  # noqa
 from .overlap_summary import NPairsOverlap, PairOverlap, binned_resample_mean  # noqa
 from .paths import CSiBORGPaths  # noqa
 from .pk_summary import PKReader  # noqa
 from .readsim import MmainReader, ParticleReader, halfwidth_select, read_initcm  # noqa
 from .tpcf_summary import TPCFReader  # noqa
--- a/csiborgtools/units/box_units.py
+++ b/csiborgtools/units/box_units.py
@ -16,17 +16,17 @@
 Simulation box unit transformations.
 """
 import numpy
-from scipy.interpolate import interp1d
+from astropy import constants, units
 from astropy.cosmology import LambdaCDM
-from astropy import (constants, units)
+from scipy.interpolate import interp1d
 from ..read import ParticleReader
 from .readsim import ParticleReader
 # Map of unit conversions
 CONV_NAME = {
-    "length": ["peak_x", "peak_y", "peak_z", "Rs", "rmin", "rmax", "r200",
+    "length": ['x', 'y', 'z', "peak_x", "peak_y", "peak_z", "Rs", "rmin",
-               "r500", "x0", "y0", "z0", "lagpatch"],
+               "rmax", "r200", "r500", "x0", "y0", "z0", "lagpatch"],
-    "mass": ["mass_cl", "totpartmass", "m200", "m500", "mass_mmain"],
+    "mass": ["mass_cl", "totpartmass", "m200", "m500", "mass_mmain", 'M'],
    "density": ["rho0"]
    }
@ -44,6 +44,7 @@ class BoxUnits:
    paths : py:class`csiborgtools.read.CSiBORGPaths`
        CSiBORG paths object.
    """
    _name = "box_units"
    _cosmo = None
    def __init__(self, nsnap, nsim, paths):
--- a/csiborgtools/read/halo_cat.py
+++ b/csiborgtools/read/halo_cat.py
@ -12,54 +12,75 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""
+"""CSiBORG halo catalogue."""
-Functions to read in the particle and clump files.
+from abc import ABC
-"""
+
 import numpy
 from os.path import join
 from sklearn.neighbors import NearestNeighbors
-from .readsim import (CSiBORGPaths, read_mmain, read_initcm)
+
-from ..utils import (flip_cols, add_columns)
+from .box_units import BoxUnits
-from ..units import (BoxUnits, cartesian_to_radec)
+from .paths import CSiBORGPaths
 from .readsim import ParticleReader, read_initcm
 from .utils import add_columns, cartesian_to_radec, flip_cols
-class HaloCatalogue:
+class BaseCatalogue(ABC):
    r"""
    Processed halo catalogue, the data should be calculated in `run_fit_halos`.
    Parameters
    ----------
    nsim : int
        IC realisation index.
    paths : py:class`csiborgtools.read.CSiBORGPaths`
        CSiBORG paths object.
    min_mass : float, optional
        The minimum :math:`M_{rm tot} / M_\odot` mass. By default no threshold.
    max_dist : float, optional
        The maximum comoving distance of a halo. By default no upper limit.
    load_init : bool, optional
        Whether to load the initial snapshot information. By default False.
    """
    Base (sub)halo catalogue.
    """
    _data = None
    _paths = None
    _nsim = None
    _data = None
    _selmask = None
    def __init__(self, nsim, paths, min_mass=None, max_dist=None,
                 load_init=False):
        assert isinstance(paths, CSiBORGPaths)
        self._nsim = nsim
        self._paths = paths
        self._set_data(min_mass, max_dist, load_init)
    @property
    def nsim(self):
        """
        The IC realisation index.
        Returns
        -------
        nsim : int
        """
        if self._nsim is None:
            raise RuntimeError("`nsim` is not set!")
        return self._nsim
    @nsim.setter
    def nsim(self, nsim):
        assert isinstance(nsim, int)
        self._nsim = nsim
    @property
    def paths(self):
        """
        CSiBORG paths manager.
        Returns
        -------
        paths : :py:class:`csiborgtools.read.CSiBORGPaths`
        """
        if self._paths is None:
            raise RuntimeError("`paths` is not set!")
        return self._paths
    @paths.setter
    def paths(self, paths):
        assert isinstance(paths, CSiBORGPaths)
        self._paths = paths
    @property
    def data(self):
        """
        The catalogue.
        Returns
        -------
        data : structured array
        """
        if self._data is None:
            raise RuntimeError("Catalogue data not loaded!")
        return self._data
    @property
    def nsnap(self):
        """
@ -83,163 +104,51 @@ class HaloCatalogue:
        """
        return BoxUnits(self.nsnap, self.nsim, self.paths)
-    @property
+    @box.setter
-    def data(self):
+    def box(self, box):
-        """
+        try:
-        Halo catalogue.
+            assert box._name  == "box_units"
            self._box = box
        except AttributeError as err:
            raise TypeError from err
-        Returns
+    def position(self, in_initial=False, cartesian=True):
        -------
        cat : structured array
        """
        return self._data
    def _set_data(self, min_mass, max_dist, load_init):
        """
        Loads the data, merges with mmain, does various coordinate transforms.
        """
        # Load the processed data
        data = numpy.load(self.paths.hcat_path(self.nsim))
        # Load the mmain file and add it to the data
        mmain = read_mmain(self.nsim, self.paths.mmain_path)
        data = self.merge_mmain_to_clumps(data, mmain)
        flip_cols(data, "peak_x", "peak_z")
        # Cut on number of particles and finite m200. Do not change! Hardcoded
        data = data[(data["npart"] > 100) & numpy.isfinite(data["m200"])]
        # Now also load the initial positions
        if load_init:
            initcm = read_initcm(self.nsim, self.paths.initmatch_path)
            if initcm is not None:
                data = self.merge_initmatch_to_clumps(data, initcm)
                flip_cols(data, "x0", "z0")
 #        # Calculate redshift
 #        pos = [data["peak_{}".format(p)] - 0.5 for p in ("x", "y", "z")]
 #        vel = [data["v{}".format(p)] for p in ("x", "y", "z")]
 #        zpec = self.box.box2pecredshift(*vel, *pos)
 #        zobs = self.box.box2obsredshift(*vel, *pos)
 #        zcosmo = self.box.box2cosmoredshift(
 #            sum(pos[i]**2 for i in range(3))**0.5)
 #        data = add_columns(data, [zpec, zobs, zcosmo],
 #                           ["zpec", "zobs", "zcosmo"])
        # Unit conversion
        convert_cols = ["m200", "m500", "totpartmass", "mass_mmain",
                        "r200", "r500", "Rs", "rho0",
                        "peak_x", "peak_y", "peak_z"]
        data = self.box.convert_from_boxunits(data, convert_cols)
        # Now calculate spherical coordinates
        d, ra, dec = cartesian_to_radec(
            data["peak_x"], data["peak_y"], data["peak_z"])
        data = add_columns(data, [d, ra, dec], ["dist", "ra", "dec"])
        # And do the unit transform
        if load_init and initcm is not None:
            data = self.box.convert_from_boxunits(
                data, ["x0", "y0", "z0", "lagpatch"])
        # Convert all that is not an integer to float32
        names = list(data.dtype.names)
        formats = []
        for name in names:
            if data[name].dtype.char in numpy.typecodes["AllInteger"]:
                formats.append(numpy.int32)
            else:
                formats.append(numpy.float32)
        dtype = numpy.dtype({"names": names, "formats": formats})
        # Apply cuts on distance and total particle mass if any
        data = data[data["dist"] < max_dist] if max_dist is not None else data
        data = (data[data["totpartmass"] > min_mass]
                if min_mass is not None else data)
        self._data = data.astype(dtype)
    def merge_mmain_to_clumps(self, clumps, mmain):
        """
        Merge columns from the `mmain` files to the `clump` file, matches them
        by their halo index while assuming that the indices `index` in both
        arrays are sorted.
        Parameters
        ----------
        clumps : structured array
            Clumps structured array.
        mmain : structured array
            Parent halo array whose information is to be merged into `clumps`.
        Returns
        -------
        out : structured array
            Array with added columns.
        """
        X = numpy.full((clumps.size, 2), numpy.nan)
        # Mask of which clumps have a mmain index
        mask = numpy.isin(clumps["index"], mmain["index"])
        X[mask, 0] = mmain["mass_cl"]
        X[mask, 1] = mmain["sub_frac"]
        return add_columns(clumps, X, ["mass_mmain", "sub_frac"])
    def merge_initmatch_to_clumps(self, clumps, initcat):
        """
        Merge columns from the `init_cm` files to the `clump` file.
        Parameters
        ----------
        clumps : structured array
            Clumps structured array.
        initcat : structured array
            Catalog with the clumps initial centre of mass at z = 70.
        Returns
        -------
        out : structured array
        """
        # There are more initcat clumps, so check which ones have z = 0
        # and then downsample
        mask = numpy.isin(initcat["ID"], clumps["index"])
        initcat = initcat[mask]
        # Now the index ordering should match
        if not numpy.alltrue(initcat["ID"] == clumps["index"]):
            raise ValueError(
                "Ordering of `initcat` and `clumps` is inconsistent.")
        X = numpy.full((clumps.size, 4), numpy.nan)
        for i, p in enumerate(['x', 'y', 'z', "lagpatch"]):
            X[:, i] = initcat[p]
        return add_columns(clumps, X, ["x0", "y0", "z0", "lagpatch"])
    def positions(self, in_initial=False):
        r"""
-        Cartesian position components of halos in :math:`\mathrm{cMpc}`.
+        Position components. If Cartesian, then in :math:`\mathrm{cMpc}`. If
        spherical, then radius is in :math:`\mathrm{cMpc}`, RA in
        :math:`[0, 360)` degrees and DEC in :math:`[-90, 90]` degrees. Note
        that the position is defined as the minimum of the gravitationl
        potential.
        Parameters
        ----------
        in_initial : bool, optional
-            Whether to define the kNN on the initial or final snapshot.
+            Whether to return the initial snapshot positions.
        cartesian : bool, optional
            Whether to return the Cartesian or spherical position components.
            By default Cartesian.
        Returns
        -------
-        pos : 2-dimensional array of shape `(nhalos, 3)`
+        pos : 2-dimensional array of shape `(nobjects, 3)`
        """
        if in_initial:
-            ps = ["x0", "y0", "z0"]
+            ps = ['x0', 'y0', 'z0']
        else:
-            ps = ["peak_x", "peak_y", "peak_z"]
+            ps = ['x', 'y', 'z']
-        return numpy.vstack([self[p] for p in ps]).T
+        pos = [self[p] for p in ps]
        if cartesian:
            return numpy.vstack(pos).T
        else:
            return numpy.vstack([cartesian_to_radec(*pos)]).T
-    def velocities(self):
+    def velocity(self):
        """
-        Cartesian velocity components of halos. Likely in box units.
+        Cartesian velocity components in box units.
        Returns
        -------
-        vel : 2-dimensional array of shape `(nhalos, 3)`
+        vel : 2-dimensional array of shape `(nobjects, 3)`
        """
        return numpy.vstack([self["v{}".format(p)] for p in ("x", "y", "z")]).T
@ -250,13 +159,13 @@ class HaloCatalogue:
        Returns
        -------
-        angmom : 2-dimensional array of shape `(nhalos, 3)`
+        angmom : 2-dimensional array of shape `(nobjects, 3)`
        """
        return numpy.vstack([self["L{}".format(p)] for p in ("x", "y", "z")]).T
    def knn(self, in_initial):
        """
-        kNN object of all halo positions.
+        kNN object fitted on all catalogue objects.
        Parameters
        ----------
@ -307,50 +216,205 @@ class HaloCatalogue:
        initpars = ["x0", "y0", "z0"]
        if key in initpars and key not in self.keys:
            raise RuntimeError("Initial positions are not set!")
-        return self._data[key]
+        return self.data[key]
    def __len__(self):
        return self.data.size
-###############################################################################
+class ClumpsCatalogue(BaseCatalogue):
-#                           Useful functions                                  #
+    r"""
-###############################################################################
+    Clumps catalogue, defined in the final snapshot.
-
+    TODO:
-def concatenate_clumps(clumps):
+        Add fitted quantities.
-    """
+        Add threshold on number of particles
    Concatenate an array of clumps to a single array containing all particles.
    Parameters
    ----------
-    clumps : list of structured arrays
+    nsim : int
-
+        IC realisation index.
-    Returns
+    paths : py:class`csiborgtools.read.CSiBORGPaths`
-    -------
+        CSiBORG paths object.
-    particles : structured array
+    maxdist : float, optional
        The maximum comoving distance of a halo. By default
        :math:`155.5 / 0.705 ~ \mathrm{Mpc}` with assumed :math:`h = 0.705`,
        which corresponds to the high-resolution region.
    """
-    # Count how large array will be needed
+    def __init__(self, nsim, paths, maxdist=155.5 / 0.705):
-    N = 0
+        self.nsim = nsim
-    for clump, __ in clumps:
+        self.paths = paths
-        N += clump.size
+        # Read in the clumps from the final snapshot
-    # Infer dtype of positions
+        partreader = ParticleReader(self.paths)
-    if clumps[0][0]['x'].dtype.char in numpy.typecodes["AllInteger"]:
+        cols = ["index", "parent", 'x', 'y', 'z', "mass_cl"]
-        posdtype = numpy.int32
+        data = partreader.read_clumps(self.nsnap, self.nsim, cols=cols)
-    else:
+        # Overwrite the parent with the ultimate parent
-        posdtype = numpy.float32
+        mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim))
        data["parent"] = mmain["ultimate_parent"]
-    # Pre-allocate array
+        # Flip positions and convert from code units to cMpc. Convert M too
-    dtype = {"names": ['x', 'y', 'z', 'M'],
+        flip_cols(data, "x", "z")
-             "formats": [posdtype] * 3 + [numpy.float32]}
+        for p in ('x', 'y', 'z'):
-    particles = numpy.full(N, numpy.nan, dtype)
+            data[p] -= 0.5
        data = self.box.convert_from_boxunits(data, ['x', 'y', 'z', "mass_cl"])
-    # Fill it one clump by another
+        mask = numpy.sqrt(data['x']**2 + data['y']**2 + data['z']**2) < maxdist
-    start = 0
+        self._data = data[mask]
    for clump, __ in clumps:
        end = start + clump.size
        for p in ('x', 'y', 'z', 'M'):
            particles[p][start:end] = clump[p]
        start = end
-    return particles
+    @property
    def ismain(self):
        """
        Whether the clump is a main halo.
        Returns
        -------
        ismain : 1-dimensional array
        """
        return self["index"] == self["parent"]
    def _set_data(self, min_mass, max_dist, load_init):
        """
        TODO: old later remove.
        Loads the data, merges with mmain, does various coordinate transforms.
        """
        # Load the processed data
        data = numpy.load(self.paths.hcat_path(self.nsim))
        # Load the mmain file and add it to the data
        # TODO: read the mmain here
 #        mmain = read_mmain(self.nsim, self.paths.mmain_dir)
 #        data = self.merge_mmain_to_clumps(data, mmain)
        flip_cols(data, "peak_x", "peak_z")
        # Cut on number of particles and finite m200. Do not change! Hardcoded
        data = data[(data["npart"] > 100) & numpy.isfinite(data["m200"])]
        # Now also load the initial positions
        if load_init:
            initcm = read_initcm(self.nsim,
                                 self.paths.initmatch_path(self.nsim, "cm"))
            if initcm is not None:
                data = self.merge_initmatch_to_clumps(data, initcm)
                flip_cols(data, "x0", "z0")
        # Unit conversion
        convert_cols = ["m200", "m500", "totpartmass", "mass_mmain",
                        "r200", "r500", "Rs", "rho0",
                        "peak_x", "peak_y", "peak_z"]
        data = self.box.convert_from_boxunits(data, convert_cols)
        # And do the unit transform
        if load_init and initcm is not None:
            data = self.box.convert_from_boxunits(
                data, ["x0", "y0", "z0", "lagpatch"])
        # Convert all that is not an integer to float32
        names = list(data.dtype.names)
        formats = []
        for name in names:
            if data[name].dtype.char in numpy.typecodes["AllInteger"]:
                formats.append(numpy.int32)
            else:
                formats.append(numpy.float32)
        dtype = numpy.dtype({"names": names, "formats": formats})
        # Apply cuts on distance and total particle mass if any
        data = data[data["dist"] < max_dist] if max_dist is not None else data
        data = (data[data["totpartmass"] > min_mass]
                if min_mass is not None else data)
        self._data = data.astype(dtype)
    def merge_mmain_to_clumps(self, clumps, mmain):
        """
        TODO: old, later remove.
        Merge columns from the `mmain` files to the `clump` file, matches them
        by their halo index while assuming that the indices `index` in both
        arrays are sorted.
        Parameters
        ----------
        clumps : structured array
            Clumps structured array.
        mmain : structured array
            Parent halo array whose information is to be merged into `clumps`.
        Returns
        -------
        out : structured array
            Array with added columns.
        """
        X = numpy.full((clumps.size, 2), numpy.nan)
        # Mask of which clumps have a mmain index
        mask = numpy.isin(clumps["index"], mmain["index"])
        X[mask, 0] = mmain["mass_cl"]
        X[mask, 1] = mmain["sub_frac"]
        return add_columns(clumps, X, ["mass_mmain", "sub_frac"])
    def merge_initmatch_to_clumps(self, clumps, initcat):
        """
        TODO: old, later remove.
        Merge columns from the `init_cm` files to the `clump` file.
        Parameters
        ----------
        clumps : structured array
            Clumps structured array.
        initcat : structured array
            Catalog with the clumps initial centre of mass at z = 70.
        Returns
        -------
        out : structured array
        """
        # There are more initcat clumps, so check which ones have z = 0
        # and then downsample
        mask = numpy.isin(initcat["ID"], clumps["index"])
        initcat = initcat[mask]
        # Now the index ordering should match
        if not numpy.alltrue(initcat["ID"] == clumps["index"]):
            raise ValueError(
                "Ordering of `initcat` and `clumps` is inconsistent.")
        X = numpy.full((clumps.size, 4), numpy.nan)
        for i, p in enumerate(['x', 'y', 'z', "lagpatch"]):
            X[:, i] = initcat[p]
        return add_columns(clumps, X, ["x0", "y0", "z0", "lagpatch"])
 class HaloCatalogue(BaseCatalogue):
    r"""
    Halo catalogue, i.e. parent halos with summed substructure, defined in the
    final snapshot.
    TODO:
        Add the fitted quantities
        Add threshold on number of particles
    Parameters
    ----------
    nsim : int
        IC realisation index.
    paths : py:class`csiborgtools.read.CSiBORGPaths`
        CSiBORG paths object.
    maxdist : float, optional
        The maximum comoving distance of a halo. By default
        :math:`155.5 / 0.705 ~ \mathrm{Mpc}` with assumed :math:`h = 0.705`,
        which corresponds to the high-resolution region.
    """
    def __init__(self, nsim, paths, maxdist=155.5 / 0.705):
        self.nsim = nsim
        self.paths = paths
        # Read in the mmain catalogue of summed substructure
        mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim))
        data = mmain["mmain"]
        # Flip positions and convert from code units to cMpc. Convert M too
        flip_cols(data, "x", "z")
        for p in ('x', 'y', 'z'):
            data[p] -= 0.5
        data = self.box.convert_from_boxunits(data, ['x', 'y', 'z', 'M'])
        mask = numpy.sqrt(data['x']**2 + data['y']**2 + data['z']**2) < maxdist
        self._data = data[mask]
--- a/csiborgtools/read/knn_summary.py
+++ b/csiborgtools/read/knn_summary.py
@ -13,18 +13,41 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """kNN-CDF reader."""
-from os.path import join
+import joblib
 from glob import glob
 import numpy
 from scipy.special import factorial
 import joblib
 class kNNCDFReader:
    """
    Shortcut object to read in the kNN CDF data.
    Parameters
    ----------
    paths : py:class`csiborgtools.read.CSiBORGPaths`
    """
-    def read(self, run, folder, rmin=None, rmax=None, to_clip=True):
+    _paths = None
    def __init__(self, paths):
        self.paths = paths
    @property
    def paths(self):
        """
        Paths manager.
        Parameters
        ----------
        paths : py:class`csiborgtools.read.CSiBORGPaths`
        """
        return self._paths
    @paths.setter
    def paths(self, paths):
        # assert isinstance(paths, CSiBORGPaths)  # REMOVE
        self._paths = paths
    def read(self, run, kind, rmin=None, rmax=None, to_clip=True):
        """
        Read the auto- or cross-correlation kNN-CDF data. Infers the type from
        the data files.
@ -33,8 +56,8 @@ class kNNCDFReader:
        ----------
        run : str
            Run ID to read in.
-        folder : str
+        kind : str
-            Path to the folder where the auto-correlation kNN-CDF is stored.
+            Type of correlation. Can be either `auto` or `cross`.
        rmin : float, optional
            Minimum separation. By default ignored.
        rmax : float, optional
@ -50,10 +73,13 @@ class kNNCDFReader:
        out : 3-dimensional array of shape `(len(files), len(ks), neval)`
            Array of CDFs or cross-correlations.
        """
-        run += ".p"
+        assert kind in ["auto", "cross"]
-        files = [f for f in glob(join(folder, "*")) if run in f]
+        if kind == "auto":
            files = self.paths.knnauto_path(run)
        else:
            files = self.paths.knncross_path(run)
        if len(files) == 0:
-            raise RuntimeError("No files found for run `{}`.".format(run[:-2]))
+            raise RuntimeError("No files found for run `{}`.".format(run))
        for i, file in enumerate(files):
            data = joblib.load(file)
@ -200,22 +226,3 @@ class kNNCDFReader:
        """
        V = 4 * numpy.pi / 3 * rs**3
        return (ndensity * V)**k / factorial(k) * numpy.exp(-ndensity * V)
    @staticmethod
    def cross_files(ic, folder):
        """
        Return the file paths corresponding to the cross-correlation of a given
        IC.
        Parameters
        ----------
        ic : int
            The desired IC.
        folder : str
            The folder containing the cross-correlation files.
        Returns
        -------
        filepath : list of str
        """
        return [file for file in glob(join(folder, "*")) if str(ic) in file]
--- a/csiborgtools/read/obs.py
+++ b/csiborgtools/read/obs.py
@ -18,13 +18,14 @@ Scripts to read in observation.
 from abc import ABC, abstractproperty
 from os.path import join
 from warnings import warn
 import numpy
 from scipy import constants
 from astropy.io import fits
 from astropy.coordinates import SkyCoord
 from astropy import units
 from ..utils import (cols_to_structured)
 import numpy
 from astropy import units
 from astropy.coordinates import SkyCoord
 from astropy.io import fits
 from scipy import constants
 from .utils import cols_to_structured
 ###############################################################################
 #                           Text survey base class                            #
@ -101,10 +102,8 @@ class TwoMPPGalaxies(TextSurvey):
        self._set_data(fpath)
    def _set_data(self, fpath):
        """
        Set the catalogue
        """
        from scipy.constants import c
        # Read the catalogue and select non-fake galaxies
        cat = numpy.genfromtxt(fpath, delimiter="|", )
        cat = cat[cat[:, 12] == 0, :]
@ -151,9 +150,6 @@ class TwoMPPGroups(TextSurvey):
        self._set_data(fpath)
    def _set_data(self, fpath):
        """
        Set the catalogue
        """
        cat = numpy.genfromtxt(fpath, delimiter="|", )
        # Pre-allocate and fill the array
        cols = [("RA", numpy.float64), ("DEC", numpy.float64),
@ -218,13 +214,12 @@ class FitsSurvey(ABC):
    @h.setter
    def h(self, h):
        """Sets the little h."""
        self._h = h
    @staticmethod
    def _check_in_list(member, members, kind):
        """
-        Checks that `member` is a member of a list `members`, `kind` is a
+        Check that `member` is a member of a list `members`, `kind` is a
        member type name.
        """
        if member not in members:
@ -247,7 +242,7 @@ class FitsSurvey(ABC):
    @abstractproperty
    def size(self):
        """
-        Number of samples in the catalogue.
+        Return the number of samples in the catalogue.
        Returns
        -------
@ -274,7 +269,7 @@ class FitsSurvey(ABC):
    @selection_mask.setter
    def selection_mask(self, mask):
-        """Sets the selection mask."""
+        """Set the selection mask."""
        if not (isinstance(mask, numpy.ndarray)
                and mask.ndim == 1
                and mask.dtype == bool):
@ -311,6 +306,7 @@ class FitsSurvey(ABC):
        Parameters
        ----------
        key : str
            FITS key.
        Returns
        -------
@ -331,7 +327,7 @@ class FitsSurvey(ABC):
    def make_mask(self, steps):
        """
-        Make a survey mask from a series of steps. Expected to look e.g. like
+        Make a survey mask from a series of steps, expected to look as below.
        ```
        def steps(cls):
@ -343,6 +339,7 @@ class FitsSurvey(ABC):
        Parameters
        ----------
        steps : list of steps
            Selection steps.
        Returns
        -------
@ -359,20 +356,17 @@ class FitsSurvey(ABC):
        return out
    def __getitem__(self, key):
        """
        Return values for this `key`. If in both return from `routine_keys`.
        """
        # Check duplicates
        if key in self.routine_keys and key in self.fits_keys:
            warn("Key `{}` found in both `routine_keys` and `fits_keys`. "
-                 "Returning `routine_keys` value.".format(key), UserWarning)
+                 "Returning `routine_keys` value.".format(key), stacklevel=1)
        if key in self.routine_keys:
            func, args = self.routines[key]
            out = func(*args)
        elif key in self.fits_keys:
            warn("Returning a FITS property. Be careful about little h!",
-                 UserWarning)
+                 stacklevel=1)
            out = self.get_fitsitem(key)
        else:
            raise KeyError("Unrecognised key `{}`.".format(key))
@ -541,7 +535,7 @@ class MCXCClusters(FitsSurvey):
        return self.get_fitsitem(key) * 1e14 * (self._hdata / self.h)**2
    def _lum(self, key):
-        """Get luminosity. Puts back units to be in ergs/s"""
+        """Get luminosity, puts back units to be in ergs/s."""
        return self.get_fitsitem(key) * 1e44 * (self._hdata / self.h)**2
 ###############################################################################
@ -669,14 +663,14 @@ class SDSS(FitsSurvey):
        return self._absmag(photo, band1) - self._absmag(photo, band2)
    def _dist(self):
-        """
+        r"""
-        Get the corresponding distance estimate from `ZDIST`, which is defined
+        Get the corresponding distance estimate from `ZDIST`, defined as below.
-        as:
+
            "Distance estimate using pecular velocity model of Willick et al.
            (1997), expressed as a redshift equivalent; multiply by c/H0 for
            Mpc"
-        Converts little h.
+        Distance is converted to math:`h != 1` units.
        """
        return self.get_fitsitem("ZDIST") * constants.c * 1e-3 / (100 * self.h)
--- a/csiborgtools/read/outsim.py
+++ b/csiborgtools/read/outsim.py
@ -15,9 +15,10 @@
 """
 I/O functions for analysing the CSiBORG realisations.
 """
 import numpy
 from os.path import join
 from os import remove
 from os.path import join
 import numpy
 from tqdm import trange
--- a/csiborgtools/read/overlap_summary.py
+++ b/csiborgtools/read/overlap_summary.py
@ -15,7 +15,8 @@
 """
 Tools for summarising various results.
 """
-from os.path import (join, isfile)
+from os.path import isfile, join
 import numpy
 from tqdm import tqdm
@ -26,7 +27,7 @@ class PairOverlap:
    Parameters
    ----------
-    cat0, catx: :py:class:`csiborgtools.read.HaloCatalogue`
+    cat0, catx: :py:class:`csiborgtools.read.ClumpsCatalogue`
        Halo catalogues corresponding to the reference and cross
        simulations.
    fskel : str, optional
@ -121,9 +122,9 @@ class PairOverlap:
        inv_ngp_overlap = [[] for __ in range(cross_size)]
        inv_smoothed_overlap = [[] for __ in range(cross_size)]
        for ref_id in range(match_indxs.size):
-            for cross_id, ngp_cross, smoothed_cross in zip(match_indxs[ref_id],
+            iters = zip(match_indxs[ref_id], ngp_overlap[ref_id],
-                                                           ngp_overlap[ref_id],
+                        smoothed_overlap[ref_id], strict=True)
-                                                           smoothed_overlap[ref_id]):  # noqa
+            for cross_id, ngp_cross, smoothed_cross in iters:
                inv_match_indxs[cross_id].append(ref_id)
                inv_ngp_overlap[cross_id].append(ngp_cross)
                inv_smoothed_overlap[cross_id].append(smoothed_cross)
@ -198,8 +199,8 @@ class PairOverlap:
    def summed_overlap(self, from_smoothed):
        """
-        Summed overlap of each halo in the reference simulation with the cross
+        Calculate summed overlap of each halo in the reference simulation with
-        simulation.
+        the cross simulation.
        Parameters
        ----------
@ -319,7 +320,7 @@ class PairOverlap:
        simulation from the crossed simulation.
        Parameters
-        -----------
+        ----------
        from_smoothed : bool
            Whether to use the smoothed overlap or not.
        overlap_threshold : float, optional
@ -408,7 +409,7 @@ class PairOverlap:
        Returns
        -------
-        out : :py:class:`csiborgtools.read.HaloCatalogue` or array
+        out : :py:class:`csiborgtools.read.ClumpsCatalogue` or array
        """
        if key is None:
            return self._cat0
@ -429,7 +430,7 @@ class PairOverlap:
        Returns
        -------
-        out : :py:class:`csiborgtools.read.HaloCatalogue` or array
+        out : :py:class:`csiborgtools.read.ClumpsCatalogue` or array
        """
        if key is None:
            return self._catx
@ -456,9 +457,9 @@ class NPairsOverlap:
    Parameters
    ----------
-    cat0 : :py:class:`csiborgtools.read.HaloCatalogue`
+    cat0 : :py:class:`csiborgtools.read.ClumpsCatalogue`
        Reference simulation halo catalogue.
-    catxs : list of :py:class:`csiborgtools.read.HaloCatalogue`
+    catxs : list of :py:class:`csiborgtools.read.ClumpsCatalogue`
        List of cross simulation halo catalogues.
    fskel : str, optional
        Path to the overlap. By default `None`, i.e.
@ -478,8 +479,8 @@ class NPairsOverlap:
    def summed_overlap(self, from_smoothed, verbose=False):
        """
-        Summed overlap of each halo in the reference simulation with the cross
+        Calcualte summed overlap of each halo in the reference simulation with
-        simulations.
+        the cross simulations.
        Parameters
        ----------
@ -526,7 +527,7 @@ class NPairsOverlap:
        simulation from the crossed simulation.
        Parameters
-        -----------
+        ----------
        from_smoothed : bool
            Whether to use the smoothed overlap or not.
        overlap_threshold : float, optional
--- a/csiborgtools/read/paths.py
+++ b/csiborgtools/read/paths.py
@ -0,0 +1,366 @@
 # Copyright (C) 2022 Richard Stiskalek, Harry Desmond
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """CSiBORG paths manager."""
 from glob import glob
 from os import makedirs, mkdir
 from os.path import isdir, join
 from warnings import warn
 import numpy
 class CSiBORGPaths:
    """
   Paths manager for CSiBORG IC realisations.
    Parameters
    ----------
    srcdir : str
        Path to the folder where the RAMSES outputs are stored.
    postdir: str
        Path to the folder where post-processed files are stored.
    """
    _srcdir = None
    _postdir = None
    def __init__(self, srcdir=None, postdir=None):
        self.srcdir = srcdir
        self.postdir = postdir
    @staticmethod
    def _check_directory(path):
        if not isdir(path):
            raise IOError("Invalid directory `{}`!".format(path))
    @property
    def srcdir(self):
        """
        Path to the folder where CSiBORG simulations are stored.
        Returns
        -------
        path : str
        """
        if self._srcdir is None:
            raise ValueError("`srcdir` is not set!")
        return self._srcdir
    @srcdir.setter
    def srcdir(self, path):
        if path is None:
            return
        self._check_directory(path)
        self._srcdir = path
    @property
    def postdir(self):
        """
        Path to the folder where post-processed files are stored.
        Returns
        -------
        path : str
        """
        if self._postdir is None:
            raise ValueError("`postdir` is not set!")
        return self._postdir
    @postdir.setter
    def postdir(self, path):
        if path is None:
            return
        self._check_directory(path)
        self._postdir = path
    @property
    def temp_dumpdir(self):
        """
        Path to a temporary dumping folder.
        Returns
        -------
        path : str
        """
        fpath = join(self.postdir, "temp")
        if not isdir(fpath):
            mkdir(fpath)
            warn("Created directory `{}`.".format(fpath), UserWarning,
                 stacklevel=1)
        return fpath
    def mmain_path(self, nsnap, nsim):
        """
        Path to the `mmain` files summed substructure files.
        Parameters
        ----------
        nsnap : int
            Snapshot index.
        nsim : int
            IC realisation index.
        Returns
        -------
        path : str
        """
        fdir = join(self.postdir, "mmain")
        if not isdir(fdir):
            mkdir(fdir)
            warn("Created directory `{}`.".format(fdir), UserWarning,
                 stacklevel=1)
        return join(
            fdir,
            "mmain_{}_{}.npz".format(str(nsim).zfill(5), str(nsnap).zfill(5))
            )
    def initmatch_path(self, nsim, kind):
        """
        Path to the `initmatch` files where the clump match between the
        initial and final snapshot is stored.
        Parameters
        ----------
        nsim : int
            IC realisation index.
        kind : str
            Type of match.  Can be either `cm` or `particles`.
        Returns
        -------
        path : str
        """
        assert kind in ["cm", "particles"]
        fdir = join(self.postdir, "initmatch")
        if not isdir(fdir):
            mkdir(fdir)
            warn("Created directory `{}`.".format(fdir), UserWarning,
                 stacklevel=1)
        return join(fdir, "{}_{}.npy".format(kind, str(nsim).zfill(5)))
    def split_path(self, nsnap, nsim):
        """
        Path to the `split` files from `pre_splithalos`.
        Parameters
        ----------
        nsnap : int
            Snapshot index.
        nsim : int
            IC realisation index.
        Returns
        -------
        path : str
        """
        fdir = join(self.postdir, "split")
        if not isdir(fdir):
            mkdir(fdir)
            warn("Created directory `{}`.".format(fdir), UserWarning,
                 stacklevel=1)
        return join(fdir, "clumps_{}_{}.npz"
                    .format(str(nsim).zfill(5), str(nsnap).zfill(5)))
    def get_ics(self, tonew):
        """
        Get CSiBORG IC realisation IDs from the list of folders in
        `self.srcdir`.
        Parameters
        ----------
        tonew : bool
            If `True`, path to the '_new' ICs is returned.
        Returns
        -------
        ids : 1-dimensional array
        """
        files = glob(join(self.srcdir, "ramses_out*"))
        files = [f.split("/")[-1] for f in files]  # Select only file names
        if tonew:
            files = [f for f in files if "_new" in f]
            ids = [int(f.split("_")[2]) for f in files]  # Take the IC IDs
        else:
            files = [f for f in files if "_inv" not in f]  # Remove inv. ICs
            files = [f for f in files if "_new" not in f]  # Remove _new
            files = [f for f in files if "OLD" not in f]   # Remove _old
            ids = [int(f.split("_")[-1]) for f in files]
            try:
                ids.remove(5511)
            except ValueError:
                pass
        return numpy.sort(ids)
    def ic_path(self, nsim, tonew=False):
        """
        Path to a CSiBORG IC realisation folder.
        Parameters
        ----------
        nsim : int
            IC realisation index.
        tonew : bool, optional
            Whether to return the path to the '_new' IC realisation.
        Returns
        -------
        path : str
        """
        fname = "ramses_out_{}"
        if tonew:
            fname += "_new"
        return join(self.srcdir, fname.format(nsim))
    def get_snapshots(self, nsim):
        """
        List of available snapshots of a CSiBORG IC realisation.
        Parameters
        ----------
        nsim : int
            IC realisation index.
        Returns
        -------
        snapshots : 1-dimensional array
        """
        simpath = self.ic_path(nsim, tonew=False)
        # Get all files in simpath that start with output_
        snaps = glob(join(simpath, "output_*"))
        # Take just the last _00XXXX from each file  and strip zeros
        snaps = [int(snap.split('_')[-1].lstrip('0')) for snap in snaps]
        return numpy.sort(snaps)
    def snapshot_path(self, nsnap, nsim):
        """
        Path to a CSiBORG IC realisation snapshot.
        Parameters
        ----------
        nsnap : int
            Snapshot index.
        nsim : int
            IC realisation index.
        Returns
        -------
        snappath : str
        """
        tonew = nsnap == 1
        simpath = self.ic_path(nsim, tonew=tonew)
        return join(simpath, "output_{}".format(str(nsnap).zfill(5)))
    def hcat_path(self, nsim):
        """
        Path to the final snapshot halo catalogue from `fit_halos.py`.
        Parameters
        ----------
        nsim : int
            IC realisation index.
        Returns
        -------
        path : str
        """
        nsnap = str(max(self.get_snapshots(nsim))).zfill(5)
        fname = "ramses_out_{}_{}.npy".format(str(self.nsim).zfill(5), nsnap)
        return join(self.postdir, fname)
    def knnauto_path(self, run, nsim=None):
        """
        Path to the `knn` auto-correlation files. If `nsim` is not specified
        returns a list of files for this run for all available simulations.
        Parameters
        ----------
        run : str
            Type of run.
        nsim : int, optional
            IC realisation index.
        Returns
        -------
        path : str
        """
        fdir = join(self.postdir, "knn", "auto")
        if not isdir(fdir):
            makedirs(fdir)
            warn("Created directory `{}`.".format(fdir), UserWarning,
                 stacklevel=1)
        if nsim is not None:
            return join(fdir, "knncdf_{}_{}.p".format(str(nsim).zfill(5), run))
        files = glob(join(fdir, "knncdf*"))
        run = "__" + run
        return [f for f in files if run in f]
    def knncross_path(self, run, nsims=None):
        """
        Path to the `knn` cross-correlation files. If `nsims` is not specified
        returns a list of files for this run for all available simulations.
        Parameters
        ----------
        run : str
            Type of run.
        nsims : len-2 tuple of int, optional
            IC realisation indices.
        Returns
        -------
        path : str
        """
        fdir = join(self.postdir, "knn", "cross")
        if not isdir(fdir):
            makedirs(fdir)
            warn("Created directory `{}`.".format(fdir), UserWarning,
                 stacklevel=1)
        if nsims is not None:
            assert isinstance(nsims, (list, tuple)) and len(nsims) == 2
            nsim0 = str(nsims[0]).zfill(5)
            nsimx = str(nsims[1]).zfill(5)
            return join(fdir, "knncdf_{}_{}__{}.p".format(nsim0, nsimx, run))
        files = glob(join(fdir, "knncdf*"))
        run = "__" + run
        return [f for f in files if run in f]
    def tpcfauto_path(self, run, nsim=None):
        """
        Path to the `tpcf` auto-correlation files. If `nsim` is not specified
        returns a list of files for this run for all available simulations.
        Parameters
        ----------
        run : str
            Type of run.
        nsim : int, optional
            IC realisation index.
        Returns
        -------
        path : str
        """
        fdir = join(self.postdir, "tpcf", "auto")
        if not isdir(fdir):
            makedirs(fdir)
            warn("Created directory `{}`.".format(fdir), UserWarning,
                 stacklevel=1)
        if nsim is not None:
            return join(fdir, "tpcf{}_{}.p".format(str(nsim).zfill(5), run))
        files = glob(join(fdir, "tpcf*"))
        run = "__" + run
        return [f for f in files if run in f]
--- a/csiborgtools/read/pk_summary.py
+++ b/csiborgtools/read/pk_summary.py
@ -13,8 +13,8 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """Power spectrum reader."""
 import numpy
 import joblib
 import numpy
 from tqdm import tqdm
@ -24,7 +24,7 @@ class PKReader:
    Parameters
    ----------
-    ic_ids : list of int
+    get_ics : list of int
        IC IDs to be read.
    hw : float
        Box half-width.
@ -35,8 +35,8 @@ class PKReader:
    dtype : dtype, optional
        Output precision. By default `numpy.float32`.
    """
-    def __init__(self, ic_ids, hw, fskel=None, dtype=numpy.float32):
+    def __init__(self, get_ics, hw, fskel=None, dtype=numpy.float32):
-        self.ic_ids = ic_ids
+        self.get_ics = get_ics
        self.hw = hw
        if fskel is None:
            fskel = "/mnt/extraspace/rstiskalek/csiborg/crosspk/out_{}_{}_{}.p"
@ -46,7 +46,7 @@ class PKReader:
    @staticmethod
    def _set_klim(kmin, kmax):
        """
-        Sets limits on the wavenumber to 0 and infinity if `None`s provided.
+        Set limits on the wavenumber to 0 and infinity if `None`s provided.
        """
        if kmin is None:
            kmin = 0
@ -62,27 +62,27 @@ class PKReader:
        ----------
        kmin : float, optional
            The minimum wavenumber. By default `None`, i.e. 0.
-        kmin : float, optional
+        kmax : float, optional
            The maximum wavenumber. By default `None`, i.e. infinity.
        Returns
        -------
        ks : 1-dimensional array
            Array of wavenumbers.
-        pks : 2-dimensional array of shape `(len(self.ic_ids), ks.size)`
+        pks : 2-dimensional array of shape `(len(self.get_ics), ks.size)`
            Autocorrelation of each simulation.
        """
        kmin, kmax = self._set_klim(kmin, kmax)
        ks, pks, sel = None, None, None
-        for i, nsim in enumerate(self.ic_ids):
+        for i, nsim in enumerate(self.get_ics):
            pk = joblib.load(self.fskel.format(nsim, nsim, self.hw))
            # Get cuts and pre-allocate arrays
            if i == 0:
                x = pk.k3D
                sel = (kmin < x) & (x < kmax)
                ks = x[sel].astype(self.dtype)
-                pks = numpy.full((len(self.ic_ids), numpy.sum(sel)), numpy.nan,
+                pks = numpy.full((len(self.get_ics), numpy.sum(sel)),
-                                 dtype=self.dtype)
+                                 numpy.nan, dtype=self.dtype)
            pks[i, :] = pk.Pk[sel, 0, 0]
        return ks, pks
@ -99,7 +99,7 @@ class PKReader:
            The second IC ID.
        kmin : float, optional
            The minimum wavenumber. By default `None`, i.e. 0.
-        kmin : float, optional
+        kmax : float, optional
            The maximum wavenumber. By default `None`, i.e. infinity.
        Returns
@ -133,7 +133,7 @@ class PKReader:
        ----------
        kmin : float, optional
            The minimum wavenumber. By default `None`, i.e. 0.
-        kmin : float, optional
+        kmax : float, optional
            The maximum wavenumber. By default `None`, i.e. infinity.
        Returns
@ -144,12 +144,12 @@ class PKReader:
            Cross-correlations. The first column is the the IC and is being
            cross-correlated with the remaining ICs, in the second column.
        """
-        nics = len(self.ic_ids)
+        nics = len(self.get_ics)
        ks, xpks = None, None
-        for i, ic0 in enumerate(tqdm(self.ic_ids)):
+        for i, ic0 in enumerate(tqdm(self.get_ics)):
            k = 0
-            for ic1 in self.ic_ids:
+            for ic1 in self.get_ics:
                # We don't want cross-correlation
                if ic0 == ic1:
                    continue
--- a/csiborgtools/read/readsim.py
+++ b/csiborgtools/read/readsim.py
@ -15,274 +15,18 @@
 """
 Functions to read in the particle and clump files.
 """
-from os.path import (join, isfile, isdir)
+from os.path import isfile, join
 from glob import glob
 from warnings import warn
 import numpy
 from scipy.io import FortranFile
-from tqdm import tqdm
+from tqdm import tqdm, trange
 from ..utils import (cols_to_structured)
 from .paths import CSiBORGPaths
 from .utils import cols_to_structured
 ###############################################################################
-#                            Paths manager                                    #
+#                       Fortran particle reader                               #
 ###############################################################################
 class CSiBORGPaths:
    """
    Paths manager for CSiBORG IC realisations.
    Parameters
    ----------
    srcdir : str
        Path to the folder where CSiBORG simulations are stored.
    dumpdir : str
        Path to the folder where files from `run_fit_halos` are stored.
    mmain_path : str
        Path to folder where mmain files are stored.
    initmatch_path : str
        Path to the folder where particle ID match between the first and final
        snapshot is stored.
    """
    _srcdir = None
    _dumpdir = None
    _mmain_path = None
    _initmatch_path = None
    def __init__(self, srcdir=None, dumpdir=None, mmain_path=None,
                 initmatch_path=None):
        self.srcdir = srcdir
        self.dumpdir = dumpdir
        self.mmain_path = mmain_path
        self.initmatch_path = initmatch_path
    @staticmethod
    def _check_directory(path):
        if not isdir(path):
            raise IOError("Invalid directory `{}`!".format(path))
    @property
    def srcdir(self):
        """
        Path to the folder where CSiBORG simulations are stored.
        Returns
        -------
        path : str
        """
        if self._srcdir is None:
            raise ValueError("`srcdir` is not set!")
        return self._srcdir
    @srcdir.setter
    def srcdir(self, path):
        if path is None:
            return
        self._check_directory(path)
        self._srcdir = path
    @property
    def dumpdir(self):
        """
        Path to the folder where files from `run_fit_halos` are stored.
        Returns
        -------
        path : str
        """
        if self._dumpdir is None:
            raise ValueError("`dumpdir` is not set!")
        return self._dumpdir
    @dumpdir.setter
    def dumpdir(self, path):
        if path is None:
            return
        self._check_directory(path)
        self._dumpdir = path
    @property
    def temp_dumpdir(self):
        """
        Path to a temporary dumping folder.
        Returns
        -------
        path : str
        """
        fpath = join(self.dumpdir, "temp")
        if not isdir(fpath):
            raise IOError("Invalid directory `{}`.".format(fpath))
        return fpath
    @property
    def mmain_path(self):
        """
        Path to the folder where mmain files are stored.
        Returns
        -------
        path : str
        """
        if self._mmain_path is None:
            raise ValueError("`mmain_path` is not set!")
        return self._mmain_path
    @mmain_path.setter
    def mmain_path(self, path):
        if path is None:
            return
        self._check_directory(path)
        self._mmain_path = path
    @property
    def initmatch_path(self):
        """
        Path to the folder where the match between the first and final
        snapshot is stored.
        Returns
        -------
        path : str
        """
        if self._initmatch_path is None:
            raise ValueError("`initmatch_path` is not set!")
        return self._initmatch_path
    @initmatch_path.setter
    def initmatch_path(self, path):
        if path is None:
            return
        self._check_directory(path)
        self._initmatch_path = path
    def ic_ids(self, tonew):
        """
        CSiBORG IC realisation IDs from the list of folders in `self.srcdir`.
        Parameters
        ----------
        tonew : bool
            If `True`, path to the '_new' ICs is returned.
        Returns
        -------
        ids : 1-dimensional array
        """
        files = glob(join(self.srcdir, "ramses_out*"))
        files = [f.split("/")[-1] for f in files]  # Select only file names
        if tonew:
            files = [f for f in files if "_new" in f]
            ids = [int(f.split("_")[2]) for f in files]  # Take the IC IDs
        else:
            files = [f for f in files if "_inv" not in f]  # Remove inv. ICs
            files = [f for f in files if "_new" not in f]  # Remove _new
            files = [f for f in files if "OLD" not in f]   # Remove _old
            ids = [int(f.split("_")[-1]) for f in files]
            try:
                ids.remove(5511)
            except ValueError:
                pass
        return numpy.sort(ids)
    def ic_path(self, nsim, tonew=False):
        """
        Path to a CSiBORG IC realisation folder.
        Parameters
        ----------
        nsim : int
            IC realisation index.
        tonew : bool, optional
            Whether to return the path to the '_new' IC realisation.
        Returns
        -------
        path : str
        """
        fname = "ramses_out_{}"
        if tonew:
            fname += "_new"
        return join(self.srcdir, fname.format(nsim))
    def get_snapshots(self, nsim):
        """
        List of available snapshots of a CSiBORG IC realisation.
        Parameters
        ----------
        nsim : int
            IC realisation index.
        Returns
        -------
        snapshots : 1-dimensional array
        """
        simpath = self.ic_path(nsim, tonew=False)
        # Get all files in simpath that start with output_
        snaps = glob(join(simpath, "output_*"))
        # Take just the last _00XXXX from each file  and strip zeros
        snaps = [int(snap.split('_')[-1].lstrip('0')) for snap in snaps]
        return numpy.sort(snaps)
    def clump0_path(self, nsim):
        """
        Path to a single dumped clump's particles. Expected to point to a
        dictonary whose keys are the clump indices and items structured
        arrays with the clump's particles in the initial snapshot.
        Parameters
        ----------
        nsim : int
            IC realisation index.
        Returns
        -------
        path : str
        """
        cdir = join(self.dumpdir, "initmatch")
        return join(cdir, "clump_{}_{}.npy".format(nsim, "particles"))
    def snapshot_path(self, nsnap, nsim):
        """
        Path to a CSiBORG IC realisation snapshot.
        Parameters
        ----------
        nsnap : int
            Snapshot index.
        nsim : int
            IC realisation index.
        Returns
        -------
        snappath : str
        """
        if nsnap == 1:
            tonew = True
        simpath = self.ic_path(nsim, tonew=tonew)
        return join(simpath, "output_{}".format(str(nsnap).zfill(5)))
    def hcat_path(self, nsim):
        """
        Path to the final snapshot halo catalogue from `fit_halos.py`.
        Parameters
        ----------
        nsim : int
            IC realisation index.
        Returns
        -------
        path : str
        """
        nsnap = str(max(self.get_snapshots(nsim))).zfill(5)
        fname = "ramses_out_{}_{}.npy".format(str(self.nsim).zfill(5), nsnap)
        return join(self.dumpdir, fname)
 ###############################################################################
 #                          Fortran readers                                    #
 ###############################################################################
@ -297,13 +41,24 @@ class ParticleReader:
    _paths = None
    def __init__(self, paths):
-#        assert isinstance(paths, CSiBORGPaths)
+        self.paths = paths
        self._paths = paths
    @property
    def paths(self):
        """
        Paths manager.
        Parameters
        ----------
        paths : py:class`csiborgtools.read.CSiBORGPaths`
        """
        return self._paths
    @paths.setter
    def paths(self, paths):
        # assert isinstance(paths, CSiBORGPaths)  # REMOVE
        self._paths = paths
    def read_info(self, nsnap, nsim):
        """
        Read CSiBORG simulation snapshot info.
@ -332,7 +87,7 @@ class ParticleReader:
        keys = info[eqs - 1]
        vals = info[eqs + 1]
-        return {key: val for key, val in zip(keys, vals)}
+        return {key: val for key, val in zip(keys, vals, strict=True)}
    def open_particle(self, nsnap, nsim, verbose=True):
        """
@ -392,7 +147,7 @@ class ParticleReader:
    @staticmethod
    def read_sp(dtype, partfile):
        """
-        Utility function to read a single particle file.
+        Read a single particle file.
        Parameters
        ----------
@ -490,7 +245,7 @@ class ParticleReader:
        for cpu in iters:
            i = start_ind[cpu]
            j = nparts[cpu]
-            for (fname, fdtype) in zip(fnames, fdtypes):
+            for (fname, fdtype) in zip(fnames, fdtypes, strict=True):
                if fname in pars_extract:
                    out[fname][i:i + j] = self.read_sp(fdtype, partfiles[cpu])
                else:
@ -522,7 +277,7 @@ class ParticleReader:
        """
        nsnap = str(nsnap).zfill(5)
        cpu = str(cpu + 1).zfill(5)
-        fpath = join(self.paths.ic_path(nsim, to_new=False),
+        fpath = join(self.paths.ic_path(nsim, tonew=False),
                     "output_{}".format(nsnap),
                     "unbinding_{}.out{}".format(nsnap, cpu))
        return FortranFile(fpath)
@ -561,31 +316,9 @@ class ParticleReader:
        return clumpid
    @staticmethod
    def drop_zero_indx(clump_ids, particles):
        """
        Drop from `clump_ids` and `particles` entries whose clump index is 0.
        Parameters
        ----------
        clump_ids : 1-dimensional array
            Array of clump IDs.
        particles : structured array
            Array of the particle data.
        Returns
        -------
        clump_ids : 1-dimensional array
            The array of clump IDs after removing zero clump ID entries.
        particles : structured array
            The particle data after removing zero clump ID entries.
        """
        mask = clump_ids != 0
        return clump_ids[mask], particles[mask]
    def read_clumps(self, nsnap, nsim, cols=None):
        """
-        Read in a clump file `clump_Nsnap.dat`.
+        Read in a clump file `clump_xxXXX.dat`.
        Parameters
        ----------
@ -593,7 +326,6 @@ class ParticleReader:
            Snapshot index.
        nsim : int
            IC realisation index.
        cols : list of str, optional.
            Columns to extract. By default `None` and all columns are
            extracted.
@ -601,84 +333,150 @@ class ParticleReader:
        Returns
        -------
        out : structured array
            Structured array of the clumps.
        """
        nsnap = str(nsnap).zfill(5)
-        fname = join(self.paths.ic_path(nsim, to_new=False),
+        fname = join(self.paths.ic_path(nsim, tonew=False),
                     "output_{}".format(nsnap),
                     "clump_{}.dat".format(nsnap))
        # Check the file exists.
        if not isfile(fname):
-            raise FileExistsError(
+            raise FileExistsError("Clump file `{}` does not exist."
-                "Clump file `{}` does not exist.".format(fname))
+                                  .format(fname))
        # Read in the clump array. This is how the columns must be written!
        data = numpy.genfromtxt(fname)
-        clump_cols = [("index", numpy.int64), ("level", numpy.int64),
+        # How the data is stored in the clump file.
-                      ("parent", numpy.int64), ("ncell", numpy.float64),
+        clump_cols = {"index":  (0, numpy.int32),
-                      ("peak_x", numpy.float64), ("peak_y", numpy.float64),
+                      "level": (1, numpy.int32),
-                      ("peak_z", numpy.float64), ("rho-", numpy.float64),
+                      "parent": (2, numpy.int32),
-                      ("rho+", numpy.float64), ("rho_av", numpy.float64),
+                      "ncell": (3, numpy.float32),
-                      ("mass_cl", numpy.float64), ("relevance", numpy.float64)]
+                      "x": (4, numpy.float32),
-        out0 = cols_to_structured(data.shape[0], clump_cols)
+                      "y": (5, numpy.float32),
-        for i, name in enumerate(out0.dtype.names):
+                      "z": (6, numpy.float32),
-            out0[name] = data[:, i]
+                      "rho-": (7, numpy.float32),
-        # If take all cols then return
+                      "rho+": (8, numpy.float32),
-        if cols is None:
+                      "rho_av": (9, numpy.float32),
-            return out0
+                      "mass_cl": (10, numpy.float32),
-        # Make sure we have a list
+                      "relevance": (11, numpy.float32),
                      }
        # Return the requested columns.
        cols = [cols] if isinstance(cols, str) else cols
-        # Get the indxs of clump_cols to output
+        cols = list(clump_cols.keys()) if cols is None else cols
        clump_names = [col[0] for col in clump_cols]
        indxs = [None] * len(cols)
        for i, col in enumerate(cols):
            if col not in clump_names:
                raise KeyError("...")
            indxs[i] = clump_names.index(col)
        # Make an array and fill it
        out = cols_to_structured(out0.size, [clump_cols[i] for i in indxs])
        for name in out.dtype.names:
            out[name] = out0[name]
        dtype = [(col, clump_cols[col][1]) for col in cols]
        out = cols_to_structured(data.shape[0], dtype)
        for col in cols:
            out[col] = data[:, clump_cols[col][0]]
        return out
 ###############################################################################
-#                       Supplementary reading functions                       #
+#                    Summed substructure catalogue                            #
 ###############################################################################
-def read_mmain(nsim, srcdir, fname="Mmain_{}.npy"):
+class MmainReader:
    """
-    Read `mmain` numpy arrays of central halos whose mass contains their
+    Object to generate the summed substructure catalogue.
    substracture contribution.
    Parameters
    ----------
    nsim : int
        IC realisation index.
    srcdir : str
        Path to the folder containing the files.
    fname : str, optional
        File name convention.  By default `Mmain_{}.npy`, where the
        substituted value is `n`.
    Returns
    -------
    out : structured array
        Array with the central halo information.
    """
-    fpath = join(srcdir, fname.format(nsim))
+    _paths = None
    arr = numpy.load(fpath)
-    cols = [("index", numpy.int64), ("peak_x", numpy.float64),
+    def __init__(self, paths):
-            ("peak_y", numpy.float64), ("peak_z", numpy.float64),
+        assert isinstance(paths, CSiBORGPaths)  # REMOVE
-            ("mass_cl", numpy.float64), ("sub_frac", numpy.float64)]
+        self._paths = paths
    out = cols_to_structured(arr.shape[0], cols)
    for i, name in enumerate(out.dtype.names):
        out[name] = arr[:, i]
-    return out
+    @property
    def paths(self):
        return self._paths
    def find_parents(self, clumparr, verbose=False):
        """
        Find ultimate parent haloes for every clump in a final snapshot.
        Parameters
        ----------
        clumparr : structured array
            Clump array. Read from `ParticleReader.read_clumps`. Must contain
            `index` and `parent` columns.
        verbose : bool, optional
            Verbosity flag.
        Returns
        -------
        parent_arr : 1-dimensional array of shape `(nclumps, )`
            The ultimate parent halo index for every clump, i.e. referring to
            its ultimate parent clump.
        """
        clindex = clumparr["index"]
        parindex = clumparr["parent"]
        # The ultimate parent for every clump
        parent_arr = numpy.zeros(clindex.size, dtype=numpy.int32)
        for i in trange(clindex.size) if verbose else range(clindex.size):
            tocont = clindex[i] != parindex[i]  # Continue if not a main halo
            par = parindex[i]  # First we try the parent of this clump
            while tocont:
                # The element of the array corresponding to the parent clump to
                # the one we're looking at
                element = numpy.where(clindex == par)[0][0]
                # We stop if the parent is its own parent, so a main halo. Else
                # move onto the parent of the parent. Eventually this is its
                # own parent and we stop, with ultimate parent=par
                if clindex[element] == clindex[element]:
                    tocont = False
                else:
                    par = parindex[element]
            parent_arr[i] = par
        return parent_arr
    def make_mmain(self, nsim, verbose=False):
        """
        Make the summed substructure catalogue for a final snapshot. Includes
        the position of the paren, the summed mass and the fraction of mass in
        substructure.
        Parameters
        ----------
        nsim : int
            IC realisation index.
        verbose : bool, optional
            Verbosity flag.
        Returns
        -------
        mmain : structured array
            The `mmain` catalogue.
        ultimate_parent : 1-dimensional array of shape `(nclumps,)`
            The ultimate parent halo index for every clump, i.e. referring to
            its ultimate parent clump.
        """
        nsnap = max(self.paths.get_snapshots(nsim))
        partreader = ParticleReader(self.paths)
        cols = ["index", "parent", "mass_cl", 'x', 'y', 'z']
        clumparr = partreader.read_clumps(nsnap, nsim, cols)
        ultimate_parent = self.find_parents(clumparr, verbose=verbose)
        mask_main = clumparr["index"] == clumparr["parent"]
        nmain = numpy.sum(mask_main)
        # Preallocate already the output array
        out = cols_to_structured(
            nmain, [("ID", numpy.int32), ("x", numpy.float32),
                    ("y", numpy.float32), ("z", numpy.float32),
                    ("M", numpy.float32), ("subfrac", numpy.float32)])
        out["ID"] = clumparr["index"][mask_main]
        # Because for these index == parent
        for p in ('x', 'y', 'z'):
            out[p] = clumparr[p][mask_main]
        # We want a total mass for each halo in ID_main
        for i in range(nmain):
            # Should include the main halo itself, i.e. its own ultimate parent
            out["M"][i] = numpy.sum(
                clumparr["mass_cl"][ultimate_parent == out["ID"][i]])
        out["subfrac"] = 1 - clumparr["mass_cl"][mask_main] / out["M"]
        return out, ultimate_parent
 ###############################################################################
 #                       Supplementary reading functions                       #
 ###############################################################################
 def read_initcm(nsim, srcdir, fname="clump_{}_cm.npy"):
@ -704,7 +502,8 @@ def read_initcm(nsim, srcdir, fname="clump_{}_cm.npy"):
    try:
        return numpy.load(fpath)
    except FileNotFoundError:
-        warn("File {} does not exist.".format(fpath))
+        warn("File {} does not exist.".format(fpath), UserWarning,
             stacklevel=1)
        return None
--- a/csiborgtools/read/tpcf_summary.py
+++ b/csiborgtools/read/tpcf_summary.py
@ -13,17 +13,42 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """2PCF reader."""
 from os.path import join
 from glob import glob
 import numpy
 import joblib
 import numpy
 from .paths import CSiBORGPaths
 class TPCFReader:
    """
    Shortcut object to read in the 2PCF data.
    Parameters
    ----------
    paths : py:class`csiborgtools.read.CSiBORGPaths`
    """
-    def read(self, run, folder):
+    _paths = None
    def __init__(self, paths):
        self.paths = paths
    @property
    def paths(self):
        """
        Paths manager.
        Parameters
        ----------
        paths : py:class`csiborgtools.read.CSiBORGPaths`
        """
        return self._paths
    @paths.setter
    def paths(self, paths):
        assert isinstance(paths, CSiBORGPaths)
        self._paths = paths
    def read(self, run):
        """
        Read the auto- or cross-correlation kNN-CDF data. Infers the type from
        the data files.
@ -32,8 +57,6 @@ class TPCFReader:
        ----------
        run : str
            Run ID to read in.
        folder : str
            Path to the folder where the auto-2PCF is stored.
        Returns
        -------
@ -42,8 +65,7 @@ class TPCFReader:
        out : 2-dimensional array of shape `(len(files), len(rp))`
            Array of 2PCFs.
        """
-        run += ".p"
+        files = self.paths.tpcfauto_path(run)
        files = [f for f in glob(join(folder, "*")) if run in f]
        if len(files) == 0:
            raise RuntimeError("No files found for run `{}`.".format(run[:-2]))
--- a/csiborgtools/utils/recarray_manip.py
+++ b/csiborgtools/utils/recarray_manip.py
@ -13,10 +13,68 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
-Utilility functions for manipulation structured arrays.
+Various coordinate transformations.
 """
 import numpy
 ###############################################################################
 #                          Coordinate transforms                              #
 ###############################################################################
 def cartesian_to_radec(x, y, z):
    """
    Calculate the radial distance, right ascension in [0, 360) degrees and
    declination [-90, 90] degrees. Note, the observer should be placed in the
    middle of the box.
    Parameters
    ----------
    x, y, z : 1-dimensional arrays
        Cartesian coordinates.
    Returns
    -------
    dist, ra, dec : 1-dimensional arrays
        Radial distance, right ascension and declination.
    """
    dist = numpy.sqrt(x**2 + y**2 + z**2)
    dec = numpy.rad2deg(numpy.arcsin(z/dist))
    ra = numpy.rad2deg(numpy.arctan2(y, x))
    # Make sure RA in the correct range
    ra[ra < 0] += 360
    return dist, ra, dec
 def radec_to_cartesian(dist, ra, dec, isdeg=True):
    """
    Convert distance, right ascension and declination to Cartesian coordinates.
    Parameters
    ----------
    dist, ra, dec : 1-dimensional arrays
        Spherical coordinates.
    isdeg : bool, optional
        Whether `ra` and `dec` are in degres. By default `True`.
    Returns
    -------
    x, y, z : 1-dimensional arrays
        Cartesian coordinates.
    """
    if isdeg:
        ra = numpy.deg2rad(ra)
        dec = numpy.deg2rad(dec)
    x = dist * numpy.cos(dec) * numpy.cos(ra)
    y = dist * numpy.cos(dec) * numpy.sin(ra)
    z = dist * numpy.sin(dec)
    return x, y, z
 ###############################################################################
 #                          Array manipulation                                 #
 ###############################################################################
 def cols_to_structured(N, cols):
    """
@ -108,7 +166,7 @@ def rm_columns(arr, cols):
    # Get a new dtype without the cols to be deleted
    new_dtype = []
-    for dtype, name in zip(arr.dtype.descr, arr.dtype.names):
+    for dtype, name in zip(arr.dtype.descr, arr.dtype.names, strict=True):
        if name not in cols:
            new_dtype.append(dtype)
--- a/csiborgtools/units/init.py
+++ b/csiborgtools/units/init.py
@ -1,17 +0,0 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 from .transforms import cartesian_to_radec, radec_to_cartesian  # noqa
 from .box_units import BoxUnits  # noqa
--- a/csiborgtools/units/transforms.py
+++ b/csiborgtools/units/transforms.py
@ -1,66 +0,0 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 Various coordinate transformations.
 """
 import numpy
 def cartesian_to_radec(x, y, z):
    """
    Calculate the radial distance, right ascension in [0, 360) degrees and
    declination [-90, 90] degrees. Note, the observer should be placed in the
    middle of the box.
    Parameters
    ----------
    x, y, z : 1-dimensional arrays
        Cartesian coordinates.
    Returns
    -------
    dist, ra, dec : 1-dimensional arrays
        Radial distance, right ascension and declination.
    """
    dist = numpy.sqrt(x**2 + y**2 + z**2)
    dec = numpy.rad2deg(numpy.arcsin(z/dist))
    ra = numpy.rad2deg(numpy.arctan2(y, x))
    # Make sure RA in the correct range
    ra[ra < 0] += 360
    return dist, ra, dec
 def radec_to_cartesian(dist, ra, dec, isdeg=True):
    """
    Convert distance, right ascension and declination to Cartesian coordinates.
    Parameters
    ----------
    dist, ra, dec : 1-dimensional arrays
        Spherical coordinates.
    isdeg : bool, optional
        Whether `ra` and `dec` are in degres. By default `True`.
    Returns
    -------
    x, y, z : 1-dimensional arrays
        Cartesian coordinates.
    """
    if isdeg:
        ra = numpy.deg2rad(ra)
        dec = numpy.deg2rad(dec)
    x = dist * numpy.cos(dec) * numpy.cos(ra)
    y = dist * numpy.cos(dec) * numpy.sin(ra)
    z = dist * numpy.sin(dec)
    return x, y, z
--- a/csiborgtools/utils/init.py
+++ b/csiborgtools/utils/init.py
@ -1,24 +0,0 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 from datetime import datetime
 from .recarray_manip import (cols_to_structured, add_columns, rm_columns,  # noqa
                             list_to_ndarray, array_to_structured,  # noqa
                             flip_cols, extract_from_structured)  # noqa
 def now(tz=None):
    """Shortcut to `datetime.datetime.now`."""
    return datetime.now(tz=tz)
--- a/meetings/220317_comboverlap.ipynb
+++ b/meetings/220317_comboverlap.ipynb
@ -51,8 +51,8 @@
   },
   "outputs": [],
   "source": [
-    "cat0 = csiborgtools.read.HaloCatalogue(7468)\n",
+    "cat0 = csiborgtools.read.ClumpsCatalogue(7468)\n",
-    "catxs = [csiborgtools.read.HaloCatalogue(nsim) for nsim in (7588, 8020, 8452, 8836)]\n",
+    "catxs = [csiborgtools.read.ClumpsCatalogue(nsim) for nsim in (7588, 8020, 8452, 8836)]\n",
    "reader = csiborgtools.read.NPairsOverlap(cat0, catxs, max_dist=150 / 0.705)"
   ]
  },
--- a/notebooks/knn.ipynb
+++ b/notebooks/knn.ipynb
@ -75,6 +75,17 @@
    "wp3 = reader.mean_wp(wp3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c05b4db6",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "connect ECONNRFUSED"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
@ -657,7 +668,7 @@
   },
   "outputs": [],
   "source": [
-    "cat = csiborgtools.read.HaloCatalogue(7444, paths, min_mass=1e12, max_dist=155/0.705)"
+    "cat = csiborgtools.read.ClumpsCatalogue(7444, paths, min_mass=1e12, max_dist=155/0.705)"
   ]
  },
  {
@ -813,7 +824,7 @@
   },
   "outputs": [],
   "source": [
-    "cat = csiborgtools.read.HaloCatalogue(7444, paths)"
+    "cat = csiborgtools.read.ClumpsCatalogue(7444, paths)"
   ]
  },
  {
@ -831,7 +842,7 @@
    "from tqdm import trange\n",
    "x = np.full((len(ics), 3), np.nan)\n",
    "for i in trange(len(ics)):\n",
-    "    cat = csiborgtools.read.HaloCatalogue(ics[i], paths, max_dist=155 / 0.705)\n",
+    "    cat = csiborgtools.read.ClumpsCatalogue(ics[i], paths, max_dist=155 / 0.705)\n",
    "    for j, th in enumerate([1e12, 1e13, 1e14]):\n",
    "        mask = cat[\"totpartmass\"] > th\n",
    "        x[i, j] = np.nanmedian(cat[\"lambda200c\"][mask])"
@ -1149,8 +1160,8 @@
   },
   "outputs": [],
   "source": [
-    "cat1 = csiborgtools.read.HaloCatalogue(7444, min_mass=1e13, max_dist=155 / 0.705)\n",
+    "cat1 = csiborgtools.read.ClumpsCatalogue(7444, min_mass=1e13, max_dist=155 / 0.705)\n",
-    "cat2 = csiborgtools.read.HaloCatalogue(7468, min_mass=1e13, max_dist=155 / 0.705)"
+    "cat2 = csiborgtools.read.ClumpsCatalogue(7468, min_mass=1e13, max_dist=155 / 0.705)"
   ]
  },
  {
--- a/notebooks/playground_matching.ipynb
+++ b/notebooks/playground_matching.ipynb
@ -62,8 +62,8 @@
   },
   "outputs": [],
   "source": [
-    "cat0 = csiborgtools.read.HaloCatalogue(7468)\n",
+    "cat0 = csiborgtools.read.ClumpsCatalogue(7468)\n",
-    "catx = csiborgtools.read.HaloCatalogue(7588)"
+    "catx = csiborgtools.read.ClumpsCatalogue(7588)"
   ]
  },
  {
--- a/notebooks/plot_correlation.ipynb
+++ b/notebooks/plot_correlation.ipynb
@ -78,7 +78,7 @@
   },
   "outputs": [],
   "source": [
-    "pkreader = csiborgtools.read.PKReader(paths.ic_ids, hw)\n",
+    "pkreader = csiborgtools.read.PKReader(paths.get_ics, hw)\n",
    "\n",
    "autoks, pks = pkreader.read_autos()\n",
    "\n",
@ -1134,7 +1134,7 @@
    "axs[0].set_title(\"hw = {}\".format(hw))\n",
    "m = autoks < 40\n",
    "mu = np.mean(pks, axis=0)\n",
-    "for i in range(len(paths.ic_ids)):\n",
+    "for i in range(len(paths.get_ics)):\n",
    "    axs[0].plot(autoks[m], pks[i, m], c=\"k\", lw=0.1)\n",
    "    axs[1].plot(autoks[m], pks[i, m] / mu[m], c=\"k\", lw=0.1)\n",
    "axs[0].plot(autoks[m], mu[m], c=\"red\", lw=1, label=r\"$\\langle P(k) \\rangle$\")\n",
@ -2156,7 +2156,7 @@
    "axs[0].set_title(r\"$\\mathrm{{hw}} = {}$\".format(hw))\n",
    "m = autoks < 22\n",
    "mu = np.mean(pks, axis=0)\n",
-    "for i in range(len(paths.ic_ids)):\n",
+    "for i in range(len(paths.get_ics)):\n",
    "    axs[0].plot(autoks[m], pks[i, m], c=\"k\", lw=0.1)\n",
    "    axs[1].plot(autoks[m], pks[i, m] / mu[m], c=\"k\", lw=0.1)\n",
    "axs[0].plot(autoks[m], mu[m], c=\"red\", lw=1, label=r\"$\\langle P(k) \\rangle$\")\n",
@ -3242,13 +3242,13 @@
    "fskel = \"/mnt/extraspace/rstiskalek/csiborg/crosspk/out_{}_{}_{}.p\"\n",
    "\n",
    "autoks, autopks = None, None\n",
-    "for i, nsim in enumerate(paths.ic_ids):\n",
+    "for i, nsim in enumerate(paths.get_ics):\n",
    "    pk = joblib.load(fskel.format(nsim, nsim, hw))\n",
    "    x = pk.k3D\n",
    "    y = pk.Pk[:, 0, 0]\n",
    "    sel = x < 20\n",
    "    if i == 0:\n",
-    "        autoks = np.full((len(paths.ic_ids), np.sum(sel)), np.nan)\n",
+    "        autoks = np.full((len(paths.get_ics), np.sum(sel)), np.nan)\n",
    "        autopks = np.full_like(autoks, np.nan)\n",
    "    \n",
    "    autoks[i, :] = x[sel]\n",
@ -3268,7 +3268,7 @@
   "outputs": [],
   "source": [
    "plt.figure()\n",
-    "for i in range(len(paths.ic_ids)):\n",
+    "for i in range(len(paths.get_ics)):\n",
    "    plt.plot(autoks[i, :], autopks[i, :], c=\"k\", lw=0.1)\n",
    "plt.plot(np.mean(autoks, axis=0), np.mean(autopks, axis=0), label=\"CSiBORG\", c=\"k\", lw=1)\n",
    "    \n",
@ -3308,10 +3308,10 @@
   "source": [
    "fskel = \"/mnt/extraspace/rstiskalek/csiborg/crosspk/out_{}_{}_{}.p\"\n",
    "\n",
-    "ic0 = paths.ic_ids[25]\n",
+    "ic0 = paths.get_ics[25]\n",
    "crossks, crosspks = None, None\n",
    "i = 0\n",
-    "for ic in paths.ic_ids:\n",
+    "for ic in paths.get_ics:\n",
    "    if ic == ic0:\n",
    "        continue\n",
    "    ics = (ic0, ic)\n",
@ -3700,7 +3700,7 @@
   "outputs": [],
   "source": [
    "paths = csiborgtools.read.CSiBORGPaths()\n",
-    "cat = csiborgtools.read.CombinedHaloCatalogue(paths, min_m500=1e13, max_dist=210)"
+    "cat = csiborgtools.read.CombinedCatalogue(paths, min_m500=1e13, max_dist=210)"
   ]
  },
  {
@ -4144,7 +4144,7 @@
   },
   "outputs": [],
   "source": [
-    "cat = csiborgtools.io.HaloCatalogue(9844, 1016, minimum_m500=0)"
+    "cat = csiborgtools.io.Catalogue(9844, 1016, minimum_m500=0)"
   ]
  },
  {
--- a/notebooks/test_cosma.ipynb
+++ b/notebooks/test_cosma.ipynb
@ -515,7 +515,7 @@
    }
   ],
   "source": [
-    "len(paths.ic_ids(tonew=True))"
+    "len(paths.get_ics(tonew=True))"
   ]
  },
  {
@ -542,8 +542,8 @@
   },
   "outputs": [],
   "source": [
-    "cat0 = csiborgtools.read.HaloCatalogue(7468)\n",
+    "cat0 = csiborgtools.read.ClumpsCatalogue(7468)\n",
-    "catx = csiborgtools.read.HaloCatalogue(7588)"
+    "catx = csiborgtools.read.ClumpsCatalogue(7588)"
   ]
  },
  {
--- a/notebooks/test_mmain.ipynb
+++ b/notebooks/test_mmain.ipynb
@ -0,0 +1,322 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "5a38ed25",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-12-31T17:12:28.663839Z",
     "start_time": "2022-12-31T17:12:25.134607Z"
    }
   },
   "outputs": [],
   "source": [
    "import sys\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import joblib\n",
    "import scienceplots\n",
    "sys.path.append(\"../\")\n",
    "import csiborgtools\n",
    "\n",
    "plt.style.use([\"science\", \"notebook\"])\n",
    "%matplotlib widget\n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "22130d0b",
   "metadata": {},
   "outputs": [],
   "source": [
    "d = np.load(\"/mnt/extraspace/rstiskalek/csiborg/split/clumps_07444_00951.npz\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "4d9d9d11",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "772 µs ± 4.01 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit d[\"232\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "66f32cef",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "8a24c0fa",
   "metadata": {},
   "outputs": [],
   "source": [
    "paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)\n",
    "# cat = csiborgtools.read.ClumpsCatalogue(7444, paths)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "6e3ba9f4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/mnt/extraspace/rstiskalek/csiborg/knn/auto/knncdf_07444_la.npz'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "paths.knn_path(7444, \"auto\", \"la\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "2f4793b9",
   "metadata": {},
   "outputs": [],
   "source": [
    "np.savez(\"test.npz\", a=np.random.rand(510, 510, 510), b=np.random.rand(510, 510, 510))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "041d80d8",
   "metadata": {},
   "outputs": [],
   "source": [
    "d = np.load(\"test.npz\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "dc320130",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['a', 'b']"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d.files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "11231e20",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['a', 'b']"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d.files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0b6d02f8",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "4ae2a2a8",
   "metadata": {},
   "outputs": [],
   "source": [
    "np.save(\"test.npy\", np.array([]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "b675510f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([], dtype=float64)"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.load(\"test.npy\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6f8f96b7",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b4b63c20",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "1cdcf448",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/mnt/extraspace/rstiskalek/csiborg/split/ic_00952/out_07444_123.npz'"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# paths.split_path(123, 7444, 952)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "d07431f5",
   "metadata": {},
   "outputs": [],
   "source": [
    "nsim = 7444\n",
    "nsnap = max(paths.get_snapshots(7444))\n",
    "reader = csiborgtools.read.ParticleReader(paths)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "def3c21f",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# clumpind = reader.read_clumps(nsnap, nsim, cols=\"index\")[\"index\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "832e82ce",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "aa69261b",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "105dd2e2",
   "metadata": {},
   "outputs": [],
   "source": [
    "parts = np.load(\"/mnt/extraspace/rstiskalek/csiborg/initmatch/clump_7468_particles.npy\", allow_pickle=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c49f174b",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "38e9490d",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv_galomatch",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/scripts/cluster_crosspk.py
+++ b/scripts/cluster_crosspk.py
@ -16,16 +16,18 @@
 MPI script to calculate the matter cross power spectrum between CSiBORG
 IC realisations. Units are Mpc/h.
 """
 from gc import collect
 from argparse import ArgumentParser
 from datetime import datetime
 from gc import collect
 from itertools import combinations
 from os import remove
 from os.path import join
-from itertools import combinations
+
 from datetime import datetime
 import numpy
 import joblib
-from mpi4py import MPI
+import numpy
 import Pk_library as PKL
 from mpi4py import MPI
 try:
    import csiborgtools
 except ModuleNotFoundError:
@ -47,9 +49,9 @@ nproc = comm.Get_size()
 MAS = "CIC"  # mass asignment scheme
 paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
-box = csiborgtools.units.BoxUnits(paths)
+box = csiborgtools.read.BoxUnits(paths)
 reader = csiborgtools.read.ParticleReader(paths)
-ics = paths.ic_ids(tonew=False)
+ics = paths.get_ics(tonew=False)
 nsims = len(ics)
 # File paths
@ -59,7 +61,7 @@ fout = join(dumpdir, "crosspk",
            "out_{}_{}" + "_{}.p".format(args.halfwidth))
-jobs = csiborgtools.fits.split_jobs(nsims, nproc)[rank]
+jobs = csiborgtools.utils.split_jobs(nsims, nproc)[rank]
 for n in jobs:
    print("Rank {}@{}: saving {}th delta.".format(rank, datetime.now(), n))
    nsim = ics[n]
@ -99,7 +101,7 @@ for i in range(nsims):
    combs.append((i, i))
 prev_delta = [-1, None, None, None]  # i, delta, aexp, length
-jobs = csiborgtools.fits.split_jobs(len(combs), nproc)[rank]
+jobs = csiborgtools.utils.split_jobs(len(combs), nproc)[rank]
 for n in jobs:
    i, j = combs[n]
    print("Rank {}@{}: combination {}.".format(rank, datetime.now(), (i, j)))
--- a/scripts/cluster_knn_auto.py
+++ b/scripts/cluster_knn_auto.py
@ -13,17 +13,18 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues."""
 from os.path import join
 from warnings import warn
 from argparse import ArgumentParser
 from copy import deepcopy
 from datetime import datetime
-from mpi4py import MPI
+from warnings import warn
-from TaskmasterMPI import master_process, worker_process
+
 import numpy
 from sklearn.neighbors import NearestNeighbors
 import joblib
 import numpy
 import yaml
 from mpi4py import MPI
 from sklearn.neighbors import NearestNeighbors
 from TaskmasterMPI import master_process, worker_process
 try:
    import csiborgtools
 except ModuleNotFoundError:
@ -58,8 +59,6 @@ ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
       9292, 9316, 9340, 9364, 9388, 9412, 9436, 9460, 9484, 9508, 9532,
       9556, 9580, 9604, 9628, 9652, 9676, 9700, 9724, 9748, 9772, 9796,
       9820, 9844]
 dumpdir = "/mnt/extraspace/rstiskalek/csiborg/knn"
 fout = join(dumpdir, "auto", "knncdf_{}_{}.p")
 paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
 knncdf = csiborgtools.clustering.kNN_CDF()
@ -67,6 +66,7 @@ knncdf = csiborgtools.clustering.kNN_CDF()
 #                                 Analysis                                    #
 ###############################################################################
 def read_single(selection, cat):
    """Positions for single catalogue auto-correlation."""
    mmask = numpy.ones(len(cat), dtype=bool)
@ -101,11 +101,13 @@ def read_single(selection, cat):
    return pos[smask, ...]
 def do_auto(run, cat, ic):
    """Calculate the kNN-CDF single catalgoue autocorrelation."""
    _config = config.get(run, None)
    if _config is None:
-        warn("No configuration for run {}.".format(run))
+        warn("No configuration for run {}.".format(run), UserWarning,
             stacklevel=1)
        return
    rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
@ -119,13 +121,15 @@ def do_auto(run, cat, ic):
        batch_size=int(config["batch_size"]), random_state=config["seed"])
    joblib.dump({"rs": rs, "cdf": cdf, "ndensity": pos.shape[0] / totvol},
-                fout.format(str(ic).zfill(5), run))
+                paths.knnauto_path(run,  ic))
 def do_cross_rand(run, cat, ic):
    """Calculate the kNN-CDF cross catalogue random correlation."""
    _config = config.get(run, None)
    if _config is None:
-        warn("No configuration for run {}.".format(run))
+        warn("No configuration for run {}.".format(run), UserWarning,
             stacklevel=1)
        return
    rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
@ -143,14 +147,11 @@ def do_cross_rand(run, cat, ic):
        nsamples=int(config["nsamples"]), neval=int(config["neval"]),
        batch_size=int(config["batch_size"]), random_state=config["seed"])
    corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
-
+    joblib.dump({"rs": rs, "corr": corr}, paths.knnauto_path(run, ic))
    joblib.dump({"rs": rs, "corr": corr}, fout.format(str(ic).zfill(5), run))
 def do_runs(ic):
-    cat = csiborgtools.read.HaloCatalogue(ic, paths, max_dist=Rmax,
+    cat = csiborgtools.read.ClumpsCatalogue(ic, paths, maxdist=Rmax)
                                          min_mass=minmass)
    for run in args.runs:
        if "random" in run:
            do_cross_rand(run, cat, ic)
--- a/scripts/cluster_knn_auto.yml
+++ b/scripts/cluster_knn_auto.yml
--- a/scripts/cluster_knn_cross.py
+++ b/scripts/cluster_knn_cross.py
@ -13,18 +13,19 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues."""
 from warnings import warn
 from os.path import join
 from argparse import ArgumentParser
 from copy import deepcopy
 from datetime import datetime
 from itertools import combinations
-from mpi4py import MPI
+from os.path import join
-from TaskmasterMPI import master_process, worker_process
+from warnings import warn
-import numpy
+
 from sklearn.neighbors import NearestNeighbors
 import joblib
 import numpy
 import yaml
 from mpi4py import MPI
 from sklearn.neighbors import NearestNeighbors
 from TaskmasterMPI import master_process, worker_process
 try:
    import csiborgtools
 except ModuleNotFoundError:
@ -67,6 +68,7 @@ knncdf = csiborgtools.clustering.kNN_CDF()
 #                               Analysis                                      #
 ###############################################################################
 def read_single(selection, cat):
    mmask = numpy.ones(len(cat), dtype=bool)
    pos = cat.positions(False)
@ -79,19 +81,20 @@ def read_single(selection, cat):
        mmask &= (cat[psel["name"]] < pmax)
    return pos[mmask, ...]
 def do_cross(run, ics):
    _config = config.get(run, None)
    if _config is None:
-        warn("No configuration for run {}.".format(run))
+        warn("No configuration for run {}.".format(run), stacklevel=1)
        return
    rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
    knn1, knn2 = NearestNeighbors(), NearestNeighbors()
-    cat1 = csiborgtools.read.HaloCatalogue(ics[0], paths, max_dist=Rmax)
+    cat1 = csiborgtools.read.ClumpsCatalogue(ics[0], paths, max_dist=Rmax)
    pos1 = read_single(_config, cat1)
    knn1.fit(pos1)
-    cat2 = csiborgtools.read.HaloCatalogue(ics[1], paths, max_dist=Rmax)
+    cat2 = csiborgtools.read.ClumpsCatalogue(ics[1], paths, max_dist=Rmax)
    pos2 = read_single(_config, cat2)
    knn2.fit(pos2)
@ -102,9 +105,8 @@ def do_cross(run, ics):
        batch_size=int(config["batch_size"]), random_state=config["seed"])
    corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
    joblib.dump({"rs": rs, "corr": corr}, paths.knncross_path(run, ics))
    joblib.dump({"rs": rs, "corr": corr},
                fout.format(str(ics[0]).zfill(5), str(ics[1]).zfill(5), run))
 def do_runs(ics):
    print(ics)
--- a/scripts/cluster_knn_cross.yml
+++ b/scripts/cluster_knn_cross.yml
--- a/scripts/cluster_tcpf_auto.py
+++ b/scripts/cluster_tcpf_auto.py
@ -13,16 +13,18 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """A script to calculate the auto-2PCF of CSiBORG catalogues."""
 from os.path import join
 from warnings import warn
 from argparse import ArgumentParser
 from copy import deepcopy
 from datetime import datetime
 from os.path import join
 from warnings import warn
 import joblib
 import numpy
 import yaml
 from mpi4py import MPI
 from TaskmasterMPI import master_process, worker_process
-import numpy
+
 import joblib
 import yaml
 try:
    import csiborgtools
 except ModuleNotFoundError:
@ -65,6 +67,7 @@ tpcf = csiborgtools.clustering.Mock2PCF()
 #                                 Analysis                                    #
 ###############################################################################
 def read_single(selection, cat):
    """Positions for single catalogue auto-correlation."""
    mmask = numpy.ones(len(cat), dtype=bool)
@ -99,10 +102,11 @@ def read_single(selection, cat):
    return pos[smask, ...]
 def do_auto(run, cat, ic):
    _config = config.get(run, None)
    if _config is None:
-        warn("No configuration for run {}.".format(run))
+        warn("No configuration for run {}.".format(run), stacklevel=1)
        return
    rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
@ -112,12 +116,11 @@ def do_auto(run, cat, ic):
    nrandom = int(config["randmult"] * pos.shape[0])
    rp, wp = tpcf(pos, rvs_gen, nrandom, bins)
-    joblib.dump({"rp": rp, "wp": wp}, fout.format(str(ic).zfill(5), run))
+    joblib.dump({"rp": rp, "wp": wp}, paths.tpcfauto_path(run, ic))
 def do_runs(ic):
-    cat = csiborgtools.read.HaloCatalogue(ic, paths, max_dist=Rmax,
+    cat = csiborgtools.read.ClumpsCatalogue(ic, paths, maxdist=Rmax)
                                          min_mass=minmass)
    for run in args.runs:
        do_auto(run, cat, ic)
--- a/scripts/cluster_tpcf_auto.yml
+++ b/scripts/cluster_tpcf_auto.yml
--- a/scripts/field_prop.py
+++ b/scripts/field_prop.py
@ -16,17 +16,20 @@
 MPI script to evaluate field properties at the galaxy positions.
 """
 from argparse import ArgumentParser
 from os.path import join
 from os import remove
 from datetime import datetime
 from os import remove
 from os.path import join
 import numpy
 from mpi4py import MPI
 try:
    import csiborgtools
 except ModuleNotFoundError:
    import sys
    sys.path.append("../")
    import csiborgtools
 import utils
 dumpdir = "/mnt/extraspace/rstiskalek/csiborg/"
@ -61,16 +64,16 @@ dtype = {"names": ["delta", "phi"], "formats": [numpy.float32] * 2}
 # CSiBORG simulation paths
 paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
-ics = paths.ic_ids(tonew=False)
+ics = paths.get_ics(tonew=False)
 nsims = len(ics)
-for n in csiborgtools.fits.split_jobs(nsims, nproc)[rank]:
+for n in csiborgtools.utils.split_jobs(nsims, nproc)[rank]:
    print("Rank {}@{}: working on {}th IC.".format(rank, datetime.now(), n),
          flush=True)
    nsim = ics[n]
    nsnap = max(paths.get_snapshots(nsim))
    reader = csiborgtools.read.ParticleReader(paths)
-    box = csiborgtools.units.BoxUnits(nsnap, nsim, paths)
+    box = csiborgtools.read.BoxUnits(nsnap, nsim, paths)
    # Read particles and select a subset of them
    particles = reader.read_particle(nsnap, nsim, ["x", "y", "z", "M"],
--- a/scripts/match_singlematch.py
+++ b/scripts/match_singlematch.py
@ -13,17 +13,20 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """A script to calculate overlap between two CSiBORG realisations."""
 from os.path import join
 from argparse import ArgumentParser
 from datetime import datetime
 from os.path import join
 import numpy
 from scipy.ndimage import gaussian_filter
 try:
    import csiborgtools
 except ModuleNotFoundError:
    import sys
    sys.path.append("../")
    import csiborgtools
 import utils
 # Argument parser
@ -44,18 +47,19 @@ overlapper = csiborgtools.match.ParticleOverlap()
 # Load catalogues
 print("{}: loading catalogues {} and {}."
      .format(datetime.now(), args.nsim0, args.nsimx), flush=True)
-cat0 = csiborgtools.read.HaloCatalogue(args.nsim0, paths)
+cat0 = csiborgtools.read.ClumpsCatalogue(args.nsim0, paths)
-catx = csiborgtools.read.HaloCatalogue(args.nsimx, paths)
+catx = csiborgtools.read.ClumpsCatalogue(args.nsimx, paths)
 print("{}: loading simulation {} and converting positions to cell numbers."
      .format(datetime.now(), args.nsim0), flush=True)
-with open(paths.clump0_path(args.nsim0), "rb") as f:
+
 with open(paths.initmatch_path(args.nsim0, "particles"), "rb") as f:
    clumps0 = numpy.load(f, allow_pickle=True)
    overlapper.clumps_pos2cell(clumps0)
 print("{}: loading simulation {} and converting positions to cell numbers."
      .format(datetime.now(), args.nsimx), flush=True)
-with open(paths.clump0_path(args.nsimx), 'rb') as f:
+with open(paths.initmatch_path(args.nsimx, "particles"), 'rb') as f:
    clumpsx = numpy.load(f, allow_pickle=True)
    overlapper.clumps_pos2cell(clumpsx)
--- a/scripts/pre_fithalos.py
+++ b/scripts/pre_fithalos.py
@ -16,17 +16,17 @@
 A script to fit halos (concentration, ...). The particle array of each CSiBORG
 realisation must have been split in advance by `runsplit_halos`.
 """
 from os.path import join
 from datetime import datetime
 import numpy
 from mpi4py import MPI
 try:
    import csiborgtools
 except ModuleNotFoundError:
    import sys
    sys.path.append("../")
    import csiborgtools
 import utils
 # Get MPI things
@ -35,8 +35,8 @@ rank = comm.Get_rank()
 nproc = comm.Get_size()
 paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
-dumpdir = "/mnt/extraspace/rstiskalek/csiborg/"
+partreader =csiborgtools.read.ParticleReader(paths)
-loaddir = join(dumpdir, "temp")
+
 cols_collect = [("npart", numpy.int64), ("totpartmass", numpy.float64),
                ("Rs", numpy.float64), ("vx", numpy.float64),
                ("vy", numpy.float64), ("vz", numpy.float64),
@ -47,14 +47,48 @@ cols_collect = [("npart", numpy.int64), ("totpartmass", numpy.float64),
                ("r500", numpy.float64), ("m200", numpy.float64),
                ("m500", numpy.float64), ("lambda200c", numpy.float64)]
 def fit_clump(particles, clump, box):
-for i, nsim in enumerate(paths.ic_ids(tonew=False)):
+
    out["npart"][n] = clump.Npart
    out["rmin"][n] = clump.rmin
    out["rmax"][n] = clump.rmax
    out["totpartmass"][n] = clump.total_particle_mass
    out["vx"][n] = numpy.average(clump.vel[:, 0], weights=clump.m)
    out["vy"][n] = numpy.average(clump.vel[:, 1], weights=clump.m)
    out["vz"][n] = numpy.average(clump.vel[:, 2], weights=clump.m)
    out["Lx"][n], out["Ly"][n], out["Lz"][n] = clump.angular_momentum
 for i, nsim in enumerate(paths.get_ics(tonew=False)):
    if rank == 0:
-        print("{}: calculating {}th simulation.".format(datetime.now(), i))
+        print("{}: calculating {}th simulation `{}`."
              .format(datetime.now(), i, nsim), flush=True)
    nsnap = max(paths.get_snapshots(nsim))
-    box = csiborgtools.units.BoxUnits(nsnap, nsim, paths)
+    box = csiborgtools.read.BoxUnits(nsnap, nsim, paths)
-    jobs = csiborgtools.fits.split_jobs(utils.Nsplits, nproc)[rank]
+    # Archive of clumps, keywords are their clump IDs
    particle_archive = paths.split_path(nsnap, nsim)
    clumpsarr = partreader.read_clumps(nsnap, nsim,
                                       cols=["index", 'x', 'y', 'z'])
    clumpid2arrpos = {ind: ii for ii, ind in enumerate(clumpsarr["index"])}
    nclumps = len(particle_archive.files)
    # Fit 5000 clumps at a time, then dump results
    batchsize = 5000
    # This rank does these `batchsize` clumps/halos
    jobs = csiborgtools.utils.split_jobs(nclumps, nclumps // batchsize)[rank]
    for clumpid in jobs:
        ... = fit_clump(particle_archive[str(clumpid)], clumpsarr[clumpid2arrpos[clumpid]])
    jobs = csiborgtools.utils.split_jobs(nclumps, nproc)[rank]
    for nsplit in jobs:
        parts, part_clumps, clumps = csiborgtools.fits.load_split_particles(
            nsplit, nsnap, nsim, paths, remove_split=False)
@ -111,18 +145,18 @@ for i, nsim in enumerate(paths.ic_ids(tonew=False)):
    # Wait until all jobs finished before moving to another simulation
    comm.Barrier()
-    # Use the rank 0 to combine outputs for this CSiBORG realisation
+#     # Use the rank 0 to combine outputs for this CSiBORG realisation
-    if rank == 0:
+#     if rank == 0:
-        print("Collecting results!")
+#         print("Collecting results!")
-        partreader = csiborgtools.read.ParticleReader(paths)
+#         partreader = csiborgtools.read.ParticleReader(paths)
-        out_collected = csiborgtools.read.combine_splits(
+#         out_collected = csiborgtools.read.combine_splits(
-            utils.Nsplits, nsnap, nsim, partreader, cols_collect,
+#             utils.Nsplits, nsnap, nsim, partreader, cols_collect,
-            remove_splits=True, verbose=False)
+#             remove_splits=True, verbose=False)
-        fname = paths.hcat_path(nsim)
+#         fname = paths.hcat_path(nsim)
-        print("Saving results to `{}`.".format(fname))
+#         print("Saving results to `{}`.".format(fname))
-        numpy.save(fname, out_collected)
+#         numpy.save(fname, out_collected)
-
+#
-    comm.Barrier()
+#     comm.Barrier()
-
+#
-if rank == 0:
+# if rank == 0:
-    print("All finished! See ya!")
+#     print("All finished! See ya!")
--- a/scripts/pre_initmatch.py
+++ b/scripts/pre_initmatch.py
@ -19,14 +19,16 @@ are grouped in a clump at present redshift.
 Optionally also dumps the clumps information, however watch out as this will
 eat up a lot of memory.
 """
 from gc import collect
 from os.path import join
 from os import remove
 from argparse import ArgumentParser
 from datetime import datetime
 from distutils.util import strtobool
 from gc import collect
 from os import remove
 from os.path import join
 import numpy
 from mpi4py import MPI
 try:
    import csiborgtools
 except ModuleNotFoundError:
@ -45,12 +47,10 @@ parser.add_argument("--dump_clumps", type=lambda x: bool(strtobool(x)))
 args = parser.parse_args()
 paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
-nsims = paths.ic_ids(tonew=True)
+nsims = paths.get_ics(tonew=True)
-# Output files
+# Temporary output file
-ftemp = join(paths.dumpdir, "temp_initmatch", "temp_{}_{}_{}.npy")
+ftemp = join(paths.dumpdir, "temp", "initmatch_{}_{}_{}.npy")
 fpermcm = join(paths.dumpdir, "initmatch", "clump_{}_cm.npy")
 fpermpart = join(paths.dumpdir, "initmatch", "clump_{}_particles.npy")
 for nsim in nsims:
    if rank == 0:
@ -87,7 +87,7 @@ for nsim in nsims:
    unique_clumpids = numpy.unique(clump_ids)
    njobs = unique_clumpids.size
-    jobs = csiborgtools.fits.split_jobs(njobs, nproc)[rank]
+    jobs = csiborgtools.utils.split_jobs(njobs, nproc)[rank]
    for i in jobs:
        n = unique_clumpids[i]
        x0 = part0[clump_ids == n]
@ -139,8 +139,8 @@ for nsim in nsims:
            out["ID"][i] = n
        print("{}: dumping to .. `{}`.".format(
-            datetime.now(), fpermcm.format(nsim)), flush=True)
+            datetime.now(), paths.initmatch_path(nsim, "cm")), flush=True)
-        with open(fpermcm.format(nsim), 'wb') as f:
+        with open(paths.initmatch_path(nsim, "cm"), 'wb') as f:
            numpy.save(f, out)
        if args.dump_clumps:
@ -157,9 +157,11 @@ for nsim in nsims:
                out["clump"][i] = fin
                out["ID"][i] = n
                remove(fpath)
-            print("{}: dumping to .. `{}`.".format(
+
-                datetime.now(), fpermpart.format(nsim)), flush=True)
+            fout = paths.initmatch_path(nsim, "particles")
-            with open(fpermpart.format(nsim), "wb") as f:
+            print("{}: dumping to .. `{}`.".format(datetime.now(), fout),
                  flush=True)
            with open(fout, "wb") as f:
                numpy.save(f, out)
            del out
--- a/scripts/pre_mmain.py
+++ b/scripts/pre_mmain.py
@ -0,0 +1,64 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 Script to generate the mmain files, i.e. sums up the substructe of children.
 """
 from datetime import datetime
 import numpy
 from mpi4py import MPI
 from TaskmasterMPI import master_process, worker_process
 try:
    import csiborgtools
 except ModuleNotFoundError:
    import sys
    sys.path.append("../")
    import csiborgtools
 # Get MPI things
 comm = MPI.COMM_WORLD
 rank = comm.Get_rank()
 nproc = comm.Get_size()
 paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
 mmain_reader = csiborgtools.read.MmainReader(paths)
 def do_mmain(nsim):
    nsnap = max(paths.get_snapshots(nsim))
    # NOTE: currently works for highest snapshot anyway
    mmain, ultimate_parent = mmain_reader.make_mmain(nsim, verbose=False)
    numpy.savez(paths.mmain_path(nsnap, nsim),
                mmain=mmain, ultimate_parent=ultimate_parent)
 ###############################################################################
 #                             MPI task delegation                             #
 ###############################################################################
 if nproc > 1:
    if rank == 0:
        tasks = list(paths.get_ics(tonew=False))
        master_process(tasks, comm, verbose=True)
    else:
        worker_process(do_mmain, comm, verbose=False)
 else:
    tasks = paths.get_ics(tonew=False)
    for task in tasks:
        print("{}: completing task `{}`.".format(datetime.now(), task))
        do_mmain(task)
 comm.Barrier()
--- a/scripts/pre_splithalos.py
+++ b/scripts/pre_splithalos.py
@ -0,0 +1,115 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """Script to split particles to indivudual files according to their clump."""
 from datetime import datetime
 from gc import collect
 from glob import glob
 from os import remove
 from os.path import join
 import numpy
 from mpi4py import MPI
 from TaskmasterMPI import master_process, worker_process
 from tqdm import tqdm
 try:
    import csiborgtools
 except ModuleNotFoundError:
    import sys
    sys.path.append("../")
    import csiborgtools
 # Get MPI things
 comm = MPI.COMM_WORLD
 rank = comm.Get_rank()
 nproc = comm.Get_size()
 paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
 verbose = nproc == 1
 partcols = ['x', 'y', 'z', "vx", "vy", "vz", 'M']
 def do_split(nsim):
    nsnap = max(paths.get_snapshots(nsim))
    reader = csiborgtools.read.ParticleReader(paths)
    ftemp_base = join(
        paths.temp_dumpdir,
        "split_{}_{}".format(str(nsim).zfill(5), str(nsnap).zfill(5))
        )
    ftemp = ftemp_base + "_{}.npz"
    # Load the particles and their clump IDs
    particles = reader.read_particle(nsnap, nsim, partcols, verbose=verbose)
    particle_clumps = reader.read_clumpid(nsnap, nsim, verbose=verbose)
    # Drop all particles whose clump index is 0 (not assigned to any clump)
    assigned_mask = particle_clumps != 0
    particle_clumps = particle_clumps[assigned_mask]
    particles = particles[assigned_mask]
    del assigned_mask
    collect()
    # Load the clump indices
    clumpinds = reader.read_clumps(nsnap, nsim, cols="index")["index"]
    # Some of the clumps have no particles, so we do not loop over them
    clumpinds = clumpinds[numpy.isin(clumpinds, particle_clumps)]
    # Loop over the clump indices and save the particles to a temporary file
    # every 10000 clumps. We will later read this back and combine into a
    # single file.
    out = {}
    for i, clind in enumerate(tqdm(clumpinds) if verbose else clumpinds):
        key = str(clind)
        out.update({str(clind): particles[particle_clumps == clind]})
        # REMOVE bump this back up
        if i % 10000 == 0 or i == clumpinds.size - 1:
            numpy.savez(ftemp.format(i), **out)
            out = {}
    # Clear up memory because we will be loading everything back
    del particles, particle_clumps, clumpinds
    collect()
    # Now load back in every temporary file, combine them into a single
    # dictionary  and save as a single .npz file.
    out = {}
    for file in glob(ftemp_base + '*'):
        inp = numpy.load(file)
        for key in inp.files:
            out.update({key: inp[key]})
        remove(file)
    numpy.savez(paths.split_path(nsnap, nsim), **out)
 ###############################################################################
 #                             MPI task delegation                             #
 ###############################################################################
 if nproc > 1:
    if rank == 0:
        tasks = list(paths.get_ics(tonew=False))
        master_process(tasks, comm, verbose=True)
    else:
        worker_process(do_split, comm, verbose=False)
 else:
    tasks = paths.get_ics(tonew=False)
    tasks = [tasks[0]]  # REMOVE
    for task in tasks:
        print("{}: completing task `{}`.".format(datetime.now(), task))
        do_split(task)
 comm.Barrier()
--- a/scripts/split_halos.py
+++ b/scripts/split_halos.py
@ -1,58 +0,0 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 Script to split particles into smaller files according to their clump
 membership for faster manipulation. Currently does this for the maximum
 snapshot of each simulation. Running this requires a lot of memory.
 """
 from mpi4py import MPI
 from datetime import datetime
 try:
    import csiborgtools
 except ModuleNotFoundError:
    import sys
    sys.path.append("../")
    import csiborgtools
 import utils
 # Get MPI things
 comm = MPI.COMM_WORLD
 rank = comm.Get_rank()
 nproc = comm.Get_size()
 paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
 sims = paths.ic_ids(False)
 partcols = ["x", "y", "z", "vx", "vy", "vz", "M", "level"]
 jobs = csiborgtools.fits.split_jobs(len(sims), nproc)[rank]
 for icount, sim_index in enumerate(jobs):
    print("{}: rank {} working {} / {} jobs."
          .format(datetime.now(), rank, icount + 1, len(jobs)), flush=True)
    nsim = sims[sim_index]
    nsnap = max(paths.get_snapshots(nsim))
    partreader = csiborgtools.read.ParticleReader(paths)
    # Load the clumps, particles' clump IDs and particles.
    clumps = partreader.read_clumps(nsnap, nsim)
    particle_clumps = partreader.read_clumpid(nsnap, nsim, verbose=False)
    particles = partreader.read_particle(nsnap, nsim, partcols, verbose=False)
    # Drop all particles whose clump index is 0 (not assigned to any halo)
    particle_clumps, particles = partreader.drop_zero_indx(
        particle_clumps, particles)
    # Dump it!
    csiborgtools.fits.dump_split_particles(particles, particle_clumps, clumps,
                                           utils.Nsplits, nsnap, nsim, paths,
                                           verbose=False)
 print("All finished!", flush=True)