Add mmain and other major updates (#44)

* Move paths to a separate file * Add mmain reader * Add a verbosity flag * Fix imports * Fix bug * Rename files * Return ultimate parents * Add script to generate mmain * Remove mmain path * edit path * Add mmain path * Change function name * Rename function * Turn off verbose * Fix list requirement * Edit init match paths * Fix init pathing * Edit paths docs * Edit dumpdir name * Rename path * Fix split paths * Remove unused import * Add comment * Update readme * remove read mmain * Rename haloatalogue * Fix minor bugs * Update nbs * Add create directory option * Move split jobs * Move spliot jobs * Remove splitting * Add import * Edit script * Deeper split folder * Fix paths bug * Rename catalogue * Rename Catalogue * Add new clumpread * Edit paths * add knn paths * Update commenting * Update imports * Add more conversions * Update temp file * Add a note * Add catalogue * Cooment * Update TODO * Update script * add nb * Update * pep8 * edit paths & pep8 * Fix knn auto paths * add paths docs * Add auto and cross knn paths * Add new paths * Simplify tpcf reading * pep8 patch * update readme * Update progress * pep8 * pep8 * pep8 * pep8 * pep8 * pep8 * pep8 * pep8 * pep8 * pep8 * pep8 * pep8 * pep8 * pep8 * pep8 * Pep 8 and restructure * add lambda spin * add clump and halo * add checks * Edit halo profile fit * Update gitignore * backup script
2025-07-12 00:53:02 +00:00 · 2023-04-18 11:02:36 +02:00 · 2023-04-18 11:02:36 +02:00 · fdb0df8d4c
commit fdb0df8d4c
parent e0d3854277
50 changed files with 2152 additions and 1844 deletions
--- a/.gitignore
+++ b/.gitignore
@ -17,3 +17,4 @@ Pylians3/*
 scripts/plot_correlation.ipynb
 scripts/*.sh
 venv/
+.trunk/*
--- a/README.md
+++ b/README.md
@ -2,8 +2,16 @@


 ##  Project Overlap
- [ ] Calculate the overlap between all 101 IC realisations on DiRAC.
+- [x] Clean up the kNN paths in the summary.
+- [x] Clean up the 2PCF paths in the summary.

+- [ ] Sort out the splitting of individual clumps.
+- [ ] Update the fitting scripts to work for clumps and parent halos.
+- [ ] Calculated fitted quantities for clumps and parent halos and add them to the catalogues.
+- [ ] Update overlap scripts to work with summed parent halos.
+- [ ] Update the clustering scripts to work with clumps instead.
+
+- [ ] Calculate the overlap between all 101 IC realisations on DiRAC.


 ## Project Clustering
@ -20,7 +28,7 @@
 - [x] For the cross-correlation try making the second field randoms.
 - [x] Clean up the reader code.
 - [x] Correct the crossing script.
- [ ] Get started with the 2PCF calculation.
+- [x] Get started with the 2PCF calculation.

 ## Project Environmental Dependence
 - [ ] Add gradient and Hessian of the overdensity field.
--- a/csiborgtools/init.py
+++ b/csiborgtools/init.py
@ -12,11 +12,10 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-from csiborgtools import (read, match, utils, units, fits, field, clustering)  # noqa
+from csiborgtools import clustering, field, fits, match, read  # noqa

 # Arguments to csiborgtools.read.CSiBORGPaths.
 paths_glamdring = {
    "srcdir": "/mnt/extraspace/hdesmond/",
-    "dumpdir": "/mnt/extraspace/rstiskalek/csiborg/",
-    "mmain_path": "/mnt/zfsusers/hdesmond/Mmain",
-    "initmatch_path": "/mnt/extraspace/rstiskalek/csiborg/initmatch/"}
+    "postdir": "/mnt/extraspace/rstiskalek/csiborg/"
+    }
--- a/csiborgtools/clustering/init.py
+++ b/csiborgtools/clustering/init.py
@ -13,10 +13,19 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 from warnings import warn
-from .knn import kNN_CDF  # noqa
-from .utils import (RVSinsphere, RVSinbox, RVSonsphere, BaseRVS, normalised_marks)  # noqa
+
+from csiborgtools.clustering.knn import kNN_CDF  # noqa
+from csiborgtools.clustering.utils import (  # noqa
+    BaseRVS,
+    RVSinbox,
+    RVSinsphere,
+    RVSonsphere,
+    normalised_marks,
+)
+
 try:
-    import Corrfunc
+    import Corrfunc  # noqa
+
    from .tpcf import Mock2PCF  # noqa
 except ImportError:
    warn("`Corrfunc` not installed. 2PCF modules will not be available (`Mock2PCF`).")  # noqa
--- a/csiborgtools/clustering/knn.py
+++ b/csiborgtools/clustering/knn.py
@ -13,11 +13,12 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
-kNN-CDF calculation
+kNN-CDF calculation.
 """
 import numpy
 from scipy.interpolate import interp1d
 from scipy.stats import binned_statistic
+
 from .utils import BaseRVS


@ -105,7 +106,7 @@ class kNN_CDF:
            Catalogue NN object.
        rvs_gen : :py:class:`csiborgtools.clustering.BaseRVS`
            Uniform RVS generator matching `knn`.
-        neighbours : int
+        nneighbours : int
            Maximum number of neighbours to use for the kNN-CDF calculation.
        nsamples : int
            Number of random points to sample for the knn-CDF calculation.
@ -155,7 +156,7 @@ class kNN_CDF:
            NN object of the second catalogue.
        rvs_gen : :py:class:`csiborgtools.clustering.BaseRVS`
            Uniform RVS generator matching `knn1` and `knn2`.
-        neighbours : int
+        nneighbours : int
            Maximum number of neighbours to use for the kNN-CDF calculation.
        Rmax : float
            Maximum radius of the sphere in which to sample random points for
@ -246,7 +247,7 @@ class kNN_CDF:
            Catalogue NN object.
        rvs_gen : :py:class:`csiborgtools.clustering.BaseRVS`
            Uniform RVS generator matching `knn1` and `knn2`.
-        neighbours : int
+        nneighbours : int
            Maximum number of neighbours to use for the kNN-CDF calculation.
        nsamples : int
            Number of random points to sample for the knn-CDF calculation.
--- a/csiborgtools/clustering/tpcf.py
+++ b/csiborgtools/clustering/tpcf.py
@ -16,6 +16,7 @@
 import numpy
 from Corrfunc.theory.DD import DD
 from Corrfunc.utils import convert_3d_counts_to_cf
+
 from .utils import BaseRVS


@ -63,6 +64,7 @@ class Mock2PCF:
                periodic=False)

        ndata = pos.shape[0]
-        xi = convert_3d_counts_to_cf(ndata, ndata, nrandom, nrandom, dd, dr, dr, rr)
+        xi = convert_3d_counts_to_cf(ndata, ndata, nrandom, nrandom,
+                                     dd, dr, dr, rr)
        rp = 0.5 * (bins[1:] + bins[:-1])
        return rp, xi
--- a/csiborgtools/clustering/utils.py
+++ b/csiborgtools/clustering/utils.py
@ -13,10 +13,10 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """Clustering support functions."""
-from abc import (ABC, abstractmethod)
+from abc import ABC, abstractmethod
 from warnings import warn
-import numpy

+import numpy

 ###############################################################################
 #                            Random points                                    #
@ -77,7 +77,7 @@ class RVSinsphere(BaseRVS):


 class RVSinbox(BaseRVS):
-    """
+    r"""
    Generator of uniform RVS in a box of width `L` in Cartesian coordinates in
    :math:`[0, L]^3`.

@ -100,7 +100,7 @@ class RVSinbox(BaseRVS):


 class RVSonsphere(BaseRVS):
-    """
+    r"""
    Generator of uniform RVS on the surface of a unit sphere. RA is in
    :math:`[0, 2\pi)` and dec in :math:`[-\pi / 2, \pi / 2]`, respectively.
    If `indeg` is `True` then converted to degrees.
@ -148,7 +148,7 @@ def wrapRA(ra, indeg):
    """
    mask = ra < 0
    if numpy.sum(mask) == 0:
-        warn("No negative right ascension found.", UserWarning())
+        warn("No negative right ascension found.", UserWarning, stacklevel=1)
    ra[mask] += 360 if indeg else 2 * numpy.pi
    return ra

@ -177,7 +177,7 @@ def normalised_marks(x, y, nbins):
    """
    assert x.ndim == y.ndim == 1
    if y.dtype not in [numpy.float32, numpy.float64]:
-        raise NotImplemented("Marks from integers are not supported.")
+        raise NotImplementedError("Marks from integers are not supported.")

    bins = numpy.percentile(x, q=numpy.linspace(0, 100, nbins + 1))
    marks = numpy.full_like(y, numpy.nan)
--- a/csiborgtools/field/init.py
+++ b/csiborgtools/field/init.py
@ -13,8 +13,10 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 from warnings import warn
+
 try:
-    import MAS_library as MASL
-    from .density import DensityField
+    import MAS_library as MASL  # noqa
+
+    from .density import DensityField  # noqa
 except ImportError:
    warn("MAS_library not found, `DensityField` will not be available", UserWarning)  # noqa
--- a/csiborgtools/field/density.py
+++ b/csiborgtools/field/density.py
@ -16,12 +16,12 @@
 Density field and cross-correlation calculations.
 """
 from warnings import warn
-from tqdm import trange
-import numpy
+
 import MAS_library as MASL
+import numpy
 import Pk_library as PKL
 import smoothing_library as SL
-from ..units import (BoxUnits, radec_to_cartesian)
+from tqdm import trange


 class DensityField:
@ -56,8 +56,7 @@ class DensityField:
    def __init__(self, particles, boxsize, box, MAS="CIC"):
        self.particles = particles
        assert boxsize > 0
-        self._boxsize = boxsize
-        assert isinstance(box, BoxUnits)
+        self.boxsize = boxsize
        self.box = box
        assert MAS in ["NGP", "CIC", "TSC", "PCS"]
        self._MAS = MAS
@ -103,6 +102,14 @@ class DensityField:
        """
        return self._box

+    @box.setter
+    def box(self, box):
+        try:
+            assert box._name  == "box_units"
+            self._box = box
+        except AttributeError as err:
+            raise TypeError from err
+
    @property
    def MAS(self):
        """
@ -117,7 +124,7 @@ class DensityField:
    @staticmethod
    def _force_f32(x, name):
        if x.dtype != numpy.float32:
-            warn("Converting `{}` to float32.".format(name))
+            warn("Converting `{}` to float32.".format(name), stacklevel=1)
            x = x.astype(numpy.float32)
        return x

@ -348,13 +355,15 @@ class DensityField:
        -------
        interp_field : (list of) 1-dimensional array of shape `(n_samples,).
        """
-        self._force_f32(pos, "pos")
-        X = numpy.vstack(
-            radec_to_cartesian(*(pos[:, i] for i in range(3)), isdeg)).T
-        X = X.astype(numpy.float32)
-        # Place the observer at the center of the box
-        X += 0.5 * self.boxsize
-        return self.evaluate_field(*field, pos=X)
+        # TODO: implement this
+        raise NotImplementedError("This method is not yet implemented.")
+#         self._force_f32(pos, "pos")
+#         X = numpy.vstack(
+#             radec_to_cartesian(*(pos[:, i] for i in range(3)), isdeg)).T
+#         X = X.astype(numpy.float32)
+#         # Place the observer at the center of the box
+#         X += 0.5 * self.boxsize
+#         return self.evaluate_field(*field, pos=X)

    @staticmethod
    def gravitational_field_norm(gx, gy, gz):
--- a/csiborgtools/fits/init.py
+++ b/csiborgtools/fits/init.py
@ -12,7 +12,5 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-from .haloprofile import (NFWProfile, NFWPosterior)  # noqa
-from .halo  import (distribute_halos, clump_with_particles,  # noqa
-                    dump_split_particles, load_split_particles,  # noqa
-                    split_jobs, pick_single_clump, Clump)  # noqa
+from .halo import Clump, Halo  # noqa
+from .haloprofile import NFWPosterior, NFWProfile  # noqa
--- a/csiborgtools/fits/halo.py
+++ b/csiborgtools/fits/halo.py
@ -12,555 +12,195 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""
-Tools for splitting the particles and a clump object.
-"""
-from os import remove
-from warnings import warn
-from os.path import join
+"""A clump object."""
+from abc import ABC
+
 import numpy
-from tqdm import trange
-from ..read import ParticleReader


-def clump_with_particles(particle_clumps, clumps):
+class BaseStructure(ABC):
    """
-    Count how many particles does each clump have.
-
-    Parameters
-    ----------
-    particle_clumps : 1-dimensional array
-        Array of particles' clump IDs.
-    clumps : structured array
-        Clumps array.
-
-    Returns
-    -------
-    with_particles : 1-dimensional boolean array
-        Array of whether a clump has any particles.
+    Basic structure object for handling operations on its particles.
    """
-    return numpy.isin(clumps["index"], particle_clumps)
+    _particles = None
+    _info = None
+    _box = None

-
-def distribute_halos(n_splits, clumps):
-    """
-    Evenly distribute clump indices to smaller splits. Clumps should only be
-    clumps that contain particles.
-
-    Parameters
-    ----------
-    n_splits : int
-        Number of splits.
-    clumps : structured array
-        Clumps array.
-
-    Returns
-    -------
-    splits : 2-dimensional array of shape `(njobs, 2)`
-        Array of starting and ending indices of each CPU.
-    """
-    # Make sure these are unique IDs
-    indxs = clumps["index"]
-    if indxs.size > numpy.unique((indxs)).size:
-        raise ValueError("`clump_indxs` constains duplicate indices.")
-    Ntotal = indxs.size
-    njobs_per_cpu = numpy.ones(n_splits, dtype=int) * Ntotal // n_splits
-    # Split the remainder Ntotal % njobs among the CPU
-    njobs_per_cpu[:Ntotal % n_splits] += 1
-    start = ParticleReader.nparts_to_start_ind(njobs_per_cpu)
-    return numpy.vstack([start, start + njobs_per_cpu]).T
-
-
-def dump_split_particles(particles, particle_clumps, clumps, n_splits,
-                         nsnap, nsim, paths, verbose=True):
-    """
-    Save the data needed for each split so that a process does not have to load
-    everything.
-
-    Parameters
-    ----------
-    particles : structured array
-        The particle array.
-    particle_clumps : 1-dimensional array
-        Array of particles' clump IDs.
-    clumps : structured array
-        The clumps array.
-    n_splits : int
-        Number of times to split the clumps.
-    nsnap : int
-        Snapshot index.
-    nsim : int
-        IC realisation index.
-    paths : py:class`csiborgtools.read.CSiBORGPaths`
-        CSiBORG paths-handling object with set `n_sim` and `n_snap`.
-    verbose : bool, optional
-        Verbosity flag. By default `True`.
-
-    Returns
-    -------
-    None
-    """
-    if particles.size != particle_clumps.size:
-        raise ValueError("`particles` must correspond to `particle_clumps`.")
-    # Calculate which clumps have particles
-    with_particles = clump_with_particles(particle_clumps, clumps)
-    clumps = numpy.copy(clumps)[with_particles]
-    if verbose:
-        warn(r"There are {:.4f}% clumps that have identified particles."
-             .format(with_particles.sum() / with_particles.size * 100))
-
-    # The starting clump index of each split
-    splits = distribute_halos(n_splits, clumps)
-    fname = join(paths.temp_dumpdir, "out_{}_snap_{}_{}.npz")
-
-    tot = 0
-    for n in trange(n_splits) if verbose else range(n_splits):
-        # Lower and upper array index of the clumps array
-        i, j = splits[n, :]
-        # Clump indices in this split
-        indxs = clumps["index"][i:j]
-        hmin, hmax = indxs.min(), indxs.max()
-        mask = (particle_clumps >= hmin) & (particle_clumps <= hmax)
-        # Check number of clumps
-        npart_unique = numpy.unique(particle_clumps[mask]).size
-        if indxs.size > npart_unique:
-            raise RuntimeError(
-                "Split `{}` contains more unique clumps (`{}`) than there are "
-                "unique particles' clump indices (`{}`)after removing clumps "
-                "with no particles.".format(n, indxs.size, npart_unique))
-        # Dump it!
-        tot += mask.sum()
-        fout = fname.format(nsim, nsnap, n)
-        numpy.savez(fout, particles[mask], particle_clumps[mask], clumps[i:j])
-
-    # There are particles whose clump ID is > 1 and have no counterpart in the
-    # clump file. Therefore can save fewer particles, depending on the cut.
-    if tot > particle_clumps.size:
-        raise RuntimeError(
-            "Num. of dumped particles `{}` is greater than the particle file "
-            "size `{}`.".format(tot, particle_clumps.size))
-
-
-def split_jobs(njobs, ncpu):
-    """
-    Split `njobs` amongst `ncpu`.
-
-    Parameters
-    ----------
-    njobs : int
-        Number of jobs.
-    ncpu : int
-        Number of CPUs.
-
-    Returns
-    -------
-    jobs : list of lists of integers
-        Outer list of each CPU and inner lists for CPU's jobs.
-    """
-    njobs_per_cpu, njobs_remainder = divmod(njobs, ncpu)
-    jobs = numpy.arange(njobs_per_cpu * ncpu).reshape((njobs_per_cpu, ncpu)).T
-    jobs = jobs.tolist()
-    for i in range(njobs_remainder):
-        jobs[i].append(njobs_per_cpu * ncpu + i)
-
-    return jobs
-
-
-def load_split_particles(nsplit, nsnap, nsim, paths, remove_split=False):
-    """
-    Load particles of a split saved by `dump_split_particles`.
-
-    Parameters
-    --------
-    n_split : int
-        Split index.
-    nsnap : int
-        Snapshot index.
-    nsim : int
-        IC realisation index.
-    paths : py:class`csiborgtools.read.CSiBORGPaths`
-        CSiBORG paths-handling object with set `n_sim` and `n_snap`.
-    remove_split : bool, optional
-        Whether to remove the split file. By default `False`.
-
-    Returns
-    -------
-    particles : structured array
-        Particle array of this split.
-    clumps_indxs : 1-dimensional array
-        Array of particles' clump IDs of this split.
-    clumps : 1-dimensional array
-        Clumps belonging to this split.
-    """
-    fname = join(paths.temp_dumpdir,
-                 "out_{}_snap_{}_{}.npz".format(nsim, nsnap, nsplit))
-    file = numpy.load(fname)
-    particles, clump_indxs, clumps = (file[f] for f in file.files)
-    if remove_split:
-        remove(fname)
-    return particles, clump_indxs, clumps
-
-
-def pick_single_clump(n, particles, particle_clumps, clumps):
-    """
-    Get particles belonging to the `n`th clump in `clumps` arrays.
-
-    Parameters
-    ----------
-    n : int
-        Clump position in `clumps` array. Not its halo finder index!
-    particles : structured array
+    @property
+    def particles(self):
+        """
        Particle array.
-    particle_clumps : 1-dimensional array
-        Array of particles' clump IDs.
-    clumps : structured array
-        Array of clumps.

-    Returns
-    -------
-    sel_particles : structured array
-        Particles belonging to the requested clump.
-    sel_clump : array
-        A slice of a `clumps` array corresponding to this clump. Must
-        contain `["peak_x", "peak_y", "peak_z", "mass_cl"]`.
-    """
-    # Clump index on the nth position
-    k = clumps["index"][n]
-    # Mask of which particles belong to this clump
-    mask = particle_clumps == k
-    return particles[mask], clumps[n]
+        Returns
+        -------
+        particles : structured array
+        """
+        return self._particles

+    @particles.setter
+    def particles(self, particles):
+        pars = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M']
+        assert all(p in particles.dtype.names for p in pars)
+        self._particles = particles

-###############################################################################
-#                           Clump object                                      #
-###############################################################################
+    @property
+    def info(self):
+        """
+        Array containing information from the clump finder.

+        Returns
+        -------
+        info : structured array
+        """
+        return self._info

-class Clump:
-    r"""
-    A clump (halo) object to handle the particles and their clump's data.
+    @info.setter
+    def info(self, info):
+        # TODO turn this into a structured array and add some checks
+        self._info = info

-    Parameters
-    ----------
-    x : 1-dimensional array
-        Particle coordinates along the x-axis.
-    y : 1-dimensional array
-        Particle coordinates along the y-axis.
-    z : 1-dimensional array
-        Particle coordinates along the z-axis.
-    m : 1-dimensional array
-        Particle masses.
-    x0 : float
-        Clump center coordinate along the x-axis.
-    y0 : float
-        Clump center coordinate along the y-axis.
-    z0 : float
-        Clump center coordinate along the z-axis.
-    clump_mass : float, optional
-        Mass of the clump. By default not set.
-    vx : 1-dimensional array, optional
-        Particle velocity along the x-axis. By default not set.
-    vy : 1-dimensional array, optional
-        Particle velocity along the y-axis. By default not set.
-    vz : 1-dimensional array, optional
-        Particle velocity along the z-axis. By default not set.
-    index : int, optional
-        The halo finder index of this clump. By default not set.
-    rhoc : float, optional
-        The critical density :math:`\rho_c` at this snapshot in box units. By
-        default not set.
-    G : float, optional
-        The gravitational constant :math:`G` in box units. By default not set.
-    """
-    _pos = None
-    _clump_pos = None
-    _clump_mass = None
-    _vel = None
-    _index = None
-    _rhoc = None
-    _G = None
+    @property
+    def box(self):
+        """
+        CSiBORG box object handling unit conversion.

-    def __init__(self, x, y, z, m, x0, y0, z0, clump_mass=None,
-                 vx=None, vy=None, vz=None, index=None, rhoc=None, G=None):
-        self._pos = numpy.vstack([x - x0, y - y0, z - z0]).T
-        self._clump_pos = numpy.asarray((x0, y0, z0))
-        assert clump_mass is None or isinstance(clump_mass, float)
-        self._clump_mass = clump_mass
-        if all(v is not None for v in (vx, vy, vz)):
-            self._vel = numpy.vstack([vx, vy, vz]).T
-            assert self._vel.shape == self.pos.shape
-        assert m.ndim == 1 and m.size == self.Npart
-        self._m = m
-        assert index is None or (isinstance(index, (int, numpy.int64)) and index >= 0)  # noqa
-        self._index = index
-        assert rhoc is None or rhoc > 0
-        self._rhoc = rhoc
-        assert G is None or G > 0
-        self._G = G
+        Returns
+        -------
+        box : :py:class:`csiborgtools.units.BoxUnits`
+        """
+        return self._box
+
+    @box.setter
+    def box(self, box):
+        try:
+            assert box._name  == "box_units"
+            self._box = box
+        except AttributeError as err:
+            raise TypeError from err

    @property
    def pos(self):
        """
-        Cartesian particle coordinates centered at the clump.
+        Cartesian particle coordinates centered at the object.

        Returns
        -------
        pos : 2-dimensional array of shape `(n_particles, 3)`.
        """
-        return self._pos
-
-    @property
-    def Npart(self):
-        """
-        Number of particles associated with this clump.
-
-        Returns
-        -------
-        Npart : int
-        """
-        return self.pos.shape[0]
+        ps = ('x', 'y', 'z')
+        return numpy.vstack([self[p] - self.info[p] for p in ps]).T

    @property
    def r(self):
        """
-        Radial distance of the particles from the clump peak.
+        Radial separation of the particles from the centre of the object.

        Returns
        -------
        r : 1-dimensional array of shape `(n_particles, )`.
        """
-        return numpy.sum(self.pos**2, axis=1)**0.5
-
-    @property
-    def rmin(self):
-        """
-        The minimum radial distance of a particle.
-
-        Returns
-        -------
-        rmin : float
-        """
-        return numpy.min(self.r)
-
-    @property
-    def rmax(self):
-        """
-        The maximum radial distance of a particle.
-
-        Returns
-        -------
-        rmin : float
-        """
-        return numpy.max(self.r)
-
-    @property
-    def clump_pos(self):
-        """
-        Cartesian position components of the clump.
-
-        Returns
-        -------
-        pos : 1-dimensional array of shape `(3, )`
-        """
-        return self._clump_pos
-
-    @property
-    def clump_mass(self):
-        """
-        Clump mass.
-
-        Returns
-        -------
-        mass : float
-        """
-        if self._clump_mass is None:
-            raise ValueError("Clump mass `clump_mass` has not been set.")
-        return self._clump_mass
+        return numpy.linalg.norm(self.pos, axis=1)

    @property
    def vel(self):
        """
-        Cartesian velocity components of the clump.
+        Cartesian particle velocity components.

        Returns
        -------
        vel : 2-dimensional array of shape (`n_particles, 3`)
        """
-        if self._vel is None:
-            raise ValueError("Velocities `vel` have not been set.")
-        return self._vel
+        return numpy.vstack([self[p] for p in ("vx", "vy", "vz")]).T

    @property
-    def m(self):
+    def cmass(self):
        """
-        Particle masses.
-
-        Returns
-        -------
-        m : 1-dimensional array of shape `(n_particles, )`
-        """
-        return self._m
-
-    @property
-    def center_mass(self):
-        """
-        Cartesian position components of the clump centre of mass. Note that
-        this is already in a frame centered at the clump's potential minimum.
+        Cartesian position components of the object's centre of mass. Note that
+        this is already in a frame centered at the clump's potential minimum,
+        so its distance from origin indicates the separation of the centre of
+        mass and potential minimum.

        Returns
        -------
        cm : 1-dimensional array of shape `(3, )`
        """
-        return numpy.average(self.pos, axis=0, weights=self.m)
+        return numpy.average(self.pos, axis=0, weights=self['M'])

    @property
    def angular_momentum(self):
        """
-        Clump angular momentum in the box coordinates.
+        Angular momentum in the box coordinates.
+
+        NOTE: here also change velocities to the CM and appropriately edit the
+        docs.

        Returns
        -------
        J : 1-dimensional array or shape `(3, )`
        """
-        J = numpy.cross(self.pos - self.center_mass, self.vel)
+        J = numpy.cross(self.pos - self.cmass, self.vel)
        return numpy.einsum("i,ij->j", self.m, J)

-    @property
-    def lambda200c(self):
-        r"""
-        Clump Bullock spin, see Eq. 5 in [1], in a radius of
-        :math:`R_{\rm 200c}`.
-
-        References
-        ----------
-        [1] A Universal Angular Momentum Profile for Galactic Halos; 2001;
-        Bullock, J. S.;  Dekel, A.;  Kolatt, T. S.;  Kravtsov, A. V.;
-        Klypin, A. A.;  Porciani, C.;  Primack, J. R.
-
-        Returns
-        -------
-        lambda200c : float
+    def enclosed_mass(self, rmax, rmin=0):
        """
-        J = self.angular_momentum
-        R, M = self.spherical_overdensity_mass(200)
-        V = numpy.sqrt(self.G * M / R)
-        return numpy.linalg.norm(J) / (numpy.sqrt(2) * M * V * R)
-
-    @property
-    def index(self):
-        """
-        Halo finder clump index.
-
-        Returns
-        -------
-        hindex : int
-        """
-        if self._index is None:
-            raise ValueError("Halo index `hindex` has not been set.")
-        return self._index
-
-    @property
-    def rhoc(self):
-        r"""
-        Critical density :math:`\rho_c` at this snapshot in box units.
-
-        Returns
-        -------
-        rhoc : float
-        """
-        if self._rhoc is None:
-            raise ValueError("The critical density `rhoc` has not been set.")
-        return self._rhoc
-
-    @property
-    def G(self):
-        r"""
-        Gravitational constant :math:`G` in box units.
-
-        Returns
-        -------
-        G : float
-        """
-        if self._G is None:
-            raise ValueError("The grav. constant `G` has not been set.")
-        return self._G
-
-    @property
-    def total_particle_mass(self):
-        """
-        Total mass of all particles.
-
-        Returns
-        -------
-        tot_mass : float
-        """
-        return numpy.sum(self.m)
-
-    @property
-    def mean_particle_pos(self):
-        """
-        Mean Cartesian particle coordinate. Not centered at the halo!
-
-        Returns
-        -------
-        pos : 1-dimensional array of shape `(3, )`
-        """
-        return numpy.mean(self.pos + self.clump_pos, axis=0)
-
-    def enclosed_spherical_mass(self, rmax, rmin=0):
-        """
-        Enclosed spherical mass between two radii in box units.
-
-        Parameters
-        ----------
-        rmax : float
-            The maximum radial distance.
-        rmin : float, optional
-            The minimum radial distance. By default 0.
-
-        Returns
-        -------
-        M_enclosed : float
-            The enclosed mass.
-        """
-        return numpy.sum(self.m[(self.r >= rmin) & (self.r <= rmax)])
-
-    def enclosed_spherical_volume(self, rmax, rmin=0):
-        """
-        Enclosed spherical volume within two radii in box units.
+        Sum of particle masses between two radii.

        Parameters
        ----------
        rmax : float
            Maximum radial distance.
        rmin : float, optional
-            Minimum radial distance. By default 0.
+            Minimum radial distance.

        Returns
        -------
-        vol : float
+        enclosed_mass : float
        """
-        return 4 * numpy.pi / 3 * (rmax**3 - rmin**3)
+        r = self.r
+        return numpy.sum(self['M'][(r >= rmin) & (r <= rmax)])

-    def spherical_overdensity_mass(self, delta, n_particles_min=10):
+    def lambda_bullock(self, radius, npart_min=10):
        r"""
-        Spherical overdensity mass and radius. The mass is defined as the
-        enclosed mass within a radius of where the mean enclosed spherical
-        density reaches a multiple of the critical radius at a given redshift
-        `self.rho_c`.
-
-        Starts from the furthest particle, working its way inside the halo
-        through an ordered list of particles. The corresponding values is the
-        radial distance of the first particle whose addition sufficiently
-        increases the mean density.
-
+        Bullock spin, see Eq. 5 in [1], in a radius of `radius`, which should
+        define to some overdensity radius.

        Parameters
        ----------
-        delta : list of int or float
-            The :math:`\delta_{\rm x}` parameters where :math:`\mathrm{x}` is
-            the overdensity multiple.
-        n_particles_min : int
+        radius : float
+            Radius in which to calculate the spin.
+        npart_min : int
+            Minimum number of enclosed particles for a radius to be
+            considered trustworthy.
+
+        Returns
+        -------
+        lambda_bullock : float
+
+        References
+        ----------
+        [1] A Universal Angular Momentum Profile for Galactic Halos; 2001;
+        Bullock, J. S.;  Dekel, A.;  Kolatt, T. S.;  Kravtsov, A. V.;
+        Klypin, A. A.;  Porciani, C.;  Primack, J. R.
+        """
+        mask = self.r <= radius
+        if numpy.sum(mask) < npart_min:
+            return numpy.nan
+        mass = self.enclosed_mass(radius)
+        V = numpy.sqrt(self.box.box_G * mass / radius)
+        return (numpy.linalg.norm(self.angular_momentum[mask])
+                / (numpy.sqrt(2) * mass * V * radius))
+
+    def spherical_overdensity_mass(self, delta_mult, npart_min=10):
+        r"""
+        Calculate spherical overdensity mass and radius. The mass is defined as
+        the enclosed mass within an outermost radius where the mean enclosed
+        spherical density reaches a multiple of the critical density `delta`.
+
+        Parameters
+        ----------
+        delta_mult : list of int or float
+            Overdensity multiple.
+        npart_min : int
            Minimum number of enclosed particles for a radius to be
            considered trustworthy.

@ -571,83 +211,75 @@ class Clump:
        mx :  float
            Corresponding spherical enclosed mass.
        """
-        # If single `delta` turn to list
-        delta = [delta] if isinstance(delta, (float, int)) else delta
-        # If given a list or tuple turn to array
-        _istlist = isinstance(delta, (list, tuple))
-        delta = numpy.asarray(delta, dtype=float) if _istlist else delta
+        # We first sort the particles in an increasing separation
+        rs = self.r
+        order = numpy.argsort(rs)
+        rs = rs[order]
+        cmass = numpy.cumsum(self['M'])  # Cumulative mass
+        # We calculate the enclosed volume and indices where it is above target
+        vol = 4 * numpy.pi / 3 * (rs**3 - rs[0]**3)
+        ks = numpy.where([cmass / vol > delta_mult * self.box.rhoc])[0]
+        if ks.size == 0:  # Never above the threshold?
+            return numpy.nan, numpy.nan
+        k = numpy.maximum(ks)
+        if k < npart_min:  # Too few particles?
+            return numpy.nan, numpy.nan
+        return rs[k], cmass[k]

-        # Ordering of deltas
-        order_delta = numpy.argsort(delta)
-        # Sort the particles
-        order_particles = numpy.argsort(self.r)[::-1]
-        # Density to aim for
-        n_delta = delta.size
-        target_density = delta * self.rhoc
-
-        # The sum of particle masses, starting from the outside
-        # Adds the furtherst particle ensure that the 0th index is tot mass
-        cummass_ordered = (self.total_particle_mass
-                           + self.m[order_particles][0]
-                           - numpy.cumsum(self.m[order_particles]))
-        # Enclosed volumes at particle radii
-        volumes = self.enclosed_spherical_volume(self.r[order_particles])
-        densities = cummass_ordered / volumes
-
-        # Pre-allocate arrays
-        rfound = numpy.full_like(delta, numpy.nan)
-        mfound = numpy.full_like(rfound, numpy.nan)
-
-        for n in order_delta:
-            overdense_mask = densities > target_density[n]
-
-            # Enforce that we have at least several particles enclosed
-            if numpy.sum(overdense_mask) < n_particles_min:
-                continue
-            # The outermost particle radius where the overdensity is achieved
-            k = numpy.where(overdense_mask)[0][0]
-            rfound[n] = self.r[order_particles][k]
-            mfound[n] = cummass_ordered[k]
-
-        # If only one delta return simply numbers
-        if n_delta == 1:
-            rfound = rfound[0]
-            mfound = mfound[0]
-
-        return rfound, mfound
-
-    @classmethod
-    def from_arrays(cls, particles, clump, rhoc=None, G=None):
-        r"""
-        Initialises `Clump` from `particles` containing the relevant particle
-        information and its `clump` information.
-
-        Paramaters
-        ----------
-        particles : structured array
-            Array of particles belonging to this clump. Must contain
-            `["x", "y", "z", "M"]` and optionally also `["vx", "vy", "vz"]`.
-        clump : array
-            A slice of a `clumps` array corresponding to this clump. Must
-            contain `["peak_x", "peak_y", "peak_z", "mass_cl"]`.
-        rhoc : float, optional
-            The critical density :math:`\rho_c` at this snapshot in box units.
-            By default not set.
-        G : float, optional
-            The gravitational constant :math:`G` in box units. By default not
-            set.
+    @property
+    def keys(self):
+        """
+        Particle array keys.

        Returns
        -------
-        clump : `Clump`
+        key : list of str
        """
-        x, y, z, m = (particles[p] for p in ["x", "y", "z", "M"])
-        x0, y0, z0, cl_mass, hindex = (
-            clump[p] for p in ["peak_x", "peak_y", "peak_z", "mass_cl",
-                               "index"])
-        try:
-            vx, vy, vz = (particles[p] for p in ["vx", "vy", "vz"])
-        except ValueError:
-            vx, vy, vz = None, None, None
-        return cls(x, y, z, m, x0, y0, z0, cl_mass,
-                   vx, vy, vz, hindex, rhoc, G)
+        return self.data.dtype.names
+
+    def __getitem__(self, key):
+        if key not in self.keys:
+            raise RuntimeError("Invalid key `{}`!".format(key))
+        return self.particles[key]
+
+    def __len__(self):
+        return self.particles.size
+
+
+class Clump(BaseStructure):
+    """
+    Clump object to handle operations on its particles.
+
+    Parameters
+    ----------
+    particles : structured array
+        Particle array. Must contain `['x', 'y', 'z', 'vx', 'vy', 'vz', 'M']`.
+    info : structured array
+        Array containing information from the clump finder.
+    box : :py:class:`csiborgtools.read.BoxUnits`
+        Box units object.
+    """
+    def __init__(self, particles, info, box):
+        self.particles = particles
+        self.info = info
+        self.box = box
+
+
+class Halo(BaseStructure):
+    """
+    Ultimate halo object to handle operations on its particles, i.e. the summed
+    particles halo.
+
+    Parameters
+    ----------
+    particles : structured array
+        Particle array. Must contain `['x', 'y', 'z', 'vx', 'vy', 'vz', 'M']`.
+    info : structured array
+        Array containing information from the clump finder.
+    box : :py:class:`csiborgtools.read.BoxUnits`
+        Box units object.
+    """
+    def __init__(self, particles, info, box):
+        self.particles = particles
+        self.info = info
+        self.box = box
--- a/csiborgtools/fits/haloprofile.py
+++ b/csiborgtools/fits/haloprofile.py
@ -12,109 +12,78 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""
-Halo profiles functions and posteriors.
-"""
-from jax import numpy as jnumpy
-from jax import grad
+"""Halo profiles functions and posteriors."""
 import numpy
 from scipy.optimize import minimize_scalar
 from scipy.stats import uniform
+
 from .halo import Clump


 class NFWProfile:
    r"""
-    The Navarro-Frenk-White (NFW) density profile defined as
+    The Navarro-Frenk-White (NFW) density profile.

    .. math::
-        \rho(r) = \frac{\rho_0}{x(1 + x)^2}
+        \rho(r) = \frac{\rho_0}{x(1 + x)^2},

-    where :math:`x = r / R_s` with free parameters :math:`R_s, \rho_0`.
-
-    Parameters
-    ----------
-    Rs : float
-        Scale radius :math:`R_s`.
-    rho0 : float
-        NFW density parameter :math:`\rho_0`.
+    :math:`x = r / R_s` and its free paramaters are :math:`R_s, \rho_0`: scale
+    radius and NFW density parameter.
    """
+
    @staticmethod
    def profile(r, Rs, rho0):
-        r"""
-        Halo profile evaluated at :math:`r`.
+        """
+        Evaluate the halo profile at `r`.

        Parameters
        ----------
-        r : float or 1-dimensional array
-            Radial distance :math:`r`.
+        r : 1-dimensional array
+            Radial distance.
        Rs : float
-            Scale radius :math:`R_s`.
+            Scale radius.
        rho0 : float
-            NFW density parameter :math:`\rho_0`.
+            NFW density parameter.

        Returns
        -------
-        density : float or 1-dimensional array
-            Density of the NFW profile at :math:`r`.
+        density : 1-dimensional array
        """
        x = r / Rs
        return rho0 / (x * (1 + x)**2)

    @staticmethod
-    def logprofile(r, Rs, rho0, use_jax=False):
-        r"""
-        Natural logarithm of the halo profile evaluated at :math:`r`.
-
-        Parameters
-        ----------
-        r : float or 1-dimensional array
-            Radial distance :math:`r`.
-        Rs : float
-            Scale radius :math:`R_s`.
-        rho0 : float
-            NFW density parameter :math:`\rho_0`.
-        use_jax : bool, optional
-            Whether to use `JAX` expressions. By default `False`.
-
-        Returns
-        -------
-        logdensity : float or 1-dimensional array
-            Logarithmic density of the NFW profile at :math:`r`.
-        """
-        log = jnumpy.log if use_jax else numpy.log
+    def _logprofile(r, Rs, rho0):
+        """Natural logarithm of `NFWPprofile.profile(...)`."""
        x = r / Rs
-        return log(rho0) - log(x) - 2 * log(1 + x)
+        return numpy.log(rho0) - numpy.log(x) - 2 * numpy.log(1 + x)

    @staticmethod
-    def enclosed_mass(r, Rs, rho0, use_jax=False):
+    def mass(r, Rs, rho0):
        r"""
-        Enclosed mass  of a NFW profile in radius :math:`r`.
+        Calculate the enclosed mass of a NFW profile in radius `r`.

        Parameters
        ----------
-        r : float or 1-dimensional array
-            Radial distance :math:`r`.
+        r : 1-dimensional array
+            Radial distance.
        Rs : float
-            Scale radius :math:`R_s`.
+            Scale radius.
        rho0 : float
-            NFW density parameter :math:`\rho_0`.
-        use_jax : bool, optional
-            Whether to use `JAX` expressions. By default `False`.
+            NFW density parameter.

        Returns
        -------
-        M : float or 1-dimensional array
+        M : 1-dimensional array
            The enclosed mass.
        """
-        log = jnumpy.log if use_jax else numpy.log
        x = r / Rs
-        out = log(1 + x) - x / (1 + x)
+        out = numpy.log(1 + x) - x / (1 + x)
        return 4 * numpy.pi * rho0 * Rs**3 * out

-    def bounded_enclosed_mass(self, rmin, rmax, Rs, rho0, use_jax=False):
+    def bounded_mass(self, rmin, rmax, Rs, rho0):
        r"""
-        Calculate the enclosed mass between :math:`r_min <= r <= r_max`.
+        Calculate the enclosed mass between `rmin` and `rmax`.

        Parameters
        ----------
@ -125,51 +94,46 @@ class NFWProfile:
        Rs : float
            Scale radius :math:`R_s`.
        rho0 : float
-            NFW density parameter :math:`\rho_0`.
-        use_jax : bool, optional
-            Whether to use `JAX` expressions. By default `False`.
+            NFW density parameter.

        Returns
        -------
        M : float
            Enclosed mass within the radial range.
        """
-        return (self.enclosed_mass(rmax, Rs, rho0, use_jax)
-                - self.enclosed_mass(rmin, Rs, rho0, use_jax))
+        return self.mass(rmax, Rs, rho0) - self.mass(rmin, Rs, rho0)

    def pdf(self, r, Rs, rmin, rmax):
        r"""
-        The radial probability density function of the NFW profile calculated
-        as
+        Calculate the radial PDF of the NFW profile, defined below.

        .. math::
-            \frac{4\pi r^2 \rho(r)} {M(r_\min, r_\max)}
+            \frac{4\pi r^2 \rho(r)} {M(r_\min, r_\max)},

        where :math:`M(r_\min, r_\max)` is the enclosed mass between
        :math:`r_\min` and :math:`r_\max'. Note that the dependance on
-        :math:`\rho_0` is cancelled.
+        :math:`\rho_0` is cancelled and must be accounted for in the
+        normalisation term to match the total mass.

        Parameters
        ----------
-        r : float or 1-dimensional array
-            Radial distance :math:`r`.
+        r : 1-dimensional array
+            Radial distance.
        Rs : float
-            Scale radius :math:`R_s`.
+            Scale radius.
        rmin : float
-            Minimum radius.
+            Minimum radius to evaluate the PDF (denominator term).
        rmax : float
-            Maximum radius.
+            Maximum radius to evaluate the PDF (denominator term).

        Returns
        -------
-        pdf : float or 1-dimensional array
-            Probability density of the NFW profile at :math:`r`.
+        pdf : 1-dimensional array
        """
-
        norm = self.bounded_enclosed_mass(rmin, rmax, Rs, 1)
        return 4 * numpy.pi * r**2 * self.profile(r, Rs, 1) / norm

-    def rvs(self, rmin, rmax, Rs, N=1):
+    def rvs(self, rmin, rmax, Rs, size=1):
        """
        Generate random samples from the NFW profile via rejection sampling.

@ -180,8 +144,8 @@ class NFWProfile:
        rmax : float
            Maximum radius.
        Rs : float
-            Scale radius :math:`R_s`.
-        N : int, optional
+            Scale radius.
+        size : int, optional
            Number of samples to generate. By default 1.

        Returns
@ -190,15 +154,15 @@ class NFWProfile:
            Samples following the NFW profile.
        """
        gen = uniform(rmin, rmax-rmin)
-        samples = numpy.full(N, numpy.nan)
-        for i in range(N):
+        samples = numpy.full(size, numpy.nan)
+        for i in range(size):
            while True:
                r = gen.rvs()
                if self.pdf(r, Rs, rmin, rmax) > numpy.random.rand():
                    samples[i] = r
                    break

-        if N == 1:
+        if size == 1:
            return samples[0]
        return samples

@ -206,11 +170,10 @@ class NFWProfile:
 class NFWPosterior(NFWProfile):
    r"""
    Posterior for fitting the NFW profile in the range specified by the
-    closest particle and the :math:`r_{200c}` radius. The likelihood is
-    calculated as
+    closest particle and the :math:`r_{200c}` radius, calculated as below.

    .. math::
-        \frac{4\pi r^2 \rho(r)} {M(r_{\min} r_{200c})} \frac{m}{M / N}
+        \frac{4\pi r^2 \rho(r)} {M(r_{\min} r_{200c})} \frac{m}{M / N},

    where :math:`M(r_{\min} r_{200c}))` is the NFW enclosed mass between the
    closest particle and the :math:`r_{200c}` radius, :math:`m` is the particle
@ -223,18 +186,9 @@ class NFWPosterior(NFWProfile):
    clump : `Clump`
        Clump object containing the particles and clump information.
    """
-    _clump = None
-    _binsguess = 10
-    _r = None
-    _Npart = None
-    _m = None
-    _rmin = None
-    _rmax = None

-    def __init__(self, clump):
-        # Initialise the NFW profile
+    def __init__(self):
        super().__init__()
-        self.clump = clump

    @property
    def clump(self):
@ -248,97 +202,12 @@ class NFWPosterior(NFWProfile):
        """
        return self._clump

-    @property
-    def r(self):
-        r"""
-        Radial distance of particles used to fit the NFW profile, i.e. the ones
-        whose radial distance is less than :math:`R_{\rm 200c}`.
-
-        Returns
-        -------
-        r : 1-dimensional array
-        """
-        return self._r
-
-    @property
-    def Npart(self):
-        r"""
-        Number of particles used to fit the NFW profile, i.e. the ones
-        whose radial distance is less than :math:`R_{\rm 200c}`.
-
-        Returns
-        -------
-        Npart : int
-        """
-        return self._Npart
-
-    @property
-    def m(self):
-        r"""
-        Mass of particles used to fit the NFW profile, i.e. the ones
-        whose radial distance is less than :math:`R_{\rm 200c}`.
-
-        Returns
-        -------
-        r : 1-dimensional array
-        """
-        return self._m
-
-    @property
-    def rmin(self):
-        """
-        The minimum radial distance of a particle.
-
-        Returns
-        -------
-        rmin : float
-        """
-        return self._rmin
-
-    @property
-    def rmax(self):
-        r"""
-        The maximum radial distance used to fit the profile, here takem to be
-        the :math:`R_{\rm 200c}`.
-
-        Returns
-        -------
-        rmax : float
-        """
-        return self._rmax
-
    @clump.setter
    def clump(self, clump):
-        """Sets `clump` and precalculates useful things."""
-        if not isinstance(clump, Clump):
-            raise TypeError(
-                "`clump` must be :py:class:`csiborgtools.fits.Clump` type. "
-                "Currently `{}`".format(type(clump)))
+        assert isinstance(clump, Clump)
        self._clump = clump
-        # The minimum separation
-        rmin = self.clump.rmin
-        rmax, __ = self.clump.spherical_overdensity_mass(200)
-        # Set the distances
-        self._rmin = rmin
-        self._rmax = rmax
-        # Set particles that will be used to fit the halo
-        mask_r200 = (self.clump.r >= rmin) & (self.clump.r <= rmax)
-        self._r = self.clump.r[mask_r200]
-        self._m = self.clump.m[mask_r200]
-        self._Npart = self._r.size
-        # Ensure that the minimum separation is > 0 for finite log
-        if self.rmin > 0:
-            self._logrmin = numpy.log10(self.rmin)
-        else:
-            self._logrmin = numpy.log10(numpy.min(self.r[self.r > 0]))
-        self._logrmax = numpy.log10(self.rmax)
-        self._logprior_volume = numpy.log(self._logrmax - self._logrmin)
-        # Precalculate useful things
-        self._logMtot = numpy.log(numpy.sum(self.m))
-        gamma = 4 * numpy.pi * self.r**2 * self.m * self.Npart
-        self._ll0 = numpy.sum(numpy.log(gamma)) - self.Npart * self._logMtot

-    def rho0_from_Rs(self, Rs):
+    def rho0_from_Rs(self, Rs, rmin, rmax, mass):
        r"""
        Obtain :math:`\rho_0` of the NFW profile from the integral constraint
        on total mass. Calculated as the ratio between the total particle mass
@ -346,75 +215,96 @@ class NFWPosterior(NFWProfile):

        Parameters
        ----------
-        logRs : float
+        Rs : float
            Logarithmic scale factor in units matching the coordinates.
+        rmin : float
+            Minimum radial distance of particles used to fit the profile.
+        rmax : float
+            Maximum radial distance of particles used to fit the profile.
+        mass : float
+            Mass enclosed within the radius used to fit the NFW profile.

        Returns
        -------
        rho0: float
        """
-        Mtot = numpy.exp(self._logMtot)
-        Mnfw_norm = self.bounded_enclosed_mass(self.rmin, self.rmax, Rs, 1)
-        return Mtot / Mnfw_norm
+        return mass / self.bounded_enclosed_mass(rmin, rmax, Rs, 1)

-    def logprior(self, logRs):
+    def initlogRs(self, r, rmin, rmax, binsguess=10):
        r"""
-        Logarithmic uniform prior on :math:`\log R_{\rm s}`.
+        Calculate the most often occuring value of :math:`r` used as initial
+        guess of :math:`R_{\rm s}` since :math:`r^2 \rho(r)` peaks at
+        :math:`r = R_{\rm s}`.
+
+        Parameters
+        ----------
+        r : 1-dimensional array
+            Radial distance of particles used to fit the profile.
+        rmin : float
+            Minimum radial distance of particles used to fit the profile.
+        rmax : float
+            Maximum radial distance of particles used to fit the profile.
+        binsguess : int
+            Number of bins to initially guess :math:`R_{\rm s}`.
+
+        Returns
+        -------
+        initlogRs : float
+        """
+        bins = numpy.linspace(rmin, rmax, binsguess)
+        counts, edges = numpy.histogram(r, bins)
+        return numpy.log10(edges[numpy.argmax(counts)])
+
+
+    def logprior(self, logRs, rmin, rmax):
+        r"""
+        Logarithmic uniform prior on :math:`\log R_{\rm s}`. Unnormalised but
+        that does not matter.

        Parameters
        ----------
        logRs : float
-            Logarithmic scale factor in units matching the coordinates.
+            Logarithmic scale factor.
+        rmin : float
+            Minimum radial distance of particles used to fit the profile.
+        rmax : float
+            Maximum radial distance of particles used to fit the profile.

        Returns
        -------
        lp : float
        """
-        if not self._logrmin < logRs < self._logrmax:
+        if not rmin < 10**logRs < rmax:
            return - numpy.infty
-        return - self._logprior_volume
+        return 0.

-    def loglikelihood(self, logRs, use_jax=False):
+    def loglikelihood(self, logRs, r, rmin, rmax, npart):
        """
        Logarithmic likelihood.

        Parameters
        ----------
+        r : 1-dimensional array
+            Radial distance of particles used to fit the profile.
        logRs : float
            Logarithmic scale factor in units matching the coordinates.
-        use_jax : bool, optional
-            Whether to use `JAX` expressions. By default `False`.
+        rmin : float
+            Minimum radial distance of particles used to fit the profile.
+        rmax : float
+            Maximum radial distance of particles used to fit the profile.
+        npart : int
+            Number of particles used to fit the profile.

        Returns
        -------
        ll : float
        """
        Rs = 10**logRs
-        log = jnumpy.log if use_jax else numpy.log
-        # Expected enclosed mass from a NFW
-        Mnfw = self.bounded_enclosed_mass(self.rmin, self.rmax,
-                                          Rs, 1, use_jax)
-        fsum = jnumpy.sum if use_jax else numpy.sum
-        ll = fsum(self.logprofile(self.r, Rs, 1, use_jax)) + self._ll0
-        return ll - self.Npart * log(Mnfw)
+        mnfw = self.bounded_mass(rmin, rmax, Rs, 1)
+        return numpy.sum(self._logprofile(r, Rs, 1)) - npart * numpy.log(mnfw)

-    @property
-    def initlogRs(self):
-        r"""
-        The most often occuring value of :math:`r` used as initial guess of
-        :math:`R_{\rm s}` since :math:`r^2 \rho(r)` peaks at
-        :math:`r = R_{\rm s}`.

-        Returns
-        -------
-        initlogRs : float
-        """
-        bins = numpy.linspace(self.rmin, self.rmax,
-                              self._binsguess)
-        counts, edges = numpy.histogram(self.r, bins)
-        return numpy.log10(edges[numpy.argmax(counts)])
-
-    def __call__(self, logRs, use_jax=False):
+    def __call__(self, logRs, r, rmin, rmax, npart):
        """
        Logarithmic posterior. Sum of the logarithmic prior and likelihood.

@ -422,83 +312,64 @@ class NFWPosterior(NFWProfile):
        ----------
        logRs : float
            Logarithmic scale factor in units matching the coordinates.
-        use_jax : bool, optional
-            Whether to use `JAX` expressions. By default `False`.
+        r : 1-dimensional array
+            Radial distance of particles used to fit the profile.
+        rmin : float
+            Minimum radial distance of particles used to fit the profile.
+        rmax : float
+            Maximum radial distance of particles used to fit the profile.
+        npart : int
+            Number of particles used to fit the profile.

        Returns
        -------
        lpost : float
        """
-        lp = self.logprior(logRs)
+        lp = self.logprior(logRs, rmin, rmax)
        if not numpy.isfinite(lp):
            return - numpy.infty
-        return self.loglikelihood(logRs, use_jax) + lp
+        return self.loglikelihood(logRs, r, rmin, rmax, npart) + lp

-    def uncertainty_at_maxpost(self, logRs_max):
+    def fit(self, clump, eps=1e-4):
        r"""
-        Calculate Gaussian approximation of the uncertainty at `logRs_max`, the
-        maximum a-posteriori estimate. This is the square root of the negative
-        inverse 2nd derivate of the logarithimic posterior with respect to the
-        logarithm of the scale factor. This is only valid `logRs_max` is the
-        maximum of the posterior!
+        Fit the NFW profile. If the fit is not converged returns NaNs.

-        This uses `JAX`. The functions should be compiled but unless there is
-        a need for more speed this is fine as it is.
-
-        Parameters
-        ----------
-        logRs_max : float
-            Position :math:`\log R_{\rm s}` to evaluate the uncertainty. Must
-            be the maximum.
-
-        Returns
-        -------
-        uncertainty : float
-        """
-        def f(x):
-            return self(x, use_jax=True)
-
-        # Evaluate the second derivative
-        h = grad(grad(f))(logRs_max)
-        h = float(h)
-        if not h < 0:
-            return numpy.nan
-        return (- 1 / h)**0.5
-
-    def maxpost_logRs(self, calc_err=False, eps=1e-4):
-        r"""
-        Maximum a-posteriori estimate of the scale radius
-        :math:`\log R_{\rm s}`. Returns the scale radius if the fit converged,
-        otherwise `numpy.nan`. Checks whether
-        :math:`log r_{\rm max} / R_{\rm s} > \epsilon`, where
+        Checks whether :math:`log r_{\rm max} / R_{\rm s} > \epsilon`,
        to ensure that the scale radius is not too close to the boundary which
        occurs if the fit fails.

        Parameters
        ----------
-        calc_err : bool, optional
-            Optional toggle to calculate the uncertainty on the scale radius.
-            By default false.
+        clump : :py:class:`csiborgtools.fits.Clump`
+            Clump being fitted.
+        eps : float
+            Tolerance to ensure we are sufficiently far from math:`R_{200c}`.

        Returns
        -------
-        logRs: float
-            Log scale radius.
-        uncertainty : float
-            Uncertainty on the scale radius. Calculated following
-            `self.uncertainty_at_maxpost`.
+        Rs: float
+            Best fit scale radius.
+        rho0: float
+            Best fit NFW central density.
        """
+        assert isinstance(clump, Clump)
+        r = clump.r
+        rmin = numpy.min(r)
+        rmax, mtot = clump.spherical_overdensity_mass(200)
+        npart = numpy.sum((rmin <= r) & (r <= rmax))
+
        # Loss function to optimize
        def loss(logRs):
-            return - self(logRs)
+            return - self(logRs, r, rmin, rmax, npart)

-        res = minimize_scalar(loss, bounds=(self._logrmin, self._logrmax),
-                              method='bounded')
+        res = minimize_scalar(
+            loss, bounds=(numpy.log10(rmin), numpy.log10(rmax)),
+            method='bounded')

-        if self._logrmax - res.x < eps:
+        if numpy.log10(rmax) - res.x < eps:
            res.success = False
-
        if not res.success:
            return numpy.nan, numpy.nan
-        e_logRs = self.uncertainty_at_maxpost(res.x) if calc_err else numpy.nan
-        return res.x, e_logRs
+
+        rho0 = self.rho0_from_Rs(10**res.x, rmin, rmax, mtot)
+        return 10**res.x, rho0
--- a/csiborgtools/match/init.py
+++ b/csiborgtools/match/init.py
@ -12,9 +12,16 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-
-from .match import (RealisationsMatcher, cosine_similarity,  # noqa
-                    ParticleOverlap, get_clumplims, fill_delta, fill_delta_indxs,  # noqa
-                    calculate_overlap, calculate_overlap_indxs,  # noqa
-                    dist_centmass, dist_percentile)  # noqa
-from .num_density import (binned_counts, number_density)  # noqa
+from csiborgtools.match.match import (  # noqa
+    ParticleOverlap,
+    RealisationsMatcher,
+    calculate_overlap,
+    calculate_overlap_indxs,
+    cosine_similarity,
+    dist_centmass,
+    dist_percentile,
+    fill_delta,
+    fill_delta_indxs,
+    get_clumplims,
+)
+from csiborgtools.match.num_density import binned_counts, number_density  # noqa
--- a/csiborgtools/match/match.py
+++ b/csiborgtools/match/match.py
@ -15,14 +15,15 @@
 """
 Support for matching halos between CSiBORG IC realisations.
 """
+from datetime import datetime
 from gc import collect
-import numpy
-from scipy.ndimage import gaussian_filter
-from numba import jit
-from tqdm import (tqdm, trange)
-from ..read import concatenate_clumps
-from ..utils import now

+import numpy
+from numba import jit
+from scipy.ndimage import gaussian_filter
+from tqdm import tqdm, trange
+
+from .utils import concatenate_clumps

 ###############################################################################
 #                  Realisations matcher for calculating overlaps              #
@ -47,24 +48,20 @@ class RealisationsMatcher:
        The mass kind whose similarity is to be checked. Must be a valid
        catalogue key. By default `totpartmass`, i.e. the total particle
        mass associated with a halo.
-    overlapper_kwargs : dict, optional
-        Keyword arguments passed to `ParticleOverlapper`.
-
    """
    _nmult = None
    _dlogmass = None
    _mass_kind = None
    _overlapper = None

-    def __init__(self, nmult=1., dlogmass=2., mass_kind="totpartmass",
-                 overlapper_kwargs={}):
+    def __init__(self, nmult=1., dlogmass=2., mass_kind="totpartmass"):
        assert nmult > 0
        assert dlogmass > 0
        assert isinstance(mass_kind, str)
        self._nmult = nmult
        self._dlogmass = dlogmass
        self._mass_kind = mass_kind
-        self._overlapper = ParticleOverlap(**overlapper_kwargs)
+        self._overlapper = ParticleOverlap()

    @property
    def nmult(self):
@ -121,9 +118,9 @@ class RealisationsMatcher:

        Parameters
        ----------
-        cat0 : :py:class:`csiborgtools.read.HaloCatalogue`
+        cat0 : :py:class:`csiborgtools.read.ClumpsCatalogue`
            Halo catalogue of the reference simulation.
-        catx : :py:class:`csiborgtools.read.HaloCatalogue`
+        catx : :py:class:`csiborgtools.read.ClumpsCatalogue`
            Halo catalogue of the cross simulation.
        clumps0 : list of structured arrays
            List of clump structured arrays of the reference simulation, keys
@ -133,7 +130,7 @@ class RealisationsMatcher:
            List of clump structured arrays of the cross simulation, keys must
            include `x`, `y`, `z` and `M`. The positions must already be
            converted to cell numbers.
-        delta_bcgk : 3-dimensional array
+        delta_bckg : 3-dimensional array
            Summed background density field of the reference and cross
            simulations calculated with particles assigned to halos at the
            final snapshot. Assumed to only be sampled in cells
@ -153,7 +150,8 @@ class RealisationsMatcher:
            Overlaps with the cross catalogue.
        """
        # Query the KNN
-        verbose and print("{}: querying the KNN.".format(now()), flush=True)
+        verbose and print("{}: querying the KNN."
+                          .format(datetime.now()), flush=True)
        match_indxs = radius_neighbours(
            catx.knn(select_initial=True), cat0.positions(in_initial=True),
            radiusX=cat0["lagpatch"], radiusKNN=catx["lagpatch"],
@ -229,7 +227,7 @@ class RealisationsMatcher:
            List of clump structured arrays of the cross simulation, keys must
            include `x`, `y`, `z` and `M`. The positions must already be
            converted to cell numbers.
-        delta_bcgk : 3-dimensional array
+        delta_bckg : 3-dimensional array
            Smoothed summed background density field of the reference and cross
            simulations calculated with particles assigned to halos at the
            final snapshot. Assumed to only be sampled in cells
@ -582,7 +580,7 @@ class ParticleOverlap:
            must include `x`, `y`, `z` and `M`.
        cellmins : len-3 tuple
            Tuple of left-most cell ID in the full box.
-        delta_bcgk : 3-dimensional array
+        delta_bckg : 3-dimensional array
            Summed background density field of the reference and cross
            simulations calculated with particles assigned to halos at the
            final snapshot. Assumed to only be sampled in cells
@ -735,7 +733,7 @@ def calculate_overlap(delta1, delta2, cellmins, delta_bckg):
        Density field of the second halo.
    cellmins : len-3 tuple
        Tuple of left-most cell ID in the full box.
-    delta_bcgk : 3-dimensional array
+    delta_bckg : 3-dimensional array
        Summed background density field of the reference and cross simulations
        calculated with particles assigned to halos at the final snapshot.
        Assumed to only be sampled in cells :math:`[512, 1536)^3`.
@ -787,7 +785,7 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
        Density field of the second halo.
    cellmins : len-3 tuple
        Tuple of left-most cell ID in the full box.
-    delta_bcgk : 3-dimensional array
+    delta_bckg : 3-dimensional array
        Summed background density field of the reference and cross simulations
        calculated with particles assigned to halos at the final snapshot.
        Assumed to only be sampled in cells :math:`[512, 1536)^3`.
@ -876,8 +874,8 @@ def dist_percentile(dist, qs, distmax=0.075):
    return x


-def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1., enforce_in32=False,
-                      verbose=True):
+def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1.,
+                      enforce_int32=False, verbose=True):
    """
    Find all neigbours of a trained KNN model whose center of mass separation
    is less than `nmult` times the sum of their respective radii.
@ -922,7 +920,7 @@ def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1., enforce_in32=False,
        # so we take the first item where appropriate
        mask = (dist[0] / (radiusX[i] + radiusKNN[indx[0]])) < nmult
        indxs[i] = indx[0][mask]
-        if enforce_in32:
+        if enforce_int32:
            indxs[i] = indxs[i].astype(numpy.int32)

    return numpy.asarray(indxs, dtype=object)
@ -980,4 +978,4 @@ def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1., enforce_in32=False,
 #         indxs[i, :] = sort
 #         sep[i, :] = dist[sort]

-#     return sep, indxs
+#     return sep, indxs
--- a/csiborgtools/match/utils.py
+++ b/csiborgtools/match/utils.py
@ -0,0 +1,55 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""Useful functions."""
+import numpy
+
+
+def concatenate_clumps(clumps):
+    """
+    Concatenate an array of clumps to a single array containing all particles.
+
+    Parameters
+    ----------
+    clumps : list of structured arrays
+        List of clumps. Each clump must be a structured array with keys
+
+    Returns
+    -------
+    particles : structured array
+    """
+    # Count how large array will be needed
+    N = 0
+    for clump, __ in clumps:
+        N += clump.size
+    # Infer dtype of positions
+    if clumps[0][0]['x'].dtype.char in numpy.typecodes["AllInteger"]:
+        posdtype = numpy.int32
+    else:
+        posdtype = numpy.float32
+
+    # Pre-allocate array
+    dtype = {"names": ['x', 'y', 'z', 'M'],
+             "formats": [posdtype] * 3 + [numpy.float32]}
+    particles = numpy.full(N, numpy.nan, dtype)
+
+    # Fill it one clump by another
+    start = 0
+    for clump, __ in clumps:
+        end = start + clump.size
+        for p in ('x', 'y', 'z', 'M'):
+            particles[p][start:end] = clump[p]
+        start = end
+
+    return particles
--- a/csiborgtools/read/init.py
+++ b/csiborgtools/read/init.py
@ -12,13 +12,18 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-
-from .readsim import (CSiBORGPaths, ParticleReader, read_mmain, read_initcm, halfwidth_select)  # noqa
-from .halo_cat import (HaloCatalogue, concatenate_clumps)  # noqa
-from .obs import (PlanckClusters, MCXCClusters, TwoMPPGalaxies,  # noqa
-                      TwoMPPGroups, SDSS)  # noqa
-from .outsim import (dump_split, combine_splits)  # noqa
-from .overlap_summary import (PairOverlap, NPairsOverlap, binned_resample_mean) # noqa
+from .halo_cat import ClumpsCatalogue, HaloCatalogue  # noqa
 from .knn_summary import kNNCDFReader  # noqa
+from .obs import (  # noqa
+    SDSS,
+    MCXCClusters,
+    PlanckClusters,
+    TwoMPPGalaxies,
+    TwoMPPGroups,
+)
+from .outsim import combine_splits, dump_split  # noqa
+from .overlap_summary import NPairsOverlap, PairOverlap, binned_resample_mean  # noqa
+from .paths import CSiBORGPaths  # noqa
 from .pk_summary import PKReader  # noqa
+from .readsim import MmainReader, ParticleReader, halfwidth_select, read_initcm  # noqa
 from .tpcf_summary import TPCFReader  # noqa
--- a/csiborgtools/units/box_units.py
+++ b/csiborgtools/units/box_units.py
@ -16,17 +16,17 @@
 Simulation box unit transformations.
 """
 import numpy
-from scipy.interpolate import interp1d
+from astropy import constants, units
 from astropy.cosmology import LambdaCDM
-from astropy import (constants, units)
-from ..read import ParticleReader
+from scipy.interpolate import interp1d

+from .readsim import ParticleReader

 # Map of unit conversions
 CONV_NAME = {
-    "length": ["peak_x", "peak_y", "peak_z", "Rs", "rmin", "rmax", "r200",
-               "r500", "x0", "y0", "z0", "lagpatch"],
-    "mass": ["mass_cl", "totpartmass", "m200", "m500", "mass_mmain"],
+    "length": ['x', 'y', 'z', "peak_x", "peak_y", "peak_z", "Rs", "rmin",
+               "rmax", "r200", "r500", "x0", "y0", "z0", "lagpatch"],
+    "mass": ["mass_cl", "totpartmass", "m200", "m500", "mass_mmain", 'M'],
    "density": ["rho0"]
    }

@ -44,6 +44,7 @@ class BoxUnits:
    paths : py:class`csiborgtools.read.CSiBORGPaths`
        CSiBORG paths object.
    """
+    _name = "box_units"
    _cosmo = None

    def __init__(self, nsnap, nsim, paths):
--- a/csiborgtools/read/halo_cat.py
+++ b/csiborgtools/read/halo_cat.py
@ -12,54 +12,75 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""
-Functions to read in the particle and clump files.
-"""
+"""CSiBORG halo catalogue."""
+from abc import ABC
+
 import numpy
-from os.path import join
 from sklearn.neighbors import NearestNeighbors
-from .readsim import (CSiBORGPaths, read_mmain, read_initcm)
-from ..utils import (flip_cols, add_columns)
-from ..units import (BoxUnits, cartesian_to_radec)
+
+from .box_units import BoxUnits
+from .paths import CSiBORGPaths
+from .readsim import ParticleReader, read_initcm
+from .utils import add_columns, cartesian_to_radec, flip_cols


-class HaloCatalogue:
-    r"""
-    Processed halo catalogue, the data should be calculated in `run_fit_halos`.
-
-    Parameters
-    ----------
-    nsim : int
-        IC realisation index.
-    paths : py:class`csiborgtools.read.CSiBORGPaths`
-        CSiBORG paths object.
-    min_mass : float, optional
-        The minimum :math:`M_{rm tot} / M_\odot` mass. By default no threshold.
-    max_dist : float, optional
-        The maximum comoving distance of a halo. By default no upper limit.
-    load_init : bool, optional
-        Whether to load the initial snapshot information. By default False.
+class BaseCatalogue(ABC):
    """
+    Base (sub)halo catalogue.
+    """
+    _data = None
    _paths = None
    _nsim = None
-    _data = None
-    _selmask = None
-
-    def __init__(self, nsim, paths, min_mass=None, max_dist=None,
-                 load_init=False):
-        assert isinstance(paths, CSiBORGPaths)
-        self._nsim = nsim
-        self._paths = paths
-        self._set_data(min_mass, max_dist, load_init)

    @property
    def nsim(self):
+        """
+        The IC realisation index.
+
+        Returns
+        -------
+        nsim : int
+        """
+        if self._nsim is None:
+            raise RuntimeError("`nsim` is not set!")
        return self._nsim

+    @nsim.setter
+    def nsim(self, nsim):
+        assert isinstance(nsim, int)
+        self._nsim = nsim
+
    @property
    def paths(self):
+        """
+        CSiBORG paths manager.
+
+        Returns
+        -------
+        paths : :py:class:`csiborgtools.read.CSiBORGPaths`
+        """
+        if self._paths is None:
+            raise RuntimeError("`paths` is not set!")
        return self._paths

+    @paths.setter
+    def paths(self, paths):
+        assert isinstance(paths, CSiBORGPaths)
+        self._paths = paths
+
+    @property
+    def data(self):
+        """
+        The catalogue.
+
+        Returns
+        -------
+        data : structured array
+        """
+        if self._data is None:
+            raise RuntimeError("Catalogue data not loaded!")
+        return self._data
+
    @property
    def nsnap(self):
        """
@ -83,163 +104,51 @@ class HaloCatalogue:
        """
        return BoxUnits(self.nsnap, self.nsim, self.paths)

-    @property
-    def data(self):
-        """
-        Halo catalogue.
+    @box.setter
+    def box(self, box):
+        try:
+            assert box._name  == "box_units"
+            self._box = box
+        except AttributeError as err:
+            raise TypeError from err

-        Returns
-        -------
-        cat : structured array
-        """
-        return self._data
-
-    def _set_data(self, min_mass, max_dist, load_init):
-        """
-        Loads the data, merges with mmain, does various coordinate transforms.
-        """
-        # Load the processed data
-        data = numpy.load(self.paths.hcat_path(self.nsim))
-
-        # Load the mmain file and add it to the data
-        mmain = read_mmain(self.nsim, self.paths.mmain_path)
-        data = self.merge_mmain_to_clumps(data, mmain)
-        flip_cols(data, "peak_x", "peak_z")
-
-        # Cut on number of particles and finite m200. Do not change! Hardcoded
-        data = data[(data["npart"] > 100) & numpy.isfinite(data["m200"])]
-
-        # Now also load the initial positions
-        if load_init:
-            initcm = read_initcm(self.nsim, self.paths.initmatch_path)
-            if initcm is not None:
-                data = self.merge_initmatch_to_clumps(data, initcm)
-                flip_cols(data, "x0", "z0")
-
-#        # Calculate redshift
-#        pos = [data["peak_{}".format(p)] - 0.5 for p in ("x", "y", "z")]
-#        vel = [data["v{}".format(p)] for p in ("x", "y", "z")]
-#        zpec = self.box.box2pecredshift(*vel, *pos)
-#        zobs = self.box.box2obsredshift(*vel, *pos)
-#        zcosmo = self.box.box2cosmoredshift(
-#            sum(pos[i]**2 for i in range(3))**0.5)
-#        data = add_columns(data, [zpec, zobs, zcosmo],
-#                           ["zpec", "zobs", "zcosmo"])
-
-        # Unit conversion
-        convert_cols = ["m200", "m500", "totpartmass", "mass_mmain",
-                        "r200", "r500", "Rs", "rho0",
-                        "peak_x", "peak_y", "peak_z"]
-        data = self.box.convert_from_boxunits(data, convert_cols)
-
-        # Now calculate spherical coordinates
-        d, ra, dec = cartesian_to_radec(
-            data["peak_x"], data["peak_y"], data["peak_z"])
-        data = add_columns(data, [d, ra, dec], ["dist", "ra", "dec"])
-
-        # And do the unit transform
-        if load_init and initcm is not None:
-            data = self.box.convert_from_boxunits(
-                data, ["x0", "y0", "z0", "lagpatch"])
-
-        # Convert all that is not an integer to float32
-        names = list(data.dtype.names)
-        formats = []
-        for name in names:
-            if data[name].dtype.char in numpy.typecodes["AllInteger"]:
-                formats.append(numpy.int32)
-            else:
-                formats.append(numpy.float32)
-        dtype = numpy.dtype({"names": names, "formats": formats})
-
-        # Apply cuts on distance and total particle mass if any
-        data = data[data["dist"] < max_dist] if max_dist is not None else data
-        data = (data[data["totpartmass"] > min_mass]
-                if min_mass is not None else data)
-
-        self._data = data.astype(dtype)
-
-    def merge_mmain_to_clumps(self, clumps, mmain):
-        """
-        Merge columns from the `mmain` files to the `clump` file, matches them
-        by their halo index while assuming that the indices `index` in both
-        arrays are sorted.
-
-        Parameters
-        ----------
-        clumps : structured array
-            Clumps structured array.
-        mmain : structured array
-            Parent halo array whose information is to be merged into `clumps`.
-
-        Returns
-        -------
-        out : structured array
-            Array with added columns.
-        """
-        X = numpy.full((clumps.size, 2), numpy.nan)
-        # Mask of which clumps have a mmain index
-        mask = numpy.isin(clumps["index"], mmain["index"])
-
-        X[mask, 0] = mmain["mass_cl"]
-        X[mask, 1] = mmain["sub_frac"]
-        return add_columns(clumps, X, ["mass_mmain", "sub_frac"])
-
-    def merge_initmatch_to_clumps(self, clumps, initcat):
-        """
-        Merge columns from the `init_cm` files to the `clump` file.
-
-        Parameters
-        ----------
-        clumps : structured array
-            Clumps structured array.
-        initcat : structured array
-            Catalog with the clumps initial centre of mass at z = 70.
-
-        Returns
-        -------
-        out : structured array
-        """
-        # There are more initcat clumps, so check which ones have z = 0
-        # and then downsample
-        mask = numpy.isin(initcat["ID"], clumps["index"])
-        initcat = initcat[mask]
-        # Now the index ordering should match
-        if not numpy.alltrue(initcat["ID"] == clumps["index"]):
-            raise ValueError(
-                "Ordering of `initcat` and `clumps` is inconsistent.")
-
-        X = numpy.full((clumps.size, 4), numpy.nan)
-        for i, p in enumerate(['x', 'y', 'z', "lagpatch"]):
-            X[:, i] = initcat[p]
-        return add_columns(clumps, X, ["x0", "y0", "z0", "lagpatch"])
-
-    def positions(self, in_initial=False):
+    def position(self, in_initial=False, cartesian=True):
        r"""
-        Cartesian position components of halos in :math:`\mathrm{cMpc}`.
+        Position components. If Cartesian, then in :math:`\mathrm{cMpc}`. If
+        spherical, then radius is in :math:`\mathrm{cMpc}`, RA in
+        :math:`[0, 360)` degrees and DEC in :math:`[-90, 90]` degrees. Note
+        that the position is defined as the minimum of the gravitationl
+        potential.

        Parameters
        ----------
        in_initial : bool, optional
-            Whether to define the kNN on the initial or final snapshot.
+            Whether to return the initial snapshot positions.
+        cartesian : bool, optional
+            Whether to return the Cartesian or spherical position components.
+            By default Cartesian.

        Returns
        -------
-        pos : 2-dimensional array of shape `(nhalos, 3)`
+        pos : 2-dimensional array of shape `(nobjects, 3)`
        """
        if in_initial:
-            ps = ["x0", "y0", "z0"]
+            ps = ['x0', 'y0', 'z0']
        else:
-            ps = ["peak_x", "peak_y", "peak_z"]
-        return numpy.vstack([self[p] for p in ps]).T
+            ps = ['x', 'y', 'z']
+        pos = [self[p] for p in ps]
+        if cartesian:
+            return numpy.vstack(pos).T
+        else:
+            return numpy.vstack([cartesian_to_radec(*pos)]).T

-    def velocities(self):
+    def velocity(self):
        """
-        Cartesian velocity components of halos. Likely in box units.
+        Cartesian velocity components in box units.

        Returns
        -------
-        vel : 2-dimensional array of shape `(nhalos, 3)`
+        vel : 2-dimensional array of shape `(nobjects, 3)`
        """
        return numpy.vstack([self["v{}".format(p)] for p in ("x", "y", "z")]).T

@ -250,13 +159,13 @@ class HaloCatalogue:

        Returns
        -------
-        angmom : 2-dimensional array of shape `(nhalos, 3)`
+        angmom : 2-dimensional array of shape `(nobjects, 3)`
        """
        return numpy.vstack([self["L{}".format(p)] for p in ("x", "y", "z")]).T

    def knn(self, in_initial):
        """
-        kNN object of all halo positions.
+        kNN object fitted on all catalogue objects.

        Parameters
        ----------
@ -307,50 +216,205 @@ class HaloCatalogue:
        initpars = ["x0", "y0", "z0"]
        if key in initpars and key not in self.keys:
            raise RuntimeError("Initial positions are not set!")
-        return self._data[key]
+        return self.data[key]

    def __len__(self):
        return self.data.size


-###############################################################################
-#                           Useful functions                                  #
-###############################################################################
+class ClumpsCatalogue(BaseCatalogue):
+    r"""
+    Clumps catalogue, defined in the final snapshot.

-
-def concatenate_clumps(clumps):
-    """
-    Concatenate an array of clumps to a single array containing all particles.
+    TODO:
+        Add fitted quantities.
+        Add threshold on number of particles

    Parameters
    ----------
-    clumps : list of structured arrays
-
-    Returns
-    -------
-    particles : structured array
+    nsim : int
+        IC realisation index.
+    paths : py:class`csiborgtools.read.CSiBORGPaths`
+        CSiBORG paths object.
+    maxdist : float, optional
+        The maximum comoving distance of a halo. By default
+        :math:`155.5 / 0.705 ~ \mathrm{Mpc}` with assumed :math:`h = 0.705`,
+        which corresponds to the high-resolution region.
    """
-    # Count how large array will be needed
-    N = 0
-    for clump, __ in clumps:
-        N += clump.size
-    # Infer dtype of positions
-    if clumps[0][0]['x'].dtype.char in numpy.typecodes["AllInteger"]:
-        posdtype = numpy.int32
-    else:
-        posdtype = numpy.float32
+    def __init__(self, nsim, paths, maxdist=155.5 / 0.705):
+        self.nsim = nsim
+        self.paths = paths
+        # Read in the clumps from the final snapshot
+        partreader = ParticleReader(self.paths)
+        cols = ["index", "parent", 'x', 'y', 'z', "mass_cl"]
+        data = partreader.read_clumps(self.nsnap, self.nsim, cols=cols)
+        # Overwrite the parent with the ultimate parent
+        mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim))
+        data["parent"] = mmain["ultimate_parent"]

-    # Pre-allocate array
-    dtype = {"names": ['x', 'y', 'z', 'M'],
-             "formats": [posdtype] * 3 + [numpy.float32]}
-    particles = numpy.full(N, numpy.nan, dtype)
+        # Flip positions and convert from code units to cMpc. Convert M too
+        flip_cols(data, "x", "z")
+        for p in ('x', 'y', 'z'):
+            data[p] -= 0.5
+        data = self.box.convert_from_boxunits(data, ['x', 'y', 'z', "mass_cl"])

-    # Fill it one clump by another
-    start = 0
-    for clump, __ in clumps:
-        end = start + clump.size
-        for p in ('x', 'y', 'z', 'M'):
-            particles[p][start:end] = clump[p]
-        start = end
+        mask = numpy.sqrt(data['x']**2 + data['y']**2 + data['z']**2) < maxdist
+        self._data = data[mask]

-    return particles
+    @property
+    def ismain(self):
+        """
+        Whether the clump is a main halo.
+
+        Returns
+        -------
+        ismain : 1-dimensional array
+        """
+        return self["index"] == self["parent"]
+
+    def _set_data(self, min_mass, max_dist, load_init):
+        """
+        TODO: old later remove.
+        Loads the data, merges with mmain, does various coordinate transforms.
+        """
+        # Load the processed data
+        data = numpy.load(self.paths.hcat_path(self.nsim))
+
+        # Load the mmain file and add it to the data
+        # TODO: read the mmain here
+#        mmain = read_mmain(self.nsim, self.paths.mmain_dir)
+#        data = self.merge_mmain_to_clumps(data, mmain)
+        flip_cols(data, "peak_x", "peak_z")
+
+        # Cut on number of particles and finite m200. Do not change! Hardcoded
+        data = data[(data["npart"] > 100) & numpy.isfinite(data["m200"])]
+
+        # Now also load the initial positions
+        if load_init:
+
+            initcm = read_initcm(self.nsim,
+                                 self.paths.initmatch_path(self.nsim, "cm"))
+            if initcm is not None:
+                data = self.merge_initmatch_to_clumps(data, initcm)
+                flip_cols(data, "x0", "z0")
+
+        # Unit conversion
+        convert_cols = ["m200", "m500", "totpartmass", "mass_mmain",
+                        "r200", "r500", "Rs", "rho0",
+                        "peak_x", "peak_y", "peak_z"]
+        data = self.box.convert_from_boxunits(data, convert_cols)
+
+        # And do the unit transform
+        if load_init and initcm is not None:
+            data = self.box.convert_from_boxunits(
+                data, ["x0", "y0", "z0", "lagpatch"])
+
+        # Convert all that is not an integer to float32
+        names = list(data.dtype.names)
+        formats = []
+        for name in names:
+            if data[name].dtype.char in numpy.typecodes["AllInteger"]:
+                formats.append(numpy.int32)
+            else:
+                formats.append(numpy.float32)
+        dtype = numpy.dtype({"names": names, "formats": formats})
+
+        # Apply cuts on distance and total particle mass if any
+        data = data[data["dist"] < max_dist] if max_dist is not None else data
+        data = (data[data["totpartmass"] > min_mass]
+                if min_mass is not None else data)
+
+        self._data = data.astype(dtype)
+
+    def merge_mmain_to_clumps(self, clumps, mmain):
+        """
+        TODO: old, later remove.
+        Merge columns from the `mmain` files to the `clump` file, matches them
+        by their halo index while assuming that the indices `index` in both
+        arrays are sorted.
+
+        Parameters
+        ----------
+        clumps : structured array
+            Clumps structured array.
+        mmain : structured array
+            Parent halo array whose information is to be merged into `clumps`.
+
+        Returns
+        -------
+        out : structured array
+            Array with added columns.
+        """
+        X = numpy.full((clumps.size, 2), numpy.nan)
+        # Mask of which clumps have a mmain index
+        mask = numpy.isin(clumps["index"], mmain["index"])
+
+        X[mask, 0] = mmain["mass_cl"]
+        X[mask, 1] = mmain["sub_frac"]
+        return add_columns(clumps, X, ["mass_mmain", "sub_frac"])
+
+    def merge_initmatch_to_clumps(self, clumps, initcat):
+        """
+        TODO: old, later remove.
+        Merge columns from the `init_cm` files to the `clump` file.
+
+        Parameters
+        ----------
+        clumps : structured array
+            Clumps structured array.
+        initcat : structured array
+            Catalog with the clumps initial centre of mass at z = 70.
+
+        Returns
+        -------
+        out : structured array
+        """
+        # There are more initcat clumps, so check which ones have z = 0
+        # and then downsample
+        mask = numpy.isin(initcat["ID"], clumps["index"])
+        initcat = initcat[mask]
+        # Now the index ordering should match
+        if not numpy.alltrue(initcat["ID"] == clumps["index"]):
+            raise ValueError(
+                "Ordering of `initcat` and `clumps` is inconsistent.")
+
+        X = numpy.full((clumps.size, 4), numpy.nan)
+        for i, p in enumerate(['x', 'y', 'z', "lagpatch"]):
+            X[:, i] = initcat[p]
+        return add_columns(clumps, X, ["x0", "y0", "z0", "lagpatch"])
+
+
+class HaloCatalogue(BaseCatalogue):
+    r"""
+    Halo catalogue, i.e. parent halos with summed substructure, defined in the
+    final snapshot.
+
+    TODO:
+        Add the fitted quantities
+        Add threshold on number of particles
+
+    Parameters
+    ----------
+    nsim : int
+        IC realisation index.
+    paths : py:class`csiborgtools.read.CSiBORGPaths`
+        CSiBORG paths object.
+    maxdist : float, optional
+        The maximum comoving distance of a halo. By default
+        :math:`155.5 / 0.705 ~ \mathrm{Mpc}` with assumed :math:`h = 0.705`,
+        which corresponds to the high-resolution region.
+    """
+    def __init__(self, nsim, paths, maxdist=155.5 / 0.705):
+        self.nsim = nsim
+        self.paths = paths
+        # Read in the mmain catalogue of summed substructure
+        mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim))
+        data = mmain["mmain"]
+        # Flip positions and convert from code units to cMpc. Convert M too
+        flip_cols(data, "x", "z")
+        for p in ('x', 'y', 'z'):
+            data[p] -= 0.5
+        data = self.box.convert_from_boxunits(data, ['x', 'y', 'z', 'M'])
+
+        mask = numpy.sqrt(data['x']**2 + data['y']**2 + data['z']**2) < maxdist
+        self._data = data[mask]
--- a/csiborgtools/read/knn_summary.py
+++ b/csiborgtools/read/knn_summary.py
@ -13,18 +13,41 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """kNN-CDF reader."""
-from os.path import join
-from glob import glob
+import joblib
 import numpy
 from scipy.special import factorial
-import joblib


 class kNNCDFReader:
    """
    Shortcut object to read in the kNN CDF data.
+
+    Parameters
+    ----------
+    paths : py:class`csiborgtools.read.CSiBORGPaths`
    """
-    def read(self, run, folder, rmin=None, rmax=None, to_clip=True):
+    _paths = None
+
+    def __init__(self, paths):
+        self.paths = paths
+
+    @property
+    def paths(self):
+        """
+        Paths manager.
+
+        Parameters
+        ----------
+        paths : py:class`csiborgtools.read.CSiBORGPaths`
+        """
+        return self._paths
+
+    @paths.setter
+    def paths(self, paths):
+        # assert isinstance(paths, CSiBORGPaths)  # REMOVE
+        self._paths = paths
+
+    def read(self, run, kind, rmin=None, rmax=None, to_clip=True):
        """
        Read the auto- or cross-correlation kNN-CDF data. Infers the type from
        the data files.
@ -33,8 +56,8 @@ class kNNCDFReader:
        ----------
        run : str
            Run ID to read in.
-        folder : str
-            Path to the folder where the auto-correlation kNN-CDF is stored.
+        kind : str
+            Type of correlation. Can be either `auto` or `cross`.
        rmin : float, optional
            Minimum separation. By default ignored.
        rmax : float, optional
@ -50,10 +73,13 @@ class kNNCDFReader:
        out : 3-dimensional array of shape `(len(files), len(ks), neval)`
            Array of CDFs or cross-correlations.
        """
-        run += ".p"
-        files = [f for f in glob(join(folder, "*")) if run in f]
+        assert kind in ["auto", "cross"]
+        if kind == "auto":
+            files = self.paths.knnauto_path(run)
+        else:
+            files = self.paths.knncross_path(run)
        if len(files) == 0:
-            raise RuntimeError("No files found for run `{}`.".format(run[:-2]))
+            raise RuntimeError("No files found for run `{}`.".format(run))

        for i, file in enumerate(files):
            data = joblib.load(file)
@ -200,22 +226,3 @@ class kNNCDFReader:
        """
        V = 4 * numpy.pi / 3 * rs**3
        return (ndensity * V)**k / factorial(k) * numpy.exp(-ndensity * V)
-
-    @staticmethod
-    def cross_files(ic, folder):
-        """
-        Return the file paths corresponding to the cross-correlation of a given
-        IC.
-
-        Parameters
-        ----------
-        ic : int
-            The desired IC.
-        folder : str
-            The folder containing the cross-correlation files.
-
-        Returns
-        -------
-        filepath : list of str
-        """
-        return [file for file in glob(join(folder, "*")) if str(ic) in file]
--- a/csiborgtools/read/obs.py
+++ b/csiborgtools/read/obs.py
@ -18,13 +18,14 @@ Scripts to read in observation.
 from abc import ABC, abstractproperty
 from os.path import join
 from warnings import warn
-import numpy
-from scipy import constants
-from astropy.io import fits
-from astropy.coordinates import SkyCoord
-from astropy import units
-from ..utils import (cols_to_structured)

+import numpy
+from astropy import units
+from astropy.coordinates import SkyCoord
+from astropy.io import fits
+from scipy import constants
+
+from .utils import cols_to_structured

 ###############################################################################
 #                           Text survey base class                            #
@ -101,10 +102,8 @@ class TwoMPPGalaxies(TextSurvey):
        self._set_data(fpath)

    def _set_data(self, fpath):
-        """
-        Set the catalogue
-        """
        from scipy.constants import c
+
        # Read the catalogue and select non-fake galaxies
        cat = numpy.genfromtxt(fpath, delimiter="|", )
        cat = cat[cat[:, 12] == 0, :]
@ -151,9 +150,6 @@ class TwoMPPGroups(TextSurvey):
        self._set_data(fpath)

    def _set_data(self, fpath):
-        """
-        Set the catalogue
-        """
        cat = numpy.genfromtxt(fpath, delimiter="|", )
        # Pre-allocate and fill the array
        cols = [("RA", numpy.float64), ("DEC", numpy.float64),
@ -218,13 +214,12 @@ class FitsSurvey(ABC):

    @h.setter
    def h(self, h):
-        """Sets the little h."""
        self._h = h

    @staticmethod
    def _check_in_list(member, members, kind):
        """
-        Checks that `member` is a member of a list `members`, `kind` is a
+        Check that `member` is a member of a list `members`, `kind` is a
        member type name.
        """
        if member not in members:
@ -247,7 +242,7 @@ class FitsSurvey(ABC):
    @abstractproperty
    def size(self):
        """
-        Number of samples in the catalogue.
+        Return the number of samples in the catalogue.

        Returns
        -------
@ -274,7 +269,7 @@ class FitsSurvey(ABC):

    @selection_mask.setter
    def selection_mask(self, mask):
-        """Sets the selection mask."""
+        """Set the selection mask."""
        if not (isinstance(mask, numpy.ndarray)
                and mask.ndim == 1
                and mask.dtype == bool):
@ -311,6 +306,7 @@ class FitsSurvey(ABC):
        Parameters
        ----------
        key : str
+            FITS key.

        Returns
        -------
@ -331,7 +327,7 @@ class FitsSurvey(ABC):

    def make_mask(self, steps):
        """
-        Make a survey mask from a series of steps. Expected to look e.g. like
+        Make a survey mask from a series of steps, expected to look as below.

        ```
        def steps(cls):
@ -343,6 +339,7 @@ class FitsSurvey(ABC):
        Parameters
        ----------
        steps : list of steps
+            Selection steps.

        Returns
        -------
@ -359,20 +356,17 @@ class FitsSurvey(ABC):
        return out

    def __getitem__(self, key):
-        """
-        Return values for this `key`. If in both return from `routine_keys`.
-        """
        # Check duplicates
        if key in self.routine_keys and key in self.fits_keys:
            warn("Key `{}` found in both `routine_keys` and `fits_keys`. "
-                 "Returning `routine_keys` value.".format(key), UserWarning)
+                 "Returning `routine_keys` value.".format(key), stacklevel=1)

        if key in self.routine_keys:
            func, args = self.routines[key]
            out = func(*args)
        elif key in self.fits_keys:
            warn("Returning a FITS property. Be careful about little h!",
-                 UserWarning)
+                 stacklevel=1)
            out = self.get_fitsitem(key)
        else:
            raise KeyError("Unrecognised key `{}`.".format(key))
@ -541,7 +535,7 @@ class MCXCClusters(FitsSurvey):
        return self.get_fitsitem(key) * 1e14 * (self._hdata / self.h)**2

    def _lum(self, key):
-        """Get luminosity. Puts back units to be in ergs/s"""
+        """Get luminosity, puts back units to be in ergs/s."""
        return self.get_fitsitem(key) * 1e44 * (self._hdata / self.h)**2

 ###############################################################################
@ -669,14 +663,14 @@ class SDSS(FitsSurvey):
        return self._absmag(photo, band1) - self._absmag(photo, band2)

    def _dist(self):
-        """
-        Get the corresponding distance estimate from `ZDIST`, which is defined
-        as:
+        r"""
+        Get the corresponding distance estimate from `ZDIST`, defined as below.
+
            "Distance estimate using pecular velocity model of Willick et al.
            (1997), expressed as a redshift equivalent; multiply by c/H0 for
            Mpc"

-        Converts little h.
+        Distance is converted to math:`h != 1` units.
        """
        return self.get_fitsitem("ZDIST") * constants.c * 1e-3 / (100 * self.h)

--- a/csiborgtools/read/outsim.py
+++ b/csiborgtools/read/outsim.py
@ -15,9 +15,10 @@
 """
 I/O functions for analysing the CSiBORG realisations.
 """
-import numpy
-from os.path import join
 from os import remove
+from os.path import join
+
+import numpy
 from tqdm import trange


--- a/csiborgtools/read/overlap_summary.py
+++ b/csiborgtools/read/overlap_summary.py
@ -15,7 +15,8 @@
 """
 Tools for summarising various results.
 """
-from os.path import (join, isfile)
+from os.path import isfile, join
+
 import numpy
 from tqdm import tqdm

@ -26,7 +27,7 @@ class PairOverlap:

    Parameters
    ----------
-    cat0, catx: :py:class:`csiborgtools.read.HaloCatalogue`
+    cat0, catx: :py:class:`csiborgtools.read.ClumpsCatalogue`
        Halo catalogues corresponding to the reference and cross
        simulations.
    fskel : str, optional
@ -121,9 +122,9 @@ class PairOverlap:
        inv_ngp_overlap = [[] for __ in range(cross_size)]
        inv_smoothed_overlap = [[] for __ in range(cross_size)]
        for ref_id in range(match_indxs.size):
-            for cross_id, ngp_cross, smoothed_cross in zip(match_indxs[ref_id],
-                                                           ngp_overlap[ref_id],
-                                                           smoothed_overlap[ref_id]):  # noqa
+            iters = zip(match_indxs[ref_id], ngp_overlap[ref_id],
+                        smoothed_overlap[ref_id], strict=True)
+            for cross_id, ngp_cross, smoothed_cross in iters:
                inv_match_indxs[cross_id].append(ref_id)
                inv_ngp_overlap[cross_id].append(ngp_cross)
                inv_smoothed_overlap[cross_id].append(smoothed_cross)
@ -198,8 +199,8 @@ class PairOverlap:

    def summed_overlap(self, from_smoothed):
        """
-        Summed overlap of each halo in the reference simulation with the cross
-        simulation.
+        Calculate summed overlap of each halo in the reference simulation with
+        the cross simulation.

        Parameters
        ----------
@ -319,7 +320,7 @@ class PairOverlap:
        simulation from the crossed simulation.

        Parameters
-        -----------
+        ----------
        from_smoothed : bool
            Whether to use the smoothed overlap or not.
        overlap_threshold : float, optional
@ -408,7 +409,7 @@ class PairOverlap:

        Returns
        -------
-        out : :py:class:`csiborgtools.read.HaloCatalogue` or array
+        out : :py:class:`csiborgtools.read.ClumpsCatalogue` or array
        """
        if key is None:
            return self._cat0
@ -429,7 +430,7 @@ class PairOverlap:

        Returns
        -------
-        out : :py:class:`csiborgtools.read.HaloCatalogue` or array
+        out : :py:class:`csiborgtools.read.ClumpsCatalogue` or array
        """
        if key is None:
            return self._catx
@ -456,9 +457,9 @@ class NPairsOverlap:

    Parameters
    ----------
-    cat0 : :py:class:`csiborgtools.read.HaloCatalogue`
+    cat0 : :py:class:`csiborgtools.read.ClumpsCatalogue`
        Reference simulation halo catalogue.
-    catxs : list of :py:class:`csiborgtools.read.HaloCatalogue`
+    catxs : list of :py:class:`csiborgtools.read.ClumpsCatalogue`
        List of cross simulation halo catalogues.
    fskel : str, optional
        Path to the overlap. By default `None`, i.e.
@ -478,8 +479,8 @@ class NPairsOverlap:

    def summed_overlap(self, from_smoothed, verbose=False):
        """
-        Summed overlap of each halo in the reference simulation with the cross
-        simulations.
+        Calcualte summed overlap of each halo in the reference simulation with
+        the cross simulations.

        Parameters
        ----------
@ -526,7 +527,7 @@ class NPairsOverlap:
        simulation from the crossed simulation.

        Parameters
-        -----------
+        ----------
        from_smoothed : bool
            Whether to use the smoothed overlap or not.
        overlap_threshold : float, optional
--- a/csiborgtools/read/paths.py
+++ b/csiborgtools/read/paths.py
@ -0,0 +1,366 @@
+# Copyright (C) 2022 Richard Stiskalek, Harry Desmond
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""CSiBORG paths manager."""
+from glob import glob
+from os import makedirs, mkdir
+from os.path import isdir, join
+from warnings import warn
+
+import numpy
+
+
+class CSiBORGPaths:
+    """
+   Paths manager for CSiBORG IC realisations.
+
+    Parameters
+    ----------
+    srcdir : str
+        Path to the folder where the RAMSES outputs are stored.
+    postdir: str
+        Path to the folder where post-processed files are stored.
+    """
+    _srcdir = None
+    _postdir = None
+
+    def __init__(self, srcdir=None, postdir=None):
+        self.srcdir = srcdir
+        self.postdir = postdir
+
+    @staticmethod
+    def _check_directory(path):
+        if not isdir(path):
+            raise IOError("Invalid directory `{}`!".format(path))
+
+    @property
+    def srcdir(self):
+        """
+        Path to the folder where CSiBORG simulations are stored.
+
+        Returns
+        -------
+        path : str
+        """
+        if self._srcdir is None:
+            raise ValueError("`srcdir` is not set!")
+        return self._srcdir
+
+    @srcdir.setter
+    def srcdir(self, path):
+        if path is None:
+            return
+        self._check_directory(path)
+        self._srcdir = path
+
+    @property
+    def postdir(self):
+        """
+        Path to the folder where post-processed files are stored.
+
+        Returns
+        -------
+        path : str
+        """
+        if self._postdir is None:
+            raise ValueError("`postdir` is not set!")
+        return self._postdir
+
+    @postdir.setter
+    def postdir(self, path):
+        if path is None:
+            return
+        self._check_directory(path)
+        self._postdir = path
+
+    @property
+    def temp_dumpdir(self):
+        """
+        Path to a temporary dumping folder.
+
+        Returns
+        -------
+        path : str
+        """
+        fpath = join(self.postdir, "temp")
+        if not isdir(fpath):
+            mkdir(fpath)
+            warn("Created directory `{}`.".format(fpath), UserWarning,
+                 stacklevel=1)
+        return fpath
+
+    def mmain_path(self, nsnap, nsim):
+        """
+        Path to the `mmain` files summed substructure files.
+
+        Parameters
+        ----------
+        nsnap : int
+            Snapshot index.
+        nsim : int
+            IC realisation index.
+
+        Returns
+        -------
+        path : str
+        """
+        fdir = join(self.postdir, "mmain")
+        if not isdir(fdir):
+            mkdir(fdir)
+            warn("Created directory `{}`.".format(fdir), UserWarning,
+                 stacklevel=1)
+        return join(
+            fdir,
+            "mmain_{}_{}.npz".format(str(nsim).zfill(5), str(nsnap).zfill(5))
+            )
+
+    def initmatch_path(self, nsim, kind):
+        """
+        Path to the `initmatch` files where the clump match between the
+        initial and final snapshot is stored.
+
+        Parameters
+        ----------
+        nsim : int
+            IC realisation index.
+        kind : str
+            Type of match.  Can be either `cm` or `particles`.
+
+        Returns
+        -------
+        path : str
+        """
+        assert kind in ["cm", "particles"]
+        fdir = join(self.postdir, "initmatch")
+        if not isdir(fdir):
+            mkdir(fdir)
+            warn("Created directory `{}`.".format(fdir), UserWarning,
+                 stacklevel=1)
+        return join(fdir, "{}_{}.npy".format(kind, str(nsim).zfill(5)))
+
+    def split_path(self, nsnap, nsim):
+        """
+        Path to the `split` files from `pre_splithalos`.
+
+        Parameters
+        ----------
+        nsnap : int
+            Snapshot index.
+        nsim : int
+            IC realisation index.
+
+        Returns
+        -------
+        path : str
+        """
+        fdir = join(self.postdir, "split")
+        if not isdir(fdir):
+            mkdir(fdir)
+            warn("Created directory `{}`.".format(fdir), UserWarning,
+                 stacklevel=1)
+        return join(fdir, "clumps_{}_{}.npz"
+                    .format(str(nsim).zfill(5), str(nsnap).zfill(5)))
+
+    def get_ics(self, tonew):
+        """
+        Get CSiBORG IC realisation IDs from the list of folders in
+        `self.srcdir`.
+
+        Parameters
+        ----------
+        tonew : bool
+            If `True`, path to the '_new' ICs is returned.
+
+        Returns
+        -------
+        ids : 1-dimensional array
+        """
+        files = glob(join(self.srcdir, "ramses_out*"))
+        files = [f.split("/")[-1] for f in files]  # Select only file names
+        if tonew:
+            files = [f for f in files if "_new" in f]
+            ids = [int(f.split("_")[2]) for f in files]  # Take the IC IDs
+        else:
+            files = [f for f in files if "_inv" not in f]  # Remove inv. ICs
+            files = [f for f in files if "_new" not in f]  # Remove _new
+            files = [f for f in files if "OLD" not in f]   # Remove _old
+            ids = [int(f.split("_")[-1]) for f in files]
+            try:
+                ids.remove(5511)
+            except ValueError:
+                pass
+        return numpy.sort(ids)
+
+    def ic_path(self, nsim, tonew=False):
+        """
+        Path to a CSiBORG IC realisation folder.
+
+        Parameters
+        ----------
+        nsim : int
+            IC realisation index.
+        tonew : bool, optional
+            Whether to return the path to the '_new' IC realisation.
+
+        Returns
+        -------
+        path : str
+        """
+        fname = "ramses_out_{}"
+        if tonew:
+            fname += "_new"
+        return join(self.srcdir, fname.format(nsim))
+
+    def get_snapshots(self, nsim):
+        """
+        List of available snapshots of a CSiBORG IC realisation.
+
+        Parameters
+        ----------
+        nsim : int
+            IC realisation index.
+
+        Returns
+        -------
+        snapshots : 1-dimensional array
+        """
+        simpath = self.ic_path(nsim, tonew=False)
+        # Get all files in simpath that start with output_
+        snaps = glob(join(simpath, "output_*"))
+        # Take just the last _00XXXX from each file  and strip zeros
+        snaps = [int(snap.split('_')[-1].lstrip('0')) for snap in snaps]
+        return numpy.sort(snaps)
+
+    def snapshot_path(self, nsnap, nsim):
+        """
+        Path to a CSiBORG IC realisation snapshot.
+
+        Parameters
+        ----------
+        nsnap : int
+            Snapshot index.
+        nsim : int
+            IC realisation index.
+
+        Returns
+        -------
+        snappath : str
+        """
+        tonew = nsnap == 1
+        simpath = self.ic_path(nsim, tonew=tonew)
+        return join(simpath, "output_{}".format(str(nsnap).zfill(5)))
+
+    def hcat_path(self, nsim):
+        """
+        Path to the final snapshot halo catalogue from `fit_halos.py`.
+
+        Parameters
+        ----------
+        nsim : int
+            IC realisation index.
+
+        Returns
+        -------
+        path : str
+        """
+        nsnap = str(max(self.get_snapshots(nsim))).zfill(5)
+        fname = "ramses_out_{}_{}.npy".format(str(self.nsim).zfill(5), nsnap)
+        return join(self.postdir, fname)
+
+    def knnauto_path(self, run, nsim=None):
+        """
+        Path to the `knn` auto-correlation files. If `nsim` is not specified
+        returns a list of files for this run for all available simulations.
+
+        Parameters
+        ----------
+        run : str
+            Type of run.
+        nsim : int, optional
+            IC realisation index.
+
+        Returns
+        -------
+        path : str
+        """
+        fdir = join(self.postdir, "knn", "auto")
+        if not isdir(fdir):
+            makedirs(fdir)
+            warn("Created directory `{}`.".format(fdir), UserWarning,
+                 stacklevel=1)
+        if nsim is not None:
+            return join(fdir, "knncdf_{}_{}.p".format(str(nsim).zfill(5), run))
+
+        files = glob(join(fdir, "knncdf*"))
+        run = "__" + run
+        return [f for f in files if run in f]
+
+    def knncross_path(self, run, nsims=None):
+        """
+        Path to the `knn` cross-correlation files. If `nsims` is not specified
+        returns a list of files for this run for all available simulations.
+
+        Parameters
+        ----------
+        run : str
+            Type of run.
+        nsims : len-2 tuple of int, optional
+            IC realisation indices.
+
+        Returns
+        -------
+        path : str
+        """
+        fdir = join(self.postdir, "knn", "cross")
+        if not isdir(fdir):
+            makedirs(fdir)
+            warn("Created directory `{}`.".format(fdir), UserWarning,
+                 stacklevel=1)
+        if nsims is not None:
+            assert isinstance(nsims, (list, tuple)) and len(nsims) == 2
+            nsim0 = str(nsims[0]).zfill(5)
+            nsimx = str(nsims[1]).zfill(5)
+            return join(fdir, "knncdf_{}_{}__{}.p".format(nsim0, nsimx, run))
+
+        files = glob(join(fdir, "knncdf*"))
+        run = "__" + run
+        return [f for f in files if run in f]
+
+    def tpcfauto_path(self, run, nsim=None):
+        """
+        Path to the `tpcf` auto-correlation files. If `nsim` is not specified
+        returns a list of files for this run for all available simulations.
+
+        Parameters
+        ----------
+        run : str
+            Type of run.
+        nsim : int, optional
+            IC realisation index.
+
+        Returns
+        -------
+        path : str
+        """
+        fdir = join(self.postdir, "tpcf", "auto")
+        if not isdir(fdir):
+            makedirs(fdir)
+            warn("Created directory `{}`.".format(fdir), UserWarning,
+                 stacklevel=1)
+        if nsim is not None:
+            return join(fdir, "tpcf{}_{}.p".format(str(nsim).zfill(5), run))
+
+        files = glob(join(fdir, "tpcf*"))
+        run = "__" + run
+        return [f for f in files if run in f]
--- a/csiborgtools/read/pk_summary.py
+++ b/csiborgtools/read/pk_summary.py
@ -13,8 +13,8 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """Power spectrum reader."""
-import numpy
 import joblib
+import numpy
 from tqdm import tqdm


@ -24,7 +24,7 @@ class PKReader:

    Parameters
    ----------
-    ic_ids : list of int
+    get_ics : list of int
        IC IDs to be read.
    hw : float
        Box half-width.
@ -35,8 +35,8 @@ class PKReader:
    dtype : dtype, optional
        Output precision. By default `numpy.float32`.
    """
-    def __init__(self, ic_ids, hw, fskel=None, dtype=numpy.float32):
-        self.ic_ids = ic_ids
+    def __init__(self, get_ics, hw, fskel=None, dtype=numpy.float32):
+        self.get_ics = get_ics
        self.hw = hw
        if fskel is None:
            fskel = "/mnt/extraspace/rstiskalek/csiborg/crosspk/out_{}_{}_{}.p"
@ -46,7 +46,7 @@ class PKReader:
    @staticmethod
    def _set_klim(kmin, kmax):
        """
-        Sets limits on the wavenumber to 0 and infinity if `None`s provided.
+        Set limits on the wavenumber to 0 and infinity if `None`s provided.
        """
        if kmin is None:
            kmin = 0
@ -62,27 +62,27 @@ class PKReader:
        ----------
        kmin : float, optional
            The minimum wavenumber. By default `None`, i.e. 0.
-        kmin : float, optional
+        kmax : float, optional
            The maximum wavenumber. By default `None`, i.e. infinity.

        Returns
        -------
        ks : 1-dimensional array
            Array of wavenumbers.
-        pks : 2-dimensional array of shape `(len(self.ic_ids), ks.size)`
+        pks : 2-dimensional array of shape `(len(self.get_ics), ks.size)`
            Autocorrelation of each simulation.
        """
        kmin, kmax = self._set_klim(kmin, kmax)
        ks, pks, sel = None, None, None
-        for i, nsim in enumerate(self.ic_ids):
+        for i, nsim in enumerate(self.get_ics):
            pk = joblib.load(self.fskel.format(nsim, nsim, self.hw))
            # Get cuts and pre-allocate arrays
            if i == 0:
                x = pk.k3D
                sel = (kmin < x) & (x < kmax)
                ks = x[sel].astype(self.dtype)
-                pks = numpy.full((len(self.ic_ids), numpy.sum(sel)), numpy.nan,
-                                 dtype=self.dtype)
+                pks = numpy.full((len(self.get_ics), numpy.sum(sel)),
+                                 numpy.nan, dtype=self.dtype)
            pks[i, :] = pk.Pk[sel, 0, 0]

        return ks, pks
@ -99,7 +99,7 @@ class PKReader:
            The second IC ID.
        kmin : float, optional
            The minimum wavenumber. By default `None`, i.e. 0.
-        kmin : float, optional
+        kmax : float, optional
            The maximum wavenumber. By default `None`, i.e. infinity.

        Returns
@ -133,7 +133,7 @@ class PKReader:
        ----------
        kmin : float, optional
            The minimum wavenumber. By default `None`, i.e. 0.
-        kmin : float, optional
+        kmax : float, optional
            The maximum wavenumber. By default `None`, i.e. infinity.

        Returns
@ -144,12 +144,12 @@ class PKReader:
            Cross-correlations. The first column is the the IC and is being
            cross-correlated with the remaining ICs, in the second column.
        """
-        nics = len(self.ic_ids)
+        nics = len(self.get_ics)

        ks, xpks = None, None
-        for i, ic0 in enumerate(tqdm(self.ic_ids)):
+        for i, ic0 in enumerate(tqdm(self.get_ics)):
            k = 0
-            for ic1 in self.ic_ids:
+            for ic1 in self.get_ics:
                # We don't want cross-correlation
                if ic0 == ic1:
                    continue
--- a/csiborgtools/read/readsim.py
+++ b/csiborgtools/read/readsim.py
@ -15,274 +15,18 @@
 """
 Functions to read in the particle and clump files.
 """
-from os.path import (join, isfile, isdir)
-from glob import glob
+from os.path import isfile, join
 from warnings import warn
+
 import numpy
 from scipy.io import FortranFile
-from tqdm import tqdm
-from ..utils import (cols_to_structured)
+from tqdm import tqdm, trange

+from .paths import CSiBORGPaths
+from .utils import cols_to_structured

 ###############################################################################
-#                            Paths manager                                    #
-###############################################################################
-
-
-class CSiBORGPaths:
-    """
-    Paths manager for CSiBORG IC realisations.
-
-    Parameters
-    ----------
-    srcdir : str
-        Path to the folder where CSiBORG simulations are stored.
-    dumpdir : str
-        Path to the folder where files from `run_fit_halos` are stored.
-    mmain_path : str
-        Path to folder where mmain files are stored.
-    initmatch_path : str
-        Path to the folder where particle ID match between the first and final
-        snapshot is stored.
-    """
-    _srcdir = None
-    _dumpdir = None
-    _mmain_path = None
-    _initmatch_path = None
-
-    def __init__(self, srcdir=None, dumpdir=None, mmain_path=None,
-                 initmatch_path=None):
-        self.srcdir = srcdir
-        self.dumpdir = dumpdir
-        self.mmain_path = mmain_path
-        self.initmatch_path = initmatch_path
-
-    @staticmethod
-    def _check_directory(path):
-        if not isdir(path):
-            raise IOError("Invalid directory `{}`!".format(path))
-
-    @property
-    def srcdir(self):
-        """
-        Path to the folder where CSiBORG simulations are stored.
-
-        Returns
-        -------
-        path : str
-        """
-        if self._srcdir is None:
-            raise ValueError("`srcdir` is not set!")
-        return self._srcdir
-
-    @srcdir.setter
-    def srcdir(self, path):
-        if path is None:
-            return
-        self._check_directory(path)
-        self._srcdir = path
-
-    @property
-    def dumpdir(self):
-        """
-        Path to the folder where files from `run_fit_halos` are stored.
-
-        Returns
-        -------
-        path : str
-        """
-        if self._dumpdir is None:
-            raise ValueError("`dumpdir` is not set!")
-        return self._dumpdir
-
-    @dumpdir.setter
-    def dumpdir(self, path):
-        if path is None:
-            return
-        self._check_directory(path)
-        self._dumpdir = path
-
-    @property
-    def temp_dumpdir(self):
-        """
-        Path to a temporary dumping folder.
-
-        Returns
-        -------
-        path : str
-        """
-        fpath = join(self.dumpdir, "temp")
-        if not isdir(fpath):
-            raise IOError("Invalid directory `{}`.".format(fpath))
-        return fpath
-
-    @property
-    def mmain_path(self):
-        """
-        Path to the folder where mmain files are stored.
-
-        Returns
-        -------
-        path : str
-        """
-        if self._mmain_path is None:
-            raise ValueError("`mmain_path` is not set!")
-        return self._mmain_path
-
-    @mmain_path.setter
-    def mmain_path(self, path):
-        if path is None:
-            return
-        self._check_directory(path)
-        self._mmain_path = path
-
-    @property
-    def initmatch_path(self):
-        """
-        Path to the folder where the match between the first and final
-        snapshot is stored.
-
-        Returns
-        -------
-        path : str
-        """
-        if self._initmatch_path is None:
-            raise ValueError("`initmatch_path` is not set!")
-        return self._initmatch_path
-
-    @initmatch_path.setter
-    def initmatch_path(self, path):
-        if path is None:
-            return
-        self._check_directory(path)
-        self._initmatch_path = path
-
-    def ic_ids(self, tonew):
-        """
-        CSiBORG IC realisation IDs from the list of folders in `self.srcdir`.
-
-        Parameters
-        ----------
-        tonew : bool
-            If `True`, path to the '_new' ICs is returned.
-        Returns
-        -------
-        ids : 1-dimensional array
-        """
-        files = glob(join(self.srcdir, "ramses_out*"))
-        files = [f.split("/")[-1] for f in files]  # Select only file names
-        if tonew:
-            files = [f for f in files if "_new" in f]
-            ids = [int(f.split("_")[2]) for f in files]  # Take the IC IDs
-        else:
-            files = [f for f in files if "_inv" not in f]  # Remove inv. ICs
-            files = [f for f in files if "_new" not in f]  # Remove _new
-            files = [f for f in files if "OLD" not in f]   # Remove _old
-            ids = [int(f.split("_")[-1]) for f in files]
-            try:
-                ids.remove(5511)
-            except ValueError:
-                pass
-        return numpy.sort(ids)
-
-    def ic_path(self, nsim, tonew=False):
-        """
-        Path to a CSiBORG IC realisation folder.
-
-        Parameters
-        ----------
-        nsim : int
-            IC realisation index.
-        tonew : bool, optional
-            Whether to return the path to the '_new' IC realisation.
-
-        Returns
-        -------
-        path : str
-        """
-        fname = "ramses_out_{}"
-        if tonew:
-            fname += "_new"
-        return join(self.srcdir, fname.format(nsim))
-
-    def get_snapshots(self, nsim):
-        """
-        List of available snapshots of a CSiBORG IC realisation.
-
-        Parameters
-        ----------
-        nsim : int
-            IC realisation index.
-
-        Returns
-        -------
-        snapshots : 1-dimensional array
-        """
-        simpath = self.ic_path(nsim, tonew=False)
-        # Get all files in simpath that start with output_
-        snaps = glob(join(simpath, "output_*"))
-        # Take just the last _00XXXX from each file  and strip zeros
-        snaps = [int(snap.split('_')[-1].lstrip('0')) for snap in snaps]
-        return numpy.sort(snaps)
-
-    def clump0_path(self, nsim):
-        """
-        Path to a single dumped clump's particles. Expected to point to a
-        dictonary whose keys are the clump indices and items structured
-        arrays with the clump's particles in the initial snapshot.
-
-        Parameters
-        ----------
-        nsim : int
-            IC realisation index.
-
-        Returns
-        -------
-        path : str
-        """
-        cdir = join(self.dumpdir, "initmatch")
-        return join(cdir, "clump_{}_{}.npy".format(nsim, "particles"))
-
-    def snapshot_path(self, nsnap, nsim):
-        """
-        Path to a CSiBORG IC realisation snapshot.
-
-        Parameters
-        ----------
-        nsnap : int
-            Snapshot index.
-        nsim : int
-            IC realisation index.
-
-        Returns
-        -------
-        snappath : str
-        """
-        if nsnap == 1:
-            tonew = True
-        simpath = self.ic_path(nsim, tonew=tonew)
-        return join(simpath, "output_{}".format(str(nsnap).zfill(5)))
-
-    def hcat_path(self, nsim):
-        """
-        Path to the final snapshot halo catalogue from `fit_halos.py`.
-
-        Parameters
-        ----------
-        nsim : int
-            IC realisation index.
-
-        Returns
-        -------
-        path : str
-        """
-        nsnap = str(max(self.get_snapshots(nsim))).zfill(5)
-        fname = "ramses_out_{}_{}.npy".format(str(self.nsim).zfill(5), nsnap)
-        return join(self.dumpdir, fname)
-
-
-###############################################################################
-#                          Fortran readers                                    #
+#                       Fortran particle reader                               #
 ###############################################################################


@ -297,13 +41,24 @@ class ParticleReader:
    _paths = None

    def __init__(self, paths):
-#        assert isinstance(paths, CSiBORGPaths)
-        self._paths = paths
+        self.paths = paths

    @property
    def paths(self):
+        """
+        Paths manager.
+
+        Parameters
+        ----------
+        paths : py:class`csiborgtools.read.CSiBORGPaths`
+        """
        return self._paths

+    @paths.setter
+    def paths(self, paths):
+        # assert isinstance(paths, CSiBORGPaths)  # REMOVE
+        self._paths = paths
+
    def read_info(self, nsnap, nsim):
        """
        Read CSiBORG simulation snapshot info.
@ -332,7 +87,7 @@ class ParticleReader:

        keys = info[eqs - 1]
        vals = info[eqs + 1]
-        return {key: val for key, val in zip(keys, vals)}
+        return {key: val for key, val in zip(keys, vals, strict=True)}

    def open_particle(self, nsnap, nsim, verbose=True):
        """
@ -392,7 +147,7 @@ class ParticleReader:
    @staticmethod
    def read_sp(dtype, partfile):
        """
-        Utility function to read a single particle file.
+        Read a single particle file.

        Parameters
        ----------
@ -490,7 +245,7 @@ class ParticleReader:
        for cpu in iters:
            i = start_ind[cpu]
            j = nparts[cpu]
-            for (fname, fdtype) in zip(fnames, fdtypes):
+            for (fname, fdtype) in zip(fnames, fdtypes, strict=True):
                if fname in pars_extract:
                    out[fname][i:i + j] = self.read_sp(fdtype, partfiles[cpu])
                else:
@ -522,7 +277,7 @@ class ParticleReader:
        """
        nsnap = str(nsnap).zfill(5)
        cpu = str(cpu + 1).zfill(5)
-        fpath = join(self.paths.ic_path(nsim, to_new=False),
+        fpath = join(self.paths.ic_path(nsim, tonew=False),
                     "output_{}".format(nsnap),
                     "unbinding_{}.out{}".format(nsnap, cpu))
        return FortranFile(fpath)
@ -561,31 +316,9 @@ class ParticleReader:

        return clumpid

-    @staticmethod
-    def drop_zero_indx(clump_ids, particles):
-        """
-        Drop from `clump_ids` and `particles` entries whose clump index is 0.
-
-        Parameters
-        ----------
-        clump_ids : 1-dimensional array
-            Array of clump IDs.
-        particles : structured array
-            Array of the particle data.
-
-        Returns
-        -------
-        clump_ids : 1-dimensional array
-            The array of clump IDs after removing zero clump ID entries.
-        particles : structured array
-            The particle data after removing zero clump ID entries.
-        """
-        mask = clump_ids != 0
-        return clump_ids[mask], particles[mask]
-
    def read_clumps(self, nsnap, nsim, cols=None):
        """
-        Read in a clump file `clump_Nsnap.dat`.
+        Read in a clump file `clump_xxXXX.dat`.

        Parameters
        ----------
@ -593,7 +326,6 @@ class ParticleReader:
            Snapshot index.
        nsim : int
            IC realisation index.
-
        cols : list of str, optional.
            Columns to extract. By default `None` and all columns are
            extracted.
@ -601,84 +333,150 @@ class ParticleReader:
        Returns
        -------
        out : structured array
-            Structured array of the clumps.
        """
        nsnap = str(nsnap).zfill(5)
-        fname = join(self.paths.ic_path(nsim, to_new=False),
+        fname = join(self.paths.ic_path(nsim, tonew=False),
                     "output_{}".format(nsnap),
                     "clump_{}.dat".format(nsnap))
-        # Check the file exists.
        if not isfile(fname):
-            raise FileExistsError(
-                "Clump file `{}` does not exist.".format(fname))
-
-        # Read in the clump array. This is how the columns must be written!
+            raise FileExistsError("Clump file `{}` does not exist."
+                                  .format(fname))
        data = numpy.genfromtxt(fname)
-        clump_cols = [("index", numpy.int64), ("level", numpy.int64),
-                      ("parent", numpy.int64), ("ncell", numpy.float64),
-                      ("peak_x", numpy.float64), ("peak_y", numpy.float64),
-                      ("peak_z", numpy.float64), ("rho-", numpy.float64),
-                      ("rho+", numpy.float64), ("rho_av", numpy.float64),
-                      ("mass_cl", numpy.float64), ("relevance", numpy.float64)]
-        out0 = cols_to_structured(data.shape[0], clump_cols)
-        for i, name in enumerate(out0.dtype.names):
-            out0[name] = data[:, i]
-        # If take all cols then return
-        if cols is None:
-            return out0
-        # Make sure we have a list
+        # How the data is stored in the clump file.
+        clump_cols = {"index":  (0, numpy.int32),
+                      "level": (1, numpy.int32),
+                      "parent": (2, numpy.int32),
+                      "ncell": (3, numpy.float32),
+                      "x": (4, numpy.float32),
+                      "y": (5, numpy.float32),
+                      "z": (6, numpy.float32),
+                      "rho-": (7, numpy.float32),
+                      "rho+": (8, numpy.float32),
+                      "rho_av": (9, numpy.float32),
+                      "mass_cl": (10, numpy.float32),
+                      "relevance": (11, numpy.float32),
+                      }
+        # Return the requested columns.
        cols = [cols] if isinstance(cols, str) else cols
-        # Get the indxs of clump_cols to output
-        clump_names = [col[0] for col in clump_cols]
-        indxs = [None] * len(cols)
-        for i, col in enumerate(cols):
-            if col not in clump_names:
-                raise KeyError("...")
-            indxs[i] = clump_names.index(col)
-        # Make an array and fill it
-        out = cols_to_structured(out0.size, [clump_cols[i] for i in indxs])
-        for name in out.dtype.names:
-            out[name] = out0[name]
+        cols = list(clump_cols.keys()) if cols is None else cols

+        dtype = [(col, clump_cols[col][1]) for col in cols]
+        out = cols_to_structured(data.shape[0], dtype)
+        for col in cols:
+            out[col] = data[:, clump_cols[col][0]]
        return out


 ###############################################################################
-#                       Supplementary reading functions                       #
+#                    Summed substructure catalogue                            #
 ###############################################################################


-def read_mmain(nsim, srcdir, fname="Mmain_{}.npy"):
+class MmainReader:
    """
-    Read `mmain` numpy arrays of central halos whose mass contains their
-    substracture contribution.
-
-    Parameters
-    ----------
-    nsim : int
-        IC realisation index.
-    srcdir : str
-        Path to the folder containing the files.
-    fname : str, optional
-        File name convention.  By default `Mmain_{}.npy`, where the
-        substituted value is `n`.
-
-    Returns
-    -------
-    out : structured array
-        Array with the central halo information.
+    Object to generate the summed substructure catalogue.
    """
-    fpath = join(srcdir, fname.format(nsim))
-    arr = numpy.load(fpath)
+    _paths = None

-    cols = [("index", numpy.int64), ("peak_x", numpy.float64),
-            ("peak_y", numpy.float64), ("peak_z", numpy.float64),
-            ("mass_cl", numpy.float64), ("sub_frac", numpy.float64)]
-    out = cols_to_structured(arr.shape[0], cols)
-    for i, name in enumerate(out.dtype.names):
-        out[name] = arr[:, i]
+    def __init__(self, paths):
+        assert isinstance(paths, CSiBORGPaths)  # REMOVE
+        self._paths = paths

-    return out
+    @property
+    def paths(self):
+        return self._paths
+
+    def find_parents(self, clumparr, verbose=False):
+        """
+        Find ultimate parent haloes for every clump in a final snapshot.
+
+        Parameters
+        ----------
+        clumparr : structured array
+            Clump array. Read from `ParticleReader.read_clumps`. Must contain
+            `index` and `parent` columns.
+        verbose : bool, optional
+            Verbosity flag.
+
+        Returns
+        -------
+        parent_arr : 1-dimensional array of shape `(nclumps, )`
+            The ultimate parent halo index for every clump, i.e. referring to
+            its ultimate parent clump.
+        """
+        clindex = clumparr["index"]
+        parindex = clumparr["parent"]
+
+        # The ultimate parent for every clump
+        parent_arr = numpy.zeros(clindex.size, dtype=numpy.int32)
+        for i in trange(clindex.size) if verbose else range(clindex.size):
+            tocont = clindex[i] != parindex[i]  # Continue if not a main halo
+            par = parindex[i]  # First we try the parent of this clump
+            while tocont:
+                # The element of the array corresponding to the parent clump to
+                # the one we're looking at
+                element = numpy.where(clindex == par)[0][0]
+                # We stop if the parent is its own parent, so a main halo. Else
+                # move onto the parent of the parent. Eventually this is its
+                # own parent and we stop, with ultimate parent=par
+                if clindex[element] == clindex[element]:
+                    tocont = False
+                else:
+                    par = parindex[element]
+            parent_arr[i] = par
+
+        return parent_arr
+
+    def make_mmain(self, nsim, verbose=False):
+        """
+        Make the summed substructure catalogue for a final snapshot. Includes
+        the position of the paren, the summed mass and the fraction of mass in
+        substructure.
+
+        Parameters
+        ----------
+        nsim : int
+            IC realisation index.
+        verbose : bool, optional
+            Verbosity flag.
+
+        Returns
+        -------
+        mmain : structured array
+            The `mmain` catalogue.
+        ultimate_parent : 1-dimensional array of shape `(nclumps,)`
+            The ultimate parent halo index for every clump, i.e. referring to
+            its ultimate parent clump.
+        """
+        nsnap = max(self.paths.get_snapshots(nsim))
+        partreader = ParticleReader(self.paths)
+        cols = ["index", "parent", "mass_cl", 'x', 'y', 'z']
+        clumparr = partreader.read_clumps(nsnap, nsim, cols)
+
+        ultimate_parent = self.find_parents(clumparr, verbose=verbose)
+        mask_main = clumparr["index"] == clumparr["parent"]
+        nmain = numpy.sum(mask_main)
+        # Preallocate already the output array
+        out = cols_to_structured(
+            nmain, [("ID", numpy.int32), ("x", numpy.float32),
+                    ("y", numpy.float32), ("z", numpy.float32),
+                    ("M", numpy.float32), ("subfrac", numpy.float32)])
+        out["ID"] = clumparr["index"][mask_main]
+        # Because for these index == parent
+        for p in ('x', 'y', 'z'):
+            out[p] = clumparr[p][mask_main]
+        # We want a total mass for each halo in ID_main
+        for i in range(nmain):
+            # Should include the main halo itself, i.e. its own ultimate parent
+            out["M"][i] = numpy.sum(
+                clumparr["mass_cl"][ultimate_parent == out["ID"][i]])
+
+        out["subfrac"] = 1 - clumparr["mass_cl"][mask_main] / out["M"]
+        return out, ultimate_parent
+
+###############################################################################
+#                       Supplementary reading functions                       #
+###############################################################################


 def read_initcm(nsim, srcdir, fname="clump_{}_cm.npy"):
@ -704,7 +502,8 @@ def read_initcm(nsim, srcdir, fname="clump_{}_cm.npy"):
    try:
        return numpy.load(fpath)
    except FileNotFoundError:
-        warn("File {} does not exist.".format(fpath))
+        warn("File {} does not exist.".format(fpath), UserWarning,
+             stacklevel=1)
        return None


--- a/csiborgtools/read/tpcf_summary.py
+++ b/csiborgtools/read/tpcf_summary.py
@ -13,17 +13,42 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """2PCF reader."""
-from os.path import join
-from glob import glob
-import numpy
 import joblib
+import numpy
+
+from .paths import CSiBORGPaths


 class TPCFReader:
    """
    Shortcut object to read in the 2PCF data.
+
+    Parameters
+    ----------
+    paths : py:class`csiborgtools.read.CSiBORGPaths`
    """
-    def read(self, run, folder):
+    _paths = None
+
+    def __init__(self, paths):
+        self.paths = paths
+
+    @property
+    def paths(self):
+        """
+        Paths manager.
+
+        Parameters
+        ----------
+        paths : py:class`csiborgtools.read.CSiBORGPaths`
+        """
+        return self._paths
+
+    @paths.setter
+    def paths(self, paths):
+        assert isinstance(paths, CSiBORGPaths)
+        self._paths = paths
+
+    def read(self, run):
        """
        Read the auto- or cross-correlation kNN-CDF data. Infers the type from
        the data files.
@ -32,8 +57,6 @@ class TPCFReader:
        ----------
        run : str
            Run ID to read in.
-        folder : str
-            Path to the folder where the auto-2PCF is stored.

        Returns
        -------
@ -42,8 +65,7 @@ class TPCFReader:
        out : 2-dimensional array of shape `(len(files), len(rp))`
            Array of 2PCFs.
        """
-        run += ".p"
-        files = [f for f in glob(join(folder, "*")) if run in f]
+        files = self.paths.tpcfauto_path(run)
        if len(files) == 0:
            raise RuntimeError("No files found for run `{}`.".format(run[:-2]))

--- a/csiborgtools/utils/recarray_manip.py
+++ b/csiborgtools/utils/recarray_manip.py
@ -13,10 +13,68 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
-Utilility functions for manipulation structured arrays.
+Various coordinate transformations.
 """
 import numpy

+###############################################################################
+#                          Coordinate transforms                              #
+###############################################################################
+
+
+def cartesian_to_radec(x, y, z):
+    """
+    Calculate the radial distance, right ascension in [0, 360) degrees and
+    declination [-90, 90] degrees. Note, the observer should be placed in the
+    middle of the box.
+
+    Parameters
+    ----------
+    x, y, z : 1-dimensional arrays
+        Cartesian coordinates.
+
+    Returns
+    -------
+    dist, ra, dec : 1-dimensional arrays
+        Radial distance, right ascension and declination.
+    """
+    dist = numpy.sqrt(x**2 + y**2 + z**2)
+    dec = numpy.rad2deg(numpy.arcsin(z/dist))
+    ra = numpy.rad2deg(numpy.arctan2(y, x))
+    # Make sure RA in the correct range
+    ra[ra < 0] += 360
+    return dist, ra, dec
+
+
+def radec_to_cartesian(dist, ra, dec, isdeg=True):
+    """
+    Convert distance, right ascension and declination to Cartesian coordinates.
+
+    Parameters
+    ----------
+    dist, ra, dec : 1-dimensional arrays
+        Spherical coordinates.
+    isdeg : bool, optional
+        Whether `ra` and `dec` are in degres. By default `True`.
+
+    Returns
+    -------
+    x, y, z : 1-dimensional arrays
+        Cartesian coordinates.
+    """
+    if isdeg:
+        ra = numpy.deg2rad(ra)
+        dec = numpy.deg2rad(dec)
+    x = dist * numpy.cos(dec) * numpy.cos(ra)
+    y = dist * numpy.cos(dec) * numpy.sin(ra)
+    z = dist * numpy.sin(dec)
+    return x, y, z
+
+
+###############################################################################
+#                          Array manipulation                                 #
+###############################################################################
+

 def cols_to_structured(N, cols):
    """
@ -108,7 +166,7 @@ def rm_columns(arr, cols):

    # Get a new dtype without the cols to be deleted
    new_dtype = []
-    for dtype, name in zip(arr.dtype.descr, arr.dtype.names):
+    for dtype, name in zip(arr.dtype.descr, arr.dtype.names, strict=True):
        if name not in cols:
            new_dtype.append(dtype)

--- a/csiborgtools/units/init.py
+++ b/csiborgtools/units/init.py
@ -1,17 +0,0 @@
-# Copyright (C) 2022 Richard Stiskalek
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
-# Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-
-from .transforms import cartesian_to_radec, radec_to_cartesian  # noqa
-from .box_units import BoxUnits  # noqa
--- a/csiborgtools/units/transforms.py
+++ b/csiborgtools/units/transforms.py
@ -1,66 +0,0 @@
-# Copyright (C) 2022 Richard Stiskalek
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
-# Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""
-Various coordinate transformations.
-"""
-import numpy
-
-
-def cartesian_to_radec(x, y, z):
-    """
-    Calculate the radial distance, right ascension in [0, 360) degrees and
-    declination [-90, 90] degrees. Note, the observer should be placed in the
-    middle of the box.
-
-    Parameters
-    ----------
-    x, y, z : 1-dimensional arrays
-        Cartesian coordinates.
-    Returns
-    -------
-    dist, ra, dec : 1-dimensional arrays
-        Radial distance, right ascension and declination.
-    """
-    dist = numpy.sqrt(x**2 + y**2 + z**2)
-    dec = numpy.rad2deg(numpy.arcsin(z/dist))
-    ra = numpy.rad2deg(numpy.arctan2(y, x))
-    # Make sure RA in the correct range
-    ra[ra < 0] += 360
-    return dist, ra, dec
-
-
-def radec_to_cartesian(dist, ra, dec, isdeg=True):
-    """
-    Convert distance, right ascension and declination to Cartesian coordinates.
-
-    Parameters
-    ----------
-    dist, ra, dec : 1-dimensional arrays
-        Spherical coordinates.
-    isdeg : bool, optional
-        Whether `ra` and `dec` are in degres. By default `True`.
-
-    Returns
-    -------
-    x, y, z : 1-dimensional arrays
-        Cartesian coordinates.
-    """
-    if isdeg:
-        ra = numpy.deg2rad(ra)
-        dec = numpy.deg2rad(dec)
-    x = dist * numpy.cos(dec) * numpy.cos(ra)
-    y = dist * numpy.cos(dec) * numpy.sin(ra)
-    z = dist * numpy.sin(dec)
-    return x, y, z
--- a/csiborgtools/utils/init.py
+++ b/csiborgtools/utils/init.py
@ -1,24 +0,0 @@
-# Copyright (C) 2022 Richard Stiskalek
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
-# Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-
-from datetime import datetime
-from .recarray_manip import (cols_to_structured, add_columns, rm_columns,  # noqa
-                             list_to_ndarray, array_to_structured,  # noqa
-                             flip_cols, extract_from_structured)  # noqa
-
-
-def now(tz=None):
-    """Shortcut to `datetime.datetime.now`."""
-    return datetime.now(tz=tz)
--- a/meetings/220317_comboverlap.ipynb
+++ b/meetings/220317_comboverlap.ipynb
@ -51,8 +51,8 @@
   },
   "outputs": [],
   "source": [
-    "cat0 = csiborgtools.read.HaloCatalogue(7468)\n",
-    "catxs = [csiborgtools.read.HaloCatalogue(nsim) for nsim in (7588, 8020, 8452, 8836)]\n",
+    "cat0 = csiborgtools.read.ClumpsCatalogue(7468)\n",
+    "catxs = [csiborgtools.read.ClumpsCatalogue(nsim) for nsim in (7588, 8020, 8452, 8836)]\n",
    "reader = csiborgtools.read.NPairsOverlap(cat0, catxs, max_dist=150 / 0.705)"
   ]
  },
--- a/notebooks/knn.ipynb
+++ b/notebooks/knn.ipynb
@ -75,6 +75,17 @@
    "wp3 = reader.mean_wp(wp3)"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c05b4db6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "connect ECONNRFUSED"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 33,
@ -657,7 +668,7 @@
   },
   "outputs": [],
   "source": [
-    "cat = csiborgtools.read.HaloCatalogue(7444, paths, min_mass=1e12, max_dist=155/0.705)"
+    "cat = csiborgtools.read.ClumpsCatalogue(7444, paths, min_mass=1e12, max_dist=155/0.705)"
   ]
  },
  {
@ -813,7 +824,7 @@
   },
   "outputs": [],
   "source": [
-    "cat = csiborgtools.read.HaloCatalogue(7444, paths)"
+    "cat = csiborgtools.read.ClumpsCatalogue(7444, paths)"
   ]
  },
  {
@ -831,7 +842,7 @@
    "from tqdm import trange\n",
    "x = np.full((len(ics), 3), np.nan)\n",
    "for i in trange(len(ics)):\n",
-    "    cat = csiborgtools.read.HaloCatalogue(ics[i], paths, max_dist=155 / 0.705)\n",
+    "    cat = csiborgtools.read.ClumpsCatalogue(ics[i], paths, max_dist=155 / 0.705)\n",
    "    for j, th in enumerate([1e12, 1e13, 1e14]):\n",
    "        mask = cat[\"totpartmass\"] > th\n",
    "        x[i, j] = np.nanmedian(cat[\"lambda200c\"][mask])"
@ -1149,8 +1160,8 @@
   },
   "outputs": [],
   "source": [
-    "cat1 = csiborgtools.read.HaloCatalogue(7444, min_mass=1e13, max_dist=155 / 0.705)\n",
-    "cat2 = csiborgtools.read.HaloCatalogue(7468, min_mass=1e13, max_dist=155 / 0.705)"
+    "cat1 = csiborgtools.read.ClumpsCatalogue(7444, min_mass=1e13, max_dist=155 / 0.705)\n",
+    "cat2 = csiborgtools.read.ClumpsCatalogue(7468, min_mass=1e13, max_dist=155 / 0.705)"
   ]
  },
  {
--- a/notebooks/playground_matching.ipynb
+++ b/notebooks/playground_matching.ipynb
@ -62,8 +62,8 @@
   },
   "outputs": [],
   "source": [
-    "cat0 = csiborgtools.read.HaloCatalogue(7468)\n",
-    "catx = csiborgtools.read.HaloCatalogue(7588)"
+    "cat0 = csiborgtools.read.ClumpsCatalogue(7468)\n",
+    "catx = csiborgtools.read.ClumpsCatalogue(7588)"
   ]
  },
  {
--- a/notebooks/plot_correlation.ipynb
+++ b/notebooks/plot_correlation.ipynb
@ -78,7 +78,7 @@
   },
   "outputs": [],
   "source": [
-    "pkreader = csiborgtools.read.PKReader(paths.ic_ids, hw)\n",
+    "pkreader = csiborgtools.read.PKReader(paths.get_ics, hw)\n",
    "\n",
    "autoks, pks = pkreader.read_autos()\n",
    "\n",
@ -1134,7 +1134,7 @@
    "axs[0].set_title(\"hw = {}\".format(hw))\n",
    "m = autoks < 40\n",
    "mu = np.mean(pks, axis=0)\n",
-    "for i in range(len(paths.ic_ids)):\n",
+    "for i in range(len(paths.get_ics)):\n",
    "    axs[0].plot(autoks[m], pks[i, m], c=\"k\", lw=0.1)\n",
    "    axs[1].plot(autoks[m], pks[i, m] / mu[m], c=\"k\", lw=0.1)\n",
    "axs[0].plot(autoks[m], mu[m], c=\"red\", lw=1, label=r\"$\\langle P(k) \\rangle$\")\n",
@ -2156,7 +2156,7 @@
    "axs[0].set_title(r\"$\\mathrm{{hw}} = {}$\".format(hw))\n",
    "m = autoks < 22\n",
    "mu = np.mean(pks, axis=0)\n",
-    "for i in range(len(paths.ic_ids)):\n",
+    "for i in range(len(paths.get_ics)):\n",
    "    axs[0].plot(autoks[m], pks[i, m], c=\"k\", lw=0.1)\n",
    "    axs[1].plot(autoks[m], pks[i, m] / mu[m], c=\"k\", lw=0.1)\n",
    "axs[0].plot(autoks[m], mu[m], c=\"red\", lw=1, label=r\"$\\langle P(k) \\rangle$\")\n",
@ -3242,13 +3242,13 @@
    "fskel = \"/mnt/extraspace/rstiskalek/csiborg/crosspk/out_{}_{}_{}.p\"\n",
    "\n",
    "autoks, autopks = None, None\n",
-    "for i, nsim in enumerate(paths.ic_ids):\n",
+    "for i, nsim in enumerate(paths.get_ics):\n",
    "    pk = joblib.load(fskel.format(nsim, nsim, hw))\n",
    "    x = pk.k3D\n",
    "    y = pk.Pk[:, 0, 0]\n",
    "    sel = x < 20\n",
    "    if i == 0:\n",
-    "        autoks = np.full((len(paths.ic_ids), np.sum(sel)), np.nan)\n",
+    "        autoks = np.full((len(paths.get_ics), np.sum(sel)), np.nan)\n",
    "        autopks = np.full_like(autoks, np.nan)\n",
    "    \n",
    "    autoks[i, :] = x[sel]\n",
@ -3268,7 +3268,7 @@
   "outputs": [],
   "source": [
    "plt.figure()\n",
-    "for i in range(len(paths.ic_ids)):\n",
+    "for i in range(len(paths.get_ics)):\n",
    "    plt.plot(autoks[i, :], autopks[i, :], c=\"k\", lw=0.1)\n",
    "plt.plot(np.mean(autoks, axis=0), np.mean(autopks, axis=0), label=\"CSiBORG\", c=\"k\", lw=1)\n",
    "    \n",
@ -3308,10 +3308,10 @@
   "source": [
    "fskel = \"/mnt/extraspace/rstiskalek/csiborg/crosspk/out_{}_{}_{}.p\"\n",
    "\n",
-    "ic0 = paths.ic_ids[25]\n",
+    "ic0 = paths.get_ics[25]\n",
    "crossks, crosspks = None, None\n",
    "i = 0\n",
-    "for ic in paths.ic_ids:\n",
+    "for ic in paths.get_ics:\n",
    "    if ic == ic0:\n",
    "        continue\n",
    "    ics = (ic0, ic)\n",
@ -3700,7 +3700,7 @@
   "outputs": [],
   "source": [
    "paths = csiborgtools.read.CSiBORGPaths()\n",
-    "cat = csiborgtools.read.CombinedHaloCatalogue(paths, min_m500=1e13, max_dist=210)"
+    "cat = csiborgtools.read.CombinedCatalogue(paths, min_m500=1e13, max_dist=210)"
   ]
  },
  {
@ -4144,7 +4144,7 @@
   },
   "outputs": [],
   "source": [
-    "cat = csiborgtools.io.HaloCatalogue(9844, 1016, minimum_m500=0)"
+    "cat = csiborgtools.io.Catalogue(9844, 1016, minimum_m500=0)"
   ]
  },
  {
--- a/notebooks/test_cosma.ipynb
+++ b/notebooks/test_cosma.ipynb
@ -515,7 +515,7 @@
    }
   ],
   "source": [
-    "len(paths.ic_ids(tonew=True))"
+    "len(paths.get_ics(tonew=True))"
   ]
  },
  {
@ -542,8 +542,8 @@
   },
   "outputs": [],
   "source": [
-    "cat0 = csiborgtools.read.HaloCatalogue(7468)\n",
-    "catx = csiborgtools.read.HaloCatalogue(7588)"
+    "cat0 = csiborgtools.read.ClumpsCatalogue(7468)\n",
+    "catx = csiborgtools.read.ClumpsCatalogue(7588)"
   ]
  },
  {
--- a/notebooks/test_mmain.ipynb
+++ b/notebooks/test_mmain.ipynb
@ -0,0 +1,322 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "5a38ed25",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-12-31T17:12:28.663839Z",
+     "start_time": "2022-12-31T17:12:25.134607Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import joblib\n",
+    "import scienceplots\n",
+    "sys.path.append(\"../\")\n",
+    "import csiborgtools\n",
+    "\n",
+    "plt.style.use([\"science\", \"notebook\"])\n",
+    "%matplotlib widget\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "22130d0b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "d = np.load(\"/mnt/extraspace/rstiskalek/csiborg/split/clumps_07444_00951.npz\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "4d9d9d11",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "772 µs ± 4.01 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%timeit d[\"232\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "66f32cef",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "8a24c0fa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)\n",
+    "# cat = csiborgtools.read.ClumpsCatalogue(7444, paths)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "6e3ba9f4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'/mnt/extraspace/rstiskalek/csiborg/knn/auto/knncdf_07444_la.npz'"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "paths.knn_path(7444, \"auto\", \"la\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "2f4793b9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.savez(\"test.npz\", a=np.random.rand(510, 510, 510), b=np.random.rand(510, 510, 510))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "041d80d8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "d = np.load(\"test.npz\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "dc320130",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['a', 'b']"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "d.files"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "11231e20",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['a', 'b']"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "d.files"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0b6d02f8",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "4ae2a2a8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.save(\"test.npy\", np.array([]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "b675510f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([], dtype=float64)"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.load(\"test.npy\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6f8f96b7",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b4b63c20",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "1cdcf448",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'/mnt/extraspace/rstiskalek/csiborg/split/ic_00952/out_07444_123.npz'"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# paths.split_path(123, 7444, 952)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "d07431f5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nsim = 7444\n",
+    "nsnap = max(paths.get_snapshots(7444))\n",
+    "reader = csiborgtools.read.ParticleReader(paths)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "def3c21f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "# clumpind = reader.read_clumps(nsnap, nsim, cols=\"index\")[\"index\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "832e82ce",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aa69261b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "105dd2e2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "parts = np.load(\"/mnt/extraspace/rstiskalek/csiborg/initmatch/clump_7468_particles.npy\", allow_pickle=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c49f174b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "38e9490d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv_galomatch",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/scripts/cluster_crosspk.py
+++ b/scripts/cluster_crosspk.py
@ -16,16 +16,18 @@
 MPI script to calculate the matter cross power spectrum between CSiBORG
 IC realisations. Units are Mpc/h.
 """
-from gc import collect
 from argparse import ArgumentParser
+from datetime import datetime
+from gc import collect
+from itertools import combinations
 from os import remove
 from os.path import join
-from itertools import combinations
-from datetime import datetime
-import numpy
+
 import joblib
-from mpi4py import MPI
+import numpy
 import Pk_library as PKL
+from mpi4py import MPI
+
 try:
    import csiborgtools
 except ModuleNotFoundError:
@ -47,9 +49,9 @@ nproc = comm.Get_size()
 MAS = "CIC"  # mass asignment scheme

 paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
-box = csiborgtools.units.BoxUnits(paths)
+box = csiborgtools.read.BoxUnits(paths)
 reader = csiborgtools.read.ParticleReader(paths)
-ics = paths.ic_ids(tonew=False)
+ics = paths.get_ics(tonew=False)
 nsims = len(ics)

 # File paths
@ -59,7 +61,7 @@ fout = join(dumpdir, "crosspk",
            "out_{}_{}" + "_{}.p".format(args.halfwidth))


-jobs = csiborgtools.fits.split_jobs(nsims, nproc)[rank]
+jobs = csiborgtools.utils.split_jobs(nsims, nproc)[rank]
 for n in jobs:
    print("Rank {}@{}: saving {}th delta.".format(rank, datetime.now(), n))
    nsim = ics[n]
@ -99,7 +101,7 @@ for i in range(nsims):
    combs.append((i, i))
 prev_delta = [-1, None, None, None]  # i, delta, aexp, length

-jobs = csiborgtools.fits.split_jobs(len(combs), nproc)[rank]
+jobs = csiborgtools.utils.split_jobs(len(combs), nproc)[rank]
 for n in jobs:
    i, j = combs[n]
    print("Rank {}@{}: combination {}.".format(rank, datetime.now(), (i, j)))
@ -153,4 +155,4 @@ if rank == 0:
        remove(ftemp.format(ic, "delta") + ".npy")
        remove(ftemp.format(ic, "lengths") + ".p")

-    print("All finished!")
+    print("All finished!")
--- a/scripts/cluster_knn_auto.py
+++ b/scripts/cluster_knn_auto.py
@ -13,17 +13,18 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues."""
-from os.path import join
-from warnings import warn
 from argparse import ArgumentParser
 from copy import deepcopy
 from datetime import datetime
-from mpi4py import MPI
-from TaskmasterMPI import master_process, worker_process
-import numpy
-from sklearn.neighbors import NearestNeighbors
+from warnings import warn
+
 import joblib
+import numpy
 import yaml
+from mpi4py import MPI
+from sklearn.neighbors import NearestNeighbors
+from TaskmasterMPI import master_process, worker_process
+
 try:
    import csiborgtools
 except ModuleNotFoundError:
@ -58,8 +59,6 @@ ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
       9292, 9316, 9340, 9364, 9388, 9412, 9436, 9460, 9484, 9508, 9532,
       9556, 9580, 9604, 9628, 9652, 9676, 9700, 9724, 9748, 9772, 9796,
       9820, 9844]
-dumpdir = "/mnt/extraspace/rstiskalek/csiborg/knn"
-fout = join(dumpdir, "auto", "knncdf_{}_{}.p")
 paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
 knncdf = csiborgtools.clustering.kNN_CDF()

@ -67,6 +66,7 @@ knncdf = csiborgtools.clustering.kNN_CDF()
 #                                 Analysis                                    #
 ###############################################################################

+
 def read_single(selection, cat):
    """Positions for single catalogue auto-correlation."""
    mmask = numpy.ones(len(cat), dtype=bool)
@ -101,11 +101,13 @@ def read_single(selection, cat):

    return pos[smask, ...]

+
 def do_auto(run, cat, ic):
    """Calculate the kNN-CDF single catalgoue autocorrelation."""
    _config = config.get(run, None)
    if _config is None:
-        warn("No configuration for run {}.".format(run))
+        warn("No configuration for run {}.".format(run), UserWarning,
+             stacklevel=1)
        return

    rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
@ -119,13 +121,15 @@ def do_auto(run, cat, ic):
        batch_size=int(config["batch_size"]), random_state=config["seed"])

    joblib.dump({"rs": rs, "cdf": cdf, "ndensity": pos.shape[0] / totvol},
-                fout.format(str(ic).zfill(5), run))
+                paths.knnauto_path(run,  ic))
+

 def do_cross_rand(run, cat, ic):
    """Calculate the kNN-CDF cross catalogue random correlation."""
    _config = config.get(run, None)
    if _config is None:
-        warn("No configuration for run {}.".format(run))
+        warn("No configuration for run {}.".format(run), UserWarning,
+             stacklevel=1)
        return

    rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
@ -143,14 +147,11 @@ def do_cross_rand(run, cat, ic):
        nsamples=int(config["nsamples"]), neval=int(config["neval"]),
        batch_size=int(config["batch_size"]), random_state=config["seed"])
    corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
-
-    joblib.dump({"rs": rs, "corr": corr}, fout.format(str(ic).zfill(5), run))
-
+    joblib.dump({"rs": rs, "corr": corr}, paths.knnauto_path(run, ic))


 def do_runs(ic):
-    cat = csiborgtools.read.HaloCatalogue(ic, paths, max_dist=Rmax,
-                                          min_mass=minmass)
+    cat = csiborgtools.read.ClumpsCatalogue(ic, paths, maxdist=Rmax)
    for run in args.runs:
        if "random" in run:
            do_cross_rand(run, cat, ic)
@ -179,4 +180,4 @@ comm.Barrier()

 if rank == 0:
    print("{}: all finished.".format(datetime.now()))
-quit()  # Force quit the script
+quit()  # Force quit the script
--- a/scripts/cluster_knn_auto.yml
+++ b/scripts/cluster_knn_auto.yml
--- a/scripts/cluster_knn_cross.py
+++ b/scripts/cluster_knn_cross.py
@ -13,18 +13,19 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues."""
-from warnings import warn
-from os.path import join
 from argparse import ArgumentParser
-from copy import deepcopy
 from datetime import datetime
 from itertools import combinations
-from mpi4py import MPI
-from TaskmasterMPI import master_process, worker_process
-import numpy
-from sklearn.neighbors import NearestNeighbors
+from os.path import join
+from warnings import warn
+
 import joblib
+import numpy
 import yaml
+from mpi4py import MPI
+from sklearn.neighbors import NearestNeighbors
+from TaskmasterMPI import master_process, worker_process
+
 try:
    import csiborgtools
 except ModuleNotFoundError:
@ -67,6 +68,7 @@ knncdf = csiborgtools.clustering.kNN_CDF()
 #                               Analysis                                      #
 ###############################################################################

+
 def read_single(selection, cat):
    mmask = numpy.ones(len(cat), dtype=bool)
    pos = cat.positions(False)
@ -79,19 +81,20 @@ def read_single(selection, cat):
        mmask &= (cat[psel["name"]] < pmax)
    return pos[mmask, ...]

+
 def do_cross(run, ics):
    _config = config.get(run, None)
    if _config is None:
-        warn("No configuration for run {}.".format(run))
+        warn("No configuration for run {}.".format(run), stacklevel=1)
        return
    rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
    knn1, knn2 = NearestNeighbors(), NearestNeighbors()

-    cat1 = csiborgtools.read.HaloCatalogue(ics[0], paths, max_dist=Rmax)
+    cat1 = csiborgtools.read.ClumpsCatalogue(ics[0], paths, max_dist=Rmax)
    pos1 = read_single(_config, cat1)
    knn1.fit(pos1)

-    cat2 = csiborgtools.read.HaloCatalogue(ics[1], paths, max_dist=Rmax)
+    cat2 = csiborgtools.read.ClumpsCatalogue(ics[1], paths, max_dist=Rmax)
    pos2 = read_single(_config, cat2)
    knn2.fit(pos2)

@ -102,9 +105,8 @@ def do_cross(run, ics):
        batch_size=int(config["batch_size"]), random_state=config["seed"])

    corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
+    joblib.dump({"rs": rs, "corr": corr}, paths.knncross_path(run, ics))

-    joblib.dump({"rs": rs, "corr": corr},
-                fout.format(str(ics[0]).zfill(5), str(ics[1]).zfill(5), run))

 def do_runs(ics):
    print(ics)
@ -133,4 +135,4 @@ comm.Barrier()

 if rank == 0:
    print("{}: all finished.".format(datetime.now()))
-quit()  # Force quit the script
+quit()  # Force quit the script
--- a/scripts/cluster_knn_cross.yml
+++ b/scripts/cluster_knn_cross.yml
--- a/scripts/cluster_tcpf_auto.py
+++ b/scripts/cluster_tcpf_auto.py
@ -13,16 +13,18 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """A script to calculate the auto-2PCF of CSiBORG catalogues."""
-from os.path import join
-from warnings import warn
 from argparse import ArgumentParser
 from copy import deepcopy
 from datetime import datetime
+from os.path import join
+from warnings import warn
+
+import joblib
+import numpy
+import yaml
 from mpi4py import MPI
 from TaskmasterMPI import master_process, worker_process
-import numpy
-import joblib
-import yaml
+
 try:
    import csiborgtools
 except ModuleNotFoundError:
@ -65,6 +67,7 @@ tpcf = csiborgtools.clustering.Mock2PCF()
 #                                 Analysis                                    #
 ###############################################################################

+
 def read_single(selection, cat):
    """Positions for single catalogue auto-correlation."""
    mmask = numpy.ones(len(cat), dtype=bool)
@ -99,10 +102,11 @@ def read_single(selection, cat):

    return pos[smask, ...]

+
 def do_auto(run, cat, ic):
    _config = config.get(run, None)
    if _config is None:
-        warn("No configuration for run {}.".format(run))
+        warn("No configuration for run {}.".format(run), stacklevel=1)
        return

    rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
@ -112,12 +116,11 @@ def do_auto(run, cat, ic):
    nrandom = int(config["randmult"] * pos.shape[0])
    rp, wp = tpcf(pos, rvs_gen, nrandom, bins)

-    joblib.dump({"rp": rp, "wp": wp}, fout.format(str(ic).zfill(5), run))
+    joblib.dump({"rp": rp, "wp": wp}, paths.tpcfauto_path(run, ic))


 def do_runs(ic):
-    cat = csiborgtools.read.HaloCatalogue(ic, paths, max_dist=Rmax,
-                                          min_mass=minmass)
+    cat = csiborgtools.read.ClumpsCatalogue(ic, paths, maxdist=Rmax)
    for run in args.runs:
        do_auto(run, cat, ic)

@ -143,4 +146,4 @@ comm.Barrier()

 if rank == 0:
    print("{}: all finished.".format(datetime.now()))
-quit()  # Force quit the script
+quit()  # Force quit the script
--- a/scripts/cluster_tpcf_auto.yml
+++ b/scripts/cluster_tpcf_auto.yml
--- a/scripts/field_prop.py
+++ b/scripts/field_prop.py
@ -16,17 +16,20 @@
 MPI script to evaluate field properties at the galaxy positions.
 """
 from argparse import ArgumentParser
-from os.path import join
-from os import remove
 from datetime import datetime
+from os import remove
+from os.path import join
+
 import numpy
 from mpi4py import MPI
+
 try:
    import csiborgtools
 except ModuleNotFoundError:
    import sys
    sys.path.append("../")
    import csiborgtools
+
 import utils

 dumpdir = "/mnt/extraspace/rstiskalek/csiborg/"
@ -61,16 +64,16 @@ dtype = {"names": ["delta", "phi"], "formats": [numpy.float32] * 2}

 # CSiBORG simulation paths
 paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
-ics = paths.ic_ids(tonew=False)
+ics = paths.get_ics(tonew=False)
 nsims = len(ics)

-for n in csiborgtools.fits.split_jobs(nsims, nproc)[rank]:
+for n in csiborgtools.utils.split_jobs(nsims, nproc)[rank]:
    print("Rank {}@{}: working on {}th IC.".format(rank, datetime.now(), n),
          flush=True)
    nsim = ics[n]
    nsnap = max(paths.get_snapshots(nsim))
    reader = csiborgtools.read.ParticleReader(paths)
-    box = csiborgtools.units.BoxUnits(nsnap, nsim, paths)
+    box = csiborgtools.read.BoxUnits(nsnap, nsim, paths)

    # Read particles and select a subset of them
    particles = reader.read_particle(nsnap, nsim, ["x", "y", "z", "M"],
@ -121,4 +124,4 @@ if rank == 0:

    print("Saving results to `{}`.".format(fperm), flush=True)
    with open(fperm, "wb") as f:
-        numpy.save(f, out)
+        numpy.save(f, out)
--- a/scripts/match_singlematch.py
+++ b/scripts/match_singlematch.py
@ -13,17 +13,20 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """A script to calculate overlap between two CSiBORG realisations."""
-from os.path import join
 from argparse import ArgumentParser
 from datetime import datetime
+from os.path import join
+
 import numpy
 from scipy.ndimage import gaussian_filter
+
 try:
    import csiborgtools
 except ModuleNotFoundError:
    import sys
    sys.path.append("../")
    import csiborgtools
+
 import utils

 # Argument parser
@ -44,18 +47,19 @@ overlapper = csiborgtools.match.ParticleOverlap()
 # Load catalogues
 print("{}: loading catalogues {} and {}."
      .format(datetime.now(), args.nsim0, args.nsimx), flush=True)
-cat0 = csiborgtools.read.HaloCatalogue(args.nsim0, paths)
-catx = csiborgtools.read.HaloCatalogue(args.nsimx, paths)
+cat0 = csiborgtools.read.ClumpsCatalogue(args.nsim0, paths)
+catx = csiborgtools.read.ClumpsCatalogue(args.nsimx, paths)


 print("{}: loading simulation {} and converting positions to cell numbers."
      .format(datetime.now(), args.nsim0), flush=True)
-with open(paths.clump0_path(args.nsim0), "rb") as f:
+
+with open(paths.initmatch_path(args.nsim0, "particles"), "rb") as f:
    clumps0 = numpy.load(f, allow_pickle=True)
    overlapper.clumps_pos2cell(clumps0)
 print("{}: loading simulation {} and converting positions to cell numbers."
      .format(datetime.now(), args.nsimx), flush=True)
-with open(paths.clump0_path(args.nsimx), 'rb') as f:
+with open(paths.initmatch_path(args.nsimx, "particles"), 'rb') as f:
    clumpsx = numpy.load(f, allow_pickle=True)
    overlapper.clumps_pos2cell(clumpsx)

@ -87,4 +91,4 @@ with open(fout, "wb") as f:
    numpy.savez(fout, ref_indxs=ref_indxs, cross_indxs=cross_indxs,
                match_indxs=match_indxs, ngp_overlap=ngp_overlap,
                smoothed_overlap=smoothed_overlap, sigma=args.sigma)
-print("All finished.", flush=True)
+print("All finished.", flush=True)
--- a/scripts/pre_fithalos.py
+++ b/scripts/pre_fithalos.py
@ -16,17 +16,17 @@
 A script to fit halos (concentration, ...). The particle array of each CSiBORG
 realisation must have been split in advance by `runsplit_halos`.
 """
-from os.path import join
 from datetime import datetime
+
 import numpy
 from mpi4py import MPI
+
 try:
    import csiborgtools
 except ModuleNotFoundError:
    import sys
    sys.path.append("../")
    import csiborgtools
-import utils


 # Get MPI things
@ -35,8 +35,8 @@ rank = comm.Get_rank()
 nproc = comm.Get_size()

 paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
-dumpdir = "/mnt/extraspace/rstiskalek/csiborg/"
-loaddir = join(dumpdir, "temp")
+partreader =csiborgtools.read.ParticleReader(paths)
+
 cols_collect = [("npart", numpy.int64), ("totpartmass", numpy.float64),
                ("Rs", numpy.float64), ("vx", numpy.float64),
                ("vy", numpy.float64), ("vz", numpy.float64),
@ -47,14 +47,48 @@ cols_collect = [("npart", numpy.int64), ("totpartmass", numpy.float64),
                ("r500", numpy.float64), ("m200", numpy.float64),
                ("m500", numpy.float64), ("lambda200c", numpy.float64)]

+def fit_clump(particles, clump, box):

-for i, nsim in enumerate(paths.ic_ids(tonew=False)):
+
+
+
+    out["npart"][n] = clump.Npart
+    out["rmin"][n] = clump.rmin
+    out["rmax"][n] = clump.rmax
+    out["totpartmass"][n] = clump.total_particle_mass
+    out["vx"][n] = numpy.average(clump.vel[:, 0], weights=clump.m)
+    out["vy"][n] = numpy.average(clump.vel[:, 1], weights=clump.m)
+    out["vz"][n] = numpy.average(clump.vel[:, 2], weights=clump.m)
+    out["Lx"][n], out["Ly"][n], out["Lz"][n] = clump.angular_momentum
+
+
+
+for i, nsim in enumerate(paths.get_ics(tonew=False)):
    if rank == 0:
-        print("{}: calculating {}th simulation.".format(datetime.now(), i))
+        print("{}: calculating {}th simulation `{}`."
+              .format(datetime.now(), i, nsim), flush=True)
    nsnap = max(paths.get_snapshots(nsim))
-    box = csiborgtools.units.BoxUnits(nsnap, nsim, paths)
+    box = csiborgtools.read.BoxUnits(nsnap, nsim, paths)

-    jobs = csiborgtools.fits.split_jobs(utils.Nsplits, nproc)[rank]
+    # Archive of clumps, keywords are their clump IDs
+    particle_archive = paths.split_path(nsnap, nsim)
+    clumpsarr = partreader.read_clumps(nsnap, nsim,
+                                       cols=["index", 'x', 'y', 'z'])
+    clumpid2arrpos = {ind: ii for ii, ind in enumerate(clumpsarr["index"])}
+
+
+    nclumps = len(particle_archive.files)
+    # Fit 5000 clumps at a time, then dump results
+    batchsize = 5000
+
+    # This rank does these `batchsize` clumps/halos
+    jobs = csiborgtools.utils.split_jobs(nclumps, nclumps // batchsize)[rank]
+    for clumpid in jobs:
+        ... = fit_clump(particle_archive[str(clumpid)], clumpsarr[clumpid2arrpos[clumpid]])
+
+
+
+    jobs = csiborgtools.utils.split_jobs(nclumps, nproc)[rank]
    for nsplit in jobs:
        parts, part_clumps, clumps = csiborgtools.fits.load_split_particles(
            nsplit, nsnap, nsim, paths, remove_split=False)
@ -111,18 +145,18 @@ for i, nsim in enumerate(paths.ic_ids(tonew=False)):
    # Wait until all jobs finished before moving to another simulation
    comm.Barrier()

-    # Use the rank 0 to combine outputs for this CSiBORG realisation
-    if rank == 0:
-        print("Collecting results!")
-        partreader = csiborgtools.read.ParticleReader(paths)
-        out_collected = csiborgtools.read.combine_splits(
-            utils.Nsplits, nsnap, nsim, partreader, cols_collect,
-            remove_splits=True, verbose=False)
-        fname = paths.hcat_path(nsim)
-        print("Saving results to `{}`.".format(fname))
-        numpy.save(fname, out_collected)
-
-    comm.Barrier()
-
-if rank == 0:
-    print("All finished! See ya!")
+#     # Use the rank 0 to combine outputs for this CSiBORG realisation
+#     if rank == 0:
+#         print("Collecting results!")
+#         partreader = csiborgtools.read.ParticleReader(paths)
+#         out_collected = csiborgtools.read.combine_splits(
+#             utils.Nsplits, nsnap, nsim, partreader, cols_collect,
+#             remove_splits=True, verbose=False)
+#         fname = paths.hcat_path(nsim)
+#         print("Saving results to `{}`.".format(fname))
+#         numpy.save(fname, out_collected)
+#
+#     comm.Barrier()
+#
+# if rank == 0:
+#     print("All finished! See ya!")
--- a/scripts/pre_initmatch.py
+++ b/scripts/pre_initmatch.py
@ -19,14 +19,16 @@ are grouped in a clump at present redshift.
 Optionally also dumps the clumps information, however watch out as this will
 eat up a lot of memory.
 """
-from gc import collect
-from os.path import join
-from os import remove
 from argparse import ArgumentParser
 from datetime import datetime
 from distutils.util import strtobool
+from gc import collect
+from os import remove
+from os.path import join
+
 import numpy
 from mpi4py import MPI
+
 try:
    import csiborgtools
 except ModuleNotFoundError:
@ -45,12 +47,10 @@ parser.add_argument("--dump_clumps", type=lambda x: bool(strtobool(x)))
 args = parser.parse_args()

 paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
-nsims = paths.ic_ids(tonew=True)
+nsims = paths.get_ics(tonew=True)

-# Output files
-ftemp = join(paths.dumpdir, "temp_initmatch", "temp_{}_{}_{}.npy")
-fpermcm = join(paths.dumpdir, "initmatch", "clump_{}_cm.npy")
-fpermpart = join(paths.dumpdir, "initmatch", "clump_{}_particles.npy")
+# Temporary output file
+ftemp = join(paths.dumpdir, "temp", "initmatch_{}_{}_{}.npy")

 for nsim in nsims:
    if rank == 0:
@ -87,7 +87,7 @@ for nsim in nsims:
    unique_clumpids = numpy.unique(clump_ids)

    njobs = unique_clumpids.size
-    jobs = csiborgtools.fits.split_jobs(njobs, nproc)[rank]
+    jobs = csiborgtools.utils.split_jobs(njobs, nproc)[rank]
    for i in jobs:
        n = unique_clumpids[i]
        x0 = part0[clump_ids == n]
@ -139,8 +139,8 @@ for nsim in nsims:
            out["ID"][i] = n

        print("{}: dumping to .. `{}`.".format(
-            datetime.now(), fpermcm.format(nsim)), flush=True)
-        with open(fpermcm.format(nsim), 'wb') as f:
+            datetime.now(), paths.initmatch_path(nsim, "cm")), flush=True)
+        with open(paths.initmatch_path(nsim, "cm"), 'wb') as f:
            numpy.save(f, out)

        if args.dump_clumps:
@ -157,10 +157,12 @@ for nsim in nsims:
                out["clump"][i] = fin
                out["ID"][i] = n
                remove(fpath)
-            print("{}: dumping to .. `{}`.".format(
-                datetime.now(), fpermpart.format(nsim)), flush=True)
-            with open(fpermpart.format(nsim), "wb") as f:
+
+            fout = paths.initmatch_path(nsim, "particles")
+            print("{}: dumping to .. `{}`.".format(datetime.now(), fout),
+                  flush=True)
+            with open(fout, "wb") as f:
                numpy.save(f, out)

            del out
-            collect()
+            collect()
--- a/scripts/pre_mmain.py
+++ b/scripts/pre_mmain.py
@ -0,0 +1,64 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+Script to generate the mmain files, i.e. sums up the substructe of children.
+"""
+from datetime import datetime
+
+import numpy
+from mpi4py import MPI
+from TaskmasterMPI import master_process, worker_process
+
+try:
+    import csiborgtools
+except ModuleNotFoundError:
+    import sys
+    sys.path.append("../")
+    import csiborgtools
+
+# Get MPI things
+comm = MPI.COMM_WORLD
+rank = comm.Get_rank()
+nproc = comm.Get_size()
+
+paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
+mmain_reader = csiborgtools.read.MmainReader(paths)
+
+
+def do_mmain(nsim):
+    nsnap = max(paths.get_snapshots(nsim))
+    # NOTE: currently works for highest snapshot anyway
+    mmain, ultimate_parent = mmain_reader.make_mmain(nsim, verbose=False)
+    numpy.savez(paths.mmain_path(nsnap, nsim),
+                mmain=mmain, ultimate_parent=ultimate_parent)
+
+###############################################################################
+#                             MPI task delegation                             #
+###############################################################################
+
+
+if nproc > 1:
+    if rank == 0:
+        tasks = list(paths.get_ics(tonew=False))
+        master_process(tasks, comm, verbose=True)
+    else:
+        worker_process(do_mmain, comm, verbose=False)
+else:
+    tasks = paths.get_ics(tonew=False)
+    for task in tasks:
+        print("{}: completing task `{}`.".format(datetime.now(), task))
+        do_mmain(task)
+
+comm.Barrier()
--- a/scripts/pre_splithalos.py
+++ b/scripts/pre_splithalos.py
@ -0,0 +1,115 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""Script to split particles to indivudual files according to their clump."""
+from datetime import datetime
+from gc import collect
+from glob import glob
+from os import remove
+from os.path import join
+
+import numpy
+from mpi4py import MPI
+from TaskmasterMPI import master_process, worker_process
+from tqdm import tqdm
+
+try:
+    import csiborgtools
+except ModuleNotFoundError:
+    import sys
+    sys.path.append("../")
+    import csiborgtools
+
+# Get MPI things
+comm = MPI.COMM_WORLD
+rank = comm.Get_rank()
+nproc = comm.Get_size()
+
+paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
+verbose = nproc == 1
+partcols = ['x', 'y', 'z', "vx", "vy", "vz", 'M']
+
+
+def do_split(nsim):
+    nsnap = max(paths.get_snapshots(nsim))
+    reader = csiborgtools.read.ParticleReader(paths)
+    ftemp_base = join(
+        paths.temp_dumpdir,
+        "split_{}_{}".format(str(nsim).zfill(5), str(nsnap).zfill(5))
+        )
+    ftemp = ftemp_base + "_{}.npz"
+
+    # Load the particles and their clump IDs
+    particles = reader.read_particle(nsnap, nsim, partcols, verbose=verbose)
+    particle_clumps = reader.read_clumpid(nsnap, nsim, verbose=verbose)
+    # Drop all particles whose clump index is 0 (not assigned to any clump)
+    assigned_mask = particle_clumps != 0
+    particle_clumps = particle_clumps[assigned_mask]
+    particles = particles[assigned_mask]
+    del assigned_mask
+    collect()
+
+    # Load the clump indices
+    clumpinds = reader.read_clumps(nsnap, nsim, cols="index")["index"]
+    # Some of the clumps have no particles, so we do not loop over them
+    clumpinds = clumpinds[numpy.isin(clumpinds, particle_clumps)]
+
+    # Loop over the clump indices and save the particles to a temporary file
+    # every 10000 clumps. We will later read this back and combine into a
+    # single file.
+    out = {}
+    for i, clind in enumerate(tqdm(clumpinds) if verbose else clumpinds):
+        key = str(clind)
+        out.update({str(clind): particles[particle_clumps == clind]})
+
+        # REMOVE bump this back up
+        if i % 10000 == 0 or i == clumpinds.size - 1:
+            numpy.savez(ftemp.format(i), **out)
+            out = {}
+
+    # Clear up memory because we will be loading everything back
+    del particles, particle_clumps, clumpinds
+    collect()
+
+    # Now load back in every temporary file, combine them into a single
+    # dictionary  and save as a single .npz file.
+    out = {}
+    for file in glob(ftemp_base + '*'):
+        inp = numpy.load(file)
+        for key in inp.files:
+            out.update({key: inp[key]})
+        remove(file)
+
+    numpy.savez(paths.split_path(nsnap, nsim), **out)
+
+
+###############################################################################
+#                             MPI task delegation                             #
+###############################################################################
+
+
+if nproc > 1:
+    if rank == 0:
+        tasks = list(paths.get_ics(tonew=False))
+        master_process(tasks, comm, verbose=True)
+    else:
+        worker_process(do_split, comm, verbose=False)
+else:
+    tasks = paths.get_ics(tonew=False)
+    tasks = [tasks[0]]  # REMOVE
+    for task in tasks:
+        print("{}: completing task `{}`.".format(datetime.now(), task))
+        do_split(task)
+
+comm.Barrier()
--- a/scripts/split_halos.py
+++ b/scripts/split_halos.py
@ -1,58 +0,0 @@
-# Copyright (C) 2022 Richard Stiskalek
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
-# Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""
-Script to split particles into smaller files according to their clump
-membership for faster manipulation. Currently does this for the maximum
-snapshot of each simulation. Running this requires a lot of memory.
-"""
-from mpi4py import MPI
-from datetime import datetime
-try:
-    import csiborgtools
-except ModuleNotFoundError:
-    import sys
-    sys.path.append("../")
-    import csiborgtools
-import utils
-
-# Get MPI things
-comm = MPI.COMM_WORLD
-rank = comm.Get_rank()
-nproc = comm.Get_size()
-
-paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
-sims = paths.ic_ids(False)
-partcols = ["x", "y", "z", "vx", "vy", "vz", "M", "level"]
-
-jobs = csiborgtools.fits.split_jobs(len(sims), nproc)[rank]
-for icount, sim_index in enumerate(jobs):
-    print("{}: rank {} working {} / {} jobs."
-          .format(datetime.now(), rank, icount + 1, len(jobs)), flush=True)
-    nsim = sims[sim_index]
-    nsnap = max(paths.get_snapshots(nsim))
-    partreader = csiborgtools.read.ParticleReader(paths)
-    # Load the clumps, particles' clump IDs and particles.
-    clumps = partreader.read_clumps(nsnap, nsim)
-    particle_clumps = partreader.read_clumpid(nsnap, nsim, verbose=False)
-    particles = partreader.read_particle(nsnap, nsim, partcols, verbose=False)
-    # Drop all particles whose clump index is 0 (not assigned to any halo)
-    particle_clumps, particles = partreader.drop_zero_indx(
-        particle_clumps, particles)
-    # Dump it!
-    csiborgtools.fits.dump_split_particles(particles, particle_clumps, clumps,
-                                           utils.Nsplits, nsnap, nsim, paths,
-                                           verbose=False)
-
-print("All finished!", flush=True)