diff --git a/csiborgtools/clustering/knn.py b/csiborgtools/clustering/knn.py
index 8ae9a18..ef77284 100644
--- a/csiborgtools/clustering/knn.py
+++ b/csiborgtools/clustering/knn.py
@@ -241,7 +241,7 @@ class kNN_1DCDF:
     def __call__(self, knn, rvs_gen, nneighbours, nsamples, rmin, rmax, neval,
                  batch_size=None, random_state=42, dtype=numpy.float32):
         """
-        Calculate the CDF for a set of kNNs of CSiBORG halo catalogues.
+        Calculate the CDF for a set of kNNs of halo catalogues.
 
         Parameters
         ----------
diff --git a/csiborgtools/match/__init__.py b/csiborgtools/match/__init__.py
index 27478de..1f65bca 100644
--- a/csiborgtools/match/__init__.py
+++ b/csiborgtools/match/__init__.py
@@ -15,5 +15,6 @@
 from .match import (ParticleOverlap, RealisationsMatcher,  # noqa
                     calculate_overlap, calculate_overlap_indxs,
                     cosine_similarity)
+from .nearest_neighbour import find_neighbour  # noqa
 from .num_density import binned_counts, number_density  # noqa
 from .utils import concatenate_parts  # noqa
diff --git a/csiborgtools/match/nearest_neighbour.py b/csiborgtools/match/nearest_neighbour.py
new file mode 100644
index 0000000..edf677c
--- /dev/null
+++ b/csiborgtools/match/nearest_neighbour.py
@@ -0,0 +1,56 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+Tools for finding the nearest neighbours of reference simulation haloes from
+cross simulations.
+"""
+import numpy
+
+
+def find_neighbour(nsim0, cats):
+    """
+    Find the nearest neighbour of halos in `cat0` in `catx`.
+
+    Parameters
+    ----------
+    nsim0 : int
+        Index of the reference simulation.
+    cats : dict
+        Dictionary of halo catalogues. Keys must be the simulation indices.
+
+    Returns
+    -------
+    dists : 2-dimensional array of shape `(nhalos, len(cats) - 1)`
+        Distances to the nearest neighbour.
+    cross_hindxs : 2-dimensional array of shape `(nhalos, len(cats) - 1)`
+        Halo indices of the nearest neighbour.
+    """
+    cat0 = cats[nsim0]
+    X = cat0.position(in_initial=False)
+    shape = (X.shape[0], len(cats) - 1)
+    dists = numpy.full(shape, numpy.nan, dtype=numpy.float32)
+    cross_hindxs = numpy.full(shape, numpy.nan, dtype=numpy.int32)
+
+    i = 0
+    for nsimx, catx in cats.items():
+        if nsimx == nsim0:
+            continue
+        dist, ind = catx.nearest_neighbours(X, radius=1, in_initial=False,
+                                            knearest=True)
+        dists[:, i] = dist.reshape(-1,)
+        cross_hindxs[:, i] = catx["index"][ind.reshape(-1,)]
+        i += 1
+
+    return dists, cross_hindxs
diff --git a/csiborgtools/read/__init__.py b/csiborgtools/read/__init__.py
index 4e2af73..c3f7696 100644
--- a/csiborgtools/read/__init__.py
+++ b/csiborgtools/read/__init__.py
@@ -16,6 +16,7 @@ from .box_units import CSiBORGBox, QuijoteBox  # noqa
 from .halo_cat import (ClumpsCatalogue, HaloCatalogue,  # noqa
                        QuijoteHaloCatalogue, fiducial_observers)
 from .knn_summary import kNNCDFReader  # noqa
+from .nearest_neighbour_summary import NearestNeighbourReader  # noqa
 from .obs import (SDSS, MCXCClusters, PlanckClusters, TwoMPPGalaxies,  # noqa
                   TwoMPPGroups)
 from .overlap_summary import (NPairsOverlap, PairOverlap,  # noqa
diff --git a/csiborgtools/read/box_units.py b/csiborgtools/read/box_units.py
index c0c1dff..4dad8a2 100644
--- a/csiborgtools/read/box_units.py
+++ b/csiborgtools/read/box_units.py
@@ -15,7 +15,7 @@
 """
 Simulation box unit transformations.
 """
-from abc import ABC
+from abc import ABC, abstractproperty
 
 import numpy
 from astropy import constants, units
@@ -93,6 +93,17 @@ class BaseBox(ABC):
         """
         return self.cosmo.Om0
 
+    @abstractproperty
+    def boxsize(self):
+        """
+        Box size in cMpc.
+
+        Returns
+        -------
+        boxsize : float
+        """
+        pass
+
 
 ###############################################################################
 #                              CSiBORG box                                    #
@@ -383,6 +394,10 @@ class CSiBORGBox(BaseBox):
 
         return data
 
+    @property
+    def boxsize(self):
+        return self.box2mpc(1.)
+
 
 ###############################################################################
 #                      Quijote fiducial cosmology box                         #
@@ -408,3 +423,7 @@ class QuijoteBox(BaseBox):
 
         self._cosmo = LambdaCDM(H0=67.11, Om0=0.3175, Ode0=0.6825,
                                 Tcmb0=2.725 * units.K, Ob0=0.049)
+
+    @property
+    def boxsize(self):
+        return 1000. / (self._cosmo.H0.value / 100)
diff --git a/csiborgtools/read/halo_cat.py b/csiborgtools/read/halo_cat.py
index 54732d2..4440902 100644
--- a/csiborgtools/read/halo_cat.py
+++ b/csiborgtools/read/halo_cat.py
@@ -439,11 +439,11 @@ class ClumpsCatalogue(BaseCSiBORG):
         cols = ["index", "parent", "x", "y", "z", "mass_cl"]
         self._data = partreader.read_clumps(self.nsnap, self.nsim, cols=cols)
         # Overwrite the parent with the ultimate parent
-        mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim))
+        mmain = numpy.load(self.paths.mmain(self.nsnap, self.nsim))
         self._data["parent"] = mmain["ultimate_parent"]
 
         if load_fitted:
-            fits = numpy.load(paths.structfit_path(self.nsnap, nsim, "clumps"))
+            fits = numpy.load(paths.structfit(self.nsnap, nsim, "clumps"))
             cols = [col for col in fits.dtype.names if col != "index"]
             X = [fits[col] for col in cols]
             self._data = add_columns(self._data, X, cols)
@@ -512,20 +512,20 @@ class HaloCatalogue(BaseCSiBORG):
         self.nsim = nsim
         self.paths = paths
         # Read in the mmain catalogue of summed substructure
-        mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim))
+        mmain = numpy.load(self.paths.mmain(self.nsnap, self.nsim))
         self._data = mmain["mmain"]
         # We will also need the clumps catalogue
         if load_clumps_cat:
             self._clumps_cat = ClumpsCatalogue(nsim, paths, rawdata=True,
                                                load_fitted=False)
         if load_fitted:
-            fits = numpy.load(paths.structfit_path(self.nsnap, nsim, "halos"))
+            fits = numpy.load(paths.structfit(self.nsnap, nsim, "halos"))
             cols = [col for col in fits.dtype.names if col != "index"]
             X = [fits[col] for col in cols]
             self._data = add_columns(self._data, X, cols)
 
         if load_initial:
-            fits = numpy.load(paths.initmatch_path(nsim, "fit"))
+            fits = numpy.load(paths.initmatch(nsim, "fit"))
             X, cols = [], []
             for col in fits.dtype.names:
                 if col == "index":
@@ -590,8 +590,7 @@ class QuijoteHaloCatalogue(BaseCatalogue):
         Snapshot index.
     origin : len-3 tuple, optional
         Where to place the origin of the box. By default the centre of the box.
-        In units of :math:`cMpc`. Optionally can be an integer between 0 and 8,
-        inclusive to correspond to CSiBORG boxes.
+        In units of :math:`cMpc`.
     bounds : dict
         Parameter bounds to apply to the catalogue. The keys are the parameter
         names and the items are a len-2 tuple of (min, max) values. In case of
@@ -601,12 +600,16 @@ class QuijoteHaloCatalogue(BaseCatalogue):
         Keyword arguments for backward compatibility.
     """
     _nsnap = None
+    _origin = None
 
     def __init__(self, nsim, paths, nsnap,
                  origin=[500 / 0.6711, 500 / 0.6711, 500 / 0.6711],
                  bounds=None, **kwargs):
         self.paths = paths
         self.nsnap = nsnap
+        self.origin = origin
+        self._boxwidth = 1000 / 0.6711
+
         fpath = join(self.paths.quijote_dir, "halos", str(nsim))
         fof = FoF_catalog(fpath, self.nsnap, long_ids=False, swap=False,
                           SFR=False, read_IDs=False)
@@ -614,21 +617,18 @@ class QuijoteHaloCatalogue(BaseCatalogue):
         cols = [("x", numpy.float32), ("y", numpy.float32),
                 ("z", numpy.float32), ("vx", numpy.float32),
                 ("vy", numpy.float32), ("vz", numpy.float32),
-                ("group_mass", numpy.float32), ("npart", numpy.int32)]
+                ("group_mass", numpy.float32), ("npart", numpy.int32),
+                ("index", numpy.int32)]
         data = cols_to_structured(fof.GroupLen.size, cols)
 
-        if isinstance(origin, int):
-            origin = fiducial_observers(1000 / 0.6711, 155.5 / 0.6711)[origin]
-
         pos = fof.GroupPos / 1e3 / self.box.h
-        for i in range(3):
-            pos[:, i] -= origin[i]
         vel = fof.GroupVel * (1 + self.redshift)
         for i, p in enumerate(["x", "y", "z"]):
-            data[p] = pos[:, i]
+            data[p] = pos[:, i] - self.origin[i]
             data["v" + p] = vel[:, i]
         data["group_mass"] = fof.GroupMass * 1e10 / self.box.h
         data["npart"] = fof.GroupLen
+        data["index"] = numpy.arange(data.size, dtype=numpy.int32)
 
         self._data = data
         if bounds is not None:
@@ -673,6 +673,53 @@ class QuijoteHaloCatalogue(BaseCatalogue):
         """
         return QuijoteBox(self.nsnap)
 
+    @property
+    def origin(self):
+        """
+        Origin of the box with respect to the initial box units.
+
+        Returns
+        -------
+        origin : len-3 tuple
+        """
+        if self._origin is None:
+            raise ValueError("`origin` is not set.")
+        return self._origin
+
+    @origin.setter
+    def origin(self, origin):
+        if isinstance(origin, (list, tuple)):
+            origin = numpy.asanyarray(origin)
+        assert origin.ndim == 1 and origin.size == 3
+        self._origin = origin
+
+    def pick_fiducial_observer(self, n, rmax):
+        r"""
+        Return a copy of itself, storing only halos within `rmax` of the new
+        fiducial observer.
+
+        Parameters
+        ----------
+        n : int
+            Fiducial observer index.
+        rmax : float
+            Maximum distance from the fiducial observer in :math:`cMpc`.
+
+        Returns
+        -------
+        cat : instance of csiborgtools.read.QuijoteHaloCatalogue
+        """
+        new_origin = fiducial_observers(self.box.boxsize, rmax)[n]
+        # We make a copy of the catalogue to avoid modifying the original.
+        # Then, we shift coordinates back to the original box frame and then to
+        # the new origin.
+        cat = deepcopy(self)
+        for i, p in enumerate(('x', 'y', 'z')):
+            cat._data[p] += self.origin[i]
+            cat._data[p] -= new_origin[i]
+
+        cat.apply_bounds({"dist": (0, rmax)})
+        return cat
 
 ###############################################################################
 #                     Utility functions for halo catalogues                   #
diff --git a/csiborgtools/read/knn_summary.py b/csiborgtools/read/knn_summary.py
index 07b1e11..4dcc77e 100644
--- a/csiborgtools/read/knn_summary.py
+++ b/csiborgtools/read/knn_summary.py
@@ -46,13 +46,15 @@ class kNNCDFReader:
     def paths(self, paths):
         self._paths = paths
 
-    def read(self, run, kind, rmin=None, rmax=None, to_clip=True):
+    def read(self, simname, run, kind, rmin=None, rmax=None, to_clip=True):
         """
         Read the auto- or cross-correlation kNN-CDF data. Infers the type from
         the data files.
 
         Parameters
         ----------
+        simname : str
+            Simulation name. Must be either `csiborg` or `quijote`.
         run : str
             Run ID to read in.
         kind : str
@@ -71,14 +73,17 @@ class kNNCDFReader:
             Separations where the CDF is evaluated.
         out : 3-dimensional array of shape `(len(files), len(ks), neval)`
             Array of CDFs or cross-correlations.
+        ndensity : 1-dimensional array of shape `(len(files), )`
+            Number density of halos in the simulation.
         """
         assert kind in ["auto", "cross"]
+        assert simname in ["csiborg", "quijote"]
         if kind == "auto":
-            files = self.paths.knnauto_path(run)
+            files = self.paths.knnauto(simname, run)
         else:
-            files = self.paths.knncross_path(run)
+            files = self.paths.knncross(simname, run)
         if len(files) == 0:
-            raise RuntimeError("No files found for run `{}`.".format(run))
+            raise RuntimeError(f"No files found for run `{run}`.")
 
         for i, file in enumerate(files):
             data = joblib.load(file)
@@ -91,8 +96,11 @@ class kNNCDFReader:
                     isauto = True
                 out = numpy.full((len(files), *data[kind].shape), numpy.nan,
                                  dtype=numpy.float32)
+                ndensity = numpy.full(len(files), numpy.nan,
+                                      dtype=numpy.float32)
                 rs = data["rs"]
             out[i, ...] = data[kind]
+            ndensity[i] = data["ndensity"]
 
             if isauto and to_clip:
                 out[i, ...] = self.clipped_cdf(out[i, ...])
@@ -103,7 +111,7 @@ class kNNCDFReader:
         rs = rs[mask]
         out = out[..., mask]
 
-        return rs, out
+        return rs, out, ndensity
 
     @staticmethod
     def peaked_cdf(cdf, make_copy=True):
@@ -191,6 +199,7 @@ class kNNCDFReader:
         ----------
         cdf : 3-dimensional array of shape `(len(files), len(ks), len(rs))`
             Array of CDFs
+
         Returns
         -------
         out : 3-dimensional array of shape `(len(ks) - 1, len(rs), 2)`
@@ -212,16 +221,33 @@ class kNNCDFReader:
         ----------
         rs : 1-dimensional array
             Array of separations.
-        k : int
+        k : int or 1-dimensional array
             Number of objects.
-        ndensity : float
+        ndensity : float or 1-dimensional array
             Number density of objects.
 
         Returns
         -------
-        pk : 1-dimensional array
+        pk : 1-dimensional array or 3-dimensional array
             The PDF that a spherical volume of radius :math:`r` contains
-            :math:`k` objects.
+            :math:`k` objects. If `k` and `ndensity` are both arrays, the shape
+            is `(len(ndensity), len(k), len(rs))`.
         """
         V = 4 * numpy.pi / 3 * rs**3
-        return (ndensity * V)**k / factorial(k) * numpy.exp(-ndensity * V)
+
+        def probk(k, ndensity):
+            return (ndensity * V)**k / factorial(k) * numpy.exp(-ndensity * V)
+
+        if isinstance(k, int) and isinstance(ndensity, float):
+            return probk(k, ndensity)
+
+        # If either k or ndensity is an array, make sure the other is too.
+        assert isinstance(k, numpy.ndarray) and k.ndim == 1
+        assert isinstance(ndensity, numpy.ndarray) and ndensity.ndim == 1
+
+        out = numpy.full((ndensity.size, k.size, rs.size), numpy.nan,
+                         dtype=numpy.float32)
+        for i in range(ndensity.size):
+            for j in range(k.size):
+                out[i, j, :] = probk(k[j], ndensity[i])
+        return out
diff --git a/csiborgtools/read/nearest_neighbour_summary.py b/csiborgtools/read/nearest_neighbour_summary.py
new file mode 100644
index 0000000..9ae442b
--- /dev/null
+++ b/csiborgtools/read/nearest_neighbour_summary.py
@@ -0,0 +1,287 @@
+# Copyright (C) 2023 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+Nearest neighbour summary for assessing goodness-of-reconstruction of a halo in
+the final snapshot.
+"""
+from math import floor
+
+import numpy
+from numba import jit
+from tqdm import tqdm
+
+
+class NearestNeighbourReader:
+    """
+    Shortcut object to read in nearest neighbour data for assessing the
+    goodness-of-reconstruction of a halo in the final snapshot.
+
+    Parameters
+    ----------
+    rmax_radial : float
+        Radius of the high-resolution region.
+    paths : py:class`csiborgtools.read.Paths`
+        Paths object.
+
+    TODO: docs
+    """
+    _paths = None
+    _rmax_radial = None
+    _nbins_radial = None
+    _rmax_neighbour = None
+    _nbins_neighbour = None
+
+    def __init__(self, rmax_radial, nbins_radial, rmax_neighbour,
+                 nbins_neighbour, paths, **kwargs):
+        self.paths = paths
+        self.rmax_radial = rmax_radial
+        self.nbins_radial = nbins_radial
+        self.rmax_neighbour = rmax_neighbour
+        self.nbins_neighbour = nbins_neighbour
+
+    @property
+    def rmax_radial_radial(self):
+        """
+        Radius of the high-resolution region.
+
+        Parameters
+        ----------
+        rmax_radial_radial : float
+        """
+        return self._rmax_radial_radial
+
+    @rmax_radial_radial.setter
+    def rmax_radial_radial(self, rmax_radial_radial):
+        assert isinstance(rmax_radial_radial, float)
+        self._rmax_radial_radial = rmax_radial_radial
+
+    @property
+    def paths(self):
+        """
+        Paths manager.
+
+        Parameters
+        ----------
+        paths : py:class`csiborgtools.read.Paths`
+        """
+        return self._paths
+
+    @property
+    def nbins_radial(self):
+        """
+        Number radial of bins.
+
+        Returns
+        -------
+        nbins_radial : int
+        """
+        return self._nbins_radial
+
+    @nbins_radial.setter
+    def nbins_radial(self, nbins_radial):
+        assert isinstance(nbins_radial, int)
+        self._nbins_radial = nbins_radial
+
+    @property
+    def nbins_neighbour(self):
+        """
+        Number of neighbour bins.
+
+        Returns
+        -------
+        nbins_neighbour : int
+        """
+        return self._nbins_neighbour
+
+    @nbins_neighbour.setter
+    def nbins_neighbour(self, nbins_neighbour):
+        assert isinstance(nbins_neighbour, int)
+        self._nbins_neighbour = nbins_neighbour
+
+    @property
+    def rmax_neighbour(self):
+        """
+        Maximum neighbour distance.
+
+        Returns
+        -------
+        rmax_neighbour : float
+        """
+        return self._rmax_neighbour
+
+    @rmax_neighbour.setter
+    def rmax_neighbour(self, rmax_neighbour):
+        assert isinstance(rmax_neighbour, float)
+        self._rmax_neighbour = rmax_neighbour
+
+    @paths.setter
+    def paths(self, paths):
+        self._paths = paths
+
+    @property
+    def radial_bin_edges(self):
+        """
+        Radial bins.
+
+        Returns
+        -------
+        radial_bins : 1-dimensional array
+        """
+        nbins = self.nbins_radial + 1
+        return self.rmax_radial * numpy.linspace(0, 1, nbins)**(1./3)
+
+    @property
+    def neighbour_bin_edges(self):
+        """
+        Neighbour bins edges
+
+        Returns
+        -------
+        neighbour_bins : 1-dimensional array
+        """
+        nbins = self.nbins_neighbour + 1
+        return numpy.linspace(0, self.rmax_neighbour, nbins)
+
+    def bin_centres(self, kind):
+        """
+        Bin centres. Either for `radial` or `neighbour` bins.
+
+        Parameters
+        ----------
+        kind : str
+            Bin kind. Either `radial` or `neighbour`.
+
+        Returns
+        -------
+        bin_centres : 1-dimensional array
+        """
+        assert kind in ["radial", "neighbour"]
+        if kind == "radial":
+            edges = self.radial_bin_edges
+        else:
+            edges = self.neighbour_bin_edges
+        return 0.5 * (edges[1:] + edges[:-1])
+
+    def read_single(self, simname, run, nsim, nobs=None):
+        """
+        Read in the nearest neighbour distances for halos from a single
+        simulation.
+
+        Parameters
+        ----------
+        simname : str
+            Simulation name. Must be either `csiborg` or `quijote`.
+        run : str
+            Run name.
+        nsim : int
+            Simulation index.
+        nobs : int, optional
+            Fiducial Quijote observer index.
+
+        Returns
+        -------
+        data : numpy archive
+            Archive with keys `ndist`, `rdist`, `mass`, `cross_hindxs``
+        """
+        assert simname in ["csiborg", "quijote"]
+        fpath = self.paths.cross_nearest(simname, run, nsim, nobs)
+        return numpy.load(fpath)
+
+    def build_cdf(self, simname, run, verbose=True):
+        """
+        Build the CDF for the nearest neighbour distribution. Counts the binned
+        number of neighbour for each halo as a funtion of its radial distance
+        from the centre of the high-resolution region and converts it to a CDF.
+
+        Parameters
+        ----------
+        simname : str
+            Simulation name. Must be either `csiborg` or `quijote`.
+        run : str
+            Run name.
+        verbose : bool, optional
+            Verbosity flag.
+
+        Returns
+        -------
+        cdf : 2-dimensional array of shape `(nbins_radial, nbins_neighbour)`
+        """
+        assert simname in ["csiborg", "quijote"]
+        rbin_edges = self.radial_bin_edges
+        # We first bin the distances as a function of each reference halo
+        # radial distance and then its nearest neighbour distance.
+        fpaths = self.paths.cross_nearest(simname, run)
+        out = numpy.zeros((self.nbins_radial, self.nbins_neighbour),
+                          dtype=numpy.float32)
+        for fpath in tqdm(fpaths) if verbose else fpaths:
+            data = numpy.load(fpath)
+            out = count_neighbour(
+                out, data["ndist"], data["rdist"], rbin_edges,
+                self.rmax_neighbour, self.nbins_neighbour)
+
+        # We then build up a CDF for each radial bin.
+        out = numpy.cumsum(out, axis=1, out=out)
+        out /= out[:, -1].reshape(-1, 1)
+        return out
+
+
+
+
+###############################################################################
+#                           Support functions                                 #
+###############################################################################
+
+
+@jit(nopython=True)
+def count_neighbour(counts, ndist, rdist, rbin_edges, rmax_neighbour,
+                    nbins_neighbour):
+    """
+    Count the number of neighbour in neighbours bins for each halo as a funtion
+    of its radial distance from the centre of the high-resolution region.
+
+    Parameters
+    ----------
+    counts : 2-dimensional array of shape `(nbins_radial, nbins_neighbour)`
+        Array to store the counts.
+    ndist : 2-dimensional array of shape `(nhalos, ncross_simulations)`
+        Distance of each halo to its nearest neighbour from a cross simulation.
+    rdist : 1-dimensional array of shape `(nhalos, )`
+        Distance of each halo to the centre of the high-resolution region.
+    rbin_edges : 1-dimensional array of shape `(nbins_radial + 1, )`
+        Edges of the radial bins.
+    rmax_neighbour : float
+        Maximum neighbour distance.
+    nbins_neighbour : int
+        Number of neighbour bins.
+
+    Returns
+    -------
+    counts : 2-dimensional array of shape `(nbins_radial, nbins_neighbour)`
+    """
+    ncross = ndist.shape[1]
+    # We normalise the neighbour distance by the maximum neighbour distance and
+    # multiply by the number of bins. This way, the floor of each distance is
+    # the bin number.
+    ndist /= rmax_neighbour
+    ndist *= nbins_neighbour
+    # We loop over each halo, assign it to a radial bin and then assign its
+    # neighbours to bins.
+    for i, radial_cell in enumerate(numpy.digitize(rdist, rbin_edges) - 1):
+        for j in range(ncross):
+            neighbour_cell = floor(ndist[i, j])
+            if neighbour_cell < nbins_neighbour:
+                counts[radial_cell, neighbour_cell] += 1
+
+    return counts
diff --git a/csiborgtools/read/overlap_summary.py b/csiborgtools/read/overlap_summary.py
index ddabd37..fb82bde 100644
--- a/csiborgtools/read/overlap_summary.py
+++ b/csiborgtools/read/overlap_summary.py
@@ -71,8 +71,8 @@ class PairOverlap:
 
         # We first load in the output files. We need to find the right
         # combination of the reference and cross simulation.
-        fname = paths.overlap_path(nsim0, nsimx, smoothed=False)
-        fname_inv = paths.overlap_path(nsimx, nsim0, smoothed=False)
+        fname = paths.overlap(nsim0, nsimx, smoothed=False)
+        fname_inv = paths.overlap(nsimx, nsim0, smoothed=False)
         if isfile(fname):
             data_ngp = numpy.load(fname, allow_pickle=True)
             to_invert = False
@@ -83,7 +83,7 @@ class PairOverlap:
         else:
             raise FileNotFoundError(f"No file found for {nsim0} and {nsimx}.")
 
-        fname_smooth = paths.overlap_path(cat0.nsim, catx.nsim, smoothed=True)
+        fname_smooth = paths.overlap(cat0.nsim, catx.nsim, smoothed=True)
         data_smooth = numpy.load(fname_smooth, allow_pickle=True)
 
         # Create mapping from halo indices to array positions in the catalogue.
@@ -628,11 +628,11 @@ def get_cross_sims(nsim0, paths, smoothed):
         Whether to use the smoothed overlap or not.
     """
     nsimxs = []
-    for nsimx in paths.get_ics():
+    for nsimx in paths.get_ics("csiborg"):
         if nsimx == nsim0:
             continue
-        f1 = paths.overlap_path(nsim0, nsimx, smoothed)
-        f2 = paths.overlap_path(nsimx, nsim0, smoothed)
+        f1 = paths.overlap(nsim0, nsimx, smoothed)
+        f2 = paths.overlap(nsimx, nsim0, smoothed)
         if isfile(f1) or isfile(f2):
             nsimxs.append(nsimx)
     return nsimxs
diff --git a/csiborgtools/read/paths.py b/csiborgtools/read/paths.py
index 3f86c03..9ddff66 100644
--- a/csiborgtools/read/paths.py
+++ b/csiborgtools/read/paths.py
@@ -88,13 +88,6 @@ class Paths:
         self._check_directory(path)
         self._quijote_dir = path
 
-    @staticmethod
-    def get_quijote_ics():
-        """
-        Quijote IC realisation IDs.
-        """
-        return numpy.arange(100, dtype=int)
-
     @property
     def postdir(self):
         """
@@ -130,9 +123,32 @@ class Paths:
             warn(f"Created directory `{fpath}`.", UserWarning, stacklevel=1)
         return fpath
 
-    def mmain_path(self, nsnap, nsim):
+    @staticmethod
+    def quijote_fiducial_nsim(nsim, nobs=None):
         """
-        Path to the `mmain` files summed substructure files.
+        Fiducial Quijote simulation ID. Combines the IC realisation and
+        observer placement.
+
+        Parameters
+        ----------
+        nsim : int
+            IC realisation index.
+        nobs : int, optional
+            Fiducial observer index.
+
+        Returns
+        -------
+        id : str
+        """
+        if nobs is None:
+            assert isinstance(nsim, str)
+            assert len(nsim) == 5
+            return nsim
+        return f"{str(nobs).zfill(2)}{str(nsim).zfill(3)}"
+
+    def mmain(self, nsnap, nsim):
+        """
+        Path to the `mmain` CSiBORG files of summed substructure.
 
         Parameters
         ----------
@@ -152,10 +168,10 @@ class Paths:
         return join(fdir,
                     f"mmain_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npz")
 
-    def initmatch_path(self, nsim, kind):
+    def initmatch(self, nsim, kind):
         """
-        Path to the `initmatch` files where the clump match between the
-        initial and final snapshot is stored.
+        Path to the `initmatch` files where the halo match between the
+        initial and final snapshot of a CSiBORG realisaiton is stored.
 
         Parameters
         ----------
@@ -176,26 +192,35 @@ class Paths:
             warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1)
         return join(fdir, f"{kind}_{str(nsim).zfill(5)}.{ftype}")
 
-    def get_ics(self):
+    def get_ics(self, simname):
         """
-        Get CSiBORG IC realisation IDs from the list of folders in
-        `self.srcdir`.
+        Get available IC realisation IDs for either the CSiBORG or Quijote
+        simulation suite.
+
+        Parameters
+        ----------
+        simname : str
+            Simulation name. Must be one of `["csiborg", "quijote"]`.
 
         Returns
         -------
         ids : 1-dimensional array
         """
-        files = glob(join(self.srcdir, "ramses_out*"))
-        files = [f.split("/")[-1] for f in files]      # Select only file names
-        files = [f for f in files if "_inv" not in f]  # Remove inv. ICs
-        files = [f for f in files if "_new" not in f]  # Remove _new
-        files = [f for f in files if "OLD" not in f]   # Remove _old
-        ids = [int(f.split("_")[-1]) for f in files]
-        try:
-            ids.remove(5511)
-        except ValueError:
-            pass
-        return numpy.sort(ids)
+        assert simname in ["csiborg", "quijote"]
+        if simname == "csiborg":
+            files = glob(join(self.srcdir, "ramses_out*"))
+            files = [f.split("/")[-1] for f in files]      # Only file names
+            files = [f for f in files if "_inv" not in f]  # Remove inv. ICs
+            files = [f for f in files if "_new" not in f]  # Remove _new
+            files = [f for f in files if "OLD" not in f]   # Remove _old
+            ids = [int(f.split("_")[-1]) for f in files]
+            try:
+                ids.remove(5511)
+            except ValueError:
+                pass
+            return numpy.sort(ids)
+        else:
+            return numpy.arange(100, dtype=int)
 
     def ic_path(self, nsim, tonew=False):
         """
@@ -239,7 +264,7 @@ class Paths:
         snaps = [int(snap.split("_")[-1].lstrip("0")) for snap in snaps]
         return numpy.sort(snaps)
 
-    def snapshot_path(self, nsnap, nsim):
+    def snapshot(self, nsnap, nsim):
         """
         Path to a CSiBORG IC realisation snapshot.
 
@@ -258,9 +283,10 @@ class Paths:
         simpath = self.ic_path(nsim, tonew=tonew)
         return join(simpath, f"output_{str(nsnap).zfill(5)}")
 
-    def structfit_path(self, nsnap, nsim, kind):
+    def structfit(self, nsnap, nsim, kind):
         """
-        Path to the clump or halo catalogue from `fit_halos.py`.
+        Path to the clump or halo catalogue from `fit_halos.py`. Only CSiBORG
+        is supported.
 
         Parameters
         ----------
@@ -283,9 +309,9 @@ class Paths:
         fname = f"{kind}_out_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npy"
         return join(fdir, fname)
 
-    def overlap_path(self, nsim0, nsimx, smoothed):
+    def overlap(self, nsim0, nsimx, smoothed):
         """
-        Path to the overlap files between two simulations.
+        Path to the overlap files between two CSiBORG simulations.
 
         Parameters
         ----------
@@ -309,32 +335,10 @@ class Paths:
             fname = fname.replace("overlap", "overlap_smoothed")
         return join(fdir, fname)
 
-    def radpos_path(self, nsnap, nsim):
+    def particles(self, nsim):
         """
-        Path to the files containing radial positions of halo particles (with
-        summed substructure).
-
-        Parameters
-        ----------
-        nsnap : int
-            Snapshot index.
-        nsim : int
-            IC realisation index.
-
-        Returns
-        -------
-        path : str
-        """
-        fdir = join(self.postdir, "radpos")
-        if not isdir(fdir):
-            mkdir(fdir)
-            warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1)
-        fname = f"radpos_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npz"
-        return join(fdir, fname)
-
-    def particles_path(self, nsim):
-        """
-        Path to the files containing all particles.
+        Path to the files containing all particles of a CSiBORG realisation at
+        :math:`z = 0`.
 
         Parameters
         ----------
@@ -352,9 +356,9 @@ class Paths:
         fname = f"parts_{str(nsim).zfill(5)}.h5"
         return join(fdir, fname)
 
-    def field_path(self, kind, MAS, grid, nsim, in_rsp):
+    def field(self, kind, MAS, grid, nsim, in_rsp):
         """
-        Path to the files containing the calculated density fields.
+        Path to the files containing the calculated density fields in CSiBORG.
 
         Parameters
         ----------
@@ -383,7 +387,43 @@ class Paths:
         fname = f"{kind}_{MAS}_{str(nsim).zfill(5)}_grid{grid}.npy"
         return join(fdir, fname)
 
-    def knnauto_path(self, simname, run, nsim=None, nobs=None):
+    def cross_nearest(self, simname, run, nsim=None, nobs=None):
+        """
+        Path to the files containing distance from a halo in a reference
+        simulation to the nearest halo from a cross simulation.
+
+        Parameters
+        ----------
+        simname : str
+            Simulation name. Must be one of: `csiborg`, `quijote`.
+        run : str
+            Run name.
+        nsim : int, optional
+            IC realisation index.
+        nobs : int, optional
+            Fiducial observer index in Quijote simulations.
+
+        Returns
+        -------
+        path : str
+        """
+        assert simname in ["csiborg", "quijote"]
+        fdir = join(self.postdir, "nearest_neighbour")
+        if not isdir(fdir):
+            makedirs(fdir)
+            warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1)
+        if nsim is not None:
+            if simname == "csiborg":
+                nsim = str(nsim).zfill(5)
+            else:
+                nsim = self.quijote_fiducial_nsim(nsim, nobs)
+            return join(fdir, f"{simname}_nn_{nsim}_{run}.npz")
+
+        files = glob(join(fdir, f"{simname}_nn_*"))
+        run = "_" + run
+        return [f for f in files if run in f]
+
+    def knnauto(self, simname, run, nsim=None, nobs=None):
         """
         Path to the `knn` auto-correlation files. If `nsim` is not specified
         returns a list of files for this run for all available simulations.
@@ -393,7 +433,7 @@ class Paths:
         simname : str
             Simulation name. Must be either `csiborg` or `quijote`.
         run : str
-            Type of run.
+            Run name.
         nsim : int, optional
             IC realisation index.
         nobs : int, optional
@@ -412,15 +452,14 @@ class Paths:
             if simname == "csiborg":
                 nsim = str(nsim).zfill(5)
             else:
-                assert nobs is not None
-                nsim = f"{str(nobs).zfill(2)}{str(nsim).zfill(3)}"
+                nsim = self.quijote_fiducial_nsim(nsim, nobs)
             return join(fdir, f"{simname}_knncdf_{nsim}_{run}.p")
 
         files = glob(join(fdir, f"{simname}_knncdf*"))
-        run = "__" + run
+        run = "_" + run
         return [f for f in files if run in f]
 
-    def knncross_path(self, simname, run, nsims=None):
+    def knncross(self, simname, run, nsims=None):
         """
         Path to the `knn` cross-correlation files. If `nsims` is not specified
         returns a list of files for this run for all available simulations.
@@ -449,10 +488,10 @@ class Paths:
             return join(fdir, f"{simname}_knncdf_{nsim0}_{nsimx}__{run}.p")
 
         files = glob(join(fdir, f"{simname}_knncdf*"))
-        run = "__" + run
+        run = "_" + run
         return [f for f in files if run in f]
 
-    def tpcfauto_path(self, simname, run, nsim=None):
+    def tpcfauto(self, simname, run, nsim=None):
         """
         Path to the `tpcf` auto-correlation files. If `nsim` is not specified
         returns a list of files for this run for all available simulations.
diff --git a/csiborgtools/read/readsim.py b/csiborgtools/read/readsim.py
index 9d5d7a7..0c2519c 100644
--- a/csiborgtools/read/readsim.py
+++ b/csiborgtools/read/readsim.py
@@ -76,7 +76,7 @@ class ParticleReader:
             Dictionary of information paramaters. Note that both keys and
             values are strings.
         """
-        snappath = self.paths.snapshot_path(nsnap, nsim)
+        snappath = self.paths.snapshot(nsnap, nsim)
         filename = join(snappath, "info_{}.txt".format(str(nsnap).zfill(5)))
         with open(filename, "r") as f:
             info = f.read().split()
@@ -87,7 +87,6 @@ class ParticleReader:
 
         keys = info[eqs - 1]
         vals = info[eqs + 1]
-        # trunk-ignore(ruff/B905)
         return {key: val for key, val in zip(keys, vals)}
 
     def open_particle(self, nsnap, nsim, verbose=True):
@@ -110,7 +109,7 @@ class ParticleReader:
         partfiles : list of `scipy.io.FortranFile`
             Opened part files.
         """
-        snappath = self.paths.snapshot_path(nsnap, nsim)
+        snappath = self.paths.snapshot(nsnap, nsim)
         ncpu = int(self.read_info(nsnap, nsim)["ncpu"])
         nsnap = str(nsnap).zfill(5)
         if verbose:
diff --git a/csiborgtools/read/tpcf_summary.py b/csiborgtools/read/tpcf_summary.py
index 602cf05..50b6466 100644
--- a/csiborgtools/read/tpcf_summary.py
+++ b/csiborgtools/read/tpcf_summary.py
@@ -65,7 +65,7 @@ class TPCFReader:
         out : 2-dimensional array of shape `(len(files), len(rp))`
             Array of 2PCFs.
         """
-        files = self.paths.tpcfauto_path(run)
+        files = self.paths.tpcfauto(run)
         if len(files) == 0:
             raise RuntimeError("No files found for run `{}`.".format(run[:-2]))
 
diff --git a/scripts/cluster_crosspk.py b/scripts/cluster_crosspk.py
index 38c8387..291681f 100644
--- a/scripts/cluster_crosspk.py
+++ b/scripts/cluster_crosspk.py
@@ -16,6 +16,7 @@
 MPI script to calculate the matter cross power spectrum between CSiBORG
 IC realisations. Units are Mpc/h.
 """
+raise NotImplementedError("This script is currently not working.")
 from argparse import ArgumentParser
 from datetime import datetime
 from gc import collect
@@ -51,7 +52,7 @@ MAS = "CIC"  # mass asignment scheme
 paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
 box = csiborgtools.read.CSiBORGBox(paths)
 reader = csiborgtools.read.ParticleReader(paths)
-ics = paths.get_ics()
+ics = paths.get_ics("csiborg")
 nsims = len(ics)
 
 # File paths
diff --git a/scripts/cluster_knn_auto.py b/scripts/cluster_knn_auto.py
index 52dd910..ea02ed5 100644
--- a/scripts/cluster_knn_auto.py
+++ b/scripts/cluster_knn_auto.py
@@ -12,18 +12,19 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues."""
+"""
+A script to calculate the KNN-CDF for a set of halo catalogues.
+"""
 from argparse import ArgumentParser
-from copy import deepcopy
 from datetime import datetime
-from warnings import warn
+from distutils.util import strtobool
 
 import joblib
 import numpy
 import yaml
 from mpi4py import MPI
 from sklearn.neighbors import NearestNeighbors
-from taskmaster import master_process, worker_process
+from taskmaster import work_delegation
 
 try:
     import csiborgtools
@@ -33,161 +34,122 @@ except ModuleNotFoundError:
     sys.path.append("../")
     import csiborgtools
 
-
-###############################################################################
-#                            MPI and arguments                                #
-###############################################################################
-comm = MPI.COMM_WORLD
-rank = comm.Get_rank()
-nproc = comm.Get_size()
-
-parser = ArgumentParser()
-parser.add_argument("--runs", type=str, nargs="+")
-parser.add_argument("--ics", type=int, nargs="+", default=None,
-                    help="IC realisations. If `-1` processes all simulations.")
-parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"])
-args = parser.parse_args()
-with open("../scripts/cluster_knn_auto.yml", "r") as file:
-    config = yaml.safe_load(file)
-
-Rmax = 155 / 0.705  # Mpc (h = 0.705) high resolution region radius
-totvol = 4 * numpy.pi * Rmax**3 / 3
-paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-knncdf = csiborgtools.clustering.kNN_1DCDF()
-
-if args.ics is None or args.ics[0] == -1:
-    if args.simname == "csiborg":
-        ics = paths.get_ics()
-    else:
-        ics = paths.get_quijote_ics()
-else:
-    ics = args.ics
+from utils import open_catalogues
 
 
-###############################################################################
-#                                 Analysis                                    #
-###############################################################################
+def do_auto(args, config, cats, nsim, paths):
+    """
+    Calculate the kNN-CDF single catalogue auto-correlation.
 
+    Parameters
+    ----------
+    args : argparse.Namespace
+        Command line arguments.
+    config : dict
+        Configuration dictionary.
+    cats : dict
+        Dictionary of halo catalogues. Keys are simulation indices, values are
+        the catalogues.
+    nsim : int
+        Simulation index.
+    paths : csiborgtools.paths.Paths
+        Paths object.
 
-def read_single(nsim, selection, nobs=None):
-    # We first read the full catalogue without applying any bounds.
-    if args.simname == "csiborg":
-        cat = csiborgtools.read.HaloCatalogue(nsim, paths)
-    else:
-        cat = csiborgtools.read.QuijoteHaloCatalogue(nsim, paths, nsnap=4,
-                                                     origin=nobs)
-
-    cat.apply_bounds({"dist": (0, Rmax)})
-    # We then first read off the primary selection bounds.
-    sel = selection["primary"]
-    pname = None
-    xs = sel["names"] if isinstance(sel["names"], list) else [sel["names"]]
-    for _name in xs:
-        if _name in cat.keys:
-            pname = _name
-    if pname is None:
-        raise KeyError(f"Invalid names `{sel['name']}`.")
-
-    cat.apply_bounds({pname: (sel.get("min", None), sel.get("max", None))})
-
-    # Now the secondary selection bounds. If needed transfrom the secondary
-    # property before applying the bounds.
-    if "secondary" in selection:
-        sel = selection["secondary"]
-        sname = None
-        xs = sel["names"] if isinstance(sel["names"], list) else [sel["names"]]
-        for _name in xs:
-            if _name in cat.keys:
-                sname = _name
-        if sname is None:
-            raise KeyError(f"Invalid names `{sel['name']}`.")
-
-        if sel.get("toperm", False):
-            cat[sname] = numpy.random.permutation(cat[sname])
-
-        if sel.get("marked", False):
-            cat[sname] = csiborgtools.clustering.normalised_marks(
-                cat[pname], cat[sname], nbins=config["nbins_marks"])
-        cat.apply_bounds({sname: (sel.get("min", None), sel.get("max", None))})
-    return cat
-
-
-def do_auto(run, nsim, nobs=None):
-    """Calculate the kNN-CDF single catalgoue autocorrelation."""
-    _config = config.get(run, None)
-    if _config is None:
-        warn(f"No configuration for run {run}.", UserWarning, stacklevel=1)
-        return
-
-    rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
-    cat = read_single(nsim, _config, nobs=nobs)
+    Returns
+    -------
+    None
+    """
+    rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax)
+    knncdf = csiborgtools.clustering.kNN_1DCDF()
+    cat = cats[nsim]
     knn = cat.knn(in_initial=False)
     rs, cdf = knncdf(
         knn, rvs_gen=rvs_gen, nneighbours=config["nneighbours"],
         rmin=config["rmin"], rmax=config["rmax"],
         nsamples=int(config["nsamples"]), neval=int(config["neval"]),
         batch_size=int(config["batch_size"]), random_state=config["seed"])
-
-    fout = paths.knnauto_path(args.simname, run, nsim, nobs)
-    print(f"Saving output to `{fout}`.")
+    totvol = (4 / 3) * numpy.pi * args.Rmax ** 3
+    fout = paths.knnauto(args.simname, args.run, nsim)
+    if args.verbose:
+        print(f"Saving output to `{fout}`.")
     joblib.dump({"rs": rs, "cdf": cdf, "ndensity": len(cat) / totvol}, fout)
 
 
-def do_cross_rand(run, nsim, nobs=None):
-    """Calculate the kNN-CDF cross catalogue random correlation."""
-    _config = config.get(run, None)
-    if _config is None:
-        warn(f"No configuration for run {run}.", UserWarning, stacklevel=1)
-        return
+def do_cross_rand(args, config, cats, nsim, paths):
+    """
+    Calculate the kNN-CDF cross catalogue random correlation.
 
-    rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
-    cat = read_single(nsim, _config)
+    Parameters
+    ----------
+    args : argparse.Namespace
+        Command line arguments.
+    config : dict
+        Configuration dictionary.
+    cats : dict
+        Dictionary of halo catalogues. Keys are simulation indices, values are
+        the catalogues.
+    nsim : int
+        Simulation index.
+    paths : csiborgtools.paths.Paths
+        Paths object.
+
+    Returns
+    -------
+    None
+    """
+    rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax)
+    cat = cats[nsim]
     knn1 = cat.knn(in_initial=False)
 
     knn2 = NearestNeighbors()
     pos2 = rvs_gen(len(cat).shape[0])
     knn2.fit(pos2)
 
+    knncdf = csiborgtools.clustering.kNN_1DCDF()
     rs, cdf0, cdf1, joint_cdf = knncdf.joint(
         knn1, knn2, rvs_gen=rvs_gen, nneighbours=int(config["nneighbours"]),
         rmin=config["rmin"], rmax=config["rmax"],
         nsamples=int(config["nsamples"]), neval=int(config["neval"]),
         batch_size=int(config["batch_size"]), random_state=config["seed"])
     corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
-    fout = paths.knnauto_path(args.simname, run, nsim, nobs)
-    print(f"Saving output to `{fout}`.")
+
+    fout = paths.knnauto(args.simname, args.run, nsim)
+    if args.verbose:
+        print(f"Saving output to `{fout}`.", flush=True)
     joblib.dump({"rs": rs, "corr": corr}, fout)
 
 
-def do_runs(nsim):
-    for run in args.runs:
-        iters = range(27) if args.simname == "quijote" else [None]
-        for nobs in iters:
-            if "random" in run:
-                do_cross_rand(run, nsim, nobs)
-            else:
-                do_auto(run, nsim, nobs)
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument("--run", type=str, help="Run name.")
+    parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"],
+                        help="Simulation name")
+    parser.add_argument("--nsims", type=int, nargs="+", default=None,
+                        help="Indices of simulations to cross. If `-1` processes all simulations.")  # noqa
+    parser.add_argument("--Rmax", type=float, default=155/0.705,
+                        help="High-resolution region radius")  # noqa
+    parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
+                        default=False)
+    args = parser.parse_args()
 
+    with open("./cluster_knn_auto.yml", "r") as file:
+        config = yaml.safe_load(file)
+    comm = MPI.COMM_WORLD
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    cats = open_catalogues(args, config, paths, comm)
 
-###############################################################################
-#                             MPI task delegation                             #
-###############################################################################
+    if args.verbose and comm.Get_rank() == 0:
+        print(f"{datetime.now()}: starting to calculate the kNN statistic.")
 
+    def do_work(nsim):
+        if "random" in args.run:
+            do_cross_rand(args, config, cats, nsim, paths)
+        else:
+            do_auto(args, config, cats, nsim, paths)
 
-if nproc > 1:
-    if rank == 0:
-        tasks = deepcopy(ics)
-        master_process(tasks, comm, verbose=True)
-    else:
-        worker_process(do_runs, comm, verbose=False)
-else:
-    tasks = deepcopy(ics)
-    for task in tasks:
-        print("{}: completing task `{}`.".format(datetime.now(), task))
-        do_runs(task)
-comm.Barrier()
+    nsims = list(cats.keys())
+    work_delegation(do_work, nsims, comm, master_verbose=args.verbose)
 
-
-if rank == 0:
-    print("{}: all finished.".format(datetime.now()))
-quit()  # Force quit the script
+    comm.Barrier()
+    if comm.Get_rank() == 0:
+        print(f"{datetime.now()}: all finished. Quitting.")
diff --git a/scripts/cluster_knn_auto.yml b/scripts/cluster_knn_auto.yml
index 6bbc4f4..c2556e6 100644
--- a/scripts/cluster_knn_auto.yml
+++ b/scripts/cluster_knn_auto.yml
@@ -1,8 +1,8 @@
 rmin: 0.1
 rmax: 100
 nneighbours: 8
-nsamples: 1.e+5
-batch_size: 5.e+4
+nsamples: 1.e+7
+batch_size: 1.e+6
 neval: 10000
 seed: 42
 nbins_marks: 10
@@ -16,7 +16,7 @@ nbins_marks: 10
 "mass001":
   primary:
     name:
-    - totpartmass,
+    - totpartmass
     - group_mass
     min: 1.e+12
     max: 1.e+13
@@ -24,7 +24,7 @@ nbins_marks: 10
 "mass002":
   primary:
     name:
-    - totpartmass,
+    - totpartmass
     - group_mass
     min: 1.e+13
     max: 1.e+14
@@ -32,7 +32,15 @@ nbins_marks: 10
 "mass003":
   primary:
     name:
-    - totpartmass,
+    - totpartmass
+    - group_mass
+    min: 1.e+14
+
+"mass003_poisson":
+  poisson: true
+  primary:
+    name:
+    - totpartmass
     - group_mass
     min: 1.e+14
 
diff --git a/scripts/cluster_knn_cross.py b/scripts/cluster_knn_cross.py
index 60cea0a..392fd9d 100644
--- a/scripts/cluster_knn_cross.py
+++ b/scripts/cluster_knn_cross.py
@@ -16,11 +16,13 @@
 A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues.
 
 TODO:
+    - [ ] Add support for new catalogue readers. Currently will not work.
     - [ ] Update catalogue readers.
     - [ ] Update paths.
     - [ ] Update to cross-correlate different mass populations from different
     simulations.
 """
+raise NotImplementedError("This script is currently not working.")
 from argparse import ArgumentParser
 from datetime import datetime
 from itertools import combinations
@@ -58,7 +60,7 @@ with open("../scripts/knn_cross.yml", "r") as file:
 
 Rmax = 155 / 0.705  # Mpc (h = 0.705) high resolution region radius
 paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-ics = paths.get_ics()
+ics = paths.get_ics("csiborg")
 knncdf = csiborgtools.clustering.kNN_1DCDF()
 
 ###############################################################################
@@ -109,7 +111,7 @@ def do_cross(run, ics):
     )
 
     corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
-    fout = paths.knncross_path(args.simname, run, ics)
+    fout = paths.knncross(args.simname, run, ics)
     joblib.dump({"rs": rs, "corr": corr}, fout)
 
 
diff --git a/scripts/cluster_tpcf_auto.py b/scripts/cluster_tpcf_auto.py
index 9418840..ec9242d 100644
--- a/scripts/cluster_tpcf_auto.py
+++ b/scripts/cluster_tpcf_auto.py
@@ -16,18 +16,16 @@
 A script to calculate the auto-2PCF of CSiBORG catalogues.
 """
 from argparse import ArgumentParser
-from copy import deepcopy
 from datetime import datetime
-from warnings import warn
+from distutils.util import strtobool
 
 import joblib
 import numpy
 import yaml
 from mpi4py import MPI
 
-from taskmaster import master_process, worker_process
-
-from .cluster_knn_auto import read_single
+from taskmaster import work_delegation
+from utils import open_catalogues
 
 try:
     import csiborgtools
@@ -38,84 +36,51 @@ except ModuleNotFoundError:
     import csiborgtools
 
 
-###############################################################################
-#                            MPI and arguments                                #
-###############################################################################
-comm = MPI.COMM_WORLD
-rank = comm.Get_rank()
-nproc = comm.Get_size()
-
-parser = ArgumentParser()
-parser.add_argument("--runs", type=str, nargs="+")
-parser.add_argument("--ics", type=int, nargs="+", default=None,
-                    help="IC realisations. If `-1` processes all simulations.")
-parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"])
-args = parser.parse_args()
-with open("../scripts/tpcf_auto.yml", "r") as file:
-    config = yaml.safe_load(file)
-
-Rmax = 155 / 0.705  # Mpc (h = 0.705) high resolution region radius
-paths = csiborgtools.read.Paths()
-tpcf = csiborgtools.clustering.Mock2PCF()
-
-if args.ics is None or args.ics[0] == -1:
-    if args.simname == "csiborg":
-        ics = paths.get_ics()
-    else:
-        ics = paths.get_quijote_ics()
-else:
-    ics = args.ics
-
-###############################################################################
-#                                 Analysis                                    #
-###############################################################################
-
-
-def do_auto(run, nsim):
-    _config = config.get(run, None)
-    if _config is None:
-        warn("No configuration for run {}.".format(run), stacklevel=1)
-        return
-
-    rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
+def do_auto(args, config, cats, nsim, paths):
+    tpcf = csiborgtools.clustering.Mock2PCF()
+    rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax)
     bins = numpy.logspace(
-        numpy.log10(config["rpmin"]),
-        numpy.log10(config["rpmax"]),
-        config["nrpbins"] + 1,
-    )
-    cat = read_single(nsim, _config)
+        numpy.log10(config["rpmin"]), numpy.log10(config["rpmax"]),
+        config["nrpbins"] + 1,)
+    cat = cats[nsim]
+
     pos = cat.position(in_initial=False, cartesian=True)
     nrandom = int(config["randmult"] * pos.shape[0])
     rp, wp = tpcf(pos, rvs_gen, nrandom, bins)
 
-    fout = paths.tpcfauto_path(args.simname, run, nsim)
+    fout = paths.knnauto(args.simname, args.run, nsim)
     joblib.dump({"rp": rp, "wp": wp}, fout)
 
 
-def do_runs(nsim):
-    for run in args.runs:
-        do_auto(run, nsim)
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument("--run", type=str, help="Run name.")
+    parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"],
+                        help="Simulation name")
+    parser.add_argument("--nsims", type=int, nargs="+", default=None,
+                        help="Indices of simulations to cross. If `-1` processes all simulations.")  # noqa
+    parser.add_argument("--Rmax", type=float, default=155/0.705,
+                        help="High-resolution region radius")  # noqa
+    parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
+                        default=False)
+    args = parser.parse_args()
 
+    with open("./cluster_tpcf_auto.yml", "r") as file:
+        config = yaml.safe_load(file)
 
-###############################################################################
-#                             MPI task delegation                             #
-###############################################################################
+    comm = MPI.COMM_WORLD
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    cats = open_catalogues(args, config, paths, comm)
 
+    if args.verbose and comm.Get_rank() == 0:
+        print(f"{datetime.now()}: starting to calculate the 2PCF statistic.")
 
-if nproc > 1:
-    if rank == 0:
-        tasks = deepcopy(ics)
-        master_process(tasks, comm, verbose=True)
-    else:
-        worker_process(do_runs, comm, verbose=False)
-else:
-    tasks = deepcopy(ics)
-    for task in tasks:
-        print("{}: completing task `{}`.".format(datetime.now(), task))
-        do_runs(task)
-comm.Barrier()
+    def do_work(nsim):
+        return do_auto(args, config, cats, nsim, paths)
 
+    nsims = list(cats.keys())
+    work_delegation(do_work, nsims, comm, master_verbose=args.verbose)
 
-if rank == 0:
-    print("{}: all finished.".format(datetime.now()))
-quit()  # Force quit the script
+    comm.Barrier()
+    if comm.Get_rank() == 0:
+        print(f"{datetime.now()}: all finished. Quitting.")
diff --git a/scripts/field_derived.py b/scripts/field_derived.py
index a018993..6185172 100644
--- a/scripts/field_derived.py
+++ b/scripts/field_derived.py
@@ -48,7 +48,7 @@ args = parser.parse_args()
 paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
 
 if args.ics is None or args.ics[0] == -1:
-    ics = paths.get_ics()
+    ics = paths.get_ics("csiborg")
 else:
     ics = args.ics
 
@@ -62,7 +62,7 @@ for i in csiborgtools.fits.split_jobs(len(ics), nproc)[rank]:
     box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
     density_gen = csiborgtools.field.DensityField(box, args.MAS)
 
-    rho = numpy.load(paths.field_path("density", args.MAS, args.grid, nsim,
+    rho = numpy.load(paths.field("density", args.MAS, args.grid, nsim,
                                       args.in_rsp))
     rho = density_gen.overdensity_field(rho)
 
@@ -72,7 +72,7 @@ for i in csiborgtools.fits.split_jobs(len(ics), nproc)[rank]:
         raise RuntimeError(f"Field {args.kind} is not implemented yet.")
 
     field = gen(rho)
-    fout = paths.field_path("potential", args.MAS, args.grid, nsim,
+    fout = paths.field("potential", args.MAS, args.grid, nsim,
                             args.in_rsp)
     print(f"{datetime.now()}: rank {rank} saving output to `{fout}`.")
     numpy.save(fout, field)
diff --git a/scripts/field_fromparts.py b/scripts/field_fromparts.py
index 5aabd7c..02ea934 100644
--- a/scripts/field_fromparts.py
+++ b/scripts/field_fromparts.py
@@ -50,7 +50,7 @@ paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
 mpart = 1.1641532e-10  # Particle mass in CSiBORG simulations.
 
 if args.ics is None or args.ics[0] == -1:
-    ics = paths.get_ics()
+    ics = paths.get_ics("csiborg")
 else:
     ics = args.ics
 
@@ -62,7 +62,7 @@ for i in csiborgtools.fits.split_jobs(len(ics), nproc)[rank]:
 
     nsnap = max(paths.get_snapshots(nsim))
     box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
-    parts = csiborgtools.read.read_h5(paths.particles_path(nsim))["particles"]
+    parts = csiborgtools.read.read_h5(paths.particles(nsim))["particles"]
 
     if args.kind == "density":
         gen = csiborgtools.field.DensityField(box, args.MAS)
@@ -71,6 +71,6 @@ for i in csiborgtools.fits.split_jobs(len(ics), nproc)[rank]:
         gen = csiborgtools.field.VelocityField(box, args.MAS)
         field = gen(parts, args.grid, mpart, verbose=verbose)
 
-    fout = paths.field_path(args.kind, args.MAS, args.grid, nsim, args.in_rsp)
+    fout = paths.field(args.kind, args.MAS, args.grid, nsim, args.in_rsp)
     print(f"{datetime.now()}: rank {rank} saving output to `{fout}`.")
     numpy.save(fout, field)
diff --git a/scripts/fit_halos.py b/scripts/fit_halos.py
index a4a5169..8cb8eb6 100644
--- a/scripts/fit_halos.py
+++ b/scripts/fit_halos.py
@@ -47,7 +47,7 @@ partreader = csiborgtools.read.ParticleReader(paths)
 nfwpost = csiborgtools.fits.NFWPosterior()
 
 if args.ics is None or args.ics[0] == -1:
-    ics = paths.get_ics()
+    ics = paths.get_ics("csiborg")
 else:
     ics = args.ics
 
@@ -108,7 +108,7 @@ for nsim in [ics[i] for i in jobs]:
     box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
 
     # Particle archive
-    f = csiborgtools.read.read_h5(paths.particles_path(nsim))
+    f = csiborgtools.read.read_h5(paths.particles(nsim))
     particles = f["particles"]
     clump_map = f["clumpmap"]
     clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
@@ -153,6 +153,6 @@ for nsim in [ics[i] for i in jobs]:
     if args.kind == "halos":
         out = out[ismain]
 
-    fout = paths.structfit_path(nsnap, nsim, args.kind)
+    fout = paths.structfit(nsnap, nsim, args.kind)
     print(f"Saving to `{fout}`.", flush=True)
     numpy.save(fout, out)
diff --git a/scripts/fit_init.py b/scripts/fit_init.py
index 365a82f..c31adf6 100644
--- a/scripts/fit_init.py
+++ b/scripts/fit_init.py
@@ -48,7 +48,7 @@ paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
 partreader = csiborgtools.read.ParticleReader(paths)
 
 if args.ics is None or args.ics[0] == -1:
-    ics = paths.get_ics()
+    ics = paths.get_ics("csiborg")
 else:
     ics = args.ics
 
@@ -66,9 +66,9 @@ for nsim in [ics[i] for i in jobs]:
     print(f"{datetime.now()}: rank {rank} calculating simulation `{nsim}`.",
           flush=True)
 
-    parts = csiborgtools.read.read_h5(paths.initmatch_path(nsim, "particles"))
+    parts = csiborgtools.read.read_h5(paths.initmatch(nsim, "particles"))
     parts = parts['particles']
-    clump_map = csiborgtools.read.read_h5(paths.particles_path(nsim))
+    clump_map = csiborgtools.read.read_h5(paths.particles(nsim))
     clump_map = clump_map["clumpmap"]
     clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
                                                    load_fitted=False)
@@ -96,7 +96,7 @@ for nsim in [ics[i] for i in jobs]:
 
     out = out[ismain]
     # Now save it
-    fout = paths.initmatch_path(nsim, "fit")
+    fout = paths.initmatch(nsim, "fit")
     print(f"{datetime.now()}: dumping fits to .. `{fout}`.",
           flush=True)
     with open(fout, "wb") as f:
diff --git a/scripts/match_all.py b/scripts/match_all.py
index 6ed27a3..7d54e25 100644
--- a/scripts/match_all.py
+++ b/scripts/match_all.py
@@ -55,7 +55,7 @@ def get_combs():
     seed to minimise loading the same files simultaneously.
     """
     paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-    ics = paths.get_ics()
+    ics = paths.get_ics("csiborg")
     combs = list(combinations(ics, 2))
     Random(42).shuffle(combs)
     return combs
diff --git a/scripts/match_finsnap.py b/scripts/match_finsnap.py
new file mode 100644
index 0000000..9944869
--- /dev/null
+++ b/scripts/match_finsnap.py
@@ -0,0 +1,102 @@
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+Script to find the nearest neighbour of each halo in a given halo catalogue
+from the remaining catalogues in the suite (CSIBORG or Quijote). The script is
+MPI parallelized over the reference simulations.
+"""
+from argparse import ArgumentParser
+from datetime import datetime
+from distutils.util import strtobool
+
+import numpy
+import yaml
+from mpi4py import MPI
+
+from taskmaster import work_delegation
+from utils import open_catalogues
+
+try:
+    import csiborgtools
+except ModuleNotFoundError:
+    import sys
+
+    sys.path.append("../")
+    import csiborgtools
+
+
+def find_neighbour(args, nsim, cats, paths, comm):
+    """
+    Find the nearest neighbour of each halo in the given catalogue.
+
+    Parameters
+    ----------
+    args : argparse.Namespace
+        Command line arguments.
+    nsim : int
+        Simulation index.
+    cats : dict
+        Dictionary of halo catalogues. Keys are simulation indices, values are
+        the catalogues.
+    paths : csiborgtools.paths.Paths
+        Paths object.
+    comm : mpi4py.MPI.Comm
+        MPI communicator.
+
+    Returns
+    -------
+    None
+    """
+    ndist, cross_hindxs = csiborgtools.match.find_neighbour(nsim, cats)
+
+    mass_key = "totpartmass" if args.simname == "csiborg" else "group_mass"
+    cat0 = cats[nsim]
+    mass = cat0[mass_key]
+    rdist = cat0.radial_distance(in_initial=False)
+
+    fout = paths.cross_nearest(args.simname, args.run, nsim)
+    if args.verbose:
+        print(f"Rank {comm.Get_rank()} writing to `{fout}`.", flush=True)
+    numpy.savez(fout, ndist=ndist, cross_hindxs=cross_hindxs, mass=mass,
+                rdist=rdist)
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument("--run", type=str, help="Run name")
+    parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"],
+                        help="Simulation name")
+    parser.add_argument("--nsims", type=int, nargs="+", default=None,
+                        help="Indices of simulations to cross. If `-1` processes all simulations.")  # noqa
+    parser.add_argument("--Rmax", type=float, default=155/0.705,
+                        help="High-resolution region radius")
+    parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
+                        default=False)
+    args = parser.parse_args()
+    with open("./match_finsnap.yml", "r") as file:
+        config = yaml.safe_load(file)
+
+    comm = MPI.COMM_WORLD
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    cats = open_catalogues(args, config, paths, comm)
+
+    def do_work(nsim):
+        return find_neighbour(args, nsim, cats, paths, comm)
+
+    work_delegation(do_work, list(cats.keys()), comm,
+                    master_verbose=args.verbose)
+
+    comm.Barrier()
+    if comm.Get_rank() == 0:
+        print(f"{datetime.now()}: all finished. Quitting.")
diff --git a/scripts/match_finsnap.yml b/scripts/match_finsnap.yml
new file mode 100644
index 0000000..c5441b5
--- /dev/null
+++ b/scripts/match_finsnap.yml
@@ -0,0 +1,37 @@
+rmin: 0.1
+rmax: 100
+nneighbours: 8
+nsamples: 1.e+7
+batch_size: 1.e+6
+neval: 10000
+seed: 42
+nbins_marks: 10
+
+
+################################################################################
+#                                 totpartmass                                 #
+################################################################################
+
+
+"mass001":
+  primary:
+    name:
+    - totpartmass
+    - group_mass
+    min: 1.e+12
+    max: 1.e+13
+
+"mass002":
+  primary:
+    name:
+    - totpartmass
+    - group_mass
+    min: 1.e+13
+    max: 1.e+14
+
+"mass003":
+  primary:
+    name:
+    - totpartmass
+    - group_mass
+    min: 1.e+14
diff --git a/scripts/match_singlematch.py b/scripts/match_singlematch.py
index fc68201..a11c683 100644
--- a/scripts/match_singlematch.py
+++ b/scripts/match_singlematch.py
@@ -45,12 +45,12 @@ def pair_match(nsim0, nsimx, sigma, smoothen, verbose):
     catx = HaloCatalogue(nsimx, paths, load_initial=True, bounds=bounds,
                          with_lagpatch=True, load_clumps_cat=True)
 
-    clumpmap0 = read_h5(paths.particles_path(nsim0))["clumpmap"]
-    parts0 = read_h5(paths.initmatch_path(nsim0, "particles"))["particles"]
+    clumpmap0 = read_h5(paths.particles(nsim0))["clumpmap"]
+    parts0 = read_h5(paths.initmatch(nsim0, "particles"))["particles"]
     clid2map0 = {clid: i for i, clid in enumerate(clumpmap0[:, 0])}
 
-    clumpmapx = read_h5(paths.particles_path(nsimx))["clumpmap"]
-    partsx = read_h5(paths.initmatch_path(nsimx, "particles"))["particles"]
+    clumpmapx = read_h5(paths.particles(nsimx))["clumpmap"]
+    partsx = read_h5(paths.initmatch(nsimx, "particles"))["particles"]
     clid2mapx = {clid: i for i, clid in enumerate(clumpmapx[:, 0])}
 
     # We generate the background density fields. Loads halos's particles one by
@@ -77,7 +77,7 @@ def pair_match(nsim0, nsimx, sigma, smoothen, verbose):
         for j, match in enumerate(matches):
             match_hids[i][j] = catx["index"][match]
 
-    fout = paths.overlap_path(nsim0, nsimx, smoothed=False)
+    fout = paths.overlap(nsim0, nsimx, smoothed=False)
     numpy.savez(fout, ref_hids=cat0["index"], match_hids=match_hids,
                 ngp_overlap=ngp_overlap)
     if verbose:
@@ -99,7 +99,7 @@ def pair_match(nsim0, nsimx, sigma, smoothen, verbose):
                                               match_indxs, smooth_kwargs,
                                               verbose=verbose)
 
-    fout = paths.overlap_path(nsim0, nsimx, smoothed=True)
+    fout = paths.overlap(nsim0, nsimx, smoothed=True)
     numpy.savez(fout, smoothed_overlap=smoothed_overlap, sigma=sigma)
     if verbose:
         print(f"{datetime.now()}: calculated smoothing, saved to {fout}.",
diff --git a/scripts/fit_profiles.py b/scripts/old/fit_profiles.py
similarity index 97%
rename from scripts/fit_profiles.py
rename to scripts/old/fit_profiles.py
index 0c9d539..c942253 100644
--- a/scripts/fit_profiles.py
+++ b/scripts/old/fit_profiles.py
@@ -48,7 +48,7 @@ if nproc > 1:
 paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
 cols_collect = [("r", numpy.float32), ("M", numpy.float32)]
 if args.ics is None or args.ics == -1:
-    nsims = paths.get_ics()
+    nsims = paths.get_ics("csiborg")
 else:
     nsims = args.ics
 
@@ -61,7 +61,7 @@ for i, nsim in enumerate(nsims):
     nsnap = max(paths.get_snapshots(nsim))
     box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
 
-    f = csiborgtools.read.read_h5(paths.particles_path(nsim))
+    f = csiborgtools.read.read_h5(paths.particles(nsim))
     particles = f["particles"]
     clump_map = f["clumpmap"]
     clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
diff --git a/scripts/pre_dumppart.py b/scripts/pre_dumppart.py
index 9908efe..7360afb 100644
--- a/scripts/pre_dumppart.py
+++ b/scripts/pre_dumppart.py
@@ -55,7 +55,7 @@ partreader = csiborgtools.read.ParticleReader(paths)
 pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"]
 
 if args.ics is None or args.ics[0] == -1:
-    ics = paths.get_ics()
+    ics = paths.get_ics("csiborg")
 else:
     ics = args.ics
 
@@ -87,7 +87,7 @@ jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
 for i in jobs:
     nsim = ics[i]
     nsnap = max(paths.get_snapshots(nsim))
-    fname = paths.particles_path(nsim)
+    fname = paths.particles(nsim)
     # We first read in the clump IDs of the particles and infer the sorting.
     # Right away we dump the clump IDs to a HDF5 file and clear up memory.
     print(f"{datetime.now()}: rank {rank} loading particles {nsim}.",
@@ -146,7 +146,7 @@ for i in jobs:
         start_loop = kf
 
     # We save the mapping to a HDF5 file
-    with h5py.File(paths.particles_path(nsim), "r+") as f:
+    with h5py.File(paths.particles(nsim), "r+") as f:
         f.create_dataset("clumpmap", data=clump_map)
         f.close()
 
diff --git a/scripts/pre_mmain.py b/scripts/pre_mmain.py
index e2ca54e..f9b6ccd 100644
--- a/scripts/pre_mmain.py
+++ b/scripts/pre_mmain.py
@@ -41,7 +41,7 @@ def do_mmain(nsim):
     nsnap = max(paths.get_snapshots(nsim))
     # NOTE: currently works for highest snapshot anyway
     mmain, ultimate_parent = mmain_reader.make_mmain(nsim, verbose=False)
-    numpy.savez(paths.mmain_path(nsnap, nsim),
+    numpy.savez(paths.mmain(nsnap, nsim),
                 mmain=mmain, ultimate_parent=ultimate_parent)
 
 ###############################################################################
@@ -51,12 +51,12 @@ def do_mmain(nsim):
 
 if nproc > 1:
     if rank == 0:
-        tasks = list(paths.get_ics())
+        tasks = list(paths.get_ics("csiborg"))
         master_process(tasks, comm, verbose=True)
     else:
         worker_process(do_mmain, comm, verbose=False)
 else:
-    tasks = paths.get_ics()
+    tasks = paths.get_ics("csiborg")
     for task in tasks:
         print(f"{datetime.now()}: completing task `{task}`.", flush=True)
         do_mmain(task)
diff --git a/scripts/pre_sortinit.py b/scripts/pre_sortinit.py
index 93a63a3..9c4a387 100644
--- a/scripts/pre_sortinit.py
+++ b/scripts/pre_sortinit.py
@@ -50,7 +50,7 @@ partreader = csiborgtools.read.ParticleReader(paths)
 pars_extract = ["x", "y", "z", "M", "ID"]
 
 if args.ics is None or args.ics[0] == -1:
-    ics = paths.get_ics()
+    ics = paths.get_ics("csiborg")
 else:
     ics = args.ics
 
@@ -64,7 +64,7 @@ for i in jobs:
     print(f"{datetime.now()}: reading and processing simulation {nsim}.",
           flush=True)
     # We first load the particle IDs in the final snapshot.
-    pidf = csiborgtools.read.read_h5(paths.particles_path(nsim))
+    pidf = csiborgtools.read.read_h5(paths.particles(nsim))
     pidf = pidf["particle_ids"]
     # Then we load the particles in the initil snapshot and make sure that
     # their particle IDs are sorted as in the final snapshot.
@@ -78,5 +78,5 @@ for i in jobs:
     collect()
     part0 = part0[numpy.argsort(numpy.argsort(pidf))]
     print(f"{datetime.now()}: dumping particles for {nsim}.", flush=True)
-    with h5py.File(paths.initmatch_path(nsim, "particles"), "w") as f:
+    with h5py.File(paths.initmatch(nsim, "particles"), "w") as f:
         f.create_dataset("particles", data=part0)
diff --git a/scripts/utils.py b/scripts/utils.py
index 52c688f..4f8f761 100644
--- a/scripts/utils.py
+++ b/scripts/utils.py
@@ -13,23 +13,184 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
-Notebook utility functions.
+Utility functions for scripts.
 """
+from datetime import datetime
 
-# from os.path import join
+import numpy
+
+from tqdm import tqdm
 
 try:
     import csiborgtools
 except ModuleNotFoundError:
     import sys
     sys.path.append("../")
+    import csiborgtools
 
 
-Nsplits = 200
-dumpdir = "/mnt/extraspace/rstiskalek/CSiBORG/"
+###############################################################################
+#                           Reading functions                                 #
+###############################################################################
 
 
-# Some chosen clusters
+def get_nsims(args, paths):
+    """
+    Get simulation indices from the command line arguments.
+
+    Parameters
+    ----------
+    args : argparse.Namespace
+        Command line arguments. Must include `nsims` and `simname`. If `nsims`
+        is `None` or `-1`, all simulations in `simname` are used.
+    paths : :py:class`csiborgtools.paths.Paths`
+        Paths object.
+
+    Returns
+    -------
+    nsims : list of int
+        Simulation indices.
+    """
+    if args.nsims is None or args.nsims[0] == -1:
+        nsims = paths.get_ics(args.simname)
+    else:
+        nsims = args.nsims
+    return list(nsims)
+
+
+def read_single_catalogue(args, config, nsim, run, rmax, paths, nobs=None):
+    """
+    Read a single halo catalogue and apply selection criteria to it.
+
+    Parameters
+    ----------
+    args : argparse.Namespace
+        Command line arguments. Must include `simname`.
+    config : dict
+        Configuration dictionary.
+    nsim : int
+        Simulation index.
+    run : str
+        Run name.
+    rmax : float
+        Maximum radial distance of the halo catalogue.
+    paths : csiborgtools.paths.Paths
+        Paths object.
+    nobs : int, optional
+        Fiducial Quijote observer index.
+
+    Returns
+    -------
+    cat : csiborgtools.read.HaloCatalogue or csiborgtools.read.QuijoteHaloCatalogue  # noqa
+        Halo catalogue with selection criteria applied.
+    """
+    selection = config.get(run, None)
+    if selection is None:
+        raise KeyError(f"No configuration for run {run}.")
+    # We first read the full catalogue without applying any bounds.
+    if args.simname == "csiborg":
+        cat = csiborgtools.read.HaloCatalogue(nsim, paths)
+    else:
+        cat = csiborgtools.read.QuijoteHaloCatalogue(nsim, paths, nsnap=4)
+        if nobs is not None:
+            # We may optionally already here pick a fiducial observer.
+            cat = cat.pick_fiducial_observer(nobs, args.Rmax)
+
+    cat.apply_bounds({"dist": (0, rmax)})
+    # We then first read off the primary selection bounds.
+    sel = selection["primary"]
+    pname = None
+    xs = sel["name"] if isinstance(sel["name"], list) else [sel["name"]]
+    for _name in xs:
+        if _name in cat.keys:
+            pname = _name
+    if pname is None:
+        raise KeyError(f"Invalid names `{sel['name']}`.")
+
+    cat.apply_bounds({pname: (sel.get("min", None), sel.get("max", None))})
+
+    # Now the secondary selection bounds. If needed transfrom the secondary
+    # property before applying the bounds.
+    if "secondary" in selection:
+        sel = selection["secondary"]
+        sname = None
+        xs = sel["name"] if isinstance(sel["name"], list) else [sel["name"]]
+        for _name in xs:
+            if _name in cat.keys:
+                sname = _name
+        if sname is None:
+            raise KeyError(f"Invalid names `{sel['name']}`.")
+
+        if sel.get("toperm", False):
+            cat[sname] = numpy.random.permutation(cat[sname])
+
+        if sel.get("marked", False):
+            cat[sname] = csiborgtools.clustering.normalised_marks(
+                cat[pname], cat[sname], nbins=config["nbins_marks"])
+        cat.apply_bounds({sname: (sel.get("min", None), sel.get("max", None))})
+
+    return cat
+
+
+def open_catalogues(args, config, paths, comm):
+    """
+    Read all halo catalogues on the zeroth rank and broadcast them to all
+    higher ranks.
+
+    Parameters
+    ----------
+    args : argparse.Namespace
+        Command line arguments.
+    config : dict
+        Configuration dictionary.
+    paths : csiborgtools.paths.Paths
+        Paths object.
+    comm : mpi4py.MPI.Comm
+        MPI communicator.
+
+    Returns
+    -------
+    cats : dict
+        Dictionary of halo catalogues. Keys are simulation indices, values are
+        the catalogues.
+    """
+    nsims = get_nsims(args, paths)
+    rank = comm.Get_rank()
+    nproc = comm.Get_size()
+
+    if args.verbose and rank == 0:
+        print(f"{datetime.now()}: opening catalogues.", flush=True)
+
+    if rank == 0:
+        cats = {}
+        if args.simname == "csiborg":
+            for nsim in tqdm(nsims) if args.verbose else nsims:
+                cat = read_single_catalogue(args, config, nsim, args.run,
+                                            rmax=args.Rmax, paths=paths)
+                cats.update({nsim: cat})
+        else:
+            for nsim in tqdm(nsims) if args.verbose else nsims:
+                ref_cat = read_single_catalogue(args, config, nsim, args.run,
+                                                rmax=None, paths=paths)
+
+                nmax = int(ref_cat.box.boxsize // (2 * args.Rmax))**3
+                for nobs in range(nmax):
+                    name = paths.quijote_fiducial_nsim(nsim, nobs)
+                    cat = ref_cat.pick_fiducial_observer(nobs, rmax=args.Rmax)
+                    cats.update({name: cat})
+
+        if nproc > 1:
+            for i in range(1, nproc):
+                comm.send(cats, dest=i, tag=nproc + i)
+    else:
+        cats = comm.recv(source=0, tag=nproc + rank)
+    return cats
+
+
+###############################################################################
+#                               Clusters                                      #
+###############################################################################
+
 _coma = {"RA": (12 + 59 / 60 + 48.7 / 60**2) * 15,
          "DEC": 27 + 58 / 60 + 50 / 60**2,
          "COMDIST": 102.975}
@@ -40,7 +201,6 @@ _virgo = {"RA": (12 + 27 / 60) * 15,
 
 specific_clusters = {"Coma": _coma, "Virgo": _virgo}
 
-
 ###############################################################################
 #                                 Surveys                                     #
 ###############################################################################
@@ -56,6 +216,3 @@ class SDSS:
 
     def __call__(self):
         return csiborgtools.read.SDSS(h=1, sel_steps=self.steps)
-
-
-surveys = {"SDSS": SDSS}
diff --git a/scripts_plots/plot_knn.py b/scripts_plots/plot_knn.py
new file mode 100644
index 0000000..f5baa09
--- /dev/null
+++ b/scripts_plots/plot_knn.py
@@ -0,0 +1,103 @@
+# Copyright (C) 2023 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+from os.path import join
+
+import matplotlib.pyplot as plt
+import numpy
+
+import scienceplots  # noqa
+import utils
+
+try:
+    import csiborgtools
+except ModuleNotFoundError:
+    import sys
+    sys.path.append("../")
+    import csiborgtools
+
+
+###############################################################################
+#                Probability of matching a reference simulation halo          #
+###############################################################################
+
+
+def plot_knn(runname):
+    print(f"Plotting kNN CDF for {runname}.")
+    cols = plt.rcParams["axes.prop_cycle"].by_key()["color"]
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    reader = csiborgtools.read.kNNCDFReader(paths)
+
+    with plt.style.context(utils.mplstyle):
+        plt.figure()
+
+        # Quijote kNN
+        rs, cdf, ndensity = reader.read("quijote", runname, kind="auto")
+        pk = reader.prob_k(cdf)
+        pk_poisson = reader.poisson_prob_k(rs, numpy.arange(pk.shape[1]),
+                                           ndensity)
+
+        for k in range(3):
+            mu = numpy.mean(pk[:, k, :], axis=0)
+            std = numpy.std(pk[:, k, :], axis=0)
+            plt.plot(rs, mu, label=r"$k = {}$, Quijote".format(k + 1),
+                     c=cols[k % len(cols)])
+            # plt.fill_between(rs, mu - std, mu + std, alpha=0.15,
+            #                  color=cols[k % len(cols)], zorder=0)
+
+            mu = numpy.mean(pk_poisson[:, k, :], axis=0)
+            std = numpy.std(pk_poisson[:, k, :], axis=0)
+            plt.plot(rs, mu, c=cols[k % len(cols)], ls="dashed",
+                     label=r"$k = {}$, Poisson analytical".format(k + 1))
+            # plt.fill_between(rs, mu - std, mu + std, alpha=0.15,
+            #                  color=cols[k % len(cols)], zorder=0, hatch="\\")
+
+        # Quijote poisson kNN
+        rs, cdf, ndensity = reader.read("quijote", "mass003_poisson",
+                                        kind="auto")
+        pk = reader.prob_k(cdf)
+
+        for k in range(3):
+            mu = numpy.mean(pk[:, k, :], axis=0)
+            std = numpy.std(pk[:, k, :], axis=0)
+            plt.plot(rs, mu, label=r"$k = {}$, Poisson Quijote".format(k + 1),
+                     c=cols[k % len(cols)], ls="dotted")
+            # plt.fill_between(rs, mu - std, mu + std, alpha=0.15,
+            #                  color=cols[k % len(cols)], zorder=0)
+
+#         # CSiBORG kNN
+#         rs, cdf, ndensity = reader.read("csiborg", runname, kind="auto")
+#         pk = reader.mean_prob_k(cdf)
+#         for k in range(2):
+#             mu = pk[k, :, 0]
+#             std = pk[k, :, 1]
+#             plt.plot(rs, mu, ls="--", c=cols[k % len(cols)])
+#             plt.fill_between(rs, mu - std, mu + std, alpha=0.15, hatch="\\",
+#                              color=cols[k % len(cols)], zorder=0)
+
+        plt.legend()
+        plt.xlabel(r"$r~[\mathrm{Mpc}]$")
+        plt.ylabel(r"$P(k | V = 4 \pi r^3 / 3)$")
+
+        for ext in ["png"]:
+            fout = join(utils.fout, f"knn_{runname}.{ext}")
+            print("Saving to `{fout}`.".format(fout=fout))
+            plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
+        plt.close()
+
+
+if __name__ == "__main__":
+
+    plot_knn("mass003")
diff --git a/scripts_plots/plot_nearest.py b/scripts_plots/plot_nearest.py
new file mode 100644
index 0000000..e769fc8
--- /dev/null
+++ b/scripts_plots/plot_nearest.py
@@ -0,0 +1,91 @@
+# Copyright (C) 2023 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+from argparse import ArgumentParser
+
+import matplotlib.pyplot as plt
+import numpy
+import scienceplots  # noqa
+from cache_to_disk import cache_to_disk, delete_disk_caches_for_function
+
+import utils
+
+try:
+    import csiborgtools
+except ModuleNotFoundError:
+    import sys
+    sys.path.append("../")
+    import csiborgtools
+
+
+@cache_to_disk(7)
+def read_cdf(simname, run, kwargs):
+    paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
+    reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths)
+    return reader.build_cdf(simname, run, verbose=True)
+
+
+def plot_cdf(kwargs):
+    print("Plotting the CDFs.", flush=True)
+    paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
+    reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths)
+    x = reader.bin_centres("neighbour")
+
+    y_quijote = read_cdf("quijote", "mass003", kwargs)
+    y_csiborg = read_cdf("csiborg", "mass003", kwargs)
+    ncdf = y_quijote.shape[0]
+
+    with plt.style.context(utils.mplstyle):
+        plt.figure()
+        for i in range(ncdf):
+            if i == 0:
+                label1 = "Quijote"
+                label2 = "CSiBORG"
+            else:
+                label1 = None
+                label2 = None
+            plt.plot(x, y_quijote[i], c="C0", label=label1)
+            plt.plot(x, y_csiborg[i], c="C1", label=label2)
+        plt.xlim(0, 75)
+        plt.ylim(0, 1)
+        plt.xlabel(r"$r_{\rm neighbour}~[\mathrm{Mpc}]$")
+        plt.ylabel(r"$\mathrm{CDF}(r_{\rm neighbour})$")
+        plt.legend()
+
+        plt.tight_layout()
+        plt.savefig("../plots/nearest_neighbour_cdf.png", dpi=450,
+                    bbox_inches="tight")
+        plt.close()
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument('-c', '--clean', action='store_true')
+    args = parser.parse_args()
+
+    kwargs = {"rmax_radial": 155 / 0.705,
+              "nbins_radial": 20,
+              "rmax_neighbour": 100.,
+              "nbins_neighbour": 150,
+              "paths_kind": csiborgtools.paths_glamdring}
+
+    cached_funcs = ["read_cdf"]
+    if args.clean:
+        for func in cached_funcs:
+            print(f"Cleaning cache for function {func}.")
+            delete_disk_caches_for_function(func)
+
+
+    plot_cdf(kwargs)
diff --git a/scripts_plots/overlap.py b/scripts_plots/plot_overlap.py
similarity index 100%
rename from scripts_plots/overlap.py
rename to scripts_plots/plot_overlap.py
diff --git a/scripts_plots/utils.py b/scripts_plots/utils.py
index cf19da0..eb3e139 100644
--- a/scripts_plots/utils.py
+++ b/scripts_plots/utils.py
@@ -15,4 +15,4 @@
 
 dpi = 450
 fout = "../plots/"
-mplstyle = ["notebook"]
+mplstyle = ["science", "notebook"]