CDF for nearest neighbour (#63)

* Updat ebounds * fix mistake * add plot script * fix which sims * Add Poisson * Just docs * Hide things to __main__ * Rename paths * Move old script * Remove radpos * Paths renaming * Paths renaming * Remove trunk stuff * Add import * Add nearest neighbour search * Add Quijote fiducial indices * Add final snapshot matching * Add fiducial observer selection * add boxsizes * Add reading functions * Little stuff * Bring back the fiducial observer * Add arguments * Add quijote paths * Add notes * Get this running * Add yaml * Remove Poisson stuff * Get the 2PCF script running * Add not finished htings * Remove comment * Verbosity only on 0th rank! * Update plotting style * Add nearest neighbour CDF * Save radial distance too * Add centres * Add basic plotting
2025-06-09 02:11:12 +00:00 · 2023-05-21 22:46:28 +01:00 · 2023-05-21 22:46:28 +01:00 · 2185846e90
commit 2185846e90
parent 369438f881
34 changed files with 1254 additions and 351 deletions
--- a/csiborgtools/clustering/knn.py
+++ b/csiborgtools/clustering/knn.py
@ -241,7 +241,7 @@ class kNN_1DCDF:
    def __call__(self, knn, rvs_gen, nneighbours, nsamples, rmin, rmax, neval,
                 batch_size=None, random_state=42, dtype=numpy.float32):
        """
-        Calculate the CDF for a set of kNNs of CSiBORG halo catalogues.
+        Calculate the CDF for a set of kNNs of halo catalogues.
        Parameters
        ----------
--- a/csiborgtools/match/init.py
+++ b/csiborgtools/match/init.py
@ -15,5 +15,6 @@
 from .match import (ParticleOverlap, RealisationsMatcher,  # noqa
                    calculate_overlap, calculate_overlap_indxs,
                    cosine_similarity)
 from .nearest_neighbour import find_neighbour  # noqa
 from .num_density import binned_counts, number_density  # noqa
 from .utils import concatenate_parts  # noqa
--- a/csiborgtools/match/nearest_neighbour.py
+++ b/csiborgtools/match/nearest_neighbour.py
@ -0,0 +1,56 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 Tools for finding the nearest neighbours of reference simulation haloes from
 cross simulations.
 """
 import numpy
 def find_neighbour(nsim0, cats):
    """
    Find the nearest neighbour of halos in `cat0` in `catx`.
    Parameters
    ----------
    nsim0 : int
        Index of the reference simulation.
    cats : dict
        Dictionary of halo catalogues. Keys must be the simulation indices.
    Returns
    -------
    dists : 2-dimensional array of shape `(nhalos, len(cats) - 1)`
        Distances to the nearest neighbour.
    cross_hindxs : 2-dimensional array of shape `(nhalos, len(cats) - 1)`
        Halo indices of the nearest neighbour.
    """
    cat0 = cats[nsim0]
    X = cat0.position(in_initial=False)
    shape = (X.shape[0], len(cats) - 1)
    dists = numpy.full(shape, numpy.nan, dtype=numpy.float32)
    cross_hindxs = numpy.full(shape, numpy.nan, dtype=numpy.int32)
    i = 0
    for nsimx, catx in cats.items():
        if nsimx == nsim0:
            continue
        dist, ind = catx.nearest_neighbours(X, radius=1, in_initial=False,
                                            knearest=True)
        dists[:, i] = dist.reshape(-1,)
        cross_hindxs[:, i] = catx["index"][ind.reshape(-1,)]
        i += 1
    return dists, cross_hindxs
--- a/csiborgtools/read/init.py
+++ b/csiborgtools/read/init.py
@ -16,6 +16,7 @@ from .box_units import CSiBORGBox, QuijoteBox  # noqa
 from .halo_cat import (ClumpsCatalogue, HaloCatalogue,  # noqa
                       QuijoteHaloCatalogue, fiducial_observers)
 from .knn_summary import kNNCDFReader  # noqa
 from .nearest_neighbour_summary import NearestNeighbourReader  # noqa
 from .obs import (SDSS, MCXCClusters, PlanckClusters, TwoMPPGalaxies,  # noqa
                  TwoMPPGroups)
 from .overlap_summary import (NPairsOverlap, PairOverlap,  # noqa
--- a/csiborgtools/read/box_units.py
+++ b/csiborgtools/read/box_units.py
@ -15,7 +15,7 @@
 """
 Simulation box unit transformations.
 """
-from abc import ABC
+from abc import ABC, abstractproperty
 import numpy
 from astropy import constants, units
@ -93,6 +93,17 @@ class BaseBox(ABC):
        """
        return self.cosmo.Om0
    @abstractproperty
    def boxsize(self):
        """
        Box size in cMpc.
        Returns
        -------
        boxsize : float
        """
        pass
 ###############################################################################
 #                              CSiBORG box                                    #
@ -383,6 +394,10 @@ class CSiBORGBox(BaseBox):
        return data
    @property
    def boxsize(self):
        return self.box2mpc(1.)
 ###############################################################################
 #                      Quijote fiducial cosmology box                         #
@ -408,3 +423,7 @@ class QuijoteBox(BaseBox):
        self._cosmo = LambdaCDM(H0=67.11, Om0=0.3175, Ode0=0.6825,
                                Tcmb0=2.725 * units.K, Ob0=0.049)
    @property
    def boxsize(self):
        return 1000. / (self._cosmo.H0.value / 100)
--- a/csiborgtools/read/halo_cat.py
+++ b/csiborgtools/read/halo_cat.py
@ -439,11 +439,11 @@ class ClumpsCatalogue(BaseCSiBORG):
        cols = ["index", "parent", "x", "y", "z", "mass_cl"]
        self._data = partreader.read_clumps(self.nsnap, self.nsim, cols=cols)
        # Overwrite the parent with the ultimate parent
-        mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim))
+        mmain = numpy.load(self.paths.mmain(self.nsnap, self.nsim))
        self._data["parent"] = mmain["ultimate_parent"]
        if load_fitted:
-            fits = numpy.load(paths.structfit_path(self.nsnap, nsim, "clumps"))
+            fits = numpy.load(paths.structfit(self.nsnap, nsim, "clumps"))
            cols = [col for col in fits.dtype.names if col != "index"]
            X = [fits[col] for col in cols]
            self._data = add_columns(self._data, X, cols)
@ -512,20 +512,20 @@ class HaloCatalogue(BaseCSiBORG):
        self.nsim = nsim
        self.paths = paths
        # Read in the mmain catalogue of summed substructure
-        mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim))
+        mmain = numpy.load(self.paths.mmain(self.nsnap, self.nsim))
        self._data = mmain["mmain"]
        # We will also need the clumps catalogue
        if load_clumps_cat:
            self._clumps_cat = ClumpsCatalogue(nsim, paths, rawdata=True,
                                               load_fitted=False)
        if load_fitted:
-            fits = numpy.load(paths.structfit_path(self.nsnap, nsim, "halos"))
+            fits = numpy.load(paths.structfit(self.nsnap, nsim, "halos"))
            cols = [col for col in fits.dtype.names if col != "index"]
            X = [fits[col] for col in cols]
            self._data = add_columns(self._data, X, cols)
        if load_initial:
-            fits = numpy.load(paths.initmatch_path(nsim, "fit"))
+            fits = numpy.load(paths.initmatch(nsim, "fit"))
            X, cols = [], []
            for col in fits.dtype.names:
                if col == "index":
@ -590,8 +590,7 @@ class QuijoteHaloCatalogue(BaseCatalogue):
        Snapshot index.
    origin : len-3 tuple, optional
        Where to place the origin of the box. By default the centre of the box.
-        In units of :math:`cMpc`. Optionally can be an integer between 0 and 8,
+        In units of :math:`cMpc`.
        inclusive to correspond to CSiBORG boxes.
    bounds : dict
        Parameter bounds to apply to the catalogue. The keys are the parameter
        names and the items are a len-2 tuple of (min, max) values. In case of
@ -601,12 +600,16 @@ class QuijoteHaloCatalogue(BaseCatalogue):
        Keyword arguments for backward compatibility.
    """
    _nsnap = None
    _origin = None
    def __init__(self, nsim, paths, nsnap,
                 origin=[500 / 0.6711, 500 / 0.6711, 500 / 0.6711],
                 bounds=None, **kwargs):
        self.paths = paths
        self.nsnap = nsnap
        self.origin = origin
        self._boxwidth = 1000 / 0.6711
        fpath = join(self.paths.quijote_dir, "halos", str(nsim))
        fof = FoF_catalog(fpath, self.nsnap, long_ids=False, swap=False,
                          SFR=False, read_IDs=False)
@ -614,21 +617,18 @@ class QuijoteHaloCatalogue(BaseCatalogue):
        cols = [("x", numpy.float32), ("y", numpy.float32),
                ("z", numpy.float32), ("vx", numpy.float32),
                ("vy", numpy.float32), ("vz", numpy.float32),
-                ("group_mass", numpy.float32), ("npart", numpy.int32)]
+                ("group_mass", numpy.float32), ("npart", numpy.int32),
                ("index", numpy.int32)]
        data = cols_to_structured(fof.GroupLen.size, cols)
        if isinstance(origin, int):
            origin = fiducial_observers(1000 / 0.6711, 155.5 / 0.6711)[origin]
        pos = fof.GroupPos / 1e3 / self.box.h
        for i in range(3):
            pos[:, i] -= origin[i]
        vel = fof.GroupVel * (1 + self.redshift)
        for i, p in enumerate(["x", "y", "z"]):
-            data[p] = pos[:, i]
+            data[p] = pos[:, i] - self.origin[i]
            data["v" + p] = vel[:, i]
        data["group_mass"] = fof.GroupMass * 1e10 / self.box.h
        data["npart"] = fof.GroupLen
        data["index"] = numpy.arange(data.size, dtype=numpy.int32)
        self._data = data
        if bounds is not None:
@ -673,6 +673,53 @@ class QuijoteHaloCatalogue(BaseCatalogue):
        """
        return QuijoteBox(self.nsnap)
    @property
    def origin(self):
        """
        Origin of the box with respect to the initial box units.
        Returns
        -------
        origin : len-3 tuple
        """
        if self._origin is None:
            raise ValueError("`origin` is not set.")
        return self._origin
    @origin.setter
    def origin(self, origin):
        if isinstance(origin, (list, tuple)):
            origin = numpy.asanyarray(origin)
        assert origin.ndim == 1 and origin.size == 3
        self._origin = origin
    def pick_fiducial_observer(self, n, rmax):
        r"""
        Return a copy of itself, storing only halos within `rmax` of the new
        fiducial observer.
        Parameters
        ----------
        n : int
            Fiducial observer index.
        rmax : float
            Maximum distance from the fiducial observer in :math:`cMpc`.
        Returns
        -------
        cat : instance of csiborgtools.read.QuijoteHaloCatalogue
        """
        new_origin = fiducial_observers(self.box.boxsize, rmax)[n]
        # We make a copy of the catalogue to avoid modifying the original.
        # Then, we shift coordinates back to the original box frame and then to
        # the new origin.
        cat = deepcopy(self)
        for i, p in enumerate(('x', 'y', 'z')):
            cat._data[p] += self.origin[i]
            cat._data[p] -= new_origin[i]
        cat.apply_bounds({"dist": (0, rmax)})
        return cat
 ###############################################################################
 #                     Utility functions for halo catalogues                   #
--- a/csiborgtools/read/knn_summary.py
+++ b/csiborgtools/read/knn_summary.py
@ -46,13 +46,15 @@ class kNNCDFReader:
    def paths(self, paths):
        self._paths = paths
-    def read(self, run, kind, rmin=None, rmax=None, to_clip=True):
+    def read(self, simname, run, kind, rmin=None, rmax=None, to_clip=True):
        """
        Read the auto- or cross-correlation kNN-CDF data. Infers the type from
        the data files.
        Parameters
        ----------
        simname : str
            Simulation name. Must be either `csiborg` or `quijote`.
        run : str
            Run ID to read in.
        kind : str
@ -71,14 +73,17 @@ class kNNCDFReader:
            Separations where the CDF is evaluated.
        out : 3-dimensional array of shape `(len(files), len(ks), neval)`
            Array of CDFs or cross-correlations.
        ndensity : 1-dimensional array of shape `(len(files), )`
            Number density of halos in the simulation.
        """
        assert kind in ["auto", "cross"]
        assert simname in ["csiborg", "quijote"]
        if kind == "auto":
-            files = self.paths.knnauto_path(run)
+            files = self.paths.knnauto(simname, run)
        else:
-            files = self.paths.knncross_path(run)
+            files = self.paths.knncross(simname, run)
        if len(files) == 0:
-            raise RuntimeError("No files found for run `{}`.".format(run))
+            raise RuntimeError(f"No files found for run `{run}`.")
        for i, file in enumerate(files):
            data = joblib.load(file)
@ -91,8 +96,11 @@ class kNNCDFReader:
                    isauto = True
                out = numpy.full((len(files), *data[kind].shape), numpy.nan,
                                 dtype=numpy.float32)
                ndensity = numpy.full(len(files), numpy.nan,
                                      dtype=numpy.float32)
                rs = data["rs"]
            out[i, ...] = data[kind]
            ndensity[i] = data["ndensity"]
            if isauto and to_clip:
                out[i, ...] = self.clipped_cdf(out[i, ...])
@ -103,7 +111,7 @@ class kNNCDFReader:
        rs = rs[mask]
        out = out[..., mask]
-        return rs, out
+        return rs, out, ndensity
    @staticmethod
    def peaked_cdf(cdf, make_copy=True):
@ -191,6 +199,7 @@ class kNNCDFReader:
        ----------
        cdf : 3-dimensional array of shape `(len(files), len(ks), len(rs))`
            Array of CDFs
        Returns
        -------
        out : 3-dimensional array of shape `(len(ks) - 1, len(rs), 2)`
@ -212,16 +221,33 @@ class kNNCDFReader:
        ----------
        rs : 1-dimensional array
            Array of separations.
-        k : int
+        k : int or 1-dimensional array
            Number of objects.
-        ndensity : float
+        ndensity : float or 1-dimensional array
            Number density of objects.
        Returns
        -------
-        pk : 1-dimensional array
+        pk : 1-dimensional array or 3-dimensional array
            The PDF that a spherical volume of radius :math:`r` contains
-            :math:`k` objects.
+            :math:`k` objects. If `k` and `ndensity` are both arrays, the shape
            is `(len(ndensity), len(k), len(rs))`.
        """
        V = 4 * numpy.pi / 3 * rs**3
-        return (ndensity * V)**k / factorial(k) * numpy.exp(-ndensity * V)
+
        def probk(k, ndensity):
            return (ndensity * V)**k / factorial(k) * numpy.exp(-ndensity * V)
        if isinstance(k, int) and isinstance(ndensity, float):
            return probk(k, ndensity)
        # If either k or ndensity is an array, make sure the other is too.
        assert isinstance(k, numpy.ndarray) and k.ndim == 1
        assert isinstance(ndensity, numpy.ndarray) and ndensity.ndim == 1
        out = numpy.full((ndensity.size, k.size, rs.size), numpy.nan,
                         dtype=numpy.float32)
        for i in range(ndensity.size):
            for j in range(k.size):
                out[i, j, :] = probk(k[j], ndensity[i])
        return out
--- a/csiborgtools/read/nearest_neighbour_summary.py
+++ b/csiborgtools/read/nearest_neighbour_summary.py
@ -0,0 +1,287 @@
 # Copyright (C) 2023 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 Nearest neighbour summary for assessing goodness-of-reconstruction of a halo in
 the final snapshot.
 """
 from math import floor
 import numpy
 from numba import jit
 from tqdm import tqdm
 class NearestNeighbourReader:
    """
    Shortcut object to read in nearest neighbour data for assessing the
    goodness-of-reconstruction of a halo in the final snapshot.
    Parameters
    ----------
    rmax_radial : float
        Radius of the high-resolution region.
    paths : py:class`csiborgtools.read.Paths`
        Paths object.
    TODO: docs
    """
    _paths = None
    _rmax_radial = None
    _nbins_radial = None
    _rmax_neighbour = None
    _nbins_neighbour = None
    def __init__(self, rmax_radial, nbins_radial, rmax_neighbour,
                 nbins_neighbour, paths, **kwargs):
        self.paths = paths
        self.rmax_radial = rmax_radial
        self.nbins_radial = nbins_radial
        self.rmax_neighbour = rmax_neighbour
        self.nbins_neighbour = nbins_neighbour
    @property
    def rmax_radial_radial(self):
        """
        Radius of the high-resolution region.
        Parameters
        ----------
        rmax_radial_radial : float
        """
        return self._rmax_radial_radial
    @rmax_radial_radial.setter
    def rmax_radial_radial(self, rmax_radial_radial):
        assert isinstance(rmax_radial_radial, float)
        self._rmax_radial_radial = rmax_radial_radial
    @property
    def paths(self):
        """
        Paths manager.
        Parameters
        ----------
        paths : py:class`csiborgtools.read.Paths`
        """
        return self._paths
    @property
    def nbins_radial(self):
        """
        Number radial of bins.
        Returns
        -------
        nbins_radial : int
        """
        return self._nbins_radial
    @nbins_radial.setter
    def nbins_radial(self, nbins_radial):
        assert isinstance(nbins_radial, int)
        self._nbins_radial = nbins_radial
    @property
    def nbins_neighbour(self):
        """
        Number of neighbour bins.
        Returns
        -------
        nbins_neighbour : int
        """
        return self._nbins_neighbour
    @nbins_neighbour.setter
    def nbins_neighbour(self, nbins_neighbour):
        assert isinstance(nbins_neighbour, int)
        self._nbins_neighbour = nbins_neighbour
    @property
    def rmax_neighbour(self):
        """
        Maximum neighbour distance.
        Returns
        -------
        rmax_neighbour : float
        """
        return self._rmax_neighbour
    @rmax_neighbour.setter
    def rmax_neighbour(self, rmax_neighbour):
        assert isinstance(rmax_neighbour, float)
        self._rmax_neighbour = rmax_neighbour
    @paths.setter
    def paths(self, paths):
        self._paths = paths
    @property
    def radial_bin_edges(self):
        """
        Radial bins.
        Returns
        -------
        radial_bins : 1-dimensional array
        """
        nbins = self.nbins_radial + 1
        return self.rmax_radial * numpy.linspace(0, 1, nbins)**(1./3)
    @property
    def neighbour_bin_edges(self):
        """
        Neighbour bins edges
        Returns
        -------
        neighbour_bins : 1-dimensional array
        """
        nbins = self.nbins_neighbour + 1
        return numpy.linspace(0, self.rmax_neighbour, nbins)
    def bin_centres(self, kind):
        """
        Bin centres. Either for `radial` or `neighbour` bins.
        Parameters
        ----------
        kind : str
            Bin kind. Either `radial` or `neighbour`.
        Returns
        -------
        bin_centres : 1-dimensional array
        """
        assert kind in ["radial", "neighbour"]
        if kind == "radial":
            edges = self.radial_bin_edges
        else:
            edges = self.neighbour_bin_edges
        return 0.5 * (edges[1:] + edges[:-1])
    def read_single(self, simname, run, nsim, nobs=None):
        """
        Read in the nearest neighbour distances for halos from a single
        simulation.
        Parameters
        ----------
        simname : str
            Simulation name. Must be either `csiborg` or `quijote`.
        run : str
            Run name.
        nsim : int
            Simulation index.
        nobs : int, optional
            Fiducial Quijote observer index.
        Returns
        -------
        data : numpy archive
            Archive with keys `ndist`, `rdist`, `mass`, `cross_hindxs``
        """
        assert simname in ["csiborg", "quijote"]
        fpath = self.paths.cross_nearest(simname, run, nsim, nobs)
        return numpy.load(fpath)
    def build_cdf(self, simname, run, verbose=True):
        """
        Build the CDF for the nearest neighbour distribution. Counts the binned
        number of neighbour for each halo as a funtion of its radial distance
        from the centre of the high-resolution region and converts it to a CDF.
        Parameters
        ----------
        simname : str
            Simulation name. Must be either `csiborg` or `quijote`.
        run : str
            Run name.
        verbose : bool, optional
            Verbosity flag.
        Returns
        -------
        cdf : 2-dimensional array of shape `(nbins_radial, nbins_neighbour)`
        """
        assert simname in ["csiborg", "quijote"]
        rbin_edges = self.radial_bin_edges
        # We first bin the distances as a function of each reference halo
        # radial distance and then its nearest neighbour distance.
        fpaths = self.paths.cross_nearest(simname, run)
        out = numpy.zeros((self.nbins_radial, self.nbins_neighbour),
                          dtype=numpy.float32)
        for fpath in tqdm(fpaths) if verbose else fpaths:
            data = numpy.load(fpath)
            out = count_neighbour(
                out, data["ndist"], data["rdist"], rbin_edges,
                self.rmax_neighbour, self.nbins_neighbour)
        # We then build up a CDF for each radial bin.
        out = numpy.cumsum(out, axis=1, out=out)
        out /= out[:, -1].reshape(-1, 1)
        return out
 ###############################################################################
 #                           Support functions                                 #
 ###############################################################################
@jit(nopython=True)
 def count_neighbour(counts, ndist, rdist, rbin_edges, rmax_neighbour,
                    nbins_neighbour):
    """
    Count the number of neighbour in neighbours bins for each halo as a funtion
    of its radial distance from the centre of the high-resolution region.
    Parameters
    ----------
    counts : 2-dimensional array of shape `(nbins_radial, nbins_neighbour)`
        Array to store the counts.
    ndist : 2-dimensional array of shape `(nhalos, ncross_simulations)`
        Distance of each halo to its nearest neighbour from a cross simulation.
    rdist : 1-dimensional array of shape `(nhalos, )`
        Distance of each halo to the centre of the high-resolution region.
    rbin_edges : 1-dimensional array of shape `(nbins_radial + 1, )`
        Edges of the radial bins.
    rmax_neighbour : float
        Maximum neighbour distance.
    nbins_neighbour : int
        Number of neighbour bins.
    Returns
    -------
    counts : 2-dimensional array of shape `(nbins_radial, nbins_neighbour)`
    """
    ncross = ndist.shape[1]
    # We normalise the neighbour distance by the maximum neighbour distance and
    # multiply by the number of bins. This way, the floor of each distance is
    # the bin number.
    ndist /= rmax_neighbour
    ndist *= nbins_neighbour
    # We loop over each halo, assign it to a radial bin and then assign its
    # neighbours to bins.
    for i, radial_cell in enumerate(numpy.digitize(rdist, rbin_edges) - 1):
        for j in range(ncross):
            neighbour_cell = floor(ndist[i, j])
            if neighbour_cell < nbins_neighbour:
                counts[radial_cell, neighbour_cell] += 1
    return counts
--- a/csiborgtools/read/overlap_summary.py
+++ b/csiborgtools/read/overlap_summary.py
@ -71,8 +71,8 @@ class PairOverlap:
        # We first load in the output files. We need to find the right
        # combination of the reference and cross simulation.
-        fname = paths.overlap_path(nsim0, nsimx, smoothed=False)
+        fname = paths.overlap(nsim0, nsimx, smoothed=False)
-        fname_inv = paths.overlap_path(nsimx, nsim0, smoothed=False)
+        fname_inv = paths.overlap(nsimx, nsim0, smoothed=False)
        if isfile(fname):
            data_ngp = numpy.load(fname, allow_pickle=True)
            to_invert = False
@ -83,7 +83,7 @@ class PairOverlap:
        else:
            raise FileNotFoundError(f"No file found for {nsim0} and {nsimx}.")
-        fname_smooth = paths.overlap_path(cat0.nsim, catx.nsim, smoothed=True)
+        fname_smooth = paths.overlap(cat0.nsim, catx.nsim, smoothed=True)
        data_smooth = numpy.load(fname_smooth, allow_pickle=True)
        # Create mapping from halo indices to array positions in the catalogue.
@ -628,11 +628,11 @@ def get_cross_sims(nsim0, paths, smoothed):
        Whether to use the smoothed overlap or not.
    """
    nsimxs = []
-    for nsimx in paths.get_ics():
+    for nsimx in paths.get_ics("csiborg"):
        if nsimx == nsim0:
            continue
-        f1 = paths.overlap_path(nsim0, nsimx, smoothed)
+        f1 = paths.overlap(nsim0, nsimx, smoothed)
-        f2 = paths.overlap_path(nsimx, nsim0, smoothed)
+        f2 = paths.overlap(nsimx, nsim0, smoothed)
        if isfile(f1) or isfile(f2):
            nsimxs.append(nsimx)
    return nsimxs
--- a/csiborgtools/read/paths.py
+++ b/csiborgtools/read/paths.py
@ -88,13 +88,6 @@ class Paths:
        self._check_directory(path)
        self._quijote_dir = path
    @staticmethod
    def get_quijote_ics():
        """
        Quijote IC realisation IDs.
        """
        return numpy.arange(100, dtype=int)
    @property
    def postdir(self):
        """
@ -130,9 +123,32 @@ class Paths:
            warn(f"Created directory `{fpath}`.", UserWarning, stacklevel=1)
        return fpath
-    def mmain_path(self, nsnap, nsim):
+    @staticmethod
    def quijote_fiducial_nsim(nsim, nobs=None):
        """
-        Path to the `mmain` files summed substructure files.
+        Fiducial Quijote simulation ID. Combines the IC realisation and
        observer placement.
        Parameters
        ----------
        nsim : int
            IC realisation index.
        nobs : int, optional
            Fiducial observer index.
        Returns
        -------
        id : str
        """
        if nobs is None:
            assert isinstance(nsim, str)
            assert len(nsim) == 5
            return nsim
        return f"{str(nobs).zfill(2)}{str(nsim).zfill(3)}"
    def mmain(self, nsnap, nsim):
        """
        Path to the `mmain` CSiBORG files of summed substructure.
        Parameters
        ----------
@ -152,10 +168,10 @@ class Paths:
        return join(fdir,
                    f"mmain_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npz")
-    def initmatch_path(self, nsim, kind):
+    def initmatch(self, nsim, kind):
        """
-        Path to the `initmatch` files where the clump match between the
+        Path to the `initmatch` files where the halo match between the
-        initial and final snapshot is stored.
+        initial and final snapshot of a CSiBORG realisaiton is stored.
        Parameters
        ----------
@ -176,26 +192,35 @@ class Paths:
            warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1)
        return join(fdir, f"{kind}_{str(nsim).zfill(5)}.{ftype}")
-    def get_ics(self):
+    def get_ics(self, simname):
        """
-        Get CSiBORG IC realisation IDs from the list of folders in
+        Get available IC realisation IDs for either the CSiBORG or Quijote
-        `self.srcdir`.
+        simulation suite.
        Parameters
        ----------
        simname : str
            Simulation name. Must be one of `["csiborg", "quijote"]`.
        Returns
        -------
        ids : 1-dimensional array
        """
-        files = glob(join(self.srcdir, "ramses_out*"))
+        assert simname in ["csiborg", "quijote"]
-        files = [f.split("/")[-1] for f in files]      # Select only file names
+        if simname == "csiborg":
-        files = [f for f in files if "_inv" not in f]  # Remove inv. ICs
+            files = glob(join(self.srcdir, "ramses_out*"))
-        files = [f for f in files if "_new" not in f]  # Remove _new
+            files = [f.split("/")[-1] for f in files]      # Only file names
-        files = [f for f in files if "OLD" not in f]   # Remove _old
+            files = [f for f in files if "_inv" not in f]  # Remove inv. ICs
-        ids = [int(f.split("_")[-1]) for f in files]
+            files = [f for f in files if "_new" not in f]  # Remove _new
-        try:
+            files = [f for f in files if "OLD" not in f]   # Remove _old
-            ids.remove(5511)
+            ids = [int(f.split("_")[-1]) for f in files]
-        except ValueError:
+            try:
-            pass
+                ids.remove(5511)
-        return numpy.sort(ids)
+            except ValueError:
                pass
            return numpy.sort(ids)
        else:
            return numpy.arange(100, dtype=int)
    def ic_path(self, nsim, tonew=False):
        """
@ -239,7 +264,7 @@ class Paths:
        snaps = [int(snap.split("_")[-1].lstrip("0")) for snap in snaps]
        return numpy.sort(snaps)
-    def snapshot_path(self, nsnap, nsim):
+    def snapshot(self, nsnap, nsim):
        """
        Path to a CSiBORG IC realisation snapshot.
@ -258,9 +283,10 @@ class Paths:
        simpath = self.ic_path(nsim, tonew=tonew)
        return join(simpath, f"output_{str(nsnap).zfill(5)}")
-    def structfit_path(self, nsnap, nsim, kind):
+    def structfit(self, nsnap, nsim, kind):
        """
-        Path to the clump or halo catalogue from `fit_halos.py`.
+        Path to the clump or halo catalogue from `fit_halos.py`. Only CSiBORG
        is supported.
        Parameters
        ----------
@ -283,9 +309,9 @@ class Paths:
        fname = f"{kind}_out_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npy"
        return join(fdir, fname)
-    def overlap_path(self, nsim0, nsimx, smoothed):
+    def overlap(self, nsim0, nsimx, smoothed):
        """
-        Path to the overlap files between two simulations.
+        Path to the overlap files between two CSiBORG simulations.
        Parameters
        ----------
@ -309,32 +335,10 @@ class Paths:
            fname = fname.replace("overlap", "overlap_smoothed")
        return join(fdir, fname)
-    def radpos_path(self, nsnap, nsim):
+    def particles(self, nsim):
        """
-        Path to the files containing radial positions of halo particles (with
+        Path to the files containing all particles of a CSiBORG realisation at
-        summed substructure).
+        :math:`z = 0`.
        Parameters
        ----------
        nsnap : int
            Snapshot index.
        nsim : int
            IC realisation index.
        Returns
        -------
        path : str
        """
        fdir = join(self.postdir, "radpos")
        if not isdir(fdir):
            mkdir(fdir)
            warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1)
        fname = f"radpos_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npz"
        return join(fdir, fname)
    def particles_path(self, nsim):
        """
        Path to the files containing all particles.
        Parameters
        ----------
@ -352,9 +356,9 @@ class Paths:
        fname = f"parts_{str(nsim).zfill(5)}.h5"
        return join(fdir, fname)
-    def field_path(self, kind, MAS, grid, nsim, in_rsp):
+    def field(self, kind, MAS, grid, nsim, in_rsp):
        """
-        Path to the files containing the calculated density fields.
+        Path to the files containing the calculated density fields in CSiBORG.
        Parameters
        ----------
@ -383,7 +387,43 @@ class Paths:
        fname = f"{kind}_{MAS}_{str(nsim).zfill(5)}_grid{grid}.npy"
        return join(fdir, fname)
-    def knnauto_path(self, simname, run, nsim=None, nobs=None):
+    def cross_nearest(self, simname, run, nsim=None, nobs=None):
        """
        Path to the files containing distance from a halo in a reference
        simulation to the nearest halo from a cross simulation.
        Parameters
        ----------
        simname : str
            Simulation name. Must be one of: `csiborg`, `quijote`.
        run : str
            Run name.
        nsim : int, optional
            IC realisation index.
        nobs : int, optional
            Fiducial observer index in Quijote simulations.
        Returns
        -------
        path : str
        """
        assert simname in ["csiborg", "quijote"]
        fdir = join(self.postdir, "nearest_neighbour")
        if not isdir(fdir):
            makedirs(fdir)
            warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1)
        if nsim is not None:
            if simname == "csiborg":
                nsim = str(nsim).zfill(5)
            else:
                nsim = self.quijote_fiducial_nsim(nsim, nobs)
            return join(fdir, f"{simname}_nn_{nsim}_{run}.npz")
        files = glob(join(fdir, f"{simname}_nn_*"))
        run = "_" + run
        return [f for f in files if run in f]
    def knnauto(self, simname, run, nsim=None, nobs=None):
        """
        Path to the `knn` auto-correlation files. If `nsim` is not specified
        returns a list of files for this run for all available simulations.
@ -393,7 +433,7 @@ class Paths:
        simname : str
            Simulation name. Must be either `csiborg` or `quijote`.
        run : str
-            Type of run.
+            Run name.
        nsim : int, optional
            IC realisation index.
        nobs : int, optional
@ -412,15 +452,14 @@ class Paths:
            if simname == "csiborg":
                nsim = str(nsim).zfill(5)
            else:
-                assert nobs is not None
+                nsim = self.quijote_fiducial_nsim(nsim, nobs)
                nsim = f"{str(nobs).zfill(2)}{str(nsim).zfill(3)}"
            return join(fdir, f"{simname}_knncdf_{nsim}_{run}.p")
        files = glob(join(fdir, f"{simname}_knncdf*"))
-        run = "__" + run
+        run = "_" + run
        return [f for f in files if run in f]
-    def knncross_path(self, simname, run, nsims=None):
+    def knncross(self, simname, run, nsims=None):
        """
        Path to the `knn` cross-correlation files. If `nsims` is not specified
        returns a list of files for this run for all available simulations.
@ -449,10 +488,10 @@ class Paths:
            return join(fdir, f"{simname}_knncdf_{nsim0}_{nsimx}__{run}.p")
        files = glob(join(fdir, f"{simname}_knncdf*"))
-        run = "__" + run
+        run = "_" + run
        return [f for f in files if run in f]
-    def tpcfauto_path(self, simname, run, nsim=None):
+    def tpcfauto(self, simname, run, nsim=None):
        """
        Path to the `tpcf` auto-correlation files. If `nsim` is not specified
        returns a list of files for this run for all available simulations.
--- a/csiborgtools/read/readsim.py
+++ b/csiborgtools/read/readsim.py
@ -76,7 +76,7 @@ class ParticleReader:
            Dictionary of information paramaters. Note that both keys and
            values are strings.
        """
-        snappath = self.paths.snapshot_path(nsnap, nsim)
+        snappath = self.paths.snapshot(nsnap, nsim)
        filename = join(snappath, "info_{}.txt".format(str(nsnap).zfill(5)))
        with open(filename, "r") as f:
            info = f.read().split()
@ -87,7 +87,6 @@ class ParticleReader:
        keys = info[eqs - 1]
        vals = info[eqs + 1]
        # trunk-ignore(ruff/B905)
        return {key: val for key, val in zip(keys, vals)}
    def open_particle(self, nsnap, nsim, verbose=True):
@ -110,7 +109,7 @@ class ParticleReader:
        partfiles : list of `scipy.io.FortranFile`
            Opened part files.
        """
-        snappath = self.paths.snapshot_path(nsnap, nsim)
+        snappath = self.paths.snapshot(nsnap, nsim)
        ncpu = int(self.read_info(nsnap, nsim)["ncpu"])
        nsnap = str(nsnap).zfill(5)
        if verbose:
--- a/csiborgtools/read/tpcf_summary.py
+++ b/csiborgtools/read/tpcf_summary.py
@ -65,7 +65,7 @@ class TPCFReader:
        out : 2-dimensional array of shape `(len(files), len(rp))`
            Array of 2PCFs.
        """
-        files = self.paths.tpcfauto_path(run)
+        files = self.paths.tpcfauto(run)
        if len(files) == 0:
            raise RuntimeError("No files found for run `{}`.".format(run[:-2]))
--- a/scripts/cluster_crosspk.py
+++ b/scripts/cluster_crosspk.py
@ -16,6 +16,7 @@
 MPI script to calculate the matter cross power spectrum between CSiBORG
 IC realisations. Units are Mpc/h.
 """
 raise NotImplementedError("This script is currently not working.")
 from argparse import ArgumentParser
 from datetime import datetime
 from gc import collect
@ -51,7 +52,7 @@ MAS = "CIC"  # mass asignment scheme
 paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
 box = csiborgtools.read.CSiBORGBox(paths)
 reader = csiborgtools.read.ParticleReader(paths)
-ics = paths.get_ics()
+ics = paths.get_ics("csiborg")
 nsims = len(ics)
 # File paths
--- a/scripts/cluster_knn_auto.py
+++ b/scripts/cluster_knn_auto.py
@ -12,18 +12,19 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues."""
+"""
 A script to calculate the KNN-CDF for a set of halo catalogues.
 """
 from argparse import ArgumentParser
 from copy import deepcopy
 from datetime import datetime
-from warnings import warn
+from distutils.util import strtobool
 import joblib
 import numpy
 import yaml
 from mpi4py import MPI
 from sklearn.neighbors import NearestNeighbors
-from taskmaster import master_process, worker_process
+from taskmaster import work_delegation
 try:
    import csiborgtools
@ -33,161 +34,122 @@ except ModuleNotFoundError:
    sys.path.append("../")
    import csiborgtools
-
+from utils import open_catalogues
 ###############################################################################
 #                            MPI and arguments                                #
 ###############################################################################
 comm = MPI.COMM_WORLD
 rank = comm.Get_rank()
 nproc = comm.Get_size()
 parser = ArgumentParser()
 parser.add_argument("--runs", type=str, nargs="+")
 parser.add_argument("--ics", type=int, nargs="+", default=None,
                    help="IC realisations. If `-1` processes all simulations.")
 parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"])
 args = parser.parse_args()
 with open("../scripts/cluster_knn_auto.yml", "r") as file:
    config = yaml.safe_load(file)
 Rmax = 155 / 0.705  # Mpc (h = 0.705) high resolution region radius
 totvol = 4 * numpy.pi * Rmax**3 / 3
 paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
 knncdf = csiborgtools.clustering.kNN_1DCDF()
 if args.ics is None or args.ics[0] == -1:
    if args.simname == "csiborg":
        ics = paths.get_ics()
    else:
        ics = paths.get_quijote_ics()
 else:
    ics = args.ics
-###############################################################################
+def do_auto(args, config, cats, nsim, paths):
-#                                 Analysis                                    #
+    """
-###############################################################################
+    Calculate the kNN-CDF single catalogue auto-correlation.
    Parameters
    ----------
    args : argparse.Namespace
        Command line arguments.
    config : dict
        Configuration dictionary.
    cats : dict
        Dictionary of halo catalogues. Keys are simulation indices, values are
        the catalogues.
    nsim : int
        Simulation index.
    paths : csiborgtools.paths.Paths
        Paths object.
-def read_single(nsim, selection, nobs=None):
+    Returns
-    # We first read the full catalogue without applying any bounds.
+    -------
-    if args.simname == "csiborg":
+    None
-        cat = csiborgtools.read.HaloCatalogue(nsim, paths)
+    """
-    else:
+    rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax)
-        cat = csiborgtools.read.QuijoteHaloCatalogue(nsim, paths, nsnap=4,
+    knncdf = csiborgtools.clustering.kNN_1DCDF()
-                                                     origin=nobs)
+    cat = cats[nsim]
    cat.apply_bounds({"dist": (0, Rmax)})
    # We then first read off the primary selection bounds.
    sel = selection["primary"]
    pname = None
    xs = sel["names"] if isinstance(sel["names"], list) else [sel["names"]]
    for _name in xs:
        if _name in cat.keys:
            pname = _name
    if pname is None:
        raise KeyError(f"Invalid names `{sel['name']}`.")
    cat.apply_bounds({pname: (sel.get("min", None), sel.get("max", None))})
    # Now the secondary selection bounds. If needed transfrom the secondary
    # property before applying the bounds.
    if "secondary" in selection:
        sel = selection["secondary"]
        sname = None
        xs = sel["names"] if isinstance(sel["names"], list) else [sel["names"]]
        for _name in xs:
            if _name in cat.keys:
                sname = _name
        if sname is None:
            raise KeyError(f"Invalid names `{sel['name']}`.")
        if sel.get("toperm", False):
            cat[sname] = numpy.random.permutation(cat[sname])
        if sel.get("marked", False):
            cat[sname] = csiborgtools.clustering.normalised_marks(
                cat[pname], cat[sname], nbins=config["nbins_marks"])
        cat.apply_bounds({sname: (sel.get("min", None), sel.get("max", None))})
    return cat
 def do_auto(run, nsim, nobs=None):
    """Calculate the kNN-CDF single catalgoue autocorrelation."""
    _config = config.get(run, None)
    if _config is None:
        warn(f"No configuration for run {run}.", UserWarning, stacklevel=1)
        return
    rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
    cat = read_single(nsim, _config, nobs=nobs)
    knn = cat.knn(in_initial=False)
    rs, cdf = knncdf(
        knn, rvs_gen=rvs_gen, nneighbours=config["nneighbours"],
        rmin=config["rmin"], rmax=config["rmax"],
        nsamples=int(config["nsamples"]), neval=int(config["neval"]),
        batch_size=int(config["batch_size"]), random_state=config["seed"])
-
+    totvol = (4 / 3) * numpy.pi * args.Rmax ** 3
-    fout = paths.knnauto_path(args.simname, run, nsim, nobs)
+    fout = paths.knnauto(args.simname, args.run, nsim)
-    print(f"Saving output to `{fout}`.")
+    if args.verbose:
        print(f"Saving output to `{fout}`.")
    joblib.dump({"rs": rs, "cdf": cdf, "ndensity": len(cat) / totvol}, fout)
-def do_cross_rand(run, nsim, nobs=None):
+def do_cross_rand(args, config, cats, nsim, paths):
-    """Calculate the kNN-CDF cross catalogue random correlation."""
+    """
-    _config = config.get(run, None)
+    Calculate the kNN-CDF cross catalogue random correlation.
    if _config is None:
        warn(f"No configuration for run {run}.", UserWarning, stacklevel=1)
        return
-    rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
+    Parameters
-    cat = read_single(nsim, _config)
+    ----------
    args : argparse.Namespace
        Command line arguments.
    config : dict
        Configuration dictionary.
    cats : dict
        Dictionary of halo catalogues. Keys are simulation indices, values are
        the catalogues.
    nsim : int
        Simulation index.
    paths : csiborgtools.paths.Paths
        Paths object.
    Returns
    -------
    None
    """
    rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax)
    cat = cats[nsim]
    knn1 = cat.knn(in_initial=False)
    knn2 = NearestNeighbors()
    pos2 = rvs_gen(len(cat).shape[0])
    knn2.fit(pos2)
    knncdf = csiborgtools.clustering.kNN_1DCDF()
    rs, cdf0, cdf1, joint_cdf = knncdf.joint(
        knn1, knn2, rvs_gen=rvs_gen, nneighbours=int(config["nneighbours"]),
        rmin=config["rmin"], rmax=config["rmax"],
        nsamples=int(config["nsamples"]), neval=int(config["neval"]),
        batch_size=int(config["batch_size"]), random_state=config["seed"])
    corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
-    fout = paths.knnauto_path(args.simname, run, nsim, nobs)
+
-    print(f"Saving output to `{fout}`.")
+    fout = paths.knnauto(args.simname, args.run, nsim)
    if args.verbose:
        print(f"Saving output to `{fout}`.", flush=True)
    joblib.dump({"rs": rs, "corr": corr}, fout)
-def do_runs(nsim):
+if __name__ == "__main__":
-    for run in args.runs:
+    parser = ArgumentParser()
-        iters = range(27) if args.simname == "quijote" else [None]
+    parser.add_argument("--run", type=str, help="Run name.")
-        for nobs in iters:
+    parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"],
-            if "random" in run:
+                        help="Simulation name")
-                do_cross_rand(run, nsim, nobs)
+    parser.add_argument("--nsims", type=int, nargs="+", default=None,
-            else:
+                        help="Indices of simulations to cross. If `-1` processes all simulations.")  # noqa
-                do_auto(run, nsim, nobs)
+    parser.add_argument("--Rmax", type=float, default=155/0.705,
                        help="High-resolution region radius")  # noqa
    parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
                        default=False)
    args = parser.parse_args()
    with open("./cluster_knn_auto.yml", "r") as file:
        config = yaml.safe_load(file)
    comm = MPI.COMM_WORLD
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    cats = open_catalogues(args, config, paths, comm)
-###############################################################################
+    if args.verbose and comm.Get_rank() == 0:
-#                             MPI task delegation                             #
+        print(f"{datetime.now()}: starting to calculate the kNN statistic.")
 ###############################################################################
    def do_work(nsim):
        if "random" in args.run:
            do_cross_rand(args, config, cats, nsim, paths)
        else:
            do_auto(args, config, cats, nsim, paths)
-if nproc > 1:
+    nsims = list(cats.keys())
-    if rank == 0:
+    work_delegation(do_work, nsims, comm, master_verbose=args.verbose)
        tasks = deepcopy(ics)
        master_process(tasks, comm, verbose=True)
    else:
        worker_process(do_runs, comm, verbose=False)
 else:
    tasks = deepcopy(ics)
    for task in tasks:
        print("{}: completing task `{}`.".format(datetime.now(), task))
        do_runs(task)
 comm.Barrier()
-
+    comm.Barrier()
-if rank == 0:
+    if comm.Get_rank() == 0:
-    print("{}: all finished.".format(datetime.now()))
+        print(f"{datetime.now()}: all finished. Quitting.")
 quit()  # Force quit the script
--- a/scripts/cluster_knn_auto.yml
+++ b/scripts/cluster_knn_auto.yml
@ -1,8 +1,8 @@
 rmin: 0.1
 rmax: 100
 nneighbours: 8
-nsamples: 1.e+5
+nsamples: 1.e+7
-batch_size: 5.e+4
+batch_size: 1.e+6
 neval: 10000
 seed: 42
 nbins_marks: 10
@ -16,7 +16,7 @@ nbins_marks: 10
 "mass001":
  primary:
    name:
-    - totpartmass,
+    - totpartmass
    - group_mass
    min: 1.e+12
    max: 1.e+13
@ -24,7 +24,7 @@ nbins_marks: 10
 "mass002":
  primary:
    name:
-    - totpartmass,
+    - totpartmass
    - group_mass
    min: 1.e+13
    max: 1.e+14
@ -32,7 +32,15 @@ nbins_marks: 10
 "mass003":
  primary:
    name:
-    - totpartmass,
+    - totpartmass
    - group_mass
    min: 1.e+14
 "mass003_poisson":
  poisson: true
  primary:
    name:
    - totpartmass
    - group_mass
    min: 1.e+14
--- a/scripts/cluster_knn_cross.py
+++ b/scripts/cluster_knn_cross.py
@ -16,11 +16,13 @@
 A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues.
 TODO:
    - [ ] Add support for new catalogue readers. Currently will not work.
    - [ ] Update catalogue readers.
    - [ ] Update paths.
    - [ ] Update to cross-correlate different mass populations from different
    simulations.
 """
 raise NotImplementedError("This script is currently not working.")
 from argparse import ArgumentParser
 from datetime import datetime
 from itertools import combinations
@ -58,7 +60,7 @@ with open("../scripts/knn_cross.yml", "r") as file:
 Rmax = 155 / 0.705  # Mpc (h = 0.705) high resolution region radius
 paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-ics = paths.get_ics()
+ics = paths.get_ics("csiborg")
 knncdf = csiborgtools.clustering.kNN_1DCDF()
 ###############################################################################
@ -109,7 +111,7 @@ def do_cross(run, ics):
    )
    corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
-    fout = paths.knncross_path(args.simname, run, ics)
+    fout = paths.knncross(args.simname, run, ics)
    joblib.dump({"rs": rs, "corr": corr}, fout)
--- a/scripts/cluster_tpcf_auto.py
+++ b/scripts/cluster_tpcf_auto.py
@ -16,18 +16,16 @@
 A script to calculate the auto-2PCF of CSiBORG catalogues.
 """
 from argparse import ArgumentParser
 from copy import deepcopy
 from datetime import datetime
-from warnings import warn
+from distutils.util import strtobool
 import joblib
 import numpy
 import yaml
 from mpi4py import MPI
-from taskmaster import master_process, worker_process
+from taskmaster import work_delegation
-
+from utils import open_catalogues
 from .cluster_knn_auto import read_single
 try:
    import csiborgtools
@ -38,84 +36,51 @@ except ModuleNotFoundError:
    import csiborgtools
-###############################################################################
+def do_auto(args, config, cats, nsim, paths):
-#                            MPI and arguments                                #
+    tpcf = csiborgtools.clustering.Mock2PCF()
-###############################################################################
+    rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax)
 comm = MPI.COMM_WORLD
 rank = comm.Get_rank()
 nproc = comm.Get_size()
 parser = ArgumentParser()
 parser.add_argument("--runs", type=str, nargs="+")
 parser.add_argument("--ics", type=int, nargs="+", default=None,
                    help="IC realisations. If `-1` processes all simulations.")
 parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"])
 args = parser.parse_args()
 with open("../scripts/tpcf_auto.yml", "r") as file:
    config = yaml.safe_load(file)
 Rmax = 155 / 0.705  # Mpc (h = 0.705) high resolution region radius
 paths = csiborgtools.read.Paths()
 tpcf = csiborgtools.clustering.Mock2PCF()
 if args.ics is None or args.ics[0] == -1:
    if args.simname == "csiborg":
        ics = paths.get_ics()
    else:
        ics = paths.get_quijote_ics()
 else:
    ics = args.ics
 ###############################################################################
 #                                 Analysis                                    #
 ###############################################################################
 def do_auto(run, nsim):
    _config = config.get(run, None)
    if _config is None:
        warn("No configuration for run {}.".format(run), stacklevel=1)
        return
    rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
    bins = numpy.logspace(
-        numpy.log10(config["rpmin"]),
+        numpy.log10(config["rpmin"]), numpy.log10(config["rpmax"]),
-        numpy.log10(config["rpmax"]),
+        config["nrpbins"] + 1,)
-        config["nrpbins"] + 1,
+    cat = cats[nsim]
-    )
+
    cat = read_single(nsim, _config)
    pos = cat.position(in_initial=False, cartesian=True)
    nrandom = int(config["randmult"] * pos.shape[0])
    rp, wp = tpcf(pos, rvs_gen, nrandom, bins)
-    fout = paths.tpcfauto_path(args.simname, run, nsim)
+    fout = paths.knnauto(args.simname, args.run, nsim)
    joblib.dump({"rp": rp, "wp": wp}, fout)
-def do_runs(nsim):
+if __name__ == "__main__":
-    for run in args.runs:
+    parser = ArgumentParser()
-        do_auto(run, nsim)
+    parser.add_argument("--run", type=str, help="Run name.")
    parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"],
                        help="Simulation name")
    parser.add_argument("--nsims", type=int, nargs="+", default=None,
                        help="Indices of simulations to cross. If `-1` processes all simulations.")  # noqa
    parser.add_argument("--Rmax", type=float, default=155/0.705,
                        help="High-resolution region radius")  # noqa
    parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
                        default=False)
    args = parser.parse_args()
    with open("./cluster_tpcf_auto.yml", "r") as file:
        config = yaml.safe_load(file)
-###############################################################################
+    comm = MPI.COMM_WORLD
-#                             MPI task delegation                             #
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-###############################################################################
+    cats = open_catalogues(args, config, paths, comm)
    if args.verbose and comm.Get_rank() == 0:
        print(f"{datetime.now()}: starting to calculate the 2PCF statistic.")
-if nproc > 1:
+    def do_work(nsim):
-    if rank == 0:
+        return do_auto(args, config, cats, nsim, paths)
        tasks = deepcopy(ics)
        master_process(tasks, comm, verbose=True)
    else:
        worker_process(do_runs, comm, verbose=False)
 else:
    tasks = deepcopy(ics)
    for task in tasks:
        print("{}: completing task `{}`.".format(datetime.now(), task))
        do_runs(task)
 comm.Barrier()
    nsims = list(cats.keys())
    work_delegation(do_work, nsims, comm, master_verbose=args.verbose)
-if rank == 0:
+    comm.Barrier()
-    print("{}: all finished.".format(datetime.now()))
+    if comm.Get_rank() == 0:
-quit()  # Force quit the script
+        print(f"{datetime.now()}: all finished. Quitting.")
--- a/scripts/field_derived.py
+++ b/scripts/field_derived.py
@ -48,7 +48,7 @@ args = parser.parse_args()
 paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
 if args.ics is None or args.ics[0] == -1:
-    ics = paths.get_ics()
+    ics = paths.get_ics("csiborg")
 else:
    ics = args.ics
@ -62,7 +62,7 @@ for i in csiborgtools.fits.split_jobs(len(ics), nproc)[rank]:
    box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
    density_gen = csiborgtools.field.DensityField(box, args.MAS)
-    rho = numpy.load(paths.field_path("density", args.MAS, args.grid, nsim,
+    rho = numpy.load(paths.field("density", args.MAS, args.grid, nsim,
                                      args.in_rsp))
    rho = density_gen.overdensity_field(rho)
@ -72,7 +72,7 @@ for i in csiborgtools.fits.split_jobs(len(ics), nproc)[rank]:
        raise RuntimeError(f"Field {args.kind} is not implemented yet.")
    field = gen(rho)
-    fout = paths.field_path("potential", args.MAS, args.grid, nsim,
+    fout = paths.field("potential", args.MAS, args.grid, nsim,
                            args.in_rsp)
    print(f"{datetime.now()}: rank {rank} saving output to `{fout}`.")
    numpy.save(fout, field)
--- a/scripts/field_fromparts.py
+++ b/scripts/field_fromparts.py
@ -50,7 +50,7 @@ paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
 mpart = 1.1641532e-10  # Particle mass in CSiBORG simulations.
 if args.ics is None or args.ics[0] == -1:
-    ics = paths.get_ics()
+    ics = paths.get_ics("csiborg")
 else:
    ics = args.ics
@ -62,7 +62,7 @@ for i in csiborgtools.fits.split_jobs(len(ics), nproc)[rank]:
    nsnap = max(paths.get_snapshots(nsim))
    box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
-    parts = csiborgtools.read.read_h5(paths.particles_path(nsim))["particles"]
+    parts = csiborgtools.read.read_h5(paths.particles(nsim))["particles"]
    if args.kind == "density":
        gen = csiborgtools.field.DensityField(box, args.MAS)
@ -71,6 +71,6 @@ for i in csiborgtools.fits.split_jobs(len(ics), nproc)[rank]:
        gen = csiborgtools.field.VelocityField(box, args.MAS)
        field = gen(parts, args.grid, mpart, verbose=verbose)
-    fout = paths.field_path(args.kind, args.MAS, args.grid, nsim, args.in_rsp)
+    fout = paths.field(args.kind, args.MAS, args.grid, nsim, args.in_rsp)
    print(f"{datetime.now()}: rank {rank} saving output to `{fout}`.")
    numpy.save(fout, field)
--- a/scripts/fit_halos.py
+++ b/scripts/fit_halos.py
@ -47,7 +47,7 @@ partreader = csiborgtools.read.ParticleReader(paths)
 nfwpost = csiborgtools.fits.NFWPosterior()
 if args.ics is None or args.ics[0] == -1:
-    ics = paths.get_ics()
+    ics = paths.get_ics("csiborg")
 else:
    ics = args.ics
@ -108,7 +108,7 @@ for nsim in [ics[i] for i in jobs]:
    box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
    # Particle archive
-    f = csiborgtools.read.read_h5(paths.particles_path(nsim))
+    f = csiborgtools.read.read_h5(paths.particles(nsim))
    particles = f["particles"]
    clump_map = f["clumpmap"]
    clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
@ -153,6 +153,6 @@ for nsim in [ics[i] for i in jobs]:
    if args.kind == "halos":
        out = out[ismain]
-    fout = paths.structfit_path(nsnap, nsim, args.kind)
+    fout = paths.structfit(nsnap, nsim, args.kind)
    print(f"Saving to `{fout}`.", flush=True)
    numpy.save(fout, out)
--- a/scripts/fit_init.py
+++ b/scripts/fit_init.py
@ -48,7 +48,7 @@ paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
 partreader = csiborgtools.read.ParticleReader(paths)
 if args.ics is None or args.ics[0] == -1:
-    ics = paths.get_ics()
+    ics = paths.get_ics("csiborg")
 else:
    ics = args.ics
@ -66,9 +66,9 @@ for nsim in [ics[i] for i in jobs]:
    print(f"{datetime.now()}: rank {rank} calculating simulation `{nsim}`.",
          flush=True)
-    parts = csiborgtools.read.read_h5(paths.initmatch_path(nsim, "particles"))
+    parts = csiborgtools.read.read_h5(paths.initmatch(nsim, "particles"))
    parts = parts['particles']
-    clump_map = csiborgtools.read.read_h5(paths.particles_path(nsim))
+    clump_map = csiborgtools.read.read_h5(paths.particles(nsim))
    clump_map = clump_map["clumpmap"]
    clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
                                                   load_fitted=False)
@ -96,7 +96,7 @@ for nsim in [ics[i] for i in jobs]:
    out = out[ismain]
    # Now save it
-    fout = paths.initmatch_path(nsim, "fit")
+    fout = paths.initmatch(nsim, "fit")
    print(f"{datetime.now()}: dumping fits to .. `{fout}`.",
          flush=True)
    with open(fout, "wb") as f:
--- a/scripts/match_all.py
+++ b/scripts/match_all.py
@ -55,7 +55,7 @@ def get_combs():
    seed to minimise loading the same files simultaneously.
    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-    ics = paths.get_ics()
+    ics = paths.get_ics("csiborg")
    combs = list(combinations(ics, 2))
    Random(42).shuffle(combs)
    return combs
--- a/scripts/match_finsnap.py
+++ b/scripts/match_finsnap.py
@ -0,0 +1,102 @@
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 Script to find the nearest neighbour of each halo in a given halo catalogue
 from the remaining catalogues in the suite (CSIBORG or Quijote). The script is
 MPI parallelized over the reference simulations.
 """
 from argparse import ArgumentParser
 from datetime import datetime
 from distutils.util import strtobool
 import numpy
 import yaml
 from mpi4py import MPI
 from taskmaster import work_delegation
 from utils import open_catalogues
 try:
    import csiborgtools
 except ModuleNotFoundError:
    import sys
    sys.path.append("../")
    import csiborgtools
 def find_neighbour(args, nsim, cats, paths, comm):
    """
    Find the nearest neighbour of each halo in the given catalogue.
    Parameters
    ----------
    args : argparse.Namespace
        Command line arguments.
    nsim : int
        Simulation index.
    cats : dict
        Dictionary of halo catalogues. Keys are simulation indices, values are
        the catalogues.
    paths : csiborgtools.paths.Paths
        Paths object.
    comm : mpi4py.MPI.Comm
        MPI communicator.
    Returns
    -------
    None
    """
    ndist, cross_hindxs = csiborgtools.match.find_neighbour(nsim, cats)
    mass_key = "totpartmass" if args.simname == "csiborg" else "group_mass"
    cat0 = cats[nsim]
    mass = cat0[mass_key]
    rdist = cat0.radial_distance(in_initial=False)
    fout = paths.cross_nearest(args.simname, args.run, nsim)
    if args.verbose:
        print(f"Rank {comm.Get_rank()} writing to `{fout}`.", flush=True)
    numpy.savez(fout, ndist=ndist, cross_hindxs=cross_hindxs, mass=mass,
                rdist=rdist)
 if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument("--run", type=str, help="Run name")
    parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"],
                        help="Simulation name")
    parser.add_argument("--nsims", type=int, nargs="+", default=None,
                        help="Indices of simulations to cross. If `-1` processes all simulations.")  # noqa
    parser.add_argument("--Rmax", type=float, default=155/0.705,
                        help="High-resolution region radius")
    parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
                        default=False)
    args = parser.parse_args()
    with open("./match_finsnap.yml", "r") as file:
        config = yaml.safe_load(file)
    comm = MPI.COMM_WORLD
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    cats = open_catalogues(args, config, paths, comm)
    def do_work(nsim):
        return find_neighbour(args, nsim, cats, paths, comm)
    work_delegation(do_work, list(cats.keys()), comm,
                    master_verbose=args.verbose)
    comm.Barrier()
    if comm.Get_rank() == 0:
        print(f"{datetime.now()}: all finished. Quitting.")
--- a/scripts/match_finsnap.yml
+++ b/scripts/match_finsnap.yml
@ -0,0 +1,37 @@
 rmin: 0.1
 rmax: 100
 nneighbours: 8
 nsamples: 1.e+7
 batch_size: 1.e+6
 neval: 10000
 seed: 42
 nbins_marks: 10
 ################################################################################
 #                                 totpartmass                                 #
 ################################################################################
 "mass001":
  primary:
    name:
    - totpartmass
    - group_mass
    min: 1.e+12
    max: 1.e+13
 "mass002":
  primary:
    name:
    - totpartmass
    - group_mass
    min: 1.e+13
    max: 1.e+14
 "mass003":
  primary:
    name:
    - totpartmass
    - group_mass
    min: 1.e+14
--- a/scripts/match_singlematch.py
+++ b/scripts/match_singlematch.py
@ -45,12 +45,12 @@ def pair_match(nsim0, nsimx, sigma, smoothen, verbose):
    catx = HaloCatalogue(nsimx, paths, load_initial=True, bounds=bounds,
                         with_lagpatch=True, load_clumps_cat=True)
-    clumpmap0 = read_h5(paths.particles_path(nsim0))["clumpmap"]
+    clumpmap0 = read_h5(paths.particles(nsim0))["clumpmap"]
-    parts0 = read_h5(paths.initmatch_path(nsim0, "particles"))["particles"]
+    parts0 = read_h5(paths.initmatch(nsim0, "particles"))["particles"]
    clid2map0 = {clid: i for i, clid in enumerate(clumpmap0[:, 0])}
-    clumpmapx = read_h5(paths.particles_path(nsimx))["clumpmap"]
+    clumpmapx = read_h5(paths.particles(nsimx))["clumpmap"]
-    partsx = read_h5(paths.initmatch_path(nsimx, "particles"))["particles"]
+    partsx = read_h5(paths.initmatch(nsimx, "particles"))["particles"]
    clid2mapx = {clid: i for i, clid in enumerate(clumpmapx[:, 0])}
    # We generate the background density fields. Loads halos's particles one by
@ -77,7 +77,7 @@ def pair_match(nsim0, nsimx, sigma, smoothen, verbose):
        for j, match in enumerate(matches):
            match_hids[i][j] = catx["index"][match]
-    fout = paths.overlap_path(nsim0, nsimx, smoothed=False)
+    fout = paths.overlap(nsim0, nsimx, smoothed=False)
    numpy.savez(fout, ref_hids=cat0["index"], match_hids=match_hids,
                ngp_overlap=ngp_overlap)
    if verbose:
@ -99,7 +99,7 @@ def pair_match(nsim0, nsimx, sigma, smoothen, verbose):
                                              match_indxs, smooth_kwargs,
                                              verbose=verbose)
-    fout = paths.overlap_path(nsim0, nsimx, smoothed=True)
+    fout = paths.overlap(nsim0, nsimx, smoothed=True)
    numpy.savez(fout, smoothed_overlap=smoothed_overlap, sigma=sigma)
    if verbose:
        print(f"{datetime.now()}: calculated smoothing, saved to {fout}.",
--- a/scripts/old/fit_profiles.py
+++ b/scripts/old/fit_profiles.py
@ -48,7 +48,7 @@ if nproc > 1:
 paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
 cols_collect = [("r", numpy.float32), ("M", numpy.float32)]
 if args.ics is None or args.ics == -1:
-    nsims = paths.get_ics()
+    nsims = paths.get_ics("csiborg")
 else:
    nsims = args.ics
@ -61,7 +61,7 @@ for i, nsim in enumerate(nsims):
    nsnap = max(paths.get_snapshots(nsim))
    box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
-    f = csiborgtools.read.read_h5(paths.particles_path(nsim))
+    f = csiborgtools.read.read_h5(paths.particles(nsim))
    particles = f["particles"]
    clump_map = f["clumpmap"]
    clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
--- a/scripts/pre_dumppart.py
+++ b/scripts/pre_dumppart.py
@ -55,7 +55,7 @@ partreader = csiborgtools.read.ParticleReader(paths)
 pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"]
 if args.ics is None or args.ics[0] == -1:
-    ics = paths.get_ics()
+    ics = paths.get_ics("csiborg")
 else:
    ics = args.ics
@ -87,7 +87,7 @@ jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
 for i in jobs:
    nsim = ics[i]
    nsnap = max(paths.get_snapshots(nsim))
-    fname = paths.particles_path(nsim)
+    fname = paths.particles(nsim)
    # We first read in the clump IDs of the particles and infer the sorting.
    # Right away we dump the clump IDs to a HDF5 file and clear up memory.
    print(f"{datetime.now()}: rank {rank} loading particles {nsim}.",
@ -146,7 +146,7 @@ for i in jobs:
        start_loop = kf
    # We save the mapping to a HDF5 file
-    with h5py.File(paths.particles_path(nsim), "r+") as f:
+    with h5py.File(paths.particles(nsim), "r+") as f:
        f.create_dataset("clumpmap", data=clump_map)
        f.close()
--- a/scripts/pre_mmain.py
+++ b/scripts/pre_mmain.py
@ -41,7 +41,7 @@ def do_mmain(nsim):
    nsnap = max(paths.get_snapshots(nsim))
    # NOTE: currently works for highest snapshot anyway
    mmain, ultimate_parent = mmain_reader.make_mmain(nsim, verbose=False)
-    numpy.savez(paths.mmain_path(nsnap, nsim),
+    numpy.savez(paths.mmain(nsnap, nsim),
                mmain=mmain, ultimate_parent=ultimate_parent)
 ###############################################################################
@ -51,12 +51,12 @@ def do_mmain(nsim):
 if nproc > 1:
    if rank == 0:
-        tasks = list(paths.get_ics())
+        tasks = list(paths.get_ics("csiborg"))
        master_process(tasks, comm, verbose=True)
    else:
        worker_process(do_mmain, comm, verbose=False)
 else:
-    tasks = paths.get_ics()
+    tasks = paths.get_ics("csiborg")
    for task in tasks:
        print(f"{datetime.now()}: completing task `{task}`.", flush=True)
        do_mmain(task)
--- a/scripts/pre_sortinit.py
+++ b/scripts/pre_sortinit.py
@ -50,7 +50,7 @@ partreader = csiborgtools.read.ParticleReader(paths)
 pars_extract = ["x", "y", "z", "M", "ID"]
 if args.ics is None or args.ics[0] == -1:
-    ics = paths.get_ics()
+    ics = paths.get_ics("csiborg")
 else:
    ics = args.ics
@ -64,7 +64,7 @@ for i in jobs:
    print(f"{datetime.now()}: reading and processing simulation {nsim}.",
          flush=True)
    # We first load the particle IDs in the final snapshot.
-    pidf = csiborgtools.read.read_h5(paths.particles_path(nsim))
+    pidf = csiborgtools.read.read_h5(paths.particles(nsim))
    pidf = pidf["particle_ids"]
    # Then we load the particles in the initil snapshot and make sure that
    # their particle IDs are sorted as in the final snapshot.
@ -78,5 +78,5 @@ for i in jobs:
    collect()
    part0 = part0[numpy.argsort(numpy.argsort(pidf))]
    print(f"{datetime.now()}: dumping particles for {nsim}.", flush=True)
-    with h5py.File(paths.initmatch_path(nsim, "particles"), "w") as f:
+    with h5py.File(paths.initmatch(nsim, "particles"), "w") as f:
        f.create_dataset("particles", data=part0)
--- a/scripts/utils.py
+++ b/scripts/utils.py
@ -13,23 +13,184 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
-Notebook utility functions.
+Utility functions for scripts.
 """
 from datetime import datetime
-# from os.path import join
+import numpy
 from tqdm import tqdm
 try:
    import csiborgtools
 except ModuleNotFoundError:
    import sys
    sys.path.append("../")
    import csiborgtools
-Nsplits = 200
+###############################################################################
-dumpdir = "/mnt/extraspace/rstiskalek/CSiBORG/"
+#                           Reading functions                                 #
 ###############################################################################
-# Some chosen clusters
+def get_nsims(args, paths):
    """
    Get simulation indices from the command line arguments.
    Parameters
    ----------
    args : argparse.Namespace
        Command line arguments. Must include `nsims` and `simname`. If `nsims`
        is `None` or `-1`, all simulations in `simname` are used.
    paths : :py:class`csiborgtools.paths.Paths`
        Paths object.
    Returns
    -------
    nsims : list of int
        Simulation indices.
    """
    if args.nsims is None or args.nsims[0] == -1:
        nsims = paths.get_ics(args.simname)
    else:
        nsims = args.nsims
    return list(nsims)
 def read_single_catalogue(args, config, nsim, run, rmax, paths, nobs=None):
    """
    Read a single halo catalogue and apply selection criteria to it.
    Parameters
    ----------
    args : argparse.Namespace
        Command line arguments. Must include `simname`.
    config : dict
        Configuration dictionary.
    nsim : int
        Simulation index.
    run : str
        Run name.
    rmax : float
        Maximum radial distance of the halo catalogue.
    paths : csiborgtools.paths.Paths
        Paths object.
    nobs : int, optional
        Fiducial Quijote observer index.
    Returns
    -------
    cat : csiborgtools.read.HaloCatalogue or csiborgtools.read.QuijoteHaloCatalogue  # noqa
        Halo catalogue with selection criteria applied.
    """
    selection = config.get(run, None)
    if selection is None:
        raise KeyError(f"No configuration for run {run}.")
    # We first read the full catalogue without applying any bounds.
    if args.simname == "csiborg":
        cat = csiborgtools.read.HaloCatalogue(nsim, paths)
    else:
        cat = csiborgtools.read.QuijoteHaloCatalogue(nsim, paths, nsnap=4)
        if nobs is not None:
            # We may optionally already here pick a fiducial observer.
            cat = cat.pick_fiducial_observer(nobs, args.Rmax)
    cat.apply_bounds({"dist": (0, rmax)})
    # We then first read off the primary selection bounds.
    sel = selection["primary"]
    pname = None
    xs = sel["name"] if isinstance(sel["name"], list) else [sel["name"]]
    for _name in xs:
        if _name in cat.keys:
            pname = _name
    if pname is None:
        raise KeyError(f"Invalid names `{sel['name']}`.")
    cat.apply_bounds({pname: (sel.get("min", None), sel.get("max", None))})
    # Now the secondary selection bounds. If needed transfrom the secondary
    # property before applying the bounds.
    if "secondary" in selection:
        sel = selection["secondary"]
        sname = None
        xs = sel["name"] if isinstance(sel["name"], list) else [sel["name"]]
        for _name in xs:
            if _name in cat.keys:
                sname = _name
        if sname is None:
            raise KeyError(f"Invalid names `{sel['name']}`.")
        if sel.get("toperm", False):
            cat[sname] = numpy.random.permutation(cat[sname])
        if sel.get("marked", False):
            cat[sname] = csiborgtools.clustering.normalised_marks(
                cat[pname], cat[sname], nbins=config["nbins_marks"])
        cat.apply_bounds({sname: (sel.get("min", None), sel.get("max", None))})
    return cat
 def open_catalogues(args, config, paths, comm):
    """
    Read all halo catalogues on the zeroth rank and broadcast them to all
    higher ranks.
    Parameters
    ----------
    args : argparse.Namespace
        Command line arguments.
    config : dict
        Configuration dictionary.
    paths : csiborgtools.paths.Paths
        Paths object.
    comm : mpi4py.MPI.Comm
        MPI communicator.
    Returns
    -------
    cats : dict
        Dictionary of halo catalogues. Keys are simulation indices, values are
        the catalogues.
    """
    nsims = get_nsims(args, paths)
    rank = comm.Get_rank()
    nproc = comm.Get_size()
    if args.verbose and rank == 0:
        print(f"{datetime.now()}: opening catalogues.", flush=True)
    if rank == 0:
        cats = {}
        if args.simname == "csiborg":
            for nsim in tqdm(nsims) if args.verbose else nsims:
                cat = read_single_catalogue(args, config, nsim, args.run,
                                            rmax=args.Rmax, paths=paths)
                cats.update({nsim: cat})
        else:
            for nsim in tqdm(nsims) if args.verbose else nsims:
                ref_cat = read_single_catalogue(args, config, nsim, args.run,
                                                rmax=None, paths=paths)
                nmax = int(ref_cat.box.boxsize // (2 * args.Rmax))**3
                for nobs in range(nmax):
                    name = paths.quijote_fiducial_nsim(nsim, nobs)
                    cat = ref_cat.pick_fiducial_observer(nobs, rmax=args.Rmax)
                    cats.update({name: cat})
        if nproc > 1:
            for i in range(1, nproc):
                comm.send(cats, dest=i, tag=nproc + i)
    else:
        cats = comm.recv(source=0, tag=nproc + rank)
    return cats
 ###############################################################################
 #                               Clusters                                      #
 ###############################################################################
 _coma = {"RA": (12 + 59 / 60 + 48.7 / 60**2) * 15,
         "DEC": 27 + 58 / 60 + 50 / 60**2,
         "COMDIST": 102.975}
@ -40,7 +201,6 @@ _virgo = {"RA": (12 + 27 / 60) * 15,
 specific_clusters = {"Coma": _coma, "Virgo": _virgo}
 ###############################################################################
 #                                 Surveys                                     #
 ###############################################################################
@ -56,6 +216,3 @@ class SDSS:
    def __call__(self):
        return csiborgtools.read.SDSS(h=1, sel_steps=self.steps)
 surveys = {"SDSS": SDSS}
--- a/scripts_plots/plot_knn.py
+++ b/scripts_plots/plot_knn.py
@ -0,0 +1,103 @@
 # Copyright (C) 2023 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 from os.path import join
 import matplotlib.pyplot as plt
 import numpy
 import scienceplots  # noqa
 import utils
 try:
    import csiborgtools
 except ModuleNotFoundError:
    import sys
    sys.path.append("../")
    import csiborgtools
 ###############################################################################
 #                Probability of matching a reference simulation halo          #
 ###############################################################################
 def plot_knn(runname):
    print(f"Plotting kNN CDF for {runname}.")
    cols = plt.rcParams["axes.prop_cycle"].by_key()["color"]
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    reader = csiborgtools.read.kNNCDFReader(paths)
    with plt.style.context(utils.mplstyle):
        plt.figure()
        # Quijote kNN
        rs, cdf, ndensity = reader.read("quijote", runname, kind="auto")
        pk = reader.prob_k(cdf)
        pk_poisson = reader.poisson_prob_k(rs, numpy.arange(pk.shape[1]),
                                           ndensity)
        for k in range(3):
            mu = numpy.mean(pk[:, k, :], axis=0)
            std = numpy.std(pk[:, k, :], axis=0)
            plt.plot(rs, mu, label=r"$k = {}$, Quijote".format(k + 1),
                     c=cols[k % len(cols)])
            # plt.fill_between(rs, mu - std, mu + std, alpha=0.15,
            #                  color=cols[k % len(cols)], zorder=0)
            mu = numpy.mean(pk_poisson[:, k, :], axis=0)
            std = numpy.std(pk_poisson[:, k, :], axis=0)
            plt.plot(rs, mu, c=cols[k % len(cols)], ls="dashed",
                     label=r"$k = {}$, Poisson analytical".format(k + 1))
            # plt.fill_between(rs, mu - std, mu + std, alpha=0.15,
            #                  color=cols[k % len(cols)], zorder=0, hatch="\\")
        # Quijote poisson kNN
        rs, cdf, ndensity = reader.read("quijote", "mass003_poisson",
                                        kind="auto")
        pk = reader.prob_k(cdf)
        for k in range(3):
            mu = numpy.mean(pk[:, k, :], axis=0)
            std = numpy.std(pk[:, k, :], axis=0)
            plt.plot(rs, mu, label=r"$k = {}$, Poisson Quijote".format(k + 1),
                     c=cols[k % len(cols)], ls="dotted")
            # plt.fill_between(rs, mu - std, mu + std, alpha=0.15,
            #                  color=cols[k % len(cols)], zorder=0)
 #         # CSiBORG kNN
 #         rs, cdf, ndensity = reader.read("csiborg", runname, kind="auto")
 #         pk = reader.mean_prob_k(cdf)
 #         for k in range(2):
 #             mu = pk[k, :, 0]
 #             std = pk[k, :, 1]
 #             plt.plot(rs, mu, ls="--", c=cols[k % len(cols)])
 #             plt.fill_between(rs, mu - std, mu + std, alpha=0.15, hatch="\\",
 #                              color=cols[k % len(cols)], zorder=0)
        plt.legend()
        plt.xlabel(r"$r~[\mathrm{Mpc}]$")
        plt.ylabel(r"$P(k | V = 4 \pi r^3 / 3)$")
        for ext in ["png"]:
            fout = join(utils.fout, f"knn_{runname}.{ext}")
            print("Saving to `{fout}`.".format(fout=fout))
            plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
        plt.close()
 if __name__ == "__main__":
    plot_knn("mass003")
--- a/scripts_plots/plot_nearest.py
+++ b/scripts_plots/plot_nearest.py
@ -0,0 +1,91 @@
 # Copyright (C) 2023 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 from argparse import ArgumentParser
 import matplotlib.pyplot as plt
 import numpy
 import scienceplots  # noqa
 from cache_to_disk import cache_to_disk, delete_disk_caches_for_function
 import utils
 try:
    import csiborgtools
 except ModuleNotFoundError:
    import sys
    sys.path.append("../")
    import csiborgtools
@cache_to_disk(7)
 def read_cdf(simname, run, kwargs):
    paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
    reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths)
    return reader.build_cdf(simname, run, verbose=True)
 def plot_cdf(kwargs):
    print("Plotting the CDFs.", flush=True)
    paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
    reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths)
    x = reader.bin_centres("neighbour")
    y_quijote = read_cdf("quijote", "mass003", kwargs)
    y_csiborg = read_cdf("csiborg", "mass003", kwargs)
    ncdf = y_quijote.shape[0]
    with plt.style.context(utils.mplstyle):
        plt.figure()
        for i in range(ncdf):
            if i == 0:
                label1 = "Quijote"
                label2 = "CSiBORG"
            else:
                label1 = None
                label2 = None
            plt.plot(x, y_quijote[i], c="C0", label=label1)
            plt.plot(x, y_csiborg[i], c="C1", label=label2)
        plt.xlim(0, 75)
        plt.ylim(0, 1)
        plt.xlabel(r"$r_{\rm neighbour}~[\mathrm{Mpc}]$")
        plt.ylabel(r"$\mathrm{CDF}(r_{\rm neighbour})$")
        plt.legend()
        plt.tight_layout()
        plt.savefig("../plots/nearest_neighbour_cdf.png", dpi=450,
                    bbox_inches="tight")
        plt.close()
 if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument('-c', '--clean', action='store_true')
    args = parser.parse_args()
    kwargs = {"rmax_radial": 155 / 0.705,
              "nbins_radial": 20,
              "rmax_neighbour": 100.,
              "nbins_neighbour": 150,
              "paths_kind": csiborgtools.paths_glamdring}
    cached_funcs = ["read_cdf"]
    if args.clean:
        for func in cached_funcs:
            print(f"Cleaning cache for function {func}.")
            delete_disk_caches_for_function(func)
    plot_cdf(kwargs)
--- a/scripts_plots/plot_overlap.py
+++ b/scripts_plots/plot_overlap.py
--- a/scripts_plots/utils.py
+++ b/scripts_plots/utils.py
@ -15,4 +15,4 @@
 dpi = 450
 fout = "../plots/"
-mplstyle = ["notebook"]
+mplstyle = ["science", "notebook"]