Add better diagnostics & plotting (#67)

* Add caching functions * Add limts * Add new mass runs * Update .gitignore * Edit which CDFs are loaded * Stop saving cross hindxs * Change dist to half precision * New nearest path * Add neighbour counting * Add neighbour kwargs * Update work in progress * Add new counting * Add how dist is built * Collect dist to 1 file * Update reading routine * Delete Quijote files * Remove file * Back to float32 * Fix bugs * Rename utils * Remove neighbuor kwargs * Rename file * Fix bug * Rename plt utils * Change where nghb kwargs from * line length * Remove old notebooks * Move survey * Add white space * Update TODO * Update CDF calculation * Update temporarily plotting * Merge branch 'add_diagnostics' of github.com:Richard-Sti/csiborgtools into add_diagnostics * Start adding documentation to plotting * Remove comments * Better code documentation * Some work on tidal tensor * Better plotting * Add comment * Remove nb * Remove comment * Add documentation * Update plotting * Update submission * Update KL vs KS plots * Update the plotting routine * Update plotting * Update plotting routines
2025-06-08 18:01:11 +00:00 · 2023-06-16 14:33:27 +01:00 · 2023-06-16 14:33:27 +01:00 · ccbbbd24b4
commit ccbbbd24b4
parent 004d9629a2
20 changed files with 1075 additions and 32121 deletions
--- a/.gitignore
+++ b/.gitignore
@ -22,3 +22,4 @@ scripts_test/
 scripts_plots/python.sh
 scripts_plots/submit.sh
 scripts_plots/*.out
+scripts_plots/*.sh
--- a/csiborgtools/init.py
+++ b/csiborgtools/init.py
@ -19,3 +19,26 @@ paths_glamdring = {"srcdir": "/mnt/extraspace/hdesmond/",
                   "postdir": "/mnt/extraspace/rstiskalek/CSiBORG/",
                   "quijote_dir": "/mnt/extraspace/rstiskalek/Quijote",
                   }
+
+
+neighbour_kwargs = {"rmax_radial": 155 / 0.705,
+                    "nbins_radial": 50,
+                    "rmax_neighbour": 100.,
+                    "nbins_neighbour": 150,
+                    "paths_kind": paths_glamdring}
+
+
+###############################################################################
+#                             Surveys                                         #
+###############################################################################
+
+class SDSS:
+    @staticmethod
+    def steps(cls):
+        return [(lambda x: cls[x], ("IN_DR7_LSS",)),
+                (lambda x: cls[x] < 17.6, ("ELPETRO_APPMAG_r", )),
+                (lambda x: cls[x] < 155, ("DIST", ))
+                ]
+
+    def __call__(self):
+        return read.SDSS(h=1, sel_steps=self.steps)
--- a/csiborgtools/field/density.py
+++ b/csiborgtools/field/density.py
@ -14,9 +14,6 @@
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 Density field and cross-correlation calculations.
-
-TODO:
-    - [ ] Project the velocity field along the line of sight.
 """
 from abc import ABC

@ -370,9 +367,9 @@ class TidalTensorField(BaseField):
    box : :py:class:`csiborgtools.read.CSiBORGBox`
        The simulation box information and transformations.
    MAS : str
-        Mass assignment scheme. Options are Options are: 'NGP' (nearest grid
-        point), 'CIC' (cloud-in-cell), 'TSC' (triangular-shape cloud), 'PCS'
-        (piecewise cubic spline).
+        Mass assignment scheme used to calculate the density field. Options
+        are: 'NGP' (nearest grid point), 'CIC' (cloud-in-cell), 'TSC'
+        (triangular-shape cloud), 'PCS' (piecewise cubic spline).
    """
    def __init__(self, box, MAS):
        self.box = box
@ -384,8 +381,6 @@ class TidalTensorField(BaseField):
        Calculate eigenvalues of the tidal tensor field, sorted in increasing
        order.

-        TODO: evaluate this on a grid instead.
-
        Parameters
        ----------
        tidal_tensor : :py:class:`MAS_library.tidal_tensor`
@ -396,20 +391,14 @@ class TidalTensorField(BaseField):
        -------
        eigvals : 3-dimensional array of shape `(grid, grid, grid)`
        """
-        n_samples = tidal_tensor.T00.size
-        # We create a array and then calculate the eigenvalues.
-        Teval = numpy.full((n_samples, 3, 3), numpy.nan, dtype=numpy.float32)
-        Teval[:, 0, 0] = tidal_tensor.T00
-        Teval[:, 0, 1] = tidal_tensor.T01
-        Teval[:, 0, 2] = tidal_tensor.T02
-        Teval[:, 1, 1] = tidal_tensor.T11
-        Teval[:, 1, 2] = tidal_tensor.T12
-        Teval[:, 2, 2] = tidal_tensor.T22
+        # TODO needs to be checked further
+        grid = tidal_tensor.T00.shape[0]
+        eigvals = numpy.full((grid, grid, grid, 3), numpy.nan,
+                             dtype=numpy.float32)
+        dummy = numpy.full((3, 3), numpy.nan, dtype=numpy.float32)

-        eigvals = numpy.full((n_samples, 3), numpy.nan, dtype=numpy.float32)
-        for i in range(n_samples):
-            eigvals[i, :] = numpy.linalg.eigvalsh(Teval[i, ...], 'U')
-            eigvals[i, :] = numpy.sort(eigvals[i, :])
+        # FILL IN THESER ARGUMENTS
+        tidal_tensor_to_eigenvalues(eigvals, dummy, ...)

        return eigvals

@ -430,3 +419,23 @@ class TidalTensorField(BaseField):
        """
        return MASL.tidal_tensor(overdensity_field, self.box._omega_m,
                                 self.box._aexp, self.MAS)
+
+
+@jit(nopython=True)
+def tidal_tensor_to_eigenvalues(eigvals, dummy, T00, T01, T02, T11, T12, T22):
+    """
+    TODO: needs to be checked further.
+    """
+    grid = T00.shape[0]
+    for i in range(grid):
+        for j in range(grid):
+            for k in range(grid):
+                dummy[0, 0] = T00[i, j, k]
+                dummy[0, 1] = T01[i, j, k]
+                dummy[0, 2] = T02[i, j, k]
+                dummy[1, 1] = T11[i, j, k]
+                dummy[1, 2] = T12[i, j, k]
+                dummy[2, 2] = T22[i, j, k]
+                eigvals[i, j, k, :] = numpy.linalg.eigvalsh(dummy, 'U')
+                eigvals[i, j, k, :] = numpy.sort(eigvals[i, j, k, :])
+    return eigvals
--- a/csiborgtools/field/interp.py
+++ b/csiborgtools/field/interp.py
@ -260,7 +260,6 @@ def fill_outside(field, fill_value, rmax, boxsize):
    N = imax
    # Squared radial distance from the center of the box in box units.
    rmax_box2 = (N * rmax / boxsize)**2
-    # print("Box ", rmax_box2)

    for i in range(N):
        idist2 = (i - 0.5 * (N - 1))**2
@ -268,7 +267,6 @@ def fill_outside(field, fill_value, rmax, boxsize):
            jdist2 = (j - 0.5 * (N - 1))**2
            for k in range(N):
                kdist2 = (k - 0.5 * (N - 1))**2
-                # print(idist2 + jdist2 + kdist2 > rmax_box2)
                if idist2 + jdist2 + kdist2 > rmax_box2:
                    field[i, j, k] = fill_value
    return field
--- a/csiborgtools/fits/halo.py
+++ b/csiborgtools/fits/halo.py
@ -58,7 +58,6 @@ class BaseStructure(ABC):

    @info.setter
    def info(self, info):
-        # TODO turn this into a structured array and add some checks
        self._info = info

    @property
--- a/csiborgtools/read/nearest_neighbour_summary.py
+++ b/csiborgtools/read/nearest_neighbour_summary.py
@ -19,10 +19,11 @@ the final snapshot.
 from math import floor

 import numpy
-from numba import jit
-from scipy.integrate import quad
+from scipy.integrate import cumulative_trapezoid, quad
 from scipy.interpolate import interp1d
 from scipy.stats import gaussian_kde, kstest
+
+from numba import jit
 from tqdm import tqdm


@ -205,54 +206,57 @@ class NearestNeighbourReader:
            Archive with keys `ndist`, `rdist`, `mass`, `cross_hindxs``
        """
        assert simname in ["csiborg", "quijote"]
-        fpath = self.paths.cross_nearest(simname, run, nsim, nobs)
+        fpath = self.paths.cross_nearest(simname, run, "dist", nsim, nobs)
        return numpy.load(fpath)

-    def build_dist(self, simname, run, kind, verbose=True):
+    def count_neighbour(self, out, ndist, rdist):
        """
-        Build the a PDF or a CDF for the nearest neighbour distribution.
-        Counts the binned number of neighbour for each halo as a funtion of its
-        radial distance from the centre of the high-resolution region.
+        Count the number of neighbours for each halo as a function of its
+        radial distance.

        Parameters
        ----------
-        simname : str
-            Simulation name. Must be either `csiborg` or `quijote`.
-        run : str
-            Run name.
-        kind : str
-            Distribution kind. Either `pdf` or `cdf`.
-        verbose : bool, optional
-            Verbosity flag.
+        out : 2-dimensional array of shape `(nbins_radial, nbins_neighbour)`
+            Output array to write to. Results are added to this array.
+        ndist : 2-dimensional array of shape `(nhalos, ncross_simulations)`
+            Distance of each halo to its nearest neighbour from a cross
+            simulation.
+        rdist : 1-dimensional array of shape `(nhalos, )`
+            Distance of each halo to the centre of the high-resolution region.
+
+        Returns
+        -------
+        out : 2-dimensional array of shape `(nbins_radial, nbins_neighbour)`
+        """
+        return count_neighbour(out, ndist, rdist, self.radial_bin_edges,
+                               self.rmax_neighbour, self.nbins_neighbour)
+
+    def build_dist(self, counts, kind):
+        """
+        Build the a PDF or a CDF for the nearest neighbour distribution from
+        binned counts as a function of radial distance from the centre of the
+        high-resolution region.
+
+        Parameters
+        ----------
+        counts : 2-dimensional array of shape `(nbins_radial, nbins_neighbour)`
+            Binned counts of the number of neighbours as a function of
+            radial distance.

        Returns
        -------
        dist : 2-dimensional array of shape `(nbins_radial, nbins_neighbour)`
        """
-        assert simname in ["csiborg", "quijote"]
        assert kind in ["pdf", "cdf"]
-        rbin_edges = self.radial_bin_edges
-        # We first bin the distances as a function of each reference halo
-        # radial distance and then its nearest neighbour distance.
-        fpaths = self.paths.cross_nearest(simname, run)
-        if simname == "quijote":
-            fpaths = fpaths
-        out = numpy.zeros((self.nbins_radial, self.nbins_neighbour),
-                          dtype=numpy.float32)
-        for fpath in tqdm(fpaths) if verbose else fpaths:
-            data = numpy.load(fpath)
-            out = count_neighbour(
-                out, data["ndist"], data["rdist"], rbin_edges,
-                self.rmax_neighbour, self.nbins_neighbour)
-
        if kind == "pdf":
            neighbour_bin_edges = self.neighbour_bin_edges
            dx = neighbour_bin_edges[1] - neighbour_bin_edges[0]
-            out /= numpy.sum(dx * out, axis=1).reshape(-1, 1)
+            counts /= numpy.sum(dx * counts, axis=1).reshape(-1, 1)
        else:
-            out = numpy.cumsum(out, axis=1, out=out)
-            out /= out[:, -1].reshape(-1, 1)
-        return out
+            x = self.bin_centres("neighbour")
+            counts = cumulative_trapezoid(counts, x, axis=1, initial=0)
+            counts /= counts[:, -1].reshape(-1, 1)
+        return counts

    def kl_divergence(self, simname, run, nsim, pdf, nobs=None, verbose=True):
        r"""
--- a/csiborgtools/read/paths.py
+++ b/csiborgtools/read/paths.py
@ -410,7 +410,7 @@ class Paths:
        fname = f"halo_counts_{simname}_{str(nsim).zfill(5)}.npz"
        return join(fdir, fname)

-    def cross_nearest(self, simname, run, nsim=None, nobs=None):
+    def cross_nearest(self, simname, run, kind, nsim=None, nobs=None):
        """
        Path to the files containing distance from a halo in a reference
        simulation to the nearest halo from a cross simulation.
@ -421,6 +421,9 @@ class Paths:
            Simulation name. Must be one of: `csiborg`, `quijote`.
        run : str
            Run name.
+        kind : str
+            Whether raw distances or counts in bins. Must be one of `dist`,
+            `bin_dist` or `tot_counts`.
        nsim : int, optional
            IC realisation index.
        nobs : int, optional
@ -431,6 +434,7 @@ class Paths:
        path : str
        """
        assert simname in ["csiborg", "quijote"]
+        assert kind in ["dist", "bin_dist", "tot_counts"]
        fdir = join(self.postdir, "nearest_neighbour")
        if not isdir(fdir):
            makedirs(fdir)
@ -440,9 +444,9 @@ class Paths:
                nsim = str(nsim).zfill(5)
            else:
                nsim = self.quijote_fiducial_nsim(nsim, nobs)
-            return join(fdir, f"{simname}_nn_{nsim}_{run}.npz")
+            return join(fdir, f"{simname}_nn_{kind}_{nsim}_{run}.npz")

-        files = glob(join(fdir, f"{simname}_nn_*"))
+        files = glob(join(fdir, f"{simname}_nn_{kind}_*"))
        run = "_" + run
        return [f for f in files if run in f]

--- a/csiborgtools/read/pk_summary.py
+++ b/csiborgtools/read/pk_summary.py
@ -36,7 +36,7 @@ class PKReader:
        Output precision. By default `numpy.float32`.
    """
    def __init__(self, ics, hw, fskel=None, dtype=numpy.float32):
-        self.ics= ics
+        self.ics = ics
        self.hw = hw
        if fskel is None:
            fskel = "/mnt/extraspace/rstiskalek/csiborg/crosspk/out_{}_{}_{}.p"
--- a/notebooks/fits.ipynb
+++ b/notebooks/fits.ipynb
--- a/notebooks/matching.ipynb
+++ b/notebooks/matching.ipynb
--- a/notebooks/playground_field.ipynb
+++ b/notebooks/playground_field.ipynb
--- a/notebooks/plot_galaxy_distribution.ipynb
+++ b/notebooks/plot_galaxy_distribution.ipynb
--- a/notebooks/plot_mass_function.ipynb
+++ b/notebooks/plot_mass_function.ipynb
--- a/scripts/match_finsnap.py
+++ b/scripts/match_finsnap.py
@ -19,12 +19,14 @@ MPI parallelized over the reference simulations.
 from argparse import ArgumentParser
 from datetime import datetime
 from distutils.util import strtobool
+from os import remove

 import numpy
 import yaml
 from mpi4py import MPI
-
 from taskmaster import work_delegation
+from tqdm import trange
+
 from utils import open_catalogues

 try:
@ -36,7 +38,7 @@ except ModuleNotFoundError:
    import csiborgtools


-def find_neighbour(args, nsim, cats, paths, comm):
+def find_neighbour(args, nsim, cats, paths, comm, save_kind):
    """
    Find the nearest neighbour of each halo in the given catalogue.

@ -53,23 +55,78 @@ def find_neighbour(args, nsim, cats, paths, comm):
        Paths object.
    comm : mpi4py.MPI.Comm
        MPI communicator.
+    save_kind : str
+        Kind of data to save. Must be either `dist` or `bin_dist`.

    Returns
    -------
    None
    """
+    assert save_kind in ["dist", "bin_dist"]
    ndist, cross_hindxs = csiborgtools.match.find_neighbour(nsim, cats)
-
    mass_key = "totpartmass" if args.simname == "csiborg" else "group_mass"
    cat0 = cats[nsim]
-    mass = cat0[mass_key]
    rdist = cat0.radial_distance(in_initial=False)

-    fout = paths.cross_nearest(args.simname, args.run, nsim)
+    # Distance is saved optionally, whereas binned distance is always saved.
+    if save_kind == "dist":
+        out = {"ndist": ndist,
+               "cross_hindxs": cross_hindxs,
+               "mass": cat0[mass_key],
+               "ref_hindxs": cat0["index"],
+               "rdist": rdist}
+        fout = paths.cross_nearest(args.simname, args.run, "dist", nsim)
        if args.verbose:
            print(f"Rank {comm.Get_rank()} writing to `{fout}`.", flush=True)
-    numpy.savez(fout, ndist=ndist, cross_hindxs=cross_hindxs, mass=mass,
-                ref_hindxs=cat0["index"], rdist=rdist)
+        numpy.savez(fout, **out)
+
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    reader = csiborgtools.read.NearestNeighbourReader(
+        paths=paths, **csiborgtools.neighbour_kwargs)
+    counts = numpy.zeros((reader.nbins_radial, reader.nbins_neighbour),
+                         dtype=numpy.float32)
+    counts = reader.count_neighbour(counts, ndist, rdist)
+    out = {"counts": counts}
+    fout = paths.cross_nearest(args.simname, args.run, "bin_dist", nsim)
+    if args.verbose:
+        print(f"Rank {comm.Get_rank()} writing to `{fout}`.", flush=True)
+    numpy.savez(fout, **out)
+
+
+def collect_dist(args, paths):
+    """
+    Collect the binned nearest neighbour distances into a single file.
+
+    Parameters
+    ----------
+    args : argparse.Namespace
+        Command line arguments.
+    paths : csiborgtools.paths.Paths
+        Paths object.
+
+    Returns
+    -------
+    """
+    fnames = paths.cross_nearest(args.simname, args.run, "bin_dist")
+
+    if args.verbose:
+        print("Collecting counts into a single file.", flush=True)
+
+    for i in trange(len(fnames)) if args.verbose else range(len(fnames)):
+        fname = fnames[i]
+        data = numpy.load(fname)
+        if i == 0:
+            out = data["counts"]
+        else:
+            out += data["counts"]
+
+        remove(fname)
+
+    fout = paths.cross_nearest(args.simname, args.run, "tot_counts",
+                               nsim=0, nobs=0)
+    if args.verbose:
+        print(f"Writing the summed counts to `{fout}`.", flush=True)
+    numpy.savez(fout, tot_counts=out)


 if __name__ == "__main__":
@ -87,16 +144,23 @@ if __name__ == "__main__":
    with open("./match_finsnap.yml", "r") as file:
        config = yaml.safe_load(file)

+    if args.simname == "csiborg":
+        save_kind = "dist"
+    else:
+        save_kind = "bin_dist"
+
    comm = MPI.COMM_WORLD
+    rank = comm.Get_rank()
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    cats = open_catalogues(args, config, paths, comm)

    def do_work(nsim):
-        return find_neighbour(args, nsim, cats, paths, comm)
+        return find_neighbour(args, nsim, cats, paths, comm, save_kind)

    work_delegation(do_work, list(cats.keys()), comm,
                    master_verbose=args.verbose)

    comm.Barrier()
-    if comm.Get_rank() == 0:
+    if rank == 0:
+        collect_dist(args, paths)
        print(f"{datetime.now()}: all finished. Quitting.")
--- a/scripts/match_finsnap.yml
+++ b/scripts/match_finsnap.yml
@ -18,20 +18,77 @@ nbins_marks: 10
    name:
    - totpartmass
    - group_mass
-    min: 1.e+12
-    max: 1.e+13
+    min: 12.4
+    max: 12.8
+    islog: true

 "mass002":
  primary:
    name:
    - totpartmass
    - group_mass
-    min: 1.e+13
-    max: 1.e+14
+    min: 12.6
+    max: 13.0
+    islog: true

 "mass003":
  primary:
    name:
    - totpartmass
    - group_mass
-    min: 1.e+14
+    min: 12.8
+    max: 13.2
+    islog: true
+
+"mass004":
+  primary:
+    name:
+    - totpartmass
+    - group_mass
+    min: 13.0
+    max: 13.4
+    islog: true
+
+"mass005":
+  primary:
+    name:
+    - totpartmass
+    - group_mass
+    min: 13.2
+    max: 13.6
+    islog: true
+
+"mass006":
+  primary:
+    name:
+    - totpartmass
+    - group_mass
+    min: 13.4
+    max: 13.8
+    islog: true
+
+"mass007":
+  primary:
+    name:
+    - totpartmass
+    - group_mass
+    min: 13.6
+    max: 14.0
+    islog: true
+
+"mass008":
+  primary:
+    name:
+    - totpartmass
+    - group_mass
+    min: 13.8
+    max: 14.2
+    islog: true
+
+"mass009":
+  primary:
+    name:
+    - totpartmass
+    - group_mass
+    min: 14.0
+    islog: true
--- a/scripts/utils.py
+++ b/scripts/utils.py
@ -106,8 +106,12 @@ def read_single_catalogue(args, config, nsim, run, rmax, paths, nobs=None):
            pname = _name
    if pname is None:
        raise KeyError(f"Invalid names `{sel['name']}`.")
-
-    cat.apply_bounds({pname: (sel.get("min", None), sel.get("max", None))})
+    xmin = sel.get("min", None)
+    xmax = sel.get("max", None)
+    if sel.get("islog", False):
+        xmin = 10**xmin if xmin is not None else None
+        xmax = 10**xmax if xmax is not None else None
+    cat.apply_bounds({pname: (xmin, xmax)})

    # Now the secondary selection bounds. If needed transfrom the secondary
    # property before applying the bounds.
--- a/scripts_plots/plot_data.py
+++ b/scripts_plots/plot_data.py
@ -21,7 +21,7 @@ import numpy
 import healpy

 import scienceplots  # noqa
-import utils
+import plt_utils
 from cache_to_disk import cache_to_disk, delete_disk_caches_for_function  # noqa
 from tqdm import tqdm

@ -35,7 +35,16 @@ except ModuleNotFoundError:

 def open_csiborg(nsim):
    """
-    Open a CSiBORG halo catalogue.
+    Open a CSiBORG halo catalogue. Applies mass and distance selection.
+
+    Parameters
+    ----------
+    nsim : int
+        Simulation index.
+
+    Returns
+    -------
+    cat : csiborgtools.read.HaloCatalogue
    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    bounds = {"totpartmass": (None, None), "dist": (0, 155/0.705)}
@ -43,6 +52,20 @@ def open_csiborg(nsim):


 def open_quijote(nsim, nobs=None):
+    """
+    Open a Quijote halo catalogue. Applies mass and distance selection.
+
+    Parameters
+    ----------
+    nsim : int
+        Simulation index.
+    nobs : int, optional
+        Fiducial observer index.
+
+    Returns
+    -------
+    cat : csiborgtools.read.QuijoteHaloCatalogue
+    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    cat = csiborgtools.read.QuijoteHaloCatalogue(nsim, paths, nsnap=4)
    if nobs is not None:
@ -51,10 +74,24 @@ def open_quijote(nsim, nobs=None):


 def plot_mass_vs_ncells(nsim, pdf=False):
+    """
+    Plot the halo mass vs. number of occupied cells in the initial snapshot.
+
+    Parameters
+    ----------
+    nsim : int
+        Simulation index.
+    pdf : bool, optional
+        Whether to save the figure as a PDF file.
+
+    Returns
+    -------
+    None
+    """
    cat = open_csiborg(nsim)
    mpart = 4.38304044e+09

-    with plt.style.context(utils.mplstyle):
+    with plt.style.context(plt_utils.mplstyle):
        plt.figure()
        plt.scatter(cat["totpartmass"], cat["lagpatch_ncells"], s=0.25,
                    rasterized=True)
@ -66,9 +103,9 @@ def plot_mass_vs_ncells(nsim, pdf=False):
        plt.ylabel(r"$N_{\rm cells}$")

        for ext in ["png"] if pdf is False else ["png", "pdf"]:
-            fout = join(utils.fout, f"init_mass_vs_ncells_{nsim}.{ext}")
+            fout = join(plt_utils.fout, f"init_mass_vs_ncells_{nsim}.{ext}")
            print(f"Saving to `{fout}`.")
-            plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
+            plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
        plt.close()


@ -77,13 +114,19 @@ def plot_mass_vs_ncells(nsim, pdf=False):
 ###############################################################################


-def process_counts(counts):
-    mean = numpy.mean(counts, axis=0)
-    std = numpy.std(counts, axis=0)
-    return mean, std
-
-
 def plot_hmf(pdf=False):
+    """
+    Plot the (ultimate paretn) halo mass function of CSiBORG and Quijote.
+
+    Parameters
+    ----------
+    pdf : bool, optional
+        Whether to save the figure as a PDF file.
+
+    Returns
+    -------
+    None
+    """
    print("Plotting the HMF...", flush=True)
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)

@ -114,35 +157,39 @@ def plot_hmf(pdf=False):
    x = 10**(0.5 * (bins[1:] + bins[:-1]))
    # Edit lower limits
    csiborg_counts[:, x < 1e12] = numpy.nan
-    quijote_counts[:, x < 8e12] = numpy.nan
+    quijote_counts[:, x < 10**(12.4)] = numpy.nan
    # Edit upper limits
    csiborg_counts[:, x > 4e15] = numpy.nan
    quijote_counts[:, x > 4e15] = numpy.nan

-    with plt.style.context(utils.mplstyle):
+    with plt.style.context(plt_utils.mplstyle):
        cols = plt.rcParams["axes.prop_cycle"].by_key()["color"]
        fig, ax = plt.subplots(nrows=2, sharex=True,
                               figsize=(3.5, 2.625 * 1.25),
                               gridspec_kw={"height_ratios": [1, 0.65]})
        fig.subplots_adjust(hspace=0, wspace=0)

-        mean_csiborg, std_csiborg = process_counts(csiborg_counts)
+        # Upper panel data
+        mean_csiborg = numpy.mean(csiborg_counts, axis=0)
+        std_csiborg = numpy.std(csiborg_counts, axis=0)
        ax[0].plot(x, mean_csiborg, label="CSiBORG")
        ax[0].fill_between(x, mean_csiborg - std_csiborg,
                           mean_csiborg + std_csiborg, alpha=0.5)

-        mean_quijote, std_quijote = process_counts(quijote_counts)
+        mean_quijote = numpy.mean(quijote_counts, axis=0)
+        std_quijote = numpy.std(quijote_counts, axis=0)
        ax[0].plot(x, mean_quijote, label="Quijote")
        ax[0].fill_between(x, mean_quijote - std_quijote,
                           mean_quijote + std_quijote, alpha=0.5)
-
+        # Lower panel data
        log_y = numpy.log10(mean_csiborg / mean_quijote)
        err = numpy.sqrt((std_csiborg / mean_csiborg / numpy.log(10))**2
                         + (std_quijote / mean_quijote / numpy.log(10))**2)
-
        ax[1].plot(x, 10**log_y, c=cols[2])
        ax[1].fill_between(x, 10**(log_y - err), 10**(log_y + err), alpha=0.5,
                           color=cols[2])
+
+        # Labels and accesories
        ax[1].axhline(1, color="k", ls=plt.rcParams["lines.linestyle"],
                      lw=0.5 * plt.rcParams["lines.linewidth"], zorder=0)
        ax[0].set_ylabel(r"$\frac{\mathrm{d} n}{\mathrm{d}\log M_{\rm h}}~\mathrm{dex}^{-1}$")  # noqa
@ -156,14 +203,33 @@ def plot_hmf(pdf=False):

        fig.tight_layout(h_pad=0, w_pad=0)
        for ext in ["png"] if pdf is False else ["png", "pdf"]:
-            fout = join(utils.fout, f"hmf_comparison.{ext}")
+            fout = join(plt_utils.fout, f"hmf_comparison.{ext}")
            print(f"Saving to `{fout}`.")
-            fig.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
+            fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
        plt.close()


-@cache_to_disk(7)
 def load_field(kind, nsim, grid, MAS, in_rsp=False):
+    """
+    Load a single field.
+
+    Parameters
+    ----------
+    kind : str
+        Field kind.
+    nsim : int
+        Simulation index.
+    grid : int
+        Grid size.
+    MAS : str
+        Mass assignment scheme.
+    in_rsp : bool, optional
+        Whether to load the field in redshift space.
+
+    Returns
+    -------
+    field : n-dimensional array
+    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    return numpy.load(paths.field(kind, MAS, grid, nsim, in_rsp=in_rsp))

@ -175,6 +241,30 @@ def load_field(kind, nsim, grid, MAS, in_rsp=False):

 def plot_projected_field(kind, nsim, grid, in_rsp, MAS="PCS",
                         highres_only=True, pdf=False):
+    """
+    Plot the mean projected field.
+
+    Parameters
+    ----------
+    kind : str
+        Field kind.
+    nsim : int
+        Simulation index.
+    grid : int
+        Grid size.
+    in_rsp : bool
+        Whether to load the field in redshift space.
+    MAS : str, optional
+        Mass assignment scheme.
+    highres_only : bool, optional
+        Whether to only plot the high-resolution region.
+    pdf : bool, optional
+        Whether to save the figure as a PDF.
+
+    Returns
+    -------
+    None
+    """
    print(f"Plotting projected field `{kind}`. ", flush=True)
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    nsnap = max(paths.get_snapshots(nsim))
@ -190,14 +280,12 @@ def plot_projected_field(kind, nsim, grid, in_rsp, MAS="PCS",
    if highres_only:
        csiborgtools.field.fill_outside(field, numpy.nan, rmax=155.5,
                                        boxsize=677.7)
-        # start = field.shape[0] // 4
        start = round(field.shape[0] * 0.27)
        end = round(field.shape[0] * 0.73)
-        # end = field.shape[0] - start
        field = field[start:end, start:end, start:end]

    labels = [r"$y-z$", r"$x-z$", r"$x-y$"]
-    with plt.style.context(utils.mplstyle):
+    with plt.style.context(plt_utils.mplstyle):
        fig, ax = plt.subplots(figsize=(3.5 * 2, 2.625), ncols=3, sharey=True,
                               sharex=True)
        fig.subplots_adjust(hspace=0, wspace=0)
@ -216,9 +304,10 @@ def plot_projected_field(kind, nsim, grid, in_rsp, MAS="PCS",

        fig.tight_layout(h_pad=0, w_pad=0)
        for ext in ["png"] if pdf is False else ["png", "pdf"]:
-            fout = join(utils.fout, f"field_{kind}_{nsim}_rsp{in_rsp}.{ext}")
+            fout = join(plt_utils.fout,
+                        f"field_{kind}_{nsim}_rsp{in_rsp}.{ext}")
            print(f"Saving to `{fout}`.")
-            fig.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
+            fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
        plt.close()

 ###############################################################################
@ -227,6 +316,20 @@ def plot_projected_field(kind, nsim, grid, in_rsp, MAS="PCS",


 def get_sky_label(kind, volume_weight):
+    """
+    Get the sky label for a given field kind.
+
+    Parameters
+    ----------
+    kind : str
+        Field kind.
+    volume_weight : bool
+        Whether to volume weight the field.
+
+    Returns
+    -------
+    label : str
+    """
    if volume_weight:
        if kind == "density":
            label = r"$\log \int_{0}^{R} r^2 \rho(r, \mathrm{RA}, \mathrm{dec}) \mathrm{d} r$"  # noqa
@ -255,8 +358,38 @@ def get_sky_label(kind, volume_weight):
 def plot_sky_distribution(kind, nsim, grid, nside, MAS="PCS", plot_groups=True,
                          dmin=0, dmax=220, plot_halos=None,
                          volume_weight=True, pdf=False):
-    """
-    NOTE: add distance for groups.
+    r"""
+    Plot the sky distribution of a given field kind on the sky along with halos
+    and selected observations.
+
+    TODO
+    ----
+    - Add distance for groups.
+
+    Parameters
+    ----------
+    field : str
+        Field kind.
+    nsim : int
+        Simulation index.
+    grid : int
+        Grid size.
+    nside : int
+        Healpix nside of the sky projection.
+    MAS : str, optional
+        Mass assignment scheme.
+    plot_groups : bool, optional
+        Whether to plot the 2M++ groups.
+    dmin : float, optional
+        Minimum projection distance in :math:`\mathrm{Mpc}/h`.
+    dmax : float, optional
+        Maximum projection distance in :math:`\mathrm{Mpc}/h`.
+    plot_halos : list, optional
+        Minimum halo mass to plot in :math:`M_\odot`.
+    volume_weight : bool, optional
+        Whether to volume weight the field.
+    pdf : bool, optional
+        Whether to save the figure as a pdf.
    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    nsnap = max(paths.get_snapshots(nsim))
@ -274,7 +407,7 @@ def plot_sky_distribution(kind, nsim, grid, nside, MAS="PCS", plot_groups=True,
    out = csiborgtools.field.make_sky(field, angpos=angpos, dist=dist, box=box,
                                      volume_weight=volume_weight)

-    with plt.style.context(utils.mplstyle):
+    with plt.style.context(plt_utils.mplstyle):
        label = get_sky_label(kind, volume_weight)
        if kind in ["density", "overdensity"]:
            out = numpy.log10(out)
@ -299,9 +432,9 @@ def plot_sky_distribution(kind, nsim, grid, nside, MAS="PCS", plot_groups=True,
            plt.legend(markerscale=10)

        for ext in ["png"] if pdf is False else ["png", "pdf"]:
-            fout = join(utils.fout, f"sky_{kind}_{nsim}_from_{dmin}_to_{dmax}_vol{volume_weight}.{ext}")  # noqa
+            fout = join(plt_utils.fout, f"sky_{kind}_{nsim}_from_{dmin}_to_{dmax}_vol{volume_weight}.{ext}")  # noqa
            print(f"Saving to `{fout}`.")
-            plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
+            plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
        plt.close()


@ -321,12 +454,19 @@ if __name__ == "__main__":
            print(f"Cleaning cache for function {func}.")
            delete_disk_caches_for_function(func)

-    # plot_mass_vs_occupancy(7444)
-    # plot_mass_vs_normcells(7444 + 24 * 4, pdf=False)
-    # plot_mass_vs_ncells(7444, pdf=True)
-    # plot_hmf(pdf=True)
-    # plot_sky_distribution("radvel", 7444, 256, nside=64,
-    #                       plot_groups=False, dmin=50, dmax=100,
-    #                       plot_halos=5e13, volume_weight=False)
+    if False:
+        plot_mass_vs_ncells(7444, pdf=False)

-    plot_projected_field("potential", 7444, 256, in_rsp=True)
+    if False:
+        plot_hmf(pdf=False)
+
+    if False:
+        plot_sky_distribution("radvel", 7444, 256, nside=64,
+                              plot_groups=False, dmin=50, dmax=100,
+                              plot_halos=5e13, volume_weight=False)
+
+    if True:
+        plot_projected_field("overdensity", 7444, 1024, in_rsp=True,
+                             highres_only=False)
+        plot_projected_field("overdensity", 7444, 1024, in_rsp=False,
+                             highres_only=False)
--- a/scripts_plots/plot_knn.py
+++ b/scripts_plots/plot_knn.py
@ -19,7 +19,7 @@ import matplotlib.pyplot as plt
 import numpy

 import scienceplots  # noqa
-import utils
+import plt_utils

 try:
    import csiborgtools
@ -40,7 +40,7 @@ def plot_knn(runname):
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    reader = csiborgtools.read.kNNCDFReader(paths)

-    with plt.style.context(utils.mplstyle):
+    with plt.style.context(plt_utils.mplstyle):
        plt.figure()

        # Quijote kNN
@ -92,9 +92,9 @@ def plot_knn(runname):
        plt.ylabel(r"$P(k | V = 4 \pi r^3 / 3)$")

        for ext in ["png"]:
-            fout = join(utils.fout, f"knn_{runname}.{ext}")
+            fout = join(plt_utils.fout, f"knn_{runname}.{ext}")
            print("Saving to `{fout}`.".format(fout=fout))
-            plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
+            plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
        plt.close()


--- a/scripts_plots/plot_match.py
+++ b/scripts_plots/plot_match.py
@ -13,17 +13,18 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

-from os.path import join
 from argparse import ArgumentParser
+from os.path import join

+import matplotlib as mpl
 import matplotlib.pyplot as plt
 import numpy
-
 import scienceplots  # noqa
-import utils
 from cache_to_disk import cache_to_disk, delete_disk_caches_for_function
 from tqdm import tqdm

+import plt_utils
+
 try:
    import csiborgtools
 except ModuleNotFoundError:
@ -38,7 +39,16 @@ except ModuleNotFoundError:

 def open_cat(nsim):
    """
-    Open a CSiBORG halo catalogue.
+    Open a CSiBORG halo catalogue. Applies only mass selection.
+
+    Parameters
+    ----------
+    nsim : int
+        Simulation index.
+
+    Returns
+    -------
+    cat : csiborgtools.read.HaloCatalogue
    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    bounds = {"totpartmass": (1e12, None)}
@ -50,6 +60,22 @@ def get_overlap(nsim0):
    """
    Calculate the summed overlap and probability of no match for a single
    reference simulation.
+
+    Parameters
+    ----------
+    nsim0 : int
+        Simulation index.
+
+    Returns
+    -------
+    mass : 1-dimensional array
+        Mass of halos in the reference simulation.
+    hindxs : 1-dimensional array
+        Halo indices in the reference simulation.
+    summed_overlap : 1-dimensional array
+        Summed overlap for each halo in the reference simulation.
+    prob_nomatch : 1-dimensional array
+        Probability of no match for each halo in the reference simulation.
    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    nsimxs = csiborgtools.read.get_cross_sims(nsim0, paths, smoothed=True)
@ -68,10 +94,20 @@ def get_overlap(nsim0):
    return mass, hindxs, summed_overlap, prob_nomatch


-def plot_summed_overlap(nsim0):
+def plot_summed_overlap_vs_mass(nsim0):
    """
-    Plot the summed overlap and probability of no matching for a single
-    reference simulation as a function of the reference halo mass.
+    Plot the summer overlap of probaiblity of no matching for a single
+    reference simulations as a function of the reference halo mass, along with
+    their comparison.
+
+    Parameters
+    ----------
+    nsim0 : int
+        Simulation index.
+
+    Returns
+    -------
+    None
    """
    x, __, summed_overlap, prob_nomatch = get_overlap(nsim0)

@ -79,7 +115,6 @@ def plot_summed_overlap(nsim0):
    std_overlap = numpy.std(summed_overlap, axis=1)

    mean_prob_nomatch = numpy.mean(prob_nomatch, axis=1)
-    # std_prob_nomatch = numpy.std(prob_nomatch, axis=1)

    mask = mean_overlap > 0
    x = x[mask]
@ -88,7 +123,7 @@ def plot_summed_overlap(nsim0):
    mean_prob_nomatch = mean_prob_nomatch[mask]

    # Mean summed overlap
-    with plt.style.context(utils.mplstyle):
+    with plt.style.context(plt_utils.mplstyle):
        plt.figure()
        plt.hexbin(x, mean_overlap, mincnt=1, xscale="log", bins="log",
                   gridsize=50)
@ -99,13 +134,13 @@ def plot_summed_overlap(nsim0):

        plt.tight_layout()
        for ext in ["png", "pdf"]:
-            fout = join(utils.fout, f"overlap_mean_{nsim0}.{ext}")
+            fout = join(plt_utils.fout, f"overlap_mean_{nsim0}.{ext}")
            print(f"Saving to `{fout}`.")
-            plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
+            plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
        plt.close()

    # Std summed overlap
-    with plt.style.context(utils.mplstyle):
+    with plt.style.context(plt_utils.mplstyle):
        plt.figure()
        plt.hexbin(x, std_overlap, mincnt=1, xscale="log", bins="log",
                   gridsize=50)
@ -116,13 +151,13 @@ def plot_summed_overlap(nsim0):
        plt.tight_layout()

        for ext in ["png", "pdf"]:
-            fout = join(utils.fout, f"overlap_std_{nsim0}.{ext}")
+            fout = join(plt_utils.fout, f"overlap_std_{nsim0}.{ext}")
            print(f"Saving to `{fout}`.")
-            plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
+            plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
        plt.close()

    # 1 - mean summed overlap vs mean prob nomatch
-    with plt.style.context(utils.mplstyle):
+    with plt.style.context(plt_utils.mplstyle):
        plt.figure()
        plt.scatter(1 - mean_overlap, mean_prob_nomatch, c=numpy.log10(x), s=2,
                    rasterized=True)
@ -136,9 +171,10 @@ def plot_summed_overlap(nsim0):
        plt.tight_layout()

        for ext in ["png", "pdf"]:
-            fout = join(utils.fout, f"overlap_vs_prob_nomatch_{nsim0}.{ext}")
+            fout = join(plt_utils.fout,
+                        f"overlap_vs_prob_nomatch_{nsim0}.{ext}")
            print(f"Saving to `{fout}`.")
-            plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
+            plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
        plt.close()


@ -147,223 +183,667 @@ def plot_summed_overlap(nsim0):
 ###############################################################################


-@cache_to_disk(7)
 def read_dist(simname, run, kind, kwargs):
+    """
+    Read PDF/CDF of a nearest neighbour distribution.
+
+    Parameters
+    ----------
+    simname : str
+        Simulation name. Must be either `csiborg` or `quijote`.
+    run : str
+        Run name.
+    kind : str
+        Kind of distribution. Must be either `pdf` or `cdf`.
+    kwargs : dict
+        Nearest neighbour reader keyword arguments.
+
+    Returns
+    -------
+    dist : 2-dimensional array
+        Distribution of distances in radial and neighbour bins.
+    """
    paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
    reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths)
-    return reader.build_dist(simname, run, kind, verbose=True)
+
+    fpath = paths.cross_nearest(simname, run, "tot_counts", nsim=0, nobs=0)
+    counts = numpy.load(fpath)["tot_counts"]
+    return reader.build_dist(counts, kind)
+
+
+def pull_cdf(x, fid_cdf, test_cdf):
+    """
+    Pull a CDF so that it matches the fiducial CDF at 0.5. Rescales the x-axis,
+    while keeping the corresponding CDF values fixed.
+
+    Parameters
+    ----------
+    x : 1-dimensional array
+        The x-axis of the CDF.
+    fid_cdf : 1-dimensional array
+        The fiducial CDF.
+    test_cdf : 1-dimensional array
+        The test CDF to be pulled.
+
+    Returns
+    -------
+    xnew : 1-dimensional array
+        The new x-axis of the test CDF.
+    test_cdf : 1-dimensional array
+        The new test CDF.
+    """
+    xnew = x * numpy.interp(0.5, fid_cdf, x) / numpy.interp(0.5, test_cdf, x)
+    return xnew, test_cdf
+
+
+def plot_dist(run, kind, kwargs, runs_to_mass, pulled_cdf=False, r200=None):
+    r"""
+    Plot the PDF or CDF of the nearest neighbour distance for CSiBORG and
+    Quijote.
+
+    Parameters
+    ----------
+    run : str
+        Run name.
+    kind : str
+        Kind of distribution. Must be either `pdf` or `cdf`.
+    kwargs : dict
+        Nearest neighbour reader keyword arguments.
+    runs_to_mass : dict
+        Dictionary mapping run names to halo mass range.
+    pulled_cdf : bool, optional
+        Whether to pull the CDFs of CSiBORG and Quijote so that they match
+        (individually) at 0.5. Default is `False`.
+    r200 : float, optional
+        Halo radial size :math:`R_{200}`. If set, the x-axis will be scaled by
+        it.
+
+    Returns
+    -------
+    None
+    """
+    assert kind in ["pdf", "cdf"]
+    print(f"Plotting the {kind} for {run}...", flush=True)
+    reader = csiborgtools.read.NearestNeighbourReader(
+        **kwargs, paths=csiborgtools.read.Paths(**kwargs["paths_kind"]))
+    raddist = reader.bin_centres("radial")
+    r = reader.bin_centres("neighbour")
+    r = r / r200 if r200 is not None else r
+
+    y_csiborg = read_dist("csiborg", run, kind, kwargs)
+    y_quijote = read_dist("quijote", run, kind, kwargs)
+
+    with plt.style.context(plt_utils.mplstyle):
+        norm = mpl.colors.Normalize(vmin=numpy.min(raddist),
+                                    vmax=numpy.max(raddist))
+        cmap = mpl.cm.ScalarMappable(norm=norm, cmap=mpl.cm.viridis)
+        cmap.set_array([])
+
+        fig, ax = plt.subplots()
+        if run != "mass009":
+            ax.set_title(r"${} \leq \log M_{{\rm tot}} / M_\odot < {}$"
+                         .format(*runs_to_mass[run]), fontsize="small")
+        else:
+            ax.set_title(r"$\log M_{{\rm tot}} / M_\odot \geq {}$"
+                         .format(runs_to_mass[run][0]), fontsize="small")
+        # Plot data
+        nrad = y_csiborg.shape[0]
+        for i in range(nrad):
+            if pulled_cdf:
+                x1, y1 = pull_cdf(r, y_csiborg[0], y_csiborg[i])
+                x2, y2 = pull_cdf(r, y_quijote[0], y_quijote[i])
+            else:
+                x1, y1 = r, y_csiborg[i]
+                x2, y2 = r, y_quijote[i]
+
+            ax.plot(x1, y1, c=cmap.to_rgba(raddist[i]),
+                    label="CSiBORG" if i == 0 else None)
+            ax.plot(x2, y2, c="gray", ls="--",
+                    label="Quijote" if i == 0 else None)
+
+        fig.colorbar(cmap, ax=ax, label=r"$R_{\rm dist}~[\mathrm{Mpc}]$")
+        ax.grid(alpha=0.5, lw=0.4)
+        # Plot labels
+        if pulled_cdf:
+            if r200 is None:
+                ax.set_xlabel(r"$\tilde{r}_{1\mathrm{NN}}~[\mathrm{Mpc}]$")
+                if kind == "pdf":
+                    ax.set_ylabel(r"$p(\tilde{r}_{1\mathrm{NN}})$")
+                else:
+                    ax.set_ylabel(r"$\mathrm{CDF}(\tilde{r}_{1\mathrm{NN}})$")
+            else:
+                ax.set_xlabel(r"$\tilde{r}_{1\mathrm{NN}} / R_{200c}$")
+                if kind == "pdf":
+                    ax.set_ylabel(r"$p(\tilde{r}_{1\mathrm{NN}} / R_{200c})$")
+                else:
+                    ax.set_ylabel(r"$\mathrm{CDF}(\tilde{r}_{1\mathrm{NN}} / R_{200c})$")  # noqa
+        else:
+            if r200 is None:
+                ax.set_xlabel(r"$r_{1\mathrm{NN}}~[\mathrm{Mpc}]$")
+                if kind == "pdf":
+                    ax.set_ylabel(r"$p(r_{1\mathrm{NN}})$")
+                else:
+                    ax.set_ylabel(r"$\mathrm{CDF}(r_{1\mathrm{NN}})$")
+            else:
+                ax.set_xlabel(r"$r_{1\mathrm{NN}} / R_{200c}$")
+                if kind == "pdf":
+                    ax.set_ylabel(r"$p(r_{1\mathrm{NN}} / R_{200c})$")
+                else:
+                    ax.set_ylabel(r"$\mathrm{CDF}(r_{1\mathrm{NN}} / R_{200c})$")  # noqa
+
+        if kind == "cdf":
+            xmax = numpy.min(r[numpy.isclose(y_quijote[-1, :], 1.)])
+            if xmax > 0:
+                ax.set_xlim(0, xmax)
+            ax.set_ylim(0, 1)
+
+        ax.legend(fontsize="small")
+        fig.tight_layout()
+        for ext in ["png"]:
+            if pulled_cdf:
+                fout = join(plt_utils.fout, f"1nn_{kind}_{run}_pulled.{ext}")
+            else:
+                fout = join(plt_utils.fout, f"1nn_{kind}_{run}.{ext}")
+            print(f"Saving to `{fout}`.")
+            fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
+        plt.close()
+
+
+def get_cdf_diff(x, y_csiborg, y_quijote, pulled_cdf):
+    """
+    Get difference between the two CDFs as a function of radial distance.
+
+    Parameters
+    ----------
+    x : 1-dimensional array
+        The x-axis of the CDFs.
+    y_csiborg : 2-dimensional array
+        The CDFs of CSiBORG.
+    y_quijote : 2-dimensional array
+        The CDFs of Quijote.
+    pulled_cdf : bool
+        Whether to pull the CDFs of CSiBORG and Quijote.
+
+    Returns
+    -------
+    dy : 2-dimensional array
+        The difference between the two CDFs.
+    """
+    dy = numpy.full_like(y_csiborg, numpy.nan)
+    for i in range(y_csiborg.shape[0]):
+        if pulled_cdf:
+            x1, y1 = pull_cdf(x, y_csiborg[0], y_csiborg[i])
+            y1 = numpy.interp(x, x1, y1, left=0., right=1.)
+            x2, y2 = pull_cdf(x, y_quijote[0], y_quijote[i])
+            y2 = numpy.interp(x, x2, y2, left=0., right=1.)
+            dy[i] = y1 - y2
+        else:
+            dy[i] = y_csiborg[i] - y_quijote[i]
+    return dy
+
+
+def plot_cdf_diff(runs, kwargs, pulled_cdf, runs_to_mass):
+    """
+    Plot the CDF difference between Quijote and CSiBORG.
+
+    Parameters
+    ----------
+    runs : list of str
+        Run names.
+    kwargs : dict
+        Nearest neighbour reader keyword arguments.
+    pulled_cdf : bool
+        Whether to pull the CDFs of CSiBORG and Quijote.
+    runs_to_mass : dict
+        Dictionary mapping run names to halo mass range.
+
+    Returns
+    -------
+    None
+    """
+    print("Plotting the CDF difference...", flush=True)
+    paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
+    reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths)
+    r = reader.bin_centres("neighbour")
+    runs_to_mass = [numpy.mean(runs_to_mass[run]) for run in runs]
+
+    with plt.style.context(plt_utils.mplstyle):
+        norm = mpl.colors.Normalize(vmin=min(runs_to_mass),
+                                    vmax=max(runs_to_mass))
+        cmap = mpl.cm.ScalarMappable(norm=norm, cmap=mpl.cm.viridis)
+        cmap.set_array([])
+
+        fig, ax = plt.subplots()
+        for i, run in enumerate(runs):
+            y_quijote = read_dist("quijote", run, "cdf", kwargs)
+            y_csiborg = read_dist("csiborg", run, "cdf", kwargs)
+
+            dy = get_cdf_diff(r, y_csiborg, y_quijote, pulled_cdf)
+            ax.plot(r, numpy.median(dy, axis=0),
+                    c=cmap.to_rgba(runs_to_mass[i]))
+            ax.fill_between(r, *numpy.percentile(dy, [16, 84], axis=0),
+                            alpha=0.5, color=cmap.to_rgba(runs_to_mass[i]))
+        fig.colorbar(cmap, ax=ax, ticks=runs_to_mass,
+                     label=r"$\log M_{\rm tot} / M_\odot$")
+        ax.set_xlim(0.0, 55)
+        ax.set_ylim(0)
+
+        ax.grid(alpha=1/3, lw=0.4)
+
+        # Plot labels
+        if pulled_cdf:
+            ax.set_xlabel(r"$\tilde{r}_{1\mathrm{NN}}~[\mathrm{Mpc}]$")
+        else:
+            ax.set_xlabel(r"$r_{1\mathrm{NN}}~[\mathrm{Mpc}]$")
+        ax.set_ylabel(r"$\Delta \mathrm{CDF}(r_{1\mathrm{NN}})$")
+
+        # Plot labels
+        if pulled_cdf:
+            ax.set_xlabel(r"$\tilde{r}_{1\mathrm{NN}}~[\mathrm{Mpc}]$")
+            ax.set_ylabel(r"$\Delta \mathrm{CDF}(\tilde{r}_{1\mathrm{NN}})$")
+        else:
+            ax.set_xlabel(r"$r_{1\mathrm{NN}}~[\mathrm{Mpc}]$")
+            ax.set_ylabel(r"$\Delta \mathrm{CDF}(r_{1\mathrm{NN}})$")
+
+        fig.tight_layout()
+        for ext in ["png"]:
+            if pulled_cdf:
+                fout = join(plt_utils.fout, f"1nn_diff_pulled.{ext}")
+            else:
+                fout = join(plt_utils.fout, f"1nn_diff.{ext}")
+            print(f"Saving to `{fout}`.")
+            fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
+        plt.close()


@cache_to_disk(7)
 def make_kl(simname, run, nsim, nobs, kwargs):
+    """
+    Calculate the KL divergence between the distribution of nearest neighbour
+    distances of haloes in a reference simulation with respect to Quijote.
+
+    Parameters
+    ----------
+    simname : str
+        Simulation name. Must be either `csiborg` or `quijote`.
+    run : str
+        Run name.
+    nsim : int
+        Simulation index.
+    nobs : int
+        Fiducial Quijote observer index. For CSiBORG must be set to `None`.
+    kwargs : dict
+        Nearest neighbour reader keyword arguments.
+
+    Returns
+    -------
+    kl : 1-dimensional array
+        KL divergence of the distribution of nearest neighbour distances
+        of each halo in the reference simulation.
+    """
    paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
    reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths)
-
+    # This is the reference PDF. Must be Quijote!
    pdf = read_dist("quijote", run, "pdf", kwargs)
    return reader.kl_divergence(simname, run, nsim, pdf, nobs=nobs)


@cache_to_disk(7)
 def make_ks(simname, run, nsim, nobs, kwargs):
+    """
+    Calculate the KS significance between the distribution of nearest neighbour
+    distances of haloes in a reference simulation with respect to Quijote.
+
+    Parameters
+    ----------
+    simname : str
+        Simulation name. Must be either `csiborg` or `quijote`.
+    run : str
+        Run name.
+    nsim : int
+        Simulation index.
+    nobs : int
+        Fiducial Quijote observer index. For CSiBORG must be set to `None`.
+    kwargs : dict
+        Nearest neighbour reader keyword arguments.
+
+    Returns
+    -------
+    ks : 1-dimensional array
+        KS significance of the distribution of nearest neighbour distances of
+        each halo in the reference simulation.
+    """
    paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
    reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths)
-
+    # This is the reference CDF. Must be Quijote!
    cdf = read_dist("quijote", run, "cdf", kwargs)
    return reader.ks_significance(simname, run, nsim, cdf, nobs=nobs)


-def plot_dist(run, kind, kwargs, r200):
+def get_cumulative_significance(simname, runs, nsim, nobs, kind, kwargs):
    """
-    Plot the PDF/CDF of the nearest neighbour distance for Quijote and CSiBORG.
+    Calculate the cumulative significance of the distribution of nearest
+    neighbours and evaluate it at the same points for all runs.
+
+    Parameters
+    ----------
+    simname : str
+        Simulation name. Must be either `csiborg` or `quijote`.
+    runs : list of str
+        Run names.
+    nsim : int
+        Simulation index.
+    nobs : int
+        Fiducial Quijote observer index. For CSiBORG must be set to `None`.
+    kind : str
+        Must be either `kl` (Kullback-Leibler diverge) or `ks`
+        (Kolmogorov-Smirnov p-value).
+    kwargs : dict
+        Nearest neighbour reader keyword arguments.
+
+    Returns
+    -------
+    z : 1-dimensional array
+        Points where the cumulative significance is evaluated.
+    cumsum : 2-dimensional array of shape `(len(runs), len(z)))`
+        Cumulative significance of the distribution of nearest neighbours.
    """
-    assert kind in ["pdf", "cdf"]
-    print(f"Plotting the {kind}.", flush=True)
-    paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
-    reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths)
-    x = reader.bin_centres("neighbour")
-    if r200 is not None:
-        x /= r200
-
-    y_quijote = read_dist("quijote", run, kind, kwargs)
-    y_csiborg = read_dist("csiborg", run, kind, kwargs)
-    ncdf = y_csiborg.shape[0]
-
-    with plt.style.context(utils.mplstyle):
-        plt.figure()
-        for i in range(ncdf):
-            if i == 0:
-                label1 = "Quijote"
-                label2 = "CSiBORG"
-            else:
-                label1 = None
-                label2 = None
-            plt.plot(x, y_quijote[i], c="C0", label=label1)
-            plt.plot(x, y_csiborg[i], c="C1", label=label2)
-        plt.xlim(0, 75)
-        if r200 is None:
-            plt.xlabel(r"$r_{1\mathrm{NN}}~[\mathrm{Mpc}]$")
-        else:
-            plt.xlabel(r"$r_{1\mathrm{NN}} / R_{200c}$")
-        if kind == "pdf":
-            plt.ylabel(r"$p(r_{1\mathrm{NN}})$")
-        else:
-            plt.ylabel(r"$\mathrm{CDF}(r_{1\mathrm{NN}})$")
-            plt.ylim(0, 1)
-        plt.legend()
-        plt.tight_layout()
-        for ext in ["png"]:
-            fout = join(utils.fout, f"1nn_{kind}_{run}.{ext}")
-            print(f"Saving to `{fout}`.")
-            plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
-        plt.close()
-
-
-def plot_significance_hist(simname, run, nsim, nobs, kind, kwargs):
-    """Plot a histogram of the significance of the 1NN distance."""
-    assert kind in ["kl", "ks"]
-    paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
+    significances = []
+    for run in runs:
        if kind == "kl":
            x = make_kl(simname, run, nsim, nobs, kwargs)
        else:
            x = make_ks(simname, run, nsim, nobs, kwargs)
            x = numpy.log10(x)
        x = x[numpy.isfinite(x)]
-
-    with plt.style.context(utils.mplstyle):
-        plt.figure()
-        plt.hist(x, bins="auto")
+        x = numpy.sort(x)
+        significances.append(x)
+    z = numpy.hstack(significances).reshape(-1, )

    if kind == "ks":
-            plt.xlabel(r"$\log p$-value of $r_{1\mathrm{NN}}$ distribution")
+        zmin, zmax = numpy.percentile(z, [1, 100])
    else:
-            plt.xlabel(r"$D_{\mathrm{KL}}$ of $r_{1\mathrm{NN}}$ distribution")
-        plt.ylabel(r"Counts")
-        plt.tight_layout()
+        zmin, zmax = numpy.percentile(z, [0.0, 99.9])
+    z = numpy.linspace(zmin, zmax, 1000, dtype=numpy.float32)

+    cumsum = numpy.full((len(runs), z.size), numpy.nan, dtype=numpy.float32)
+    for i, run in enumerate(runs):
+        x = significances[i]
+        y = numpy.linspace(0, 1, x.size)
+        cumsum[i, :] = numpy.interp(z, x, y, left=0, right=1)
+
+    return z, cumsum
+
+
+def plot_significance(simname, runs, nsim, nobs, kind, kwargs, runs_to_mass):
+    """
+    Plot cumulative significance of the 1NN distribution.
+
+    Parameters
+    ----------
+    simname : str
+        Simulation name. Must be either `csiborg` or `quijote`.
+    runs : list of str
+        Run names.
+    nsim : int
+        Simulation index.
+    nobs : int
+        Fiducial Quijote observer index. For CSiBORG must be set to `None`.
+    kind : str
+        Must be either `kl` (Kullback-Leibler diverge) or `ks`
+        (Kolmogorov-Smirnov p-value).
+    kwargs : dict
+        Nearest neighbour reader keyword arguments.
+    runs_to_mass : dict
+        Dictionary mapping run names to total halo mass range.
+
+    Returns
+    -------
+    None
+    """
+    assert kind in ["kl", "ks"]
+    runs_to_mass = [numpy.mean(runs_to_mass[run]) for run in runs]
+
+    with plt.style.context(plt_utils.mplstyle):
+        norm = mpl.colors.Normalize(vmin=min(runs_to_mass),
+                                    vmax=max(runs_to_mass))
+        cmap = mpl.cm.ScalarMappable(norm=norm, cmap=mpl.cm.viridis)
+        cmap.set_array([])
+
+        fig, ax = plt.subplots(figsize=(3.5, 2.625 * 1.2), nrows=2,
+                               sharex=True, height_ratios=[1, 0.5])
+        fig.subplots_adjust(hspace=0, wspace=0)
+        z, cumsum = get_cumulative_significance(simname, runs, nsim, nobs,
+                                                kind, kwargs)
+
+        for i in range(len(runs)):
+            ax[0].plot(z, cumsum[i, :], color=cmap.to_rgba(runs_to_mass[i]))
+
+            dy = cumsum[-1, :] - cumsum[i, :]
+            if kind == "kl":
+                dy *= -1
+            ax[1].plot(z, dy, color=cmap.to_rgba(runs_to_mass[i]))
+
+        cbar_ax = fig.add_axes([1.0, 0.125, 0.035, 0.85])
+        fig.colorbar(cmap, cax=cbar_ax, ticks=runs_to_mass,
+                     label=r"$\log M_{\rm tot} / M_\odot$")
+
+        ax[0].set_xlim(z[0], z[-1])
+        ax[0].set_ylim(1e-5, 1.)
+        if kind == "ks":
+            ax[1].set_xlabel(r"$\log p$-value of $r_{1\mathrm{NN}}$ distribution")  # noqa
+        else:
+            ax[1].set_xlabel(r"$D_{\mathrm{KL}}$ of $r_{1\mathrm{NN}}$ distribution")  # noqa
+        ax[0].set_ylabel(r"Cumulative norm. counts")
+        ax[1].set_ylabel(r"$\Delta f$")
+
+        fig.tight_layout(h_pad=0, w_pad=0)
        for ext in ["png"]:
            if simname == "quijote":
+                paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
                nsim = paths.quijote_fiducial_nsim(nsim, nobs)
-            fout = join(utils.fout, f"significance_{kind}_{simname}_{run}_{str(nsim).zfill(5)}.{ext}")  # noqa
+            fout = join(plt_utils.fout, f"significance_{kind}_{simname}_{str(nsim).zfill(5)}.{ext}")  # noqa
            print(f"Saving to `{fout}`.")
-            plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
+            fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
        plt.close()


-def plot_significance_mass(simname, run, nsim, nobs, kind, kwargs):
+def plot_significance_vs_mass(simname, runs, nsim, nobs, kind, kwargs):
    """
    Plot significance of the 1NN distance as a function of the total mass.
+
+    Parameters
+    ----------
+    simname : str
+        Simulation name. Must be either `csiborg` or `quijote`.
+    runs : list of str
+        Run names.
+    nsim : int
+        Simulation index.
+    nobs : int
+        Fiducial Quijote observer index. For CSiBORG must be set to `None`.
+    kind : str
+        Must be either `kl` (Kullback-Leibler diverge) or `ks`
+        (Kolmogorov-Smirnov p-value).
+    kwargs : dict
+        Nearest neighbour reader keyword arguments.
+
+    Returns
+    -------
+    None
    """
+    print(f"Plotting {kind} significance vs mass.")
    assert kind in ["kl", "ks"]
    paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
    reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths)

+    with plt.style.context(plt_utils.mplstyle):
+        plt.figure()
+        xs, ys = [], []
+        for run in runs:
            x = reader.read_single(simname, run, nsim, nobs)["mass"]
            if kind == "kl":
                y = make_kl(simname, run, nsim, nobs, kwargs)
            else:
-        y = make_ks(simname, run, nsim, nobs, kwargs)
+                y = numpy.log10(make_ks(simname, run, nsim, nobs, kwargs))
+            xs.append(x)
+            ys.append(y)
+        xs = numpy.concatenate(xs)
+        ys = numpy.concatenate(ys)

-    with plt.style.context(utils.mplstyle):
-        plt.figure()
-        plt.scatter(x, y)
+        plt.hexbin(xs, ys, gridsize=75, mincnt=1, xscale="log", bins="log")

-        plt.xscale("log")
        plt.xlabel(r"$M_{\rm tot} / M_\odot$")
+        plt.xlim(numpy.min(xs))
        if kind == "ks":
-            plt.ylabel(r"$p$-value of $r_{1\mathrm{NN}}$ distribution")
-            plt.yscale("log")
+            plt.ylabel(r"$\log p$-value of $r_{1\mathrm{NN}}$ distribution")
+            plt.ylim(top=0)
        else:
            plt.ylabel(r"$D_{\mathrm{KL}}$ of $r_{1\mathrm{NN}}$ distribution")
+            plt.ylim(bottom=0)
+        plt.colorbar(label="Bin counts")

        plt.tight_layout()
        for ext in ["png"]:
            if simname == "quijote":
                nsim = paths.quijote_fiducial_nsim(nsim, nobs)
-            fout = join(utils.fout, f"significance_vs_mass_{kind}_{simname}_{run}_{str(nsim).zfill(5)}.{ext}")  # noqa
+            fout = (f"significance_vs_mass_{kind}_{simname}"
+                    + f"_{str(nsim).zfill(5)}.{ext}")
+            fout = join(plt_utils.fout, fout)
            print(f"Saving to `{fout}`.")
-            plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
+            plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
        plt.close()


-def plot_kl_vs_ks(simname, run, nsim, nobs, kwargs):
+def plot_kl_vs_ks(simname, runs, nsim, nobs, kwargs):
    """
    Plot Kullback-Leibler divergence vs Kolmogorov-Smirnov statistic p-value.
+
+    Parameters
+    ----------
+    simname : str
+        Simulation name. Must be either `csiborg` or `quijote`.
+    runs : str
+        Run names.
+    nsim : int
+        Simulation index.
+    nobs : int
+        Fiducial Quijote observer index. For CSiBORG must be set to `None`.
+    kwargs : dict
+        Nearest neighbour reader keyword arguments.
+
+    Returns
+    -------
+    None
    """
    paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
    reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths)

-    x = reader.read_single(simname, run, nsim, nobs)["mass"]
-    y_kl = make_kl(simname, run, nsim, nobs, kwargs)
-    y_ks = make_ks(simname, run, nsim, nobs, kwargs)
+    xs, ys, cs = [], [], []
+    for run in runs:
+        cs.append(reader.read_single(simname, run, nsim, nobs)["mass"])
+        xs.append(make_kl(simname, run, nsim, nobs, kwargs))
+        ys.append(make_ks(simname, run, nsim, nobs, kwargs))
+    xs = numpy.concatenate(xs)
+    ys = numpy.log10(numpy.concatenate(ys))
+    cs = numpy.log10(numpy.concatenate(cs))

-    with plt.style.context(utils.mplstyle):
+    with plt.style.context(plt_utils.mplstyle):
        plt.figure()
-        plt.scatter(y_kl, y_ks, c=numpy.log10(x))
+        plt.hexbin(xs, ys, C=cs, gridsize=50, mincnt=0,
+                   reduce_C_function=numpy.median)
        plt.colorbar(label=r"$\log M_{\rm tot} / M_\odot$")

        plt.xlabel(r"$D_{\mathrm{KL}}$ of $r_{1\mathrm{NN}}$ distribution")
-        plt.ylabel(r"$p$-value of $r_{1\mathrm{NN}}$ distribution")
-        plt.yscale("log")
+        plt.ylabel(r"$\log p$-value of $r_{1\mathrm{NN}}$ distribution")

        plt.tight_layout()
        for ext in ["png"]:
            if simname == "quijote":
                nsim = paths.quijote_fiducial_nsim(nsim, nobs)
-            fout = join(utils.fout, f"kl_vs_ks{simname}_{run}_{str(nsim).zfill(5)}.{ext}")  # noqa
+            fout = join(plt_utils.fout,
+                        f"kl_vs_ks_{simname}_{run}_{str(nsim).zfill(5)}.{ext}")
            print(f"Saving to `{fout}`.")
-            plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
+            plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
        plt.close()


-def plot_kl_vs_overlap(run, nsim, kwargs):
+def plot_kl_vs_overlap(runs, nsim, kwargs):
    """
-    Plot KL divergence vs overlap.
+    Plot KL divergence vs overlap for CSiBORG.
+
+    Parameters
+    ----------
+    runs : str
+        Run names.
+    nsim : int
+        Simulation index.
+    kwargs : dict
+        Nearest neighbour reader keyword arguments.
+
+    Returns
+    -------
+    None
    """
    paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
    nn_reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths)
+
+    xs, ys1, ys2, cs = [], [], [], []
+    for run in runs:
        nn_data = nn_reader.read_single("csiborg", run, nsim, nobs=None)
        nn_hindxs = nn_data["ref_hindxs"]
-
        mass, overlap_hindxs, summed_overlap, prob_nomatch = get_overlap(nsim)

        # We need to match the hindxs between the two.
        hind2overlap_array = {hind: i for i, hind in enumerate(overlap_hindxs)}
-    mask = numpy.asanyarray([hind2overlap_array[hind] for hind in nn_hindxs])
-
+        mask = numpy.asanyarray([hind2overlap_array[hind]
+                                 for hind in nn_hindxs])
        summed_overlap = summed_overlap[mask]
        prob_nomatch = prob_nomatch[mask]
        mass = mass[mask]

        kl = make_kl("csiborg", run, nsim, nobs=None, kwargs=kwargs)

-    with plt.style.context(utils.mplstyle):
+        xs.append(kl)
+        ys1.append(1 - numpy.mean(prob_nomatch, axis=1))
+        ys2.append(numpy.std(prob_nomatch, axis=1))
+        cs.append(numpy.log10(mass))
+
+    xs = numpy.concatenate(xs)
+    ys1 = numpy.concatenate(ys1)
+    ys2 = numpy.concatenate(ys2)
+    cs = numpy.concatenate(cs)
+
+    with plt.style.context(plt_utils.mplstyle):
        plt.figure()
-        mu = numpy.mean(prob_nomatch, axis=1)
-        plt.scatter(kl, 1 - mu, c=numpy.log10(mass))
+        plt.hexbin(xs, ys1, C=cs, gridsize=50, mincnt=0,
+                   reduce_C_function=numpy.median)
        plt.colorbar(label=r"$\log M_{\rm tot} / M_\odot$")
        plt.xlabel(r"$D_{\mathrm{KL}}$ of $r_{1\mathrm{NN}}$ distribution")
        plt.ylabel(r"$1 - \langle \eta^{\mathcal{B}}_a \rangle_{\mathcal{B}}$")

        plt.tight_layout()
        for ext in ["png"]:
-            fout = join(utils.fout, f"kl_vs_overlap_mean_{run}_{str(nsim).zfill(5)}.{ext}")  # noqa
+            fout = join(plt_utils.fout,
+                        f"kl_vs_overlap_mean_{str(nsim).zfill(5)}.{ext}")
            print(f"Saving to `{fout}`.")
-            plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
+            plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
        plt.close()

-    with plt.style.context(utils.mplstyle):
+    with plt.style.context(plt_utils.mplstyle):
        plt.figure()
-        std = numpy.std(prob_nomatch, axis=1)
-        plt.scatter(kl, std, c=numpy.log10(mass))
+        plt.hexbin(xs, ys2, C=cs, gridsize=50, mincnt=0,
+                   reduce_C_function=numpy.median)
        plt.colorbar(label=r"$\log M_{\rm tot} / M_\odot$")
        plt.xlabel(r"$D_{\mathrm{KL}}$ of $r_{1\mathrm{NN}}$ distribution")
-        plt.ylabel(r"$\langle \left(\eta^{\mathcal{B}}_a - \langle \eta^{\mathcal{B}^\prime}_a \rangle_{\mathcal{B}^\prime}\right)^2\rangle_{\mathcal{B}}^{1/2}$")  # noqa
+        plt.ylabel(r"Ensemble std of summed overlap")

        plt.tight_layout()
        for ext in ["png"]:
-            fout = join(utils.fout, f"kl_vs_overlap_std_{run}_{str(nsim).zfill(5)}.{ext}")  # noqa
+            fout = join(plt_utils.fout,
+                        f"kl_vs_overlap_std_{str(nsim).zfill(5)}.{ext}")
            print(f"Saving to `{fout}`.")
-            plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
+            plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
        plt.close()


@ -376,6 +856,19 @@ if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument('-c', '--clean', action='store_true')
    args = parser.parse_args()
+    neighbour_kwargs = csiborgtools.neighbour_kwargs
+
+    runs_to_mass = {
+        "mass001": (12.4, 12.8),
+        "mass002": (12.6, 13.0),
+        "mass003": (12.8, 13.2),
+        "mass004": (13.0, 13.4),
+        "mass005": (13.2, 13.6),
+        "mass006": (13.4, 13.8),
+        "mass007": (13.6, 14.0),
+        "mass008": (13.8, 14.2),
+        "mass009": (14.0, 14.4),  # There is no upper limit.
+        }

    cached_funcs = ["get_overlap", "read_dist", "make_kl", "make_ks"]
    if args.clean:
@ -383,31 +876,38 @@ if __name__ == "__main__":
            print(f"Cleaning cache for function {func}.")
            delete_disk_caches_for_function(func)

-    neighbour_kwargs = {"rmax_radial": 155 / 0.705,
-                        "nbins_radial": 50,
-                        "rmax_neighbour": 100.,
-                        "nbins_neighbour": 150,
-                        "paths_kind": csiborgtools.paths_glamdring}
-    run = "mass003"
+    # Plot 1NN distance distributions.
+    if False:
+        for i in range(1, 10):
+            run = f"mass00{i}"
+            for pulled_cdf in [True, False]:
+                plot_dist(run, "cdf", neighbour_kwargs, runs_to_mass,
+                          pulled_cdf=pulled_cdf,)
+            plot_dist(run, "pdf", neighbour_kwargs, runs_to_mass)

-    # plot_dist("mass003", "pdf", neighbour_kwargs)
+    # Plot 1NN CDF differences.
+    if False:
+        runs = [f"mass00{i}" for i in range(1, 10)]
+        for pulled_cdf in [True, False]:
+            plot_cdf_diff(runs, neighbour_kwargs, pulled_cdf=pulled_cdf,
+                          runs_to_mass=runs_to_mass)
+    if False:
+        runs = [f"mass00{i}" for i in range(1, 9)]
+        for kind in ["kl", "ks"]:
+            plot_significance("csiborg", runs, 7444, nobs=None, kind=kind,
+                              kwargs=neighbour_kwargs,
+                              runs_to_mass=runs_to_mass)

-    paths = csiborgtools.read.Paths(**neighbour_kwargs["paths_kind"])
-    nn_reader = csiborgtools.read.NearestNeighbourReader(**neighbour_kwargs,
-                                                         paths=paths)
+    if True:
+        runs = [f"mass00{i}" for i in range(1, 10)]
+        for kind in ["kl", "ks"]:
+            plot_significance_vs_mass("csiborg", runs, 7444, nobs=None,
+                                      kind=kind, kwargs=neighbour_kwargs)

-    # sizes = numpy.full(2700, numpy.nan)
-    # from tqdm import trange
-    # k = 0
-    # for nsim in trange(100):
-    #     for nobs in range(27):
-    #         d = nn_reader.read_single("quijote", run, nsim, nobs)
-    #         sizes[k] = d["mass"].size
+    if False:
+        runs = [f"mass00{i}" for i in range(1, 10)]
+        plot_kl_vs_ks("csiborg", runs, 7444, None, kwargs=neighbour_kwargs)

-    #         k += 1
-    # print(sizes)
-    # print(numpy.mean(sizes), numpy.std(sizes))
-
-    # plot_kl_vs_overlap("mass003", 7444, neighbour_kwargs)
-
-    # plot_cdf_r200("mass003", neighbour_kwargs)
+    if False:
+        runs = [f"mass00{i}" for i in range(1, 10)]
+        plot_kl_vs_overlap(runs, 7444, neighbour_kwargs)
--- a/scripts_plots/plt_utils.py
+++ b/scripts_plots/plt_utils.py