From 28e93e917f6c04fc56cd44c754115f24b93bec1d Mon Sep 17 00:00:00 2001
From: Richard Stiskalek <richard.stiskalek@protonmail.com>
Date: Mon, 3 Jul 2023 15:35:10 +0100
Subject: [PATCH] More plotting (#74)

* Add a new plot

* Add a binned trend

* Fix bug

* Improve plot further

* Add new plotting

* add max overlap

* edit get_overlap

* Add max overlap plot

* Update plot

* Add max overlap key

* add max dist flag

* Improve plotting
---
 csiborgtools/read/overlap_summary.py | 123 +++++-
 scripts_plots/plot_match.py          | 622 +++++++++++++++++++++++----
 scripts_plots/plt_utils.py           |  40 ++
 3 files changed, 690 insertions(+), 95 deletions(-)

diff --git a/csiborgtools/read/overlap_summary.py b/csiborgtools/read/overlap_summary.py
index bf66efd..2a83a17 100644
--- a/csiborgtools/read/overlap_summary.py
+++ b/csiborgtools/read/overlap_summary.py
@@ -38,18 +38,21 @@ class PairOverlap:
         Halo catalogue corresponding to the cross simulation.
     paths : py:class`csiborgtools.read.Paths`
         CSiBORG paths object.
+    maxdist : float, optional
+        Maximum halo distance in :math:`\mathrm{Mpc} / h` from the centre of
+        the high-resolution region. Removes overlaps of haloes outside it.
     """
     _cat0 = None
     _catx = None
     _data = None
 
-    def __init__(self, cat0, catx, paths):
+    def __init__(self, cat0, catx, paths, maxdist=None):
         self._cat0 = cat0
         self._catx = catx
-        self.load(cat0, catx, paths)
+        self.load(cat0, catx, paths, maxdist)
 
-    def load(self, cat0, catx, paths):
-        """
+    def load(self, cat0, catx, paths, maxdist=None):
+        r"""
         Load overlap calculation results. Matches the results back to the two
         catalogues in question.
 
@@ -61,6 +64,9 @@ class PairOverlap:
             Halo catalogue corresponding to the cross simulation.
         paths : py:class`csiborgtools.read.Paths`
             CSiBORG paths object.
+        maxdist : float, optional
+            Maximum halo distance in :math:`\mathrm{Mpc} / h` from the centre
+            of the high-resolution region.
 
         Returns
         -------
@@ -125,6 +131,14 @@ class PairOverlap:
             match_indxs, ngp_overlap, smoothed_overlap = self._invert_match(
                 match_indxs, ngp_overlap, smoothed_overlap, len(catx),)
 
+        if maxdist is not None:
+            dist = cat0.radial_distance(in_initial=False)
+            for i in range(len(cat0)):
+                if dist[i] > maxdist:
+                    match_indxs[i] = numpy.array([], dtype=int)
+                    ngp_overlap[i] = numpy.array([], dtype=float)
+                    smoothed_overlap[i] = numpy.array([], dtype=float)
+
         self._data = {"match_indxs": match_indxs,
                       "ngp_overlap": ngp_overlap,
                       "smoothed_overlap": smoothed_overlap,
@@ -228,7 +242,11 @@ class PairOverlap:
         summed_overlap : 1-dimensional array of shape `(nhalos, )`
         """
         overlap = self.overlap(from_smoothed)
-        return numpy.array([numpy.sum(cross)for cross in overlap])
+        out = numpy.full(len(overlap), numpy.nan, dtype=numpy.float32)
+        for i in range(len(overlap)):
+            if len(overlap[i]) > 0:
+                out[i] = numpy.sum(overlap[i])
+        return out
 
     def prob_nomatch(self, from_smoothed):
         """
@@ -246,8 +264,11 @@ class PairOverlap:
         prob_nomatch : 1-dimensional array of shape `(nhalos, )`
         """
         overlap = self.overlap(from_smoothed)
-        return numpy.array([numpy.product(numpy.subtract(1, cross))
-                            for cross in overlap])
+        out = numpy.full(len(overlap), numpy.nan, dtype=numpy.float32)
+        for i in range(len(overlap)):
+            if len(overlap[i]) > 0:
+                out[i] = numpy.product(numpy.subtract(1, overlap[i]))
+        return out
 
     def dist(self, in_initial, norm_kind=None):
         """
@@ -326,6 +347,35 @@ class PairOverlap:
                 ratio[i] = numpy.abs(ratio[i])
         return numpy.array(ratio, dtype=object)
 
+    def max_overlap_key(self, key, from_smoothed):
+        """
+        Calculate the maximum overlap mass of each halo in the reference
+        simulation from the cross simulation.
+
+        Parameters
+        ----------
+        key : str
+            Key to the maximum overlap statistic to calculate.
+        from_smoothed : bool
+            Whether to use the smoothed overlap or not.
+        mass_kind : str, optional
+            The mass kind whose ratio is to be calculated.
+
+        Returns
+        -------
+        out : 1-dimensional array of shape `(nhalos, )`
+        """
+        out = numpy.full(len(self), numpy.nan, dtype=numpy.float32)
+        y = self.catx(key)
+        overlap = self.overlap(from_smoothed)
+
+        for i, match_ind in enumerate(self["match_indxs"]):
+            # Skip if no match
+            if len(match_ind) == 0:
+                continue
+            out[i] = y[match_ind][numpy.argmax(overlap[i])]
+        return out
+
     def counterpart_mass(self, from_smoothed, overlap_threshold=0.,
                          in_log=False, mass_kind="totpartmass"):
         """
@@ -359,7 +409,7 @@ class PairOverlap:
 
         for i, match_ind in enumerate(self["match_indxs"]):
             # Skip if no match
-            if match_ind.size == 0:
+            if len(match_ind) == 0:
                 continue
 
             massx_ = massx[match_ind]  # Again just create references
@@ -527,6 +577,63 @@ class NPairsOverlap:
 
         self._pairs = pairs
 
+    def max_overlap(self, from_smoothed, verbose=True):
+        """
+        Calculate maximum overlap of each halo in the reference simulation with
+        the cross simulations.
+
+        Parameters
+        ----------
+        from_smoothed : bool
+            Whether to use the smoothed overlap or not.
+        verbose : bool, optional
+            Verbosity flag.
+
+        Returns
+        -------
+        max_overlap : 2-dimensional array of shape `(nhalos, ncatxs)`
+        """
+        out = [None] * len(self)
+        if verbose:
+            print("Calculating maximum overlap...", flush=True)
+
+        def get_max(y_):
+            if len(y_) == 0:
+                return numpy.nan
+            return numpy.max(y_)
+
+        for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs):
+            out[i] = numpy.asanyarray([get_max(y_)
+                                       for y_ in pair.overlap(from_smoothed)])
+        return numpy.vstack(out).T
+
+    def max_overlap_key(self, key, from_smoothed, verbose=True):
+        """
+        Calculate maximum overlap mass of each halo in the reference
+        simulation with the cross simulations.
+
+        Parameters
+        ----------
+        key : str
+            Key to the maximum overlap statistic to calculate.
+        from_smoothed : bool
+            Whether to use the smoothed overlap or not.
+        verbose : bool, optional
+            Verbosity flag.
+
+        Returns
+        -------
+        out : 2-dimensional array of shape `(nhalos, ncatxs)`
+        """
+        out = [None] * len(self)
+        if verbose:
+            print(f"Calculating maximum overlap {key}...", flush=True)
+
+        for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs):
+            out[i] = pair.max_overlap_key(key, from_smoothed)
+
+        return numpy.vstack(out).T
+
     def summed_overlap(self, from_smoothed, verbose=True):
         """
         Calculate summed overlap of each halo in the reference simulation with
diff --git a/scripts_plots/plot_match.py b/scripts_plots/plot_match.py
index 05e94a7..16f5acd 100644
--- a/scripts_plots/plot_match.py
+++ b/scripts_plots/plot_match.py
@@ -19,11 +19,12 @@ from os.path import join
 
 import matplotlib as mpl
 import matplotlib.pyplot as plt
+from mpl_toolkits.axes_grid1.inset_locator import inset_axes
 import numpy
 import scienceplots  # noqa
 from cache_to_disk import cache_to_disk, delete_disk_caches_for_function
 from scipy.stats import kendalltau
-from tqdm import tqdm
+from tqdm import trange, tqdm
 
 import plt_utils
 
@@ -57,6 +58,90 @@ def open_cat(nsim):
     return csiborgtools.read.HaloCatalogue(nsim, paths, bounds=bounds)
 
 
+def plot_mass_vs_pairoverlap(nsim0, nsimx):
+    """
+    Plot the pair overlap of a reference simulation with a single cross
+    simulation as a function of the reference halo mass.
+
+    Parameters
+    ----------
+    nsim0 : int
+        Reference simulation index.
+    nsimx : int
+        Cross simulation index.
+    """
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    cat0 = open_cat(nsim0)
+    catx = open_cat(nsimx)
+    reader = csiborgtools.read.PairOverlap(cat0, catx, paths)
+
+    x = reader.copy_per_match("totpartmass")
+    y = reader.overlap(True)
+
+    x = numpy.log10(numpy.concatenate(x))
+    y = numpy.concatenate(y)
+
+    with plt.style.context(plt_utils.mplstyle):
+        plt.figure()
+        plt.hexbin(x, y, mincnt=1, bins="log",
+                   gridsize=50)
+        plt.colorbar(label="Counts in bins")
+        plt.xlabel(r"$\log M_{\rm tot} / M_\odot$")
+        plt.ylabel("Pair overlap")
+        plt.ylim(0., 1.)
+
+        plt.tight_layout()
+        for ext in ["png"]:
+            fout = join(plt_utils.fout, f"mass_vs_pair_overlap{nsim0}.{ext}")
+            print(f"Saving to `{fout}`.")
+            plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
+        plt.close()
+
+
+def plot_mass_vs_maxpairoverlap(nsim0, nsimx):
+    """
+    Plot the maximum pair overlap of a reference simulation haloes with a
+    single cross simulation.
+
+    Parameters
+    ----------
+    nsim0 : int
+        Reference simulation index.
+    nsimx : int
+        Cross simulation index.
+    """
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    cat0 = open_cat(nsim0)
+    catx = open_cat(nsimx)
+    reader = csiborgtools.read.PairOverlap(cat0, catx, paths)
+
+    x = numpy.log10(cat0["totpartmass"])
+    y = reader.overlap(True)
+
+    def get_max(y_):
+        if len(y_) == 0:
+            return numpy.nan
+        return numpy.max(y_)
+
+    y = numpy.array([get_max(y_) for y_ in y])
+
+    with plt.style.context(plt_utils.mplstyle):
+        plt.figure()
+        plt.hexbin(x, y, mincnt=1, bins="log",
+                   gridsize=50)
+        plt.colorbar(label="Counts in bins")
+        plt.xlabel(r"$\log M_{\rm tot} / M_\odot$")
+        plt.ylabel("Maximum pair overlap")
+        plt.ylim(0., 1.)
+
+        plt.tight_layout()
+        for ext in ["png"]:
+            fout = join(plt_utils.fout, f"mass_vs_maxpairoverlap{nsim0}.{ext}")
+            print(f"Saving to `{fout}`.")
+            plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
+        plt.close()
+
+
 @cache_to_disk(7)
 def get_overlap(nsim0):
     """
@@ -74,9 +159,11 @@ def get_overlap(nsim0):
         Mass of halos in the reference simulation.
     hindxs : 1-dimensional array
         Halo indices in the reference simulation.
-    summed_overlap : 1-dimensional array
+    max_overlap : 2-dimensional array
+        Maximum overlap for each halo in the reference simulation.
+    summed_overlap : 2-dimensional array
         Summed overlap for each halo in the reference simulation.
-    prob_nomatch : 1-dimensional array
+    prob_nomatch : 2-dimensional array
         Probability of no match for each halo in the reference simulation.
     """
     paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
@@ -93,8 +180,64 @@ def get_overlap(nsim0):
 
     hindxs = reader.cat0("index")
     summed_overlap = reader.summed_overlap(True)
+    max_overlap = reader.max_overlap(True)
     prob_nomatch = reader.prob_nomatch(True)
-    return mass, hindxs, summed_overlap, prob_nomatch
+    return mass, hindxs, max_overlap, summed_overlap, prob_nomatch
+
+
+def plot_mass_vsmedmaxoverlap(nsim0):
+    """
+    Plot the mean maximum overlap of a reference simulation haloes with all the
+    cross simulations.
+
+    Parameters
+    ----------
+    nsim0 : int
+        Reference simulation index.
+    """
+    x, __, max_overlap, __, __ = get_overlap(nsim0)
+
+    for i in trange(max_overlap.shape[0]):
+        if numpy.sum(numpy.isnan(max_overlap[i, :])) > 0:
+            max_overlap[i, :] = numpy.nan
+
+    x = numpy.log10(x)
+
+    with plt.style.context(plt_utils.mplstyle):
+        fig, axs = plt.subplots(ncols=3, figsize=(3.5 * 2, 2.625))
+        im1 = axs[0].hexbin(x, numpy.nanmean(max_overlap, axis=1), gridsize=30,
+                            mincnt=1, bins="log")
+
+        im2 = axs[1].hexbin(x, numpy.nanstd(max_overlap, axis=1), gridsize=30,
+                            mincnt=1, bins="log")
+        im3 = axs[2].hexbin(numpy.nanmean(max_overlap, axis=1),
+                            numpy.nanstd(max_overlap, axis=1), gridsize=30,
+                            C=x, reduce_C_function=numpy.nanmean)
+
+        axs[0].set_xlabel(r"$\log M_{\rm tot} / M_\odot$")
+        axs[0].set_ylabel(r"Mean max. pair overlap")
+        axs[1].set_xlabel(r"$\log M_{\rm tot} / M_\odot$")
+        axs[1].set_ylabel(r"Uncertainty of max. pair overlap")
+        axs[2].set_xlabel(r"Mean max. pair overlap")
+        axs[2].set_ylabel(r"Uncertainty of max. pair overlap")
+
+        label = ["Bin counts", "Bin counts", r"$\log M_{\rm tot} / M_\odot$"]
+        ims = [im1, im2, im3]
+        for i in range(3):
+            axins = inset_axes(axs[i], width="100%", height="5%",
+                               loc='upper center', borderpad=-0.75)
+            fig.colorbar(ims[i], cax=axins, orientation="horizontal",
+                         label=label[i])
+            axins.xaxis.tick_top()
+            axins.xaxis.set_tick_params(labeltop=True)
+            axins.xaxis.set_label_position("top")
+
+        fig.tight_layout()
+        for ext in ["png"]:
+            fout = join(plt_utils.fout, f"maxpairoverlap_{nsim0}.{ext}")
+            print(f"Saving to `{fout}`.")
+            fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
+        plt.close()
 
 
 def plot_summed_overlap_vs_mass(nsim0):
@@ -112,14 +255,19 @@ def plot_summed_overlap_vs_mass(nsim0):
     -------
     None
     """
-    x, __, summed_overlap, prob_nomatch = get_overlap(nsim0)
+    x, __, __, summed_overlap, prob_nomatch = get_overlap(nsim0)
     del __
     collect()
 
-    mean_overlap = numpy.mean(summed_overlap, axis=1)
-    std_overlap = numpy.std(summed_overlap, axis=1)
+    for i in trange(summed_overlap.shape[0]):
+        if numpy.sum(numpy.isnan(summed_overlap[i, :])) > 0:
+            summed_overlap[i, :] = numpy.nan
 
-    mean_prob_nomatch = numpy.mean(prob_nomatch, axis=1)
+    x = numpy.log10(x)
+
+    mean_overlap = numpy.nanmean(summed_overlap, axis=1)
+    std_overlap = numpy.nanstd(summed_overlap, axis=1)
+    mean_prob_nomatch = numpy.nanmean(prob_nomatch, axis=1)
 
     mask = mean_overlap > 0
     x = x[mask]
@@ -127,63 +275,45 @@ def plot_summed_overlap_vs_mass(nsim0):
     std_overlap = std_overlap[mask]
     mean_prob_nomatch = mean_prob_nomatch[mask]
 
-    # Mean summed overlap
     with plt.style.context(plt_utils.mplstyle):
-        plt.figure()
-        plt.hexbin(x, mean_overlap, mincnt=1, xscale="log", bins="log",
-                   gridsize=50)
-        plt.colorbar(label="Counts in bins")
-        plt.xlabel(r"$M_{\rm tot} / M_\odot$")
-        plt.ylabel(r"$\langle \mathcal{O}_{a}^{\mathcal{A} \mathcal{B}} \rangle_{\mathcal{B}}$")  # noqa
-        plt.ylim(0., 1.)
-
-        plt.tight_layout()
-        for ext in ["png", "pdf"]:
-            fout = join(plt_utils.fout, f"overlap_mean_{nsim0}.{ext}")
-            print(f"Saving to `{fout}`.")
-            plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
-        plt.close()
-
-    # Std summed overlap
-    with plt.style.context(plt_utils.mplstyle):
-        plt.figure()
-        plt.hexbin(x, std_overlap, mincnt=1, xscale="log", bins="log",
-                   gridsize=50)
-        plt.colorbar(label="Counts in bins")
-        plt.xlabel(r"$M_{\rm tot} / M_\odot$")
-        plt.ylabel(r"$\delta \left( \mathcal{O}_{a}^{\mathcal{A} \mathcal{B}} \right)_{\mathcal{B}}$")  # noqa
-        plt.ylim(0., 1.)
-        plt.tight_layout()
-
-        for ext in ["png", "pdf"]:
-            fout = join(plt_utils.fout, f"overlap_std_{nsim0}.{ext}")
-            print(f"Saving to `{fout}`.")
-            plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
-        plt.close()
-
-    # 1 - mean summed overlap vs mean prob nomatch
-    with plt.style.context(plt_utils.mplstyle):
-        plt.figure()
-        plt.scatter(1 - mean_overlap, mean_prob_nomatch, c=numpy.log10(x), s=2,
-                    rasterized=True)
-        plt.colorbar(label=r"$\log_{10} M_{\rm halo} / M_\odot$")
-
+        fig, axs = plt.subplots(ncols=3, figsize=(3.5 * 2, 2.625))
+        im1 = axs[0].hexbin(x, mean_overlap, mincnt=1, bins="log",
+                            gridsize=30)
+        im2 = axs[1].hexbin(x, std_overlap, mincnt=1, bins="log",
+                            gridsize=30)
+        im3 = axs[2].scatter(1 - mean_overlap, mean_prob_nomatch, c=x, s=2,
+                             rasterized=True)
         t = numpy.linspace(0.3, 1, 100)
-        plt.plot(t, t, color="red", linestyle="--")
+        axs[2].plot(t, t, color="red", linestyle="--")
+        axs[0].set_ylim(0.)
+        axs[1].set_ylim(0.)
+        axs[0].set_xlabel(r"$\log M_{\rm tot} / M_\odot$")
+        axs[0].set_ylabel("Mean summed overlap")
+        axs[1].set_xlabel(r"$\log M_{\rm tot} / M_\odot$")
+        axs[1].set_ylabel("Uncertainty of summed overlap")
+        axs[2].set_xlabel(r"$1 - $ mean summed overlap")
+        axs[2].set_ylabel("Mean prob. of no match")
 
-        plt.xlabel(r"$1 - \langle \mathcal{O}_a^{\mathcal{A} \mathcal{B}} \rangle_{\mathcal{B}}$")  # noqa
-        plt.ylabel(r"$\langle \eta_a^{\mathcal{A} \mathcal{B}} \rangle_{\mathcal{B}}$")  # noqa
-        plt.tight_layout()
+        label = ["Bin counts", "Bin counts", r"$\log M_{\rm tot} / M_\odot$"]
+        ims = [im1, im2, im3]
+        for i in range(3):
+            axins = inset_axes(axs[i], width="100%", height="5%",
+                               loc='upper center', borderpad=-0.75)
+            fig.colorbar(ims[i], cax=axins, orientation="horizontal",
+                         label=label[i])
+            axins.xaxis.tick_top()
+            axins.xaxis.set_tick_params(labeltop=True)
+            axins.xaxis.set_label_position("top")
 
-        for ext in ["png", "pdf"]:
-            fout = join(plt_utils.fout,
-                        f"overlap_vs_prob_nomatch_{nsim0}.{ext}")
+        fig.tight_layout()
+        for ext in ["png"]:
+            fout = join(plt_utils.fout, f"overlap_stat_{nsim0}.{ext}")
             print(f"Saving to `{fout}`.")
-            plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
+            fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
         plt.close()
 
 
-def plot_mass_vs_separation(nsim0, nsimx, min_overlap=0.0):
+def plot_mass_vs_separation(nsim0, nsimx, plot_std=False, min_overlap=0.0):
     """
     Plot the mass of a reference halo against the weighted separation of
     its counterparts.
@@ -194,6 +324,8 @@ def plot_mass_vs_separation(nsim0, nsimx, min_overlap=0.0):
         Reference simulation index.
     nsimx : int
         Cross simulation index.
+    plot_std : bool, optional
+        Whether to plot thestd instead of mean.
     min_overlap : float, optional
         Minimum overlap to consider.
 
@@ -205,7 +337,8 @@ def plot_mass_vs_separation(nsim0, nsimx, min_overlap=0.0):
     cat0 = open_cat(nsim0)
     catx = open_cat(nsimx)
 
-    reader = csiborgtools.read.PairOverlap(cat0, catx, paths)
+    reader = csiborgtools.read.PairOverlap(cat0, catx, paths,
+                                           maxdist=155 / 0.705)
     mass = numpy.log10(reader.cat0("totpartmass"))
     dist = reader.dist(in_initial=False, norm_kind="r200c")
     overlap = reader.overlap(True)
@@ -217,7 +350,11 @@ def plot_mass_vs_separation(nsim0, nsimx, min_overlap=0.0):
     dist = dist[mask, :]
     dist = numpy.log10(dist)
 
-    p = numpy.polyfit(mass, dist[:, 0], 1)
+    if not plot_std:
+        p = numpy.polyfit(mass, dist[:, 0], 1)
+    else:
+        p = numpy.polyfit(mass, dist[:, 1], 1)
+
     xrange = numpy.linspace(numpy.min(mass), numpy.max(mass), 1000)
     txt = r"$m = {}$, $c = {}$".format(*plt_utils.latex_float(*p, n=3))
 
@@ -225,7 +362,14 @@ def plot_mass_vs_separation(nsim0, nsimx, min_overlap=0.0):
         fig, ax = plt.subplots()
         ax.set_title(txt, fontsize="small")
 
-        cx = ax.hexbin(mass, dist[:, 0], mincnt=1, bins="log", gridsize=50)
+        if not plot_std:
+            cx = ax.hexbin(mass, dist[:, 0], mincnt=1, bins="log", gridsize=50)
+            ax.set_ylabel(r"$\log \langle \Delta R / R_{\rm 200c}\rangle$")
+        else:
+            cx = ax.hexbin(mass, dist[:, 1], mincnt=1, bins="log", gridsize=50)
+            ax.set_ylabel(
+                r"$\delta \log \langle \Delta R / R_{\rm 200c}\rangle$")
+
         ax.plot(xrange, numpy.polyval(p, xrange), color="red",
                 linestyle="--")
         fig.colorbar(cx, label="Bin counts")
@@ -234,7 +378,281 @@ def plot_mass_vs_separation(nsim0, nsimx, min_overlap=0.0):
 
         fig.tight_layout()
         for ext in ["png"]:
-            fout = join(plt_utils.fout, f"mass_vs_sep_{nsim0}_{nsimx}.{ext}")
+            fout = join(plt_utils.fout,
+                        f"mass_vs_sep_{nsim0}_{nsimx}_{min_overlap}.{ext}")
+            if plot_std:
+                fout = fout.replace(f".{ext}", f"_std.{ext}")
+            print(f"Saving to `{fout}`.")
+            fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
+        plt.close()
+
+
+@cache_to_disk(7)
+def get_max_key(nsim0, key):
+    """
+    Get the value of a maximum overlap halo's property.
+
+    Parameters
+    ----------
+    nsim0 : int
+        Reference simulation index.
+    key : str
+        Property to get.
+
+    Returns
+    -------
+    mass0 : 1-dimensional array
+        Mass of the reference haloes.
+    key_val : 1-dimensional array
+        Value of the property of the reference haloes.
+    max_overlap : 2-dimensional array
+        Maximum overlap of the reference haloes.
+    stat : 2-dimensional array
+        Value of the property of the maximum overlap halo.
+    """
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    nsimxs = csiborgtools.read.get_cross_sims(nsim0, paths, smoothed=True)
+    nsimxs = nsimxs
+    cat0 = open_cat(nsim0)
+
+    catxs = []
+    print("Opening catalogues...", flush=True)
+    for nsimx in tqdm(nsimxs):
+        catxs.append(open_cat(nsimx))
+
+    reader = csiborgtools.read.NPairsOverlap(cat0, catxs, paths)
+
+    mass0 = reader.cat0("totpartmass")
+    key_val = reader.cat0(key)
+    max_overlap = reader.max_overlap(True)
+    stat = reader.max_overlap_key(key, True)
+    return mass0, key_val, max_overlap, stat
+
+
+def plot_maxoverlap_mass(nsim0):
+    """
+    Plot the mass of the reference haloes against the mass of the maximum
+    overlap haloes.
+
+    Parameters
+    ----------
+    nsim0 : int
+        Reference simulation index.
+    """
+    mass0, __, __, stat = get_max_key(nsim0, "totpartmass")
+
+    mu = numpy.mean(stat, axis=1)
+    std = numpy.std(numpy.log10(stat), axis=1)
+    mu = numpy.log10(mu)
+
+    mass0 = numpy.log10(mass0)
+    with plt.style.context(plt_utils.mplstyle):
+        fig, axs = plt.subplots(ncols=2, figsize=(3.5 * 1.75, 2.625))
+
+        im0 = axs[0].hexbin(mass0, mu, mincnt=1, bins="log", gridsize=50)
+        im1 = axs[1].hexbin(mass0, std, mincnt=1, bins="log", gridsize=50)
+
+        m = numpy.isfinite(mass0) & numpy.isfinite(mu)
+        print("True to expectation corr: ", kendalltau(mass0[m], mu[m]))
+
+        t = numpy.linspace(*numpy.percentile(mass0, [0, 100]), 1000)
+        axs[0].plot(t, t, color="red", linestyle="--")
+        axs[0].plot(t, t + 0.2, color="red", linestyle="--", alpha=0.5)
+        axs[0].plot(t, t - 0.2, color="red", linestyle="--", alpha=0.5)
+
+        axs[0].set_xlabel(r"$\log M_{\rm tot} / M_\odot$")
+        axs[1].set_xlabel(r"$\log M_{\rm tot} / M_\odot$")
+        axs[0].set_ylabel(r"Max. overlap mean of $\log M_{\rm tot} / M_\odot$")
+        axs[1].set_ylabel(r"Max. overlap std. of $\log M_{\rm tot} / M_\odot$")
+
+        ims = [im0, im1]
+        for i in range(2):
+            axins = inset_axes(axs[i], width="100%", height="5%",
+                               loc='upper center', borderpad=-0.75)
+            fig.colorbar(ims[i], cax=axins, orientation="horizontal",
+                         label="Bin counts")
+            axins.xaxis.tick_top()
+            axins.xaxis.set_tick_params(labeltop=True)
+            axins.xaxis.set_label_position("top")
+
+        fig.tight_layout()
+        for ext in ["png"]:
+            fout = join(plt_utils.fout,
+                        f"max_totpartmass_{nsim0}.{ext}")
+            print(f"Saving to `{fout}`.")
+            fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
+        plt.close()
+
+
+def plot_maxoverlapstat(nsim0, key):
+    """
+    Plot the mass of the reference haloes against the value of the maximum
+    overlap statistic.
+
+    Parameters
+    ----------
+    nsim0 : int
+        Reference simulation index.
+    key : str
+        Property to get.
+    """
+    assert key != "totpartmass"
+    mass0, key_val, __, stat = get_max_key(nsim0, key)
+
+    xlabels = {"lambda200c": r"\log \lambda_{\rm 200c}"}
+    key_label = xlabels.get(key, key)
+
+    mass0 = numpy.log10(mass0)
+    key_val = numpy.log10(key_val)
+
+    mu = numpy.mean(stat, axis=1)
+    std = numpy.std(numpy.log10(stat), axis=1)
+    mu = numpy.log10(mu)
+
+    with plt.style.context(plt_utils.mplstyle):
+        fig, axs = plt.subplots(ncols=3, figsize=(3.5 * 2, 2.625))
+
+        im0 = axs[0].hexbin(mass0, mu, mincnt=1, bins="log", gridsize=30)
+        im1 = axs[1].hexbin(mass0, std, mincnt=1, bins="log", gridsize=30)
+        im2 = axs[2].hexbin(key_val, mu, mincnt=1, bins="log", gridsize=30)
+        m = numpy.isfinite(key_val) & numpy.isfinite(mu)
+        print("True to expectation corr: ", kendalltau(key_val[m], mu[m]))
+
+        axs[0].set_xlabel(r"$\log M_{\rm tot} / M_\odot$")
+        axs[0].set_ylabel(r"Max. overlap mean of ${}$".format(key_label))
+        axs[1].set_xlabel(r"$\log M_{\rm tot} / M_\odot$")
+        axs[1].set_ylabel(r"Max. overlap std. of ${}$".format(key_label))
+        axs[2].set_xlabel(r"${}$".format(key_label))
+        axs[2].set_ylabel(r"Max. overlap mean of ${}$".format(key_label))
+
+        ims = [im0, im1, im2]
+        for i in range(3):
+            axins = inset_axes(axs[i], width="100%", height="5%",
+                               loc='upper center', borderpad=-0.75)
+            fig.colorbar(ims[i], cax=axins, orientation="horizontal",
+                         label="Bin counts")
+            axins.xaxis.tick_top()
+            axins.xaxis.set_tick_params(labeltop=True)
+            axins.xaxis.set_label_position("top")
+
+        fig.tight_layout()
+        for ext in ["png"]:
+            fout = join(plt_utils.fout,
+                        f"max_{key}_{nsim0}.{ext}")
+            print(f"Saving to `{fout}`.")
+            fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
+        plt.close()
+
+
+@cache_to_disk(7)
+def get_expected_mass(nsim0, min_overlap):
+    """
+    Get the expected mass of a reference halo given its overlap with halos
+    from other simulations.
+
+    Parameters
+    ----------
+    nsim0 : int
+        Reference simulation index.
+    min_overlap : float
+        Minimum overlap to consider between a pair of haloes.
+
+    Returns
+    -------
+    mass : 1-dimensional array
+        Mass of the reference haloes.
+    mu : 1-dimensional array
+        Expected mass of the matched haloes.
+    std : 1-dimensional array
+        Standard deviation of the expected mass of the matched haloes.
+    prob_nomatch : 2-dimensional array
+        Probability of not matching the reference halo.
+    """
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    nsimxs = csiborgtools.read.get_cross_sims(nsim0, paths, smoothed=True)
+    nsimxs = nsimxs
+    cat0 = open_cat(nsim0)
+
+    catxs = []
+    print("Opening catalogues...", flush=True)
+    for nsimx in tqdm(nsimxs):
+        catxs.append(open_cat(nsimx))
+
+    reader = csiborgtools.read.NPairsOverlap(cat0, catxs, paths)
+    mass = reader.cat0("totpartmass")
+
+    mu, std = reader.counterpart_mass(True, overlap_threshold=min_overlap,
+                                      in_log=False, return_full=False)
+    prob_nomatch = reader.prob_nomatch(True)
+    return mass, mu, std, prob_nomatch
+
+
+def plot_mass_vs_expected_mass(nsim0, min_overlap=0, max_prob_nomatch=1):
+    """
+    Plot the mass of a reference halo against the expected mass of its
+    counterparts.
+
+    Parameters
+    ----------
+    nsim0 : int
+        Reference simulation index.
+    min_overlap : float, optional
+        Minimum overlap between a pair of haloes to consider.
+    max_prob_nomatch : float, optional
+        Maximum probability of no match to consider.
+    """
+    mass, mu, std, prob_nomatch = get_expected_mass(nsim0, min_overlap)
+
+    std = std / mu / numpy.log(10)
+    mass = numpy.log10(mass)
+    mu = numpy.log10(mu)
+    prob_nomatch = numpy.nanmedian(prob_nomatch, axis=1)
+
+    # bins = numpy.arange(numpy.min(mass), numpy.max(mass), 0.2)
+    mask = numpy.isfinite(mass) & numpy.isfinite(mu)
+    mask &= (prob_nomatch < max_prob_nomatch)
+    # stat_x, stat_mu, stat_std = plt_utils.binned_trend(
+    #     mass[mask], mu[mask], 1 - prob_nomatch[mask], bins)
+
+    with plt.style.context(plt_utils.mplstyle):
+        fig, axs = plt.subplots(ncols=3, figsize=(3.5 * 2, 2.625))
+
+        im0 = axs[0].hexbin(mass[mask], mu[mask], mincnt=1, bins="log",
+                            gridsize=50,)
+        im1 = axs[1].hexbin(mass[mask], std[mask], mincnt=1, bins="log",
+                            gridsize=50)
+        im2 = axs[2].hexbin(1 - prob_nomatch[mask], mass[mask] - mu[mask],
+                            gridsize=50, C=mass[mask],
+                            reduce_C_function=numpy.nanmedian)
+        axs[2].axhline(0, color="red", linestyle="--", alpha=0.5)
+
+        axs[0].set_xlabel(r"True $\log M_{\rm tot} / M_\odot$")
+        axs[0].set_ylabel(r"Expected $\log M_{\rm tot} / M_\odot$")
+        axs[1].set_xlabel(r"True $\log M_{\rm tot} / M_\odot$")
+        axs[1].set_ylabel(r"Std. of $\sigma_{\log M_{\rm tot}}$")
+        axs[2].set_xlabel(r"1 - median prob. of no match")
+        axs[2].set_ylabel(r"$\log M_{\rm tot} - \log M_{\rm tot, exp}$")
+
+        t = numpy.linspace(*numpy.percentile(mass[mask], [0, 100]), 1000)
+        axs[0].plot(t, t, color="red", linestyle="--")
+        axs[0].plot(t, t + 0.2, color="red", linestyle="--", alpha=0.5)
+        axs[0].plot(t, t - 0.2, color="red", linestyle="--", alpha=0.5)
+
+        ims = [im0, im1, im2]
+        labels = ["Bin counts", "Bin counts", r"$\log M_{\rm tot}$"]
+        for i in range(3):
+            axins = inset_axes(axs[i], width="100%", height="5%",
+                               loc='upper center', borderpad=-0.75)
+            fig.colorbar(ims[i], cax=axins, orientation="horizontal",
+                         label=labels[i])
+            axins.xaxis.tick_top()
+            axins.xaxis.set_tick_params(labeltop=True)
+            axins.xaxis.set_label_position("top")
+
+        fig.tight_layout()
+        for ext in ["png"]:
+            fout = join(plt_utils.fout,
+                        f"mass_vs_expmass_{nsim0}_{max_prob_nomatch}.{ext}")
             print(f"Saving to `{fout}`.")
             fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
         plt.close()
@@ -714,7 +1132,7 @@ def plot_significance(simname, runs, nsim, nobs, kind, kwargs, runs_to_mass):
         plt.close()
 
 
-def make_binlims(run, runs_to_mass):
+def make_binlims(run, runs_to_mass, upper_threshold=None):
     """
     Make bin limits for the 1NN distance runs, corresponding to the first half
     of the log-mass bin.
@@ -725,13 +1143,18 @@ def make_binlims(run, runs_to_mass):
         Run name.
     runs_to_mass : dict
         Dictionary mapping run names to total halo mass range.
+    upper_threshold : float, optional
+        Bin width in dex. If set to `None`, the bin width is taken from the
+        `runs_to_mass` dictionary.
 
     Returns
     -------
     xmin, xmax : floats
     """
     xmin, xmax = runs_to_mass[run]
-    xmax = xmin + (xmax - xmin) / 2
+    if upper_threshold is not None:
+        xmax = xmin + upper_threshold
+
     xmin, xmax = 10**xmin, 10**xmax
     if run == "mass009":
         xmax = numpy.infty
@@ -782,9 +1205,9 @@ def plot_significance_vs_mass(simname, runs, nsim, nobs, kind, kwargs,
             else:
                 y = numpy.log10(make_ks(simname, run, nsim, nobs, kwargs))
 
-            xmin, xmax = make_binlims(run, runs_to_mass)
+            xmin, xmax = make_binlims(run, runs_to_mass, upper_threshold)
 
-            mask = (x >= xmin) & (x < xmax if upper_threshold else True)
+            mask = (x >= xmin) & (x < xmax)
             xs.append(numpy.log10(x[mask]))
             ys.append(y[mask])
 
@@ -922,7 +1345,7 @@ def plot_kl_vs_overlap(runs, nsim, kwargs, runs_to_mass, plot_std=True,
     for run in runs:
         nn_data = nn_reader.read_single("csiborg", run, nsim, nobs=None)
         nn_hindxs = nn_data["ref_hindxs"]
-        mass, overlap_hindxs, summed_overlap, prob_nomatch = get_overlap(nsim)
+        mass, overlap_hindxs, __, summed_overlap, prob_nomatch = get_overlap(nsim)  # noqa
 
         # We need to match the hindxs between the two.
         hind2overlap_array = {hind: i for i, hind in enumerate(overlap_hindxs)}
@@ -935,7 +1358,7 @@ def plot_kl_vs_overlap(runs, nsim, kwargs, runs_to_mass, plot_std=True,
         x = make_kl("csiborg", run, nsim, nobs=None, kwargs=kwargs)
         y1 = 1 - numpy.mean(prob_nomatch, axis=1)
         y2 = numpy.std(prob_nomatch, axis=1)
-        cmin, cmax = make_binlims(run, runs_to_mass)
+        cmin, cmax = make_binlims(run, runs_to_mass, upper_threshold)
         mask = (mass >= cmin) & (mass < cmax if upper_threshold else True)
         xs.append(x[mask])
         ys1.append(y1[mask])
@@ -957,7 +1380,7 @@ def plot_kl_vs_overlap(runs, nsim, kwargs, runs_to_mass, plot_std=True,
 
         plt.colorbar(label=r"$\log M_{\rm tot} / M_\odot$")
         plt.xlabel(r"$D_{\mathrm{KL}}$ of $r_{1\mathrm{NN}}$ distribution")
-        plt.ylabel(r"$1 - \langle \eta^{\mathcal{B}}_a \rangle_{\mathcal{B}}$")
+        plt.ylabel("1 - mean prob. of no match")
 
         plt.tight_layout()
         for ext in ["png"]:
@@ -1020,15 +1443,38 @@ if __name__ == "__main__":
         }
 
     # cached_funcs = ["get_overlap", "read_dist", "make_kl", "make_ks"]
-    cached_funcs = ["get_overlap"]
+    cached_funcs = ["get_max_key"]
     if args.clean:
         for func in cached_funcs:
             print(f"Cleaning cache for function {func}.")
             delete_disk_caches_for_function(func)
 
-    if True:
-        plot_mass_vs_separation(7444 + 24, 8956 + 24 * 3)
+    if False:
+        plot_mass_vs_pairoverlap(7444 + 24, 8956 + 24 * 3)
 
+    if False:
+        plot_mass_vs_maxpairoverlap(7444 + 24, 8956 + 24 * 3)
+
+    if False:
+        plot_mass_vsmedmaxoverlap(7444)
+
+    if False:
+        plot_summed_overlap_vs_mass(7444)
+
+    if True:
+        plot_mass_vs_separation(7444 + 24, 8956 + 24 * 3, min_overlap=0.0)
+
+    if False:
+        plot_maxoverlap_mass(7444)
+
+    if False:
+        plot_maxoverlapstat(7444, "lambda200c")
+
+    if False:
+        plot_maxoverlapstat(7444, "totpartmass")
+
+    if False:
+        plot_mass_vs_expected_mass(7444, max_prob_nomatch=1.0)
 
     # Plot 1NN distance distributions.
     if False:
@@ -1052,23 +1498,25 @@ if __name__ == "__main__":
                               kwargs=neighbour_kwargs,
                               runs_to_mass=runs_to_mass)
 
+    if False:
+        # runs = [[f"mass00{i}"] for i in range(1, 10)]
+        runs = [[f"mass00{i}"] for i in [4]]
+        for runs_ in runs:
+            # runs = ["mass007"]
+            for kind in ["kl"]:
+                plot_significance_vs_mass("csiborg", runs_, 7444, nobs=None,
+                                          kind=kind, kwargs=neighbour_kwargs,
+                                          runs_to_mass=runs_to_mass,
+                                          upper_threshold=100)
+
+    if False:
+        # runs = [f"mass00{i}" for i in range(1, 10)]
+        runs = ["mass004"]
+        plot_kl_vs_ks("csiborg", runs, 7444, None, kwargs=neighbour_kwargs,
+                      runs_to_mass=runs_to_mass, upper_threshold=100)
+
     if False:
         # runs = [f"mass00{i}" for i in range(1, 10)]
         runs = ["mass007"]
-        for kind in ["kl", "ks"]:
-            plot_significance_vs_mass("csiborg", runs, 7444, nobs=None,
-                                      kind=kind, kwargs=neighbour_kwargs,
-                                      runs_to_mass=runs_to_mass,
-                                      upper_threshold=True)
-
-    if False:
-        # runs = [f"mass00{i}" for i in range(1, 10)]
-        runs = ["mass006"]
-        plot_kl_vs_ks("csiborg", runs, 7444, None, kwargs=neighbour_kwargs,
-                      runs_to_mass=runs_to_mass, upper_threshold=True)
-
-    if False:
-        # runs = [f"mass00{i}" for i in range(1, 10)]
-        runs = ["mass006"]
         plot_kl_vs_overlap(runs, 7444, neighbour_kwargs, runs_to_mass,
-                           upper_threshold=True, plot_std=False)
+                           upper_threshold=100, plot_std=False)
diff --git a/scripts_plots/plt_utils.py b/scripts_plots/plt_utils.py
index e86c053..302ee1b 100644
--- a/scripts_plots/plt_utils.py
+++ b/scripts_plots/plt_utils.py
@@ -13,6 +13,9 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 
+import numpy
+from scipy.stats import binned_statistic
+
 dpi = 600
 fout = "../plots/"
 mplstyle = ["science"]
@@ -51,3 +54,40 @@ def latex_float(*floats, n=2):
     if len(floats) == 1:
         return latex_floats[0]
     return latex_floats
+
+
+def binned_trend(x, y, weights, bins):
+    """
+    Calculate the weighted mean and standard deviation of `y` in bins of `x`.
+
+    Parameters
+    ----------
+    x : 1-dimensional array
+        The x-coordinates of the data points.
+    y : 1-dimensional array
+        The y-coordinates of the data points.
+    weights : 1-dimensional array
+        The weights of the data points.
+    bins : 1-dimensional array
+        The bin edges.
+
+    Returns
+    -------
+    stat_x : 1-dimensional array
+        The x-coordinates of the binned data points.
+    stat_mu : 1-dimensional array
+        The weighted mean of `y` in bins of `x`.
+    stat_std : 1-dimensional array
+        The weighted standard deviation of `y` in bins of `x`.
+    """
+    stat_mu, __, __ = binned_statistic(x, y * weights, bins=bins,
+                                       statistic="sum")
+    stat_std, __, __ = binned_statistic(x, y * weights, bins=bins,
+                                        statistic=numpy.var)
+    stat_w, __, __ = binned_statistic(x, weights, bins=bins, statistic="sum")
+
+    stat_x = (bins[1:] + bins[:-1]) / 2
+    stat_mu /= stat_w
+    stat_std /= stat_w
+    stat_std = numpy.sqrt(stat_std)
+    return stat_x, stat_mu, stat_std