Lagrangian patch + HMF calculation (#65)

* Rename lagpatch * Fix old bug * Fix small bug * Add number of cells calculation * Fix a small bug * Rename column * Move file * Small changes * Edit style * Add plot script * Add delta2ncells * Add HMF calculation * Move definition around * Add HMF plot * pep8 * Update HMF plotting routine * Small edit
2025-07-12 00:53:02 +00:00 · 2023-06-01 14:45:52 +01:00 · 2023-06-01 14:45:52 +01:00 · dbf93b9416
commit dbf93b9416
parent f1dbe6f03f
13 changed files with 376 additions and 132 deletions
--- a/csiborgtools/fits/init.py
+++ b/csiborgtools/fits/init.py
@ -12,6 +12,7 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-from .halo import Clump, Halo, dist_centmass  # noqa
+from .halo import (Clump, Halo, delta2ncells, dist_centmass,  # noqa
+                   number_counts)
 from .haloprofile import NFWPosterior, NFWProfile  # noqa
 from .utils import split_jobs  # noqa
--- a/csiborgtools/fits/halo.py
+++ b/csiborgtools/fits/halo.py
@ -15,9 +15,10 @@
 """A clump object."""
 from abc import ABC

-from numba import jit
 import numpy

+from numba import jit
+

 class BaseStructure(ABC):
    """
@ -337,3 +338,51 @@ def dist_centmass(clump):
    cmx, cmy, cmz = [numpy.average(xi, weights=mass) for xi in (x, y, z)]
    dist = ((x - cmx)**2 + (y - cmy)**2 + (z - cmz)**2)**0.5
    return dist, [cmx, cmy, cmz]
+
+
+@jit(nopython=True)
+def delta2ncells(delta):
+    """
+    Calculate the number of cells in `delta` that are non-zero.
+
+    Parameters
+    ----------
+    delta : 3-dimensional array
+        Halo density field.
+
+    Returns
+    -------
+    ncells : int
+        Number of non-zero cells.
+    """
+    tot = 0
+    imax, jmax, kmax = delta.shape
+    for i in range(imax):
+        for j in range(jmax):
+            for k in range(kmax):
+                if delta[i, j, k] > 0:
+                    tot += 1
+    return tot
+
+
+@jit(nopython=True)
+def number_counts(x, bin_edges):
+    """
+    Calculate counts of samples in bins.
+
+    Parameters
+    ----------
+    x : 1-dimensional array
+        Samples' values.
+    bin_edges : 1-dimensional array
+        Bin edges.
+
+    Returns
+    -------
+    counts : 1-dimensional array
+        Bin counts.
+    """
+    out = numpy.full(bin_edges.size - 1, numpy.nan, dtype=numpy.float32)
+    for i in range(bin_edges.size - 1):
+        out[i] = numpy.sum((x >= bin_edges[i]) & (x < bin_edges[i + 1]))
+    return out
--- a/csiborgtools/match/init.py
+++ b/csiborgtools/match/init.py
@ -16,5 +16,4 @@ from .match import (ParticleOverlap, RealisationsMatcher,  # noqa
                    calculate_overlap, calculate_overlap_indxs,
                    cosine_similarity)
 from .nearest_neighbour import find_neighbour  # noqa
-from .num_density import binned_counts, number_density  # noqa
 from .utils import concatenate_parts  # noqa
--- a/csiborgtools/match/match.py
+++ b/csiborgtools/match/match.py
@ -163,7 +163,7 @@ class RealisationsMatcher:
            print(f"{datetime.now()}: querying the KNN.", flush=True)
        match_indxs = radius_neighbours(
            catx.knn(in_initial=True), cat0.position(in_initial=True),
-            radiusX=cat0["lagpatch"], radiusKNN=catx["lagpatch"],
+            radiusX=cat0["lagpatch_size"], radiusKNN=catx["lagpatch_size"],
            nmult=self.nmult, enforce_int32=True, verbose=verbose)

        # We next remove neighbours whose mass is too large/small.
@ -419,7 +419,7 @@ class ParticleOverlap:
        delta : 3-dimensional array
        """
        nshift = read_nshift(smooth_kwargs)
-        cells = self.pos2cell(pos)
+        cells = pos2cell(pos, BOX_SIZE)
        # Check that minima and maxima are integers
        if not (mins is None and maxs is None):
            assert mins.dtype.char in numpy.typecodes["AllInteger"]
@ -432,10 +432,10 @@ class ParticleOverlap:
            ncells = maxs - mins + 1  # To get the number of cells
        else:
            mins = [0, 0, 0]
-            ncells = BOX_SIZE
+            ncells = (BOX_SIZE, ) * 3

        # Preallocate and fill the array
-        delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
+        delta = numpy.zeros(ncells, dtype=numpy.float32)
        fill_delta(delta, cells[:, 0], cells[:, 1], cells[:, 2], *mins, mass)
        if smooth_kwargs is not None:
            gaussian_filter(delta, output=delta, **smooth_kwargs)
--- a/csiborgtools/match/num_density.py
+++ b/csiborgtools/match/num_density.py
@ -1,116 +0,0 @@
-# Copyright (C) 2022 Richard Stiskalek
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
-# Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""
-Calculation of number density functions.
-"""
-import numpy
-
-
-def binned_counts(x, bins):
-    """
-    Calculate number of samples in bins.
-
-    Parameters
-    ----------
-    x : 1-dimensional array
-        Samples' values.
-    bins : 1-dimensional array
-        Bin edges of shape `(n_edges, )`.
-
-    Returns
-    -------
-    centres : 1-dimensional array
-        Bin centres of shape `(n_edges - 1, )`.
-    counts : 1-dimensional array
-        Bin counts of shape `(n_edges - 1, )`.
-    """
-    if not isinstance(bins, numpy.ndarray) and bins.ndim == 1:
-        raise TypeError("`bins` must a 1-dimensional array.")
-
-    n_bins = bins.size
-    # Bin centres
-    centres = numpy.asarray(
-        [0.5 * (bins[i + 1] + bins[i]) for i in range(n_bins - 1)])
-    # Bin counts
-    out = numpy.full(n_bins - 1, numpy.nan, dtype=int)
-    for i in range(n_bins - 1):
-        out[i] = numpy.sum((x >= bins[i]) & (x < bins[i + 1]))
-    return centres, out
-
-
-def number_density(data, feat, bins, max_dist, to_log10, return_counts=False):
-    """
-    Calculate volume-limited number density of a feature `feat` from array
-    `data`, normalised also by the bin width.
-
-    Parameters
-    ----------
-    data : structured array
-        Input array of halos.
-    feat : str
-        Parameter whose number density to calculate.
-    bins : 1-dimensional array
-        Bin edges. Note that if `to_log10` then the edges must be specified
-        in the logarithmic space, not linear.
-    max_dist : float
-        Maximum radial distance of the volume limited sample.
-    to_log10 : bool
-        Whether to take a logarithm of base 10 of the feature. If so, then the
-        bins must also be logarithmic.
-    return_counts : bool, optional
-        Whether to also return number counts in each bin. By default `False`.
-
-
-    Returns
-    -------
-    centres : 1-dimensional array
-        Bin centres of shape `(n_edges - 1, )`. If `to_log10` then converts the
-        bin centres back to linear space.
-    nd : 1-dimensional array
-        Number density of shape `(n_edges - 1, )`.
-    nd_err : 1-dimensional array
-        Poissonian uncertainty of `nd` of shape `(n_edges - 1, )`.
-    counts: 1-dimensional array, optional
-        Counts in each bin of shape `(n_edges - 1, )`. Returned only if
-        `return_counts`.
-    """
-    # Extract the param and optionally convert to log10
-    x = data[feat]
-    x = numpy.log10(x) if to_log10 else x
-    # Get only things within distance from the origin
-    rdist = (data["peak_x"]**2 + data["peak_y"]**2 + data["peak_z"]**2)**0.5
-    x = x[rdist < max_dist]
-
-    # Make sure bins equally spaced
-    dbins = numpy.diff(bins)
-    dbin = dbins[0]
-    if not numpy.alltrue(dbins == dbin):
-        raise ValueError("Bins must be equally spaced. Currently `{}`."
-                         .format(bins))
-
-    # Encompassed volume around the origin
-    volume = 4 * numpy.pi / 3 * max_dist**3
-    # Poissonian statistics
-    bin_centres, counts = binned_counts(x, bins)
-    nd = counts / volume / dbin
-    nd_err = counts**0.5 / volume / dbin
-    # Convert bins to linear space if log10
-    if to_log10:
-        bin_centres = 10**bin_centres
-
-    out = (bin_centres, nd, nd_err)
-    if return_counts:
-        out += counts
-    return out
--- a/csiborgtools/read/box_units.py
+++ b/csiborgtools/read/box_units.py
@ -27,7 +27,7 @@ from .readsim import ParticleReader
 CONV_NAME = {
    "length": ["x", "y", "z", "peak_x", "peak_y", "peak_z", "Rs", "rmin",
               "rmax", "r200c", "r500c", "r200m", "x0", "y0", "z0",
-               "lagpatch"],
+               "lagpatch_size"],
    "velocity": ["vx", "vy", "vz"],
    "mass": ["mass_cl", "totpartmass", "m200c", "m500c", "mass_mmain", "M",
             "m200m"],
--- a/csiborgtools/read/halo_cat.py
+++ b/csiborgtools/read/halo_cat.py
@ -540,7 +540,7 @@ class HaloCatalogue(BaseCSiBORG):

        if not rawdata:
            if with_lagpatch:
-                self._data = self._data[numpy.isfinite(self['lagpatch'])]
+                self._data = self._data[numpy.isfinite(self["lagpatch_size"])]
            # Flip positions and convert from code units to cMpc. Convert M too
            flip_cols(self._data, "x", "z")
            for p in ("x", "y", "z"):
@ -551,7 +551,7 @@ class HaloCatalogue(BaseCSiBORG):
            self._data = self.box.convert_from_box(self._data, names)

            if load_initial:
-                names = ["x0", "y0", "z0", "lagpatch"]
+                names = ["x0", "y0", "z0", "lagpatch_size"]
                self._data = self.box.convert_from_box(self._data, names)

            if bounds is not None:
--- a/csiborgtools/read/overlap_summary.py
+++ b/csiborgtools/read/overlap_summary.py
@ -276,10 +276,10 @@ class PairOverlap:
        if norm_kind == "r200c":
            norm = self.cat0("r200c")
        if norm_kind == "ref_patch":
-            norm = self.cat0("lagpatch")
+            norm = self.cat0("lagpatch_size")
        if norm_kind == "sum_patch":
-            patch0 = self.cat0("lagpatch")
-            patchx = self.catx("lagpatch")
+            patch0 = self.cat0("lagpatch_size")
+            patchx = self.catx("lagpatch_size")
            norm = [None] * len(self)
            for i, ind in enumerate(self["match_indxs"]):
                norm[i] = patch0[i] + patchx[ind]
--- a/csiborgtools/read/paths.py
+++ b/csiborgtools/read/paths.py
@ -387,6 +387,28 @@ class Paths:
        fname = f"{kind}_{MAS}_{str(nsim).zfill(5)}_grid{grid}.npy"
        return join(fdir, fname)

+    def halo_counts(self, simname, nsim):
+        """
+        Path to the files containing the binned halo counts.
+
+        Parameters
+        ----------
+        simname : str
+            Simulation name. Must be `csiborg` or `quijote`.
+        nsim : int
+            IC realisation index.
+
+        Returns
+        -------
+        path : str
+        """
+        fdir = join(self.postdir, "HMF")
+        if not isdir(fdir):
+            makedirs(fdir)
+            warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1)
+        fname = f"halo_counts_{simname}_{str(nsim).zfill(5)}.npz"
+        return join(fdir, fname)
+
    def cross_nearest(self, simname, run, nsim=None, nobs=None):
        """
        Path to the files containing distance from a halo in a reference
--- a/scripts/fit_hmf.py
+++ b/scripts/fit_hmf.py
@ -0,0 +1,104 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+Script to calculate the HMF for CSIBORG and Quijote haloes.
+"""
+from argparse import ArgumentParser
+from datetime import datetime
+from distutils.util import strtobool
+
+import numpy
+from mpi4py import MPI
+
+from taskmaster import work_delegation
+from utils import get_nsims
+
+try:
+    import csiborgtools
+except ModuleNotFoundError:
+    import sys
+
+    sys.path.append("../")
+    import csiborgtools
+
+
+def get_counts(nsim, bins, paths, parser_args):
+    """
+    Calculate and save the number of haloes in each mass bin.
+
+    Parameters
+    ----------
+    nsim : int
+        Simulation index.
+    bins : 1-dimensional array
+        Array of bin edges (in log10 mass).
+    paths : csiborgtools.read.Paths
+        Paths object.
+    parser_args : argparse.Namespace
+        Parsed command-line arguments.
+
+    Returns
+    -------
+    None
+    """
+    simname = parser_args.simname
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    bounds = {"dist": (0, parser_args.Rmax)}
+
+    if simname == "csiborg":
+        cat = csiborgtools.read.HaloCatalogue(nsim, paths, bounds=bounds)
+        logmass = numpy.log10(cat["totpartmass"])
+        counts = csiborgtools.fits.number_counts(logmass, bins)
+    elif simname == "quijote":
+        cat0 = csiborgtools.read.QuijoteHaloCatalogue(nsim, paths, nsnap=4)
+        nmax = int(cat0.box.boxsize // (2 * parser_args.Rmax))**3
+        counts = numpy.full((nmax, len(bins) - 1), numpy.nan,
+                            dtype=numpy.float32)
+
+        for nobs in range(nmax):
+            cat = cat0.pick_fiducial_observer(nobs, rmax=parser_args.Rmax)
+            logmass = numpy.log10(cat["group_mass"])
+            counts[nobs, :] = csiborgtools.fits.number_counts(logmass, bins)
+
+    fout = paths.halo_counts(simname, nsim)
+    if parser_args.verbose:
+        print(f"{datetime.now()}: saving halo counts to `{fout}`.")
+    numpy.savez(fout, counts=counts, bins=bins, rmax=parser_args.Rmax)
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"],
+                        help="Simulation name")
+    parser.add_argument("--nsims", type=int, nargs="+", default=None,
+                        help="Indices of simulations to cross. If `-1` processes all simulations.")  # noqa
+    parser.add_argument("--Rmax", type=float, default=155/0.705,
+                        help="High-resolution region radius")
+    parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
+                        default=False)
+
+    parser_args = parser.parse_args()
+    comm = MPI.COMM_WORLD
+    rank = comm.Get_rank()
+    nproc = comm.Get_size()
+    verbose = nproc == 1
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    nsims = get_nsims(parser_args, paths)
+    bins = numpy.arange(11., 16., 0.2, dtype=numpy.float32)
+
+    def do_work(nsim):
+        get_counts(nsim, bins, paths, parser_args)
+
+    work_delegation(do_work, nsims, comm, master_verbose=parser_args.verbose)
--- a/scripts/fit_init.py
+++ b/scripts/fit_init.py
@ -56,13 +56,15 @@ cols_collect = [("index", numpy.int32),
                ("x", numpy.float32),
                ("y", numpy.float32),
                ("z", numpy.float32),
-                ("lagpatch", numpy.float32),]
+                ("lagpatch_size", numpy.float32),
+                ("lagpatch_ncells", numpy.int32),]


 # MPI loop over simulations
 jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
 for nsim in [ics[i] for i in jobs]:
    nsnap = max(paths.get_snapshots(nsim))
+    overlapper = csiborgtools.match.ParticleOverlap()
    print(f"{datetime.now()}: rank {rank} calculating simulation `{nsim}`.",
          flush=True)

@ -88,11 +90,16 @@ for nsim in [ics[i] for i in jobs]:
        if part is None or part.size < 100:
            continue

+        # Calculate the centre of mass and the Lagrangian patch size.
        dist, cm = csiborgtools.fits.dist_centmass(part)
        # We enforce a maximum patchsize of 0.075 in box coordinates.
        patchsize = min(numpy.percentile(dist, 99), 0.075)
        out["x"][i], out["y"][i], out["z"][i] = cm
-        out["lagpatch"][i] = patchsize
+        out["lagpatch_size"][i] = patchsize
+
+        # Calculate the number of cells with > 0 density.
+        delta = overlapper.make_delta(part[:, :3], part[:, 3], subbox=True)
+        out["lagpatch_ncells"][i] = csiborgtools.fits.delta2ncells(delta)

    out = out[ismain]
    # Now save it
--- a/scripts_plots/plot_data.py
+++ b/scripts_plots/plot_data.py
@ -0,0 +1,178 @@
+# Copyright (C) 2023 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+from os.path import join
+from argparse import ArgumentParser
+
+import matplotlib.pyplot as plt
+import numpy
+
+import scienceplots  # noqa
+import utils
+from cache_to_disk import cache_to_disk, delete_disk_caches_for_function  # noqa
+from tqdm import tqdm
+
+try:
+    import csiborgtools
+except ModuleNotFoundError:
+    import sys
+    sys.path.append("../")
+    import csiborgtools
+
+
+def open_csiborg(nsim):
+    """
+    Open a CSiBORG halo catalogue.
+    """
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    bounds = {"totpartmass": (None, None), "dist": (0, 155/0.705)}
+    return csiborgtools.read.HaloCatalogue(nsim, paths, bounds=bounds)
+
+
+def open_quijote(nsim, nobs=None):
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    cat = csiborgtools.read.QuijoteHaloCatalogue(nsim, paths, nsnap=4)
+    if nobs is not None:
+        cat = cat.pick_fiducial_observer(nobs, rmax=155.5 / 0.705)
+    return cat
+
+
+def plot_mass_vs_ncells(nsim, pdf=False):
+    cat = open_csiborg(nsim)
+    mpart = 4.38304044e+09
+
+    with plt.style.context(utils.mplstyle):
+        plt.figure()
+        plt.scatter(cat["totpartmass"], cat["lagpatch_ncells"], s=0.25,
+                    rasterized=True)
+        plt.xscale("log")
+        plt.yscale("log")
+        for n in [1, 10, 100]:
+            plt.axvline(n * 512 * mpart, c="black", ls="--", zorder=0, lw=0.8)
+        plt.xlabel(r"$M_{\rm tot} / M_\odot$")
+        plt.ylabel(r"$N_{\rm cells}$")
+
+        for ext in ["png"] if pdf is False else ["png", "pdf"]:
+            fout = join(utils.fout, f"init_mass_vs_ncells_{nsim}.{ext}")
+            print(f"Saving to `{fout}`.")
+            plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
+        plt.close()
+
+
+def process_counts(counts):
+    mean = numpy.mean(counts, axis=0)
+    std = numpy.std(counts, axis=0)
+    return mean, std
+
+
+def plot_hmf(pdf=False):
+    print("Plotting the HMF...", flush=True)
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+
+    csiborg_nsims = paths.get_ics("csiborg")
+    print("Loading CSiBORG halo counts.", flush=True)
+    for i, nsim in enumerate(tqdm(csiborg_nsims)):
+        data = numpy.load(paths.halo_counts("csiborg", nsim))
+        if i == 0:
+            bins = data["bins"]
+            csiborg_counts = numpy.full((len(csiborg_nsims), len(bins) - 1),
+                                        numpy.nan, dtype=numpy.float32)
+        csiborg_counts[i, :] = data["counts"]
+    csiborg_counts /= numpy.diff(bins).reshape(1, -1)
+
+    print("Loading Quijote halo counts.", flush=True)
+    quijote_nsims = paths.get_ics("quijote")
+    for i, nsim in enumerate(tqdm(quijote_nsims)):
+        data = numpy.load(paths.halo_counts("quijote", nsim))
+        if i == 0:
+            bins = data["bins"]
+            nmax = data["counts"].shape[0]
+            quijote_counts = numpy.full(
+                (len(quijote_nsims) * nmax, len(bins) - 1), numpy.nan,
+                dtype=numpy.float32)
+        quijote_counts[i * nmax:(i + 1) * nmax, :] = data["counts"]
+    quijote_counts /= numpy.diff(bins).reshape(1, -1)
+
+    x = 10**(0.5 * (bins[1:] + bins[:-1]))
+    # Edit lower limits
+    csiborg_counts[:, x < 1e12] = numpy.nan
+    quijote_counts[:, x < 8e12] = numpy.nan
+    # Edit upper limits
+    csiborg_counts[:, x > 4e15] = numpy.nan
+    quijote_counts[:, x > 4e15] = numpy.nan
+
+    with plt.style.context(utils.mplstyle):
+        cols = plt.rcParams["axes.prop_cycle"].by_key()["color"]
+        fig, ax = plt.subplots(nrows=2, sharex=True,
+                               figsize=(3.5, 2.625 * 1.25),
+                               gridspec_kw={"height_ratios": [1, 0.65]})
+        fig.subplots_adjust(hspace=0, wspace=0)
+
+        mean_csiborg, std_csiborg = process_counts(csiborg_counts)
+        ax[0].plot(x, mean_csiborg, label="CSiBORG")
+        ax[0].fill_between(x, mean_csiborg - std_csiborg,
+                           mean_csiborg + std_csiborg, alpha=0.5)
+
+        mean_quijote, std_quijote = process_counts(quijote_counts)
+        ax[0].plot(x, mean_quijote, label="Quijote")
+        ax[0].fill_between(x, mean_quijote - std_quijote,
+                           mean_quijote + std_quijote, alpha=0.5)
+
+        log_y = numpy.log10(mean_csiborg / mean_quijote)
+        err = numpy.sqrt((std_csiborg / mean_csiborg / numpy.log(10))**2
+                         + (std_quijote / mean_quijote / numpy.log(10))**2)
+
+        ax[1].plot(x, 10**log_y, c=cols[2])
+        ax[1].fill_between(x, 10**(log_y - err), 10**(log_y + err), alpha=0.5,
+                           color=cols[2])
+        ax[1].axhline(1, color="k", ls=plt.rcParams["lines.linestyle"],
+                      lw=0.5 * plt.rcParams["lines.linewidth"], zorder=0)
+        ax[0].set_ylabel(r"$\frac{\mathrm{d} n}{\mathrm{d}\log M_{\rm h}}~\mathrm{dex}^{-1}$")  # noqa
+        ax[1].set_xlabel(r"$M_{\rm h}$ [$M_\odot$]")
+        ax[1].set_ylabel(r"$\mathrm{CSiBORG} / \mathrm{Quijote}$")
+
+        ax[0].set_xscale("log")
+        ax[0].set_yscale("log")
+        ax[1].set_yscale("log")
+        ax[0].legend()
+
+        fig.tight_layout(h_pad=0, w_pad=0)
+        for ext in ["png"] if pdf is False else ["png", "pdf"]:
+            fout = join(utils.fout, f"hmf_comparison.{ext}")
+            print(f"Saving to `{fout}`.")
+            fig.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
+        plt.close()
+
+
+###############################################################################
+#                        Command line interface                               #
+###############################################################################
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument('-c', '--clean', action='store_true')
+    args = parser.parse_args()
+
+    cached_funcs = []
+    if args.clean:
+        for func in cached_funcs:
+            print(f"Cleaning cache for function {func}.")
+            delete_disk_caches_for_function(func)
+
+    # plot_mass_vs_occupancy(7444)
+    # plot_mass_vs_normcells(7444 + 24 * 4, pdf=False)
+    # plot_mass_vs_ncells(7444, pdf=True)
+    plot_hmf(pdf=True)
--- a/scripts_plots/utils.py
+++ b/scripts_plots/utils.py
@ -15,4 +15,4 @@

 dpi = 450
 fout = "../plots/"
-mplstyle = ["science", "notebook"]
+mplstyle = ["science"]