Add plotting (#64)

* Add verbosity statements * More verbosity * Save masses too * Add CDF new plot * Blank line * Fix RVS sampling bug * Add R200 conversion * Simplify plotting routines * Remove imoprt
2025-07-02 04:21:12 +00:00 · 2023-05-27 00:08:39 +01:00 · 2023-05-27 00:08:39 +01:00 · f1dbe6f03f
commit f1dbe6f03f
parent 7c2d7a86f5
6 changed files with 123 additions and 12 deletions
--- a/csiborgtools/clustering/utils.py
+++ b/csiborgtools/clustering/utils.py
@ -67,7 +67,7 @@ class RVSinsphere(BaseRVS):
        gen = numpy.random.default_rng(random_state)
        # Spherical
        r = gen.random(nsamples, dtype=dtype)**(1 / 3) * self.R
-        theta = 2 * numpy.arcsin(gen.random(nsamples, dtype=dtype))
+        theta = numpy.arccos(1 - 2 * gen.random(nsamples, dtype=dtype))
        phi = 2 * numpy.pi * gen.random(nsamples, dtype=dtype)
        # Cartesian
        x = r * numpy.sin(theta) * numpy.cos(phi)
--- a/csiborgtools/read/init.py
+++ b/csiborgtools/read/init.py
@ -27,4 +27,4 @@ from .readsim import (MmainReader, ParticleReader, halfwidth_mask,  # noqa
                      load_clump_particles, load_parent_particles, read_initcm)
 from .tpcf_summary import TPCFReader  # noqa
 from .utils import (cartesian_to_radec, cols_to_structured,  # noqa
-                    radec_to_cartesian, read_h5, real2redshift)
+                    radec_to_cartesian, read_h5, real2redshift, M200_to_R200)
--- a/csiborgtools/read/overlap_summary.py
+++ b/csiborgtools/read/overlap_summary.py
@ -484,6 +484,8 @@ class NPairsOverlap:

    def __init__(self, cat0, catxs, paths, verbose=True):
        pairs = [None] * len(catxs)
+        if verbose:
+            print("Loading individual overlap objects...", flush=True)
        for i, catx in enumerate(tqdm(catxs) if verbose else catxs):
            pairs[i] = PairOverlap(cat0, catx, paths)

@ -506,6 +508,8 @@ class NPairsOverlap:
        summed_overlap : 2-dimensional array of shape `(nhalos, ncatxs)`
        """
        out = [None] * len(self)
+        if verbose:
+            print("Calculating summed overlap...", flush=True)
        for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs):
            out[i] = pair.summed_overlap(from_smoothed)
        return numpy.vstack(out).T
@ -527,6 +531,8 @@ class NPairsOverlap:
        prob_nomatch : 2-dimensional array of shape `(nhalos, ncatxs)`
        """
        out = [None] * len(self)
+        if verbose:
+            print("Calculating probability of no match...", flush=True)
        for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs):
            out[i] = pair.prob_nomatch(from_smoothed)
        return numpy.vstack(out).T
@ -568,6 +574,8 @@ class NPairsOverlap:
            Returned only if `return_full` is `True`.
        """
        mus, stds = [None] * len(self), [None] * len(self)
+        if verbose:
+            print("Calculating counterpart masses...", flush=True)
        for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs):
            mus[i], stds[i] = pair.counterpart_mass(
                from_smoothed=from_smoothed,
--- a/csiborgtools/read/utils.py
+++ b/csiborgtools/read/utils.py
@ -18,6 +18,7 @@ Various coordinate transformations.
 from os.path import isfile

 import numpy
+from astropy import units
 from h5py import File

 ###############################################################################
@ -135,6 +136,29 @@ def real2redshift(pos, vel, origin, box, in_box_units, periodic_wrap=True,
    return pos


+def M200_to_R200(M200, cosmo):
+    r"""
+    Convert :math:M_{200} to :math:`R_{200}`.
+
+    Parameters
+    ----------
+    M200 : float
+        :math:`M_{200}` in :math:`M_{\odot}`.
+    cosmo : astropy cosmology object
+        Cosmology.
+
+    Returns
+    -------
+    R200 : float
+        :math:`R_{200}` in :math:`\mathrm{Mpc}`.
+    """
+    Msun = 1.98847e30
+    M200 = 1e14 * Msun * units.kg
+    rhoc = cosmo.critical_density0
+    R200 = (M200 / (4 * numpy.pi / 3 * 200 * rhoc))**(1. / 3)
+    return R200.to(units.Mpc).value
+
+
 ###############################################################################
 #                          Array manipulation                                 #
 ###############################################################################
--- a/scripts/match_finsnap.py
+++ b/scripts/match_finsnap.py
@ -69,7 +69,7 @@ def find_neighbour(args, nsim, cats, paths, comm):
    if args.verbose:
        print(f"Rank {comm.Get_rank()} writing to `{fout}`.", flush=True)
    numpy.savez(fout, ndist=ndist, cross_hindxs=cross_hindxs, mass=mass,
-                rdist=rdist)
+                ref_hindxs=cat0["index"], rdist=rdist)


 if __name__ == "__main__":
--- a/scripts_plots/plot_match.py
+++ b/scripts_plots/plot_match.py
@ -56,14 +56,16 @@ def get_overlap(nsim0):
    cat0 = open_cat(nsim0)

    catxs = []
+    print("Opening catalogues...", flush=True)
    for nsimx in tqdm(nsimxs):
        catxs.append(open_cat(nsimx))

    reader = csiborgtools.read.NPairsOverlap(cat0, catxs, paths)
-    x = reader.cat0("totpartmass")
+    mass = reader.cat0("totpartmass")
+    hindxs = reader.cat0("index")
    summed_overlap = reader.summed_overlap(True)
    prob_nomatch = reader.prob_nomatch(True)
-    return x, summed_overlap, prob_nomatch
+    return mass, hindxs, summed_overlap, prob_nomatch


 def plot_summed_overlap(nsim0):
@ -71,7 +73,7 @@ def plot_summed_overlap(nsim0):
    Plot the summed overlap and probability of no matching for a single
    reference simulation as a function of the reference halo mass.
    """
-    x, summed_overlap, prob_nomatch = get_overlap(nsim0)
+    x, __, summed_overlap, prob_nomatch = get_overlap(nsim0)

    mean_overlap = numpy.mean(summed_overlap, axis=1)
    std_overlap = numpy.std(summed_overlap, axis=1)
@ -170,7 +172,7 @@ def make_ks(simname, run, nsim, nobs, kwargs):
    return reader.ks_significance(simname, run, nsim, cdf, nobs=nobs)


-def plot_dist(run, kind, kwargs):
+def plot_dist(run, kind, kwargs, r200):
    """
    Plot the PDF/CDF of the nearest neighbour distance for Quijote and CSiBORG.
    """
@ -179,6 +181,8 @@ def plot_dist(run, kind, kwargs):
    paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
    reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths)
    x = reader.bin_centres("neighbour")
+    if r200 is not None:
+        x /= r200

    y_quijote = read_dist("quijote", run, kind, kwargs)
    y_csiborg = read_dist("csiborg", run, kind, kwargs)
@ -196,7 +200,10 @@ def plot_dist(run, kind, kwargs):
            plt.plot(x, y_quijote[i], c="C0", label=label1)
            plt.plot(x, y_csiborg[i], c="C1", label=label2)
        plt.xlim(0, 75)
-        plt.xlabel(r"$r_{1\mathrm{NN}}~[\mathrm{Mpc}]$")
+        if r200 is None:
+            plt.xlabel(r"$r_{1\mathrm{NN}}~[\mathrm{Mpc}]$")
+        else:
+            plt.xlabel(r"$r_{1\mathrm{NN}} / R_{200c}$")
        if kind == "pdf":
            plt.ylabel(r"$p(r_{1\mathrm{NN}})$")
        else:
@ -308,6 +315,63 @@ def plot_kl_vs_ks(simname, run, nsim, nobs, kwargs):
        plt.close()


+def plot_kl_vs_overlap(run, nsim, kwargs):
+    """
+    Plot KL divergence vs overlap.
+    """
+    paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
+    nn_reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths)
+    nn_data = nn_reader.read_single("csiborg", run, nsim, nobs=None)
+    nn_hindxs = nn_data["ref_hindxs"]
+
+    mass, overlap_hindxs, summed_overlap, prob_nomatch = get_overlap(nsim)
+
+    # We need to match the hindxs between the two.
+    hind2overlap_array = {hind: i for i, hind in enumerate(overlap_hindxs)}
+    mask = numpy.asanyarray([hind2overlap_array[hind] for hind in nn_hindxs])
+
+    summed_overlap = summed_overlap[mask]
+    prob_nomatch = prob_nomatch[mask]
+    mass = mass[mask]
+
+    kl = make_kl("csiborg", run, nsim, nobs=None, kwargs=kwargs)
+
+    with plt.style.context(utils.mplstyle):
+        plt.figure()
+        mu = numpy.mean(prob_nomatch, axis=1)
+        plt.scatter(kl, 1 - mu, c=numpy.log10(mass))
+        plt.colorbar(label=r"$\log M_{\rm tot} / M_\odot$")
+        plt.xlabel(r"$D_{\mathrm{KL}}$ of $r_{1\mathrm{NN}}$ distribution")
+        plt.ylabel(r"$1 - \langle \eta^{\mathcal{B}}_a \rangle_{\mathcal{B}}$")
+
+        plt.tight_layout()
+        for ext in ["png"]:
+            fout = join(utils.fout, f"kl_vs_overlap_mean_{run}_{str(nsim).zfill(5)}.{ext}")  # noqa
+            print(f"Saving to `{fout}`.")
+            plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
+        plt.close()
+
+    with plt.style.context(utils.mplstyle):
+        plt.figure()
+        std = numpy.std(prob_nomatch, axis=1)
+        plt.scatter(kl, std, c=numpy.log10(mass))
+        plt.colorbar(label=r"$\log M_{\rm tot} / M_\odot$")
+        plt.xlabel(r"$D_{\mathrm{KL}}$ of $r_{1\mathrm{NN}}$ distribution")
+        plt.ylabel(r"$\langle \left(\eta^{\mathcal{B}}_a - \langle \eta^{\mathcal{B}^\prime}_a \rangle_{\mathcal{B}^\prime}\right)^2\rangle_{\mathcal{B}}^{1/2}$")  # noqa
+
+        plt.tight_layout()
+        for ext in ["png"]:
+            fout = join(utils.fout, f"kl_vs_overlap_std_{run}_{str(nsim).zfill(5)}.{ext}")  # noqa
+            print(f"Saving to `{fout}`.")
+            plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
+        plt.close()
+
+
+###############################################################################
+#                        Command line interface                               #
+###############################################################################
+
+
 if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument('-c', '--clean', action='store_true')
@ -320,15 +384,30 @@ if __name__ == "__main__":
            delete_disk_caches_for_function(func)

    neighbour_kwargs = {"rmax_radial": 155 / 0.705,
-                        "nbins_radial": 20,
+                        "nbins_radial": 50,
                        "rmax_neighbour": 100.,
                        "nbins_neighbour": 150,
                        "paths_kind": csiborgtools.paths_glamdring}
+    run = "mass003"
+
+    # plot_dist("mass003", "pdf", neighbour_kwargs)

    paths = csiborgtools.read.Paths(**neighbour_kwargs["paths_kind"])
    nn_reader = csiborgtools.read.NearestNeighbourReader(**neighbour_kwargs,
                                                         paths=paths)
-    run = "mass003"

-    # for ic in [7444, 8812, 9700]:
-    #     plot_summed_overlap(ic)
+    # sizes = numpy.full(2700, numpy.nan)
+    # from tqdm import trange
+    # k = 0
+    # for nsim in trange(100):
+    #     for nobs in range(27):
+    #         d = nn_reader.read_single("quijote", run, nsim, nobs)
+    #         sizes[k] = d["mass"].size
+
+    #         k += 1
+    # print(sizes)
+    # print(numpy.mean(sizes), numpy.std(sizes))
+
+    # plot_kl_vs_overlap("mass003", 7444, neighbour_kwargs)
+
+    # plot_cdf_r200("mass003", neighbour_kwargs)