Add plotting (#64)

* Add verbosity statements * More verbosity * Save masses too * Add CDF new plot * Blank line * Fix RVS sampling bug * Add R200 conversion * Simplify plotting routines * Remove imoprt
2025-06-09 02:11:12 +00:00 · 2023-05-27 00:08:39 +01:00 · 2023-05-27 00:08:39 +01:00 · f1dbe6f03f
commit f1dbe6f03f
parent 7c2d7a86f5
6 changed files with 123 additions and 12 deletions
--- a/csiborgtools/clustering/utils.py
+++ b/csiborgtools/clustering/utils.py
@ -67,7 +67,7 @@ class RVSinsphere(BaseRVS):
        gen = numpy.random.default_rng(random_state)
        # Spherical
        r = gen.random(nsamples, dtype=dtype)**(1 / 3) * self.R
-        theta = 2 * numpy.arcsin(gen.random(nsamples, dtype=dtype))
+        theta = numpy.arccos(1 - 2 * gen.random(nsamples, dtype=dtype))
        phi = 2 * numpy.pi * gen.random(nsamples, dtype=dtype)
        # Cartesian
        x = r * numpy.sin(theta) * numpy.cos(phi)
--- a/csiborgtools/read/init.py
+++ b/csiborgtools/read/init.py
@ -27,4 +27,4 @@ from .readsim import (MmainReader, ParticleReader, halfwidth_mask,  # noqa
                      load_clump_particles, load_parent_particles, read_initcm)
 from .tpcf_summary import TPCFReader  # noqa
 from .utils import (cartesian_to_radec, cols_to_structured,  # noqa
-                    radec_to_cartesian, read_h5, real2redshift)
+                    radec_to_cartesian, read_h5, real2redshift, M200_to_R200)
--- a/csiborgtools/read/overlap_summary.py
+++ b/csiborgtools/read/overlap_summary.py
@ -484,6 +484,8 @@ class NPairsOverlap:
    def __init__(self, cat0, catxs, paths, verbose=True):
        pairs = [None] * len(catxs)
        if verbose:
            print("Loading individual overlap objects...", flush=True)
        for i, catx in enumerate(tqdm(catxs) if verbose else catxs):
            pairs[i] = PairOverlap(cat0, catx, paths)
@ -506,6 +508,8 @@ class NPairsOverlap:
        summed_overlap : 2-dimensional array of shape `(nhalos, ncatxs)`
        """
        out = [None] * len(self)
        if verbose:
            print("Calculating summed overlap...", flush=True)
        for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs):
            out[i] = pair.summed_overlap(from_smoothed)
        return numpy.vstack(out).T
@ -527,6 +531,8 @@ class NPairsOverlap:
        prob_nomatch : 2-dimensional array of shape `(nhalos, ncatxs)`
        """
        out = [None] * len(self)
        if verbose:
            print("Calculating probability of no match...", flush=True)
        for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs):
            out[i] = pair.prob_nomatch(from_smoothed)
        return numpy.vstack(out).T
@ -568,6 +574,8 @@ class NPairsOverlap:
            Returned only if `return_full` is `True`.
        """
        mus, stds = [None] * len(self), [None] * len(self)
        if verbose:
            print("Calculating counterpart masses...", flush=True)
        for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs):
            mus[i], stds[i] = pair.counterpart_mass(
                from_smoothed=from_smoothed,
--- a/csiborgtools/read/utils.py
+++ b/csiborgtools/read/utils.py
@ -18,6 +18,7 @@ Various coordinate transformations.
 from os.path import isfile
 import numpy
 from astropy import units
 from h5py import File
 ###############################################################################
@ -135,6 +136,29 @@ def real2redshift(pos, vel, origin, box, in_box_units, periodic_wrap=True,
    return pos
 def M200_to_R200(M200, cosmo):
    r"""
    Convert :math:M_{200} to :math:`R_{200}`.
    Parameters
    ----------
    M200 : float
        :math:`M_{200}` in :math:`M_{\odot}`.
    cosmo : astropy cosmology object
        Cosmology.
    Returns
    -------
    R200 : float
        :math:`R_{200}` in :math:`\mathrm{Mpc}`.
    """
    Msun = 1.98847e30
    M200 = 1e14 * Msun * units.kg
    rhoc = cosmo.critical_density0
    R200 = (M200 / (4 * numpy.pi / 3 * 200 * rhoc))**(1. / 3)
    return R200.to(units.Mpc).value
 ###############################################################################
 #                          Array manipulation                                 #
 ###############################################################################
--- a/scripts/match_finsnap.py
+++ b/scripts/match_finsnap.py
@ -69,7 +69,7 @@ def find_neighbour(args, nsim, cats, paths, comm):
    if args.verbose:
        print(f"Rank {comm.Get_rank()} writing to `{fout}`.", flush=True)
    numpy.savez(fout, ndist=ndist, cross_hindxs=cross_hindxs, mass=mass,
-                rdist=rdist)
+                ref_hindxs=cat0["index"], rdist=rdist)
 if __name__ == "__main__":
--- a/scripts_plots/plot_match.py
+++ b/scripts_plots/plot_match.py
@ -56,14 +56,16 @@ def get_overlap(nsim0):
    cat0 = open_cat(nsim0)
    catxs = []
    print("Opening catalogues...", flush=True)
    for nsimx in tqdm(nsimxs):
        catxs.append(open_cat(nsimx))
    reader = csiborgtools.read.NPairsOverlap(cat0, catxs, paths)
-    x = reader.cat0("totpartmass")
+    mass = reader.cat0("totpartmass")
    hindxs = reader.cat0("index")
    summed_overlap = reader.summed_overlap(True)
    prob_nomatch = reader.prob_nomatch(True)
-    return x, summed_overlap, prob_nomatch
+    return mass, hindxs, summed_overlap, prob_nomatch
 def plot_summed_overlap(nsim0):
@ -71,7 +73,7 @@ def plot_summed_overlap(nsim0):
    Plot the summed overlap and probability of no matching for a single
    reference simulation as a function of the reference halo mass.
    """
-    x, summed_overlap, prob_nomatch = get_overlap(nsim0)
+    x, __, summed_overlap, prob_nomatch = get_overlap(nsim0)
    mean_overlap = numpy.mean(summed_overlap, axis=1)
    std_overlap = numpy.std(summed_overlap, axis=1)
@ -170,7 +172,7 @@ def make_ks(simname, run, nsim, nobs, kwargs):
    return reader.ks_significance(simname, run, nsim, cdf, nobs=nobs)
-def plot_dist(run, kind, kwargs):
+def plot_dist(run, kind, kwargs, r200):
    """
    Plot the PDF/CDF of the nearest neighbour distance for Quijote and CSiBORG.
    """
@ -179,6 +181,8 @@ def plot_dist(run, kind, kwargs):
    paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
    reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths)
    x = reader.bin_centres("neighbour")
    if r200 is not None:
        x /= r200
    y_quijote = read_dist("quijote", run, kind, kwargs)
    y_csiborg = read_dist("csiborg", run, kind, kwargs)
@ -196,7 +200,10 @@ def plot_dist(run, kind, kwargs):
            plt.plot(x, y_quijote[i], c="C0", label=label1)
            plt.plot(x, y_csiborg[i], c="C1", label=label2)
        plt.xlim(0, 75)
-        plt.xlabel(r"$r_{1\mathrm{NN}}~[\mathrm{Mpc}]$")
+        if r200 is None:
            plt.xlabel(r"$r_{1\mathrm{NN}}~[\mathrm{Mpc}]$")
        else:
            plt.xlabel(r"$r_{1\mathrm{NN}} / R_{200c}$")
        if kind == "pdf":
            plt.ylabel(r"$p(r_{1\mathrm{NN}})$")
        else:
@ -308,6 +315,63 @@ def plot_kl_vs_ks(simname, run, nsim, nobs, kwargs):
        plt.close()
 def plot_kl_vs_overlap(run, nsim, kwargs):
    """
    Plot KL divergence vs overlap.
    """
    paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
    nn_reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths)
    nn_data = nn_reader.read_single("csiborg", run, nsim, nobs=None)
    nn_hindxs = nn_data["ref_hindxs"]
    mass, overlap_hindxs, summed_overlap, prob_nomatch = get_overlap(nsim)
    # We need to match the hindxs between the two.
    hind2overlap_array = {hind: i for i, hind in enumerate(overlap_hindxs)}
    mask = numpy.asanyarray([hind2overlap_array[hind] for hind in nn_hindxs])
    summed_overlap = summed_overlap[mask]
    prob_nomatch = prob_nomatch[mask]
    mass = mass[mask]
    kl = make_kl("csiborg", run, nsim, nobs=None, kwargs=kwargs)
    with plt.style.context(utils.mplstyle):
        plt.figure()
        mu = numpy.mean(prob_nomatch, axis=1)
        plt.scatter(kl, 1 - mu, c=numpy.log10(mass))
        plt.colorbar(label=r"$\log M_{\rm tot} / M_\odot$")
        plt.xlabel(r"$D_{\mathrm{KL}}$ of $r_{1\mathrm{NN}}$ distribution")
        plt.ylabel(r"$1 - \langle \eta^{\mathcal{B}}_a \rangle_{\mathcal{B}}$")
        plt.tight_layout()
        for ext in ["png"]:
            fout = join(utils.fout, f"kl_vs_overlap_mean_{run}_{str(nsim).zfill(5)}.{ext}")  # noqa
            print(f"Saving to `{fout}`.")
            plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
        plt.close()
    with plt.style.context(utils.mplstyle):
        plt.figure()
        std = numpy.std(prob_nomatch, axis=1)
        plt.scatter(kl, std, c=numpy.log10(mass))
        plt.colorbar(label=r"$\log M_{\rm tot} / M_\odot$")
        plt.xlabel(r"$D_{\mathrm{KL}}$ of $r_{1\mathrm{NN}}$ distribution")
        plt.ylabel(r"$\langle \left(\eta^{\mathcal{B}}_a - \langle \eta^{\mathcal{B}^\prime}_a \rangle_{\mathcal{B}^\prime}\right)^2\rangle_{\mathcal{B}}^{1/2}$")  # noqa
        plt.tight_layout()
        for ext in ["png"]:
            fout = join(utils.fout, f"kl_vs_overlap_std_{run}_{str(nsim).zfill(5)}.{ext}")  # noqa
            print(f"Saving to `{fout}`.")
            plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
        plt.close()
 ###############################################################################
 #                        Command line interface                               #
 ###############################################################################
 if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument('-c', '--clean', action='store_true')
@ -320,15 +384,30 @@ if __name__ == "__main__":
            delete_disk_caches_for_function(func)
    neighbour_kwargs = {"rmax_radial": 155 / 0.705,
-                        "nbins_radial": 20,
+                        "nbins_radial": 50,
                        "rmax_neighbour": 100.,
                        "nbins_neighbour": 150,
                        "paths_kind": csiborgtools.paths_glamdring}
    run = "mass003"
    # plot_dist("mass003", "pdf", neighbour_kwargs)
    paths = csiborgtools.read.Paths(**neighbour_kwargs["paths_kind"])
    nn_reader = csiborgtools.read.NearestNeighbourReader(**neighbour_kwargs,
                                                         paths=paths)
    run = "mass003"
-    # for ic in [7444, 8812, 9700]:
+    # sizes = numpy.full(2700, numpy.nan)
-    #     plot_summed_overlap(ic)
+    # from tqdm import trange
    # k = 0
    # for nsim in trange(100):
    #     for nobs in range(27):
    #         d = nn_reader.read_single("quijote", run, nsim, nobs)
    #         sizes[k] = d["mass"].size
    #         k += 1
    # print(sizes)
    # print(numpy.mean(sizes), numpy.std(sizes))
    # plot_kl_vs_overlap("mass003", 7444, neighbour_kwargs)
    # plot_cdf_r200("mass003", neighbour_kwargs)