mirror of
https://github.com/Richard-Sti/csiborgtools.git
synced 2025-01-09 04:04:15 +00:00
f1dbe6f03f
* Add verbosity statements * More verbosity * Save masses too * Add CDF new plot * Blank line * Fix RVS sampling bug * Add R200 conversion * Simplify plotting routines * Remove imoprt
413 lines
15 KiB
Python
413 lines
15 KiB
Python
# Copyright (C) 2023 Richard Stiskalek
|
|
# This program is free software; you can redistribute it and/or modify it
|
|
# under the terms of the GNU General Public License as published by the
|
|
# Free Software Foundation; either version 3 of the License, or (at your
|
|
# option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful, but
|
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
|
# Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License along
|
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
from os.path import join
|
|
from argparse import ArgumentParser
|
|
|
|
import matplotlib.pyplot as plt
|
|
import numpy
|
|
|
|
import scienceplots # noqa
|
|
import utils
|
|
from cache_to_disk import cache_to_disk, delete_disk_caches_for_function
|
|
from tqdm import tqdm
|
|
|
|
try:
|
|
import csiborgtools
|
|
except ModuleNotFoundError:
|
|
import sys
|
|
sys.path.append("../")
|
|
import csiborgtools
|
|
|
|
|
|
###############################################################################
|
|
# IC overlap plotting #
|
|
###############################################################################
|
|
|
|
def open_cat(nsim):
|
|
"""
|
|
Open a CSiBORG halo catalogue.
|
|
"""
|
|
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
|
|
bounds = {"totpartmass": (1e12, None)}
|
|
return csiborgtools.read.HaloCatalogue(nsim, paths, bounds=bounds)
|
|
|
|
|
|
@cache_to_disk(7)
|
|
def get_overlap(nsim0):
|
|
"""
|
|
Calculate the summed overlap and probability of no match for a single
|
|
reference simulation.
|
|
"""
|
|
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
|
|
nsimxs = csiborgtools.read.get_cross_sims(nsim0, paths, smoothed=True)
|
|
cat0 = open_cat(nsim0)
|
|
|
|
catxs = []
|
|
print("Opening catalogues...", flush=True)
|
|
for nsimx in tqdm(nsimxs):
|
|
catxs.append(open_cat(nsimx))
|
|
|
|
reader = csiborgtools.read.NPairsOverlap(cat0, catxs, paths)
|
|
mass = reader.cat0("totpartmass")
|
|
hindxs = reader.cat0("index")
|
|
summed_overlap = reader.summed_overlap(True)
|
|
prob_nomatch = reader.prob_nomatch(True)
|
|
return mass, hindxs, summed_overlap, prob_nomatch
|
|
|
|
|
|
def plot_summed_overlap(nsim0):
|
|
"""
|
|
Plot the summed overlap and probability of no matching for a single
|
|
reference simulation as a function of the reference halo mass.
|
|
"""
|
|
x, __, summed_overlap, prob_nomatch = get_overlap(nsim0)
|
|
|
|
mean_overlap = numpy.mean(summed_overlap, axis=1)
|
|
std_overlap = numpy.std(summed_overlap, axis=1)
|
|
|
|
mean_prob_nomatch = numpy.mean(prob_nomatch, axis=1)
|
|
# std_prob_nomatch = numpy.std(prob_nomatch, axis=1)
|
|
|
|
mask = mean_overlap > 0
|
|
x = x[mask]
|
|
mean_overlap = mean_overlap[mask]
|
|
std_overlap = std_overlap[mask]
|
|
mean_prob_nomatch = mean_prob_nomatch[mask]
|
|
|
|
# Mean summed overlap
|
|
with plt.style.context(utils.mplstyle):
|
|
plt.figure()
|
|
plt.hexbin(x, mean_overlap, mincnt=1, xscale="log", bins="log",
|
|
gridsize=50)
|
|
plt.colorbar(label="Counts in bins")
|
|
plt.xlabel(r"$M_{\rm tot} / M_\odot$")
|
|
plt.ylabel(r"$\langle \mathcal{O}_{a}^{\mathcal{A} \mathcal{B}} \rangle_{\mathcal{B}}$") # noqa
|
|
plt.ylim(0., 1.)
|
|
|
|
plt.tight_layout()
|
|
for ext in ["png", "pdf"]:
|
|
fout = join(utils.fout, f"overlap_mean_{nsim0}.{ext}")
|
|
print(f"Saving to `{fout}`.")
|
|
plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
|
|
plt.close()
|
|
|
|
# Std summed overlap
|
|
with plt.style.context(utils.mplstyle):
|
|
plt.figure()
|
|
plt.hexbin(x, std_overlap, mincnt=1, xscale="log", bins="log",
|
|
gridsize=50)
|
|
plt.colorbar(label="Counts in bins")
|
|
plt.xlabel(r"$M_{\rm tot} / M_\odot$")
|
|
plt.ylabel(r"$\delta \left( \mathcal{O}_{a}^{\mathcal{A} \mathcal{B}} \right)_{\mathcal{B}}$") # noqa
|
|
plt.ylim(0., 1.)
|
|
plt.tight_layout()
|
|
|
|
for ext in ["png", "pdf"]:
|
|
fout = join(utils.fout, f"overlap_std_{nsim0}.{ext}")
|
|
print(f"Saving to `{fout}`.")
|
|
plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
|
|
plt.close()
|
|
|
|
# 1 - mean summed overlap vs mean prob nomatch
|
|
with plt.style.context(utils.mplstyle):
|
|
plt.figure()
|
|
plt.scatter(1 - mean_overlap, mean_prob_nomatch, c=numpy.log10(x), s=2,
|
|
rasterized=True)
|
|
plt.colorbar(label=r"$\log_{10} M_{\rm halo} / M_\odot$")
|
|
|
|
t = numpy.linspace(0.3, 1, 100)
|
|
plt.plot(t, t, color="red", linestyle="--")
|
|
|
|
plt.xlabel(r"$1 - \langle \mathcal{O}_a^{\mathcal{A} \mathcal{B}} \rangle_{\mathcal{B}}$") # noqa
|
|
plt.ylabel(r"$\langle \eta_a^{\mathcal{A} \mathcal{B}} \rangle_{\mathcal{B}}$") # noqa
|
|
plt.tight_layout()
|
|
|
|
for ext in ["png", "pdf"]:
|
|
fout = join(utils.fout, f"overlap_vs_prob_nomatch_{nsim0}.{ext}")
|
|
print(f"Saving to `{fout}`.")
|
|
plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
|
|
plt.close()
|
|
|
|
|
|
###############################################################################
|
|
# Nearest neighbour plotting #
|
|
###############################################################################
|
|
|
|
|
|
@cache_to_disk(7)
|
|
def read_dist(simname, run, kind, kwargs):
|
|
paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
|
|
reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths)
|
|
return reader.build_dist(simname, run, kind, verbose=True)
|
|
|
|
|
|
@cache_to_disk(7)
|
|
def make_kl(simname, run, nsim, nobs, kwargs):
|
|
paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
|
|
reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths)
|
|
|
|
pdf = read_dist("quijote", run, "pdf", kwargs)
|
|
return reader.kl_divergence(simname, run, nsim, pdf, nobs=nobs)
|
|
|
|
|
|
@cache_to_disk(7)
|
|
def make_ks(simname, run, nsim, nobs, kwargs):
|
|
paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
|
|
reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths)
|
|
|
|
cdf = read_dist("quijote", run, "cdf", kwargs)
|
|
return reader.ks_significance(simname, run, nsim, cdf, nobs=nobs)
|
|
|
|
|
|
def plot_dist(run, kind, kwargs, r200):
|
|
"""
|
|
Plot the PDF/CDF of the nearest neighbour distance for Quijote and CSiBORG.
|
|
"""
|
|
assert kind in ["pdf", "cdf"]
|
|
print(f"Plotting the {kind}.", flush=True)
|
|
paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
|
|
reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths)
|
|
x = reader.bin_centres("neighbour")
|
|
if r200 is not None:
|
|
x /= r200
|
|
|
|
y_quijote = read_dist("quijote", run, kind, kwargs)
|
|
y_csiborg = read_dist("csiborg", run, kind, kwargs)
|
|
ncdf = y_csiborg.shape[0]
|
|
|
|
with plt.style.context(utils.mplstyle):
|
|
plt.figure()
|
|
for i in range(ncdf):
|
|
if i == 0:
|
|
label1 = "Quijote"
|
|
label2 = "CSiBORG"
|
|
else:
|
|
label1 = None
|
|
label2 = None
|
|
plt.plot(x, y_quijote[i], c="C0", label=label1)
|
|
plt.plot(x, y_csiborg[i], c="C1", label=label2)
|
|
plt.xlim(0, 75)
|
|
if r200 is None:
|
|
plt.xlabel(r"$r_{1\mathrm{NN}}~[\mathrm{Mpc}]$")
|
|
else:
|
|
plt.xlabel(r"$r_{1\mathrm{NN}} / R_{200c}$")
|
|
if kind == "pdf":
|
|
plt.ylabel(r"$p(r_{1\mathrm{NN}})$")
|
|
else:
|
|
plt.ylabel(r"$\mathrm{CDF}(r_{1\mathrm{NN}})$")
|
|
plt.ylim(0, 1)
|
|
plt.legend()
|
|
plt.tight_layout()
|
|
for ext in ["png"]:
|
|
fout = join(utils.fout, f"1nn_{kind}_{run}.{ext}")
|
|
print(f"Saving to `{fout}`.")
|
|
plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
|
|
plt.close()
|
|
|
|
|
|
def plot_significance_hist(simname, run, nsim, nobs, kind, kwargs):
|
|
"""Plot a histogram of the significance of the 1NN distance."""
|
|
assert kind in ["kl", "ks"]
|
|
paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
|
|
if kind == "kl":
|
|
x = make_kl(simname, run, nsim, nobs, kwargs)
|
|
else:
|
|
x = make_ks(simname, run, nsim, nobs, kwargs)
|
|
x = numpy.log10(x)
|
|
x = x[numpy.isfinite(x)]
|
|
|
|
with plt.style.context(utils.mplstyle):
|
|
plt.figure()
|
|
plt.hist(x, bins="auto")
|
|
|
|
if kind == "ks":
|
|
plt.xlabel(r"$\log p$-value of $r_{1\mathrm{NN}}$ distribution")
|
|
else:
|
|
plt.xlabel(r"$D_{\mathrm{KL}}$ of $r_{1\mathrm{NN}}$ distribution")
|
|
plt.ylabel(r"Counts")
|
|
plt.tight_layout()
|
|
|
|
for ext in ["png"]:
|
|
if simname == "quijote":
|
|
nsim = paths.quijote_fiducial_nsim(nsim, nobs)
|
|
fout = join(utils.fout, f"significance_{kind}_{simname}_{run}_{str(nsim).zfill(5)}.{ext}") # noqa
|
|
print(f"Saving to `{fout}`.")
|
|
plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
|
|
plt.close()
|
|
|
|
|
|
def plot_significance_mass(simname, run, nsim, nobs, kind, kwargs):
|
|
"""
|
|
Plot significance of the 1NN distance as a function of the total mass.
|
|
"""
|
|
assert kind in ["kl", "ks"]
|
|
paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
|
|
reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths)
|
|
|
|
x = reader.read_single(simname, run, nsim, nobs)["mass"]
|
|
if kind == "kl":
|
|
y = make_kl(simname, run, nsim, nobs, kwargs)
|
|
else:
|
|
y = make_ks(simname, run, nsim, nobs, kwargs)
|
|
|
|
with plt.style.context(utils.mplstyle):
|
|
plt.figure()
|
|
plt.scatter(x, y)
|
|
|
|
plt.xscale("log")
|
|
plt.xlabel(r"$M_{\rm tot} / M_\odot$")
|
|
if kind == "ks":
|
|
plt.ylabel(r"$p$-value of $r_{1\mathrm{NN}}$ distribution")
|
|
plt.yscale("log")
|
|
else:
|
|
plt.ylabel(r"$D_{\mathrm{KL}}$ of $r_{1\mathrm{NN}}$ distribution")
|
|
|
|
plt.tight_layout()
|
|
for ext in ["png"]:
|
|
if simname == "quijote":
|
|
nsim = paths.quijote_fiducial_nsim(nsim, nobs)
|
|
fout = join(utils.fout, f"significance_vs_mass_{kind}_{simname}_{run}_{str(nsim).zfill(5)}.{ext}") # noqa
|
|
print(f"Saving to `{fout}`.")
|
|
plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
|
|
plt.close()
|
|
|
|
|
|
def plot_kl_vs_ks(simname, run, nsim, nobs, kwargs):
|
|
"""
|
|
Plot Kullback-Leibler divergence vs Kolmogorov-Smirnov statistic p-value.
|
|
"""
|
|
paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
|
|
reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths)
|
|
|
|
x = reader.read_single(simname, run, nsim, nobs)["mass"]
|
|
y_kl = make_kl(simname, run, nsim, nobs, kwargs)
|
|
y_ks = make_ks(simname, run, nsim, nobs, kwargs)
|
|
|
|
with plt.style.context(utils.mplstyle):
|
|
plt.figure()
|
|
plt.scatter(y_kl, y_ks, c=numpy.log10(x))
|
|
plt.colorbar(label=r"$\log M_{\rm tot} / M_\odot$")
|
|
|
|
plt.xlabel(r"$D_{\mathrm{KL}}$ of $r_{1\mathrm{NN}}$ distribution")
|
|
plt.ylabel(r"$p$-value of $r_{1\mathrm{NN}}$ distribution")
|
|
plt.yscale("log")
|
|
|
|
plt.tight_layout()
|
|
for ext in ["png"]:
|
|
if simname == "quijote":
|
|
nsim = paths.quijote_fiducial_nsim(nsim, nobs)
|
|
fout = join(utils.fout, f"kl_vs_ks{simname}_{run}_{str(nsim).zfill(5)}.{ext}") # noqa
|
|
print(f"Saving to `{fout}`.")
|
|
plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
|
|
plt.close()
|
|
|
|
|
|
def plot_kl_vs_overlap(run, nsim, kwargs):
|
|
"""
|
|
Plot KL divergence vs overlap.
|
|
"""
|
|
paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
|
|
nn_reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths)
|
|
nn_data = nn_reader.read_single("csiborg", run, nsim, nobs=None)
|
|
nn_hindxs = nn_data["ref_hindxs"]
|
|
|
|
mass, overlap_hindxs, summed_overlap, prob_nomatch = get_overlap(nsim)
|
|
|
|
# We need to match the hindxs between the two.
|
|
hind2overlap_array = {hind: i for i, hind in enumerate(overlap_hindxs)}
|
|
mask = numpy.asanyarray([hind2overlap_array[hind] for hind in nn_hindxs])
|
|
|
|
summed_overlap = summed_overlap[mask]
|
|
prob_nomatch = prob_nomatch[mask]
|
|
mass = mass[mask]
|
|
|
|
kl = make_kl("csiborg", run, nsim, nobs=None, kwargs=kwargs)
|
|
|
|
with plt.style.context(utils.mplstyle):
|
|
plt.figure()
|
|
mu = numpy.mean(prob_nomatch, axis=1)
|
|
plt.scatter(kl, 1 - mu, c=numpy.log10(mass))
|
|
plt.colorbar(label=r"$\log M_{\rm tot} / M_\odot$")
|
|
plt.xlabel(r"$D_{\mathrm{KL}}$ of $r_{1\mathrm{NN}}$ distribution")
|
|
plt.ylabel(r"$1 - \langle \eta^{\mathcal{B}}_a \rangle_{\mathcal{B}}$")
|
|
|
|
plt.tight_layout()
|
|
for ext in ["png"]:
|
|
fout = join(utils.fout, f"kl_vs_overlap_mean_{run}_{str(nsim).zfill(5)}.{ext}") # noqa
|
|
print(f"Saving to `{fout}`.")
|
|
plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
|
|
plt.close()
|
|
|
|
with plt.style.context(utils.mplstyle):
|
|
plt.figure()
|
|
std = numpy.std(prob_nomatch, axis=1)
|
|
plt.scatter(kl, std, c=numpy.log10(mass))
|
|
plt.colorbar(label=r"$\log M_{\rm tot} / M_\odot$")
|
|
plt.xlabel(r"$D_{\mathrm{KL}}$ of $r_{1\mathrm{NN}}$ distribution")
|
|
plt.ylabel(r"$\langle \left(\eta^{\mathcal{B}}_a - \langle \eta^{\mathcal{B}^\prime}_a \rangle_{\mathcal{B}^\prime}\right)^2\rangle_{\mathcal{B}}^{1/2}$") # noqa
|
|
|
|
plt.tight_layout()
|
|
for ext in ["png"]:
|
|
fout = join(utils.fout, f"kl_vs_overlap_std_{run}_{str(nsim).zfill(5)}.{ext}") # noqa
|
|
print(f"Saving to `{fout}`.")
|
|
plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
|
|
plt.close()
|
|
|
|
|
|
###############################################################################
|
|
# Command line interface #
|
|
###############################################################################
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = ArgumentParser()
|
|
parser.add_argument('-c', '--clean', action='store_true')
|
|
args = parser.parse_args()
|
|
|
|
cached_funcs = ["get_overlap", "read_dist", "make_kl", "make_ks"]
|
|
if args.clean:
|
|
for func in cached_funcs:
|
|
print(f"Cleaning cache for function {func}.")
|
|
delete_disk_caches_for_function(func)
|
|
|
|
neighbour_kwargs = {"rmax_radial": 155 / 0.705,
|
|
"nbins_radial": 50,
|
|
"rmax_neighbour": 100.,
|
|
"nbins_neighbour": 150,
|
|
"paths_kind": csiborgtools.paths_glamdring}
|
|
run = "mass003"
|
|
|
|
# plot_dist("mass003", "pdf", neighbour_kwargs)
|
|
|
|
paths = csiborgtools.read.Paths(**neighbour_kwargs["paths_kind"])
|
|
nn_reader = csiborgtools.read.NearestNeighbourReader(**neighbour_kwargs,
|
|
paths=paths)
|
|
|
|
# sizes = numpy.full(2700, numpy.nan)
|
|
# from tqdm import trange
|
|
# k = 0
|
|
# for nsim in trange(100):
|
|
# for nobs in range(27):
|
|
# d = nn_reader.read_single("quijote", run, nsim, nobs)
|
|
# sizes[k] = d["mass"].size
|
|
|
|
# k += 1
|
|
# print(sizes)
|
|
# print(numpy.mean(sizes), numpy.std(sizes))
|
|
|
|
# plot_kl_vs_overlap("mass003", 7444, neighbour_kwargs)
|
|
|
|
# plot_cdf_r200("mass003", neighbour_kwargs)
|