Better plots (#73)

* Edits paths of saved files

* Add upper threshold options

* Add upper threshold options

* add latex_float option

* Add weighted stats

* add new plot
This commit is contained in:
Richard Stiskalek 2023-06-28 15:22:42 +01:00 committed by GitHub
parent de7def61d5
commit fbf9c2a4b7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 225 additions and 39 deletions

View file

@ -17,8 +17,7 @@ from csiborgtools import clustering, field, fits, match, read # noqa
# Arguments to csiborgtools.read.Paths.
paths_glamdring = {"srcdir": "/mnt/extraspace/hdesmond/",
"postdir": "/mnt/extraspace/rstiskalek/CSiBORG/",
"BORG_dir": "/mnt/extraspace/rstiskalek/BORG/",
"BORG_final_density": "/users/hdesmond/BORG_final/",
"borg_dir": "/users/hdesmond/BORG_final/",
"quijote_dir": "/mnt/extraspace/rstiskalek/Quijote",
}

View file

@ -19,12 +19,14 @@ from .knn_summary import kNNCDFReader # noqa
from .nearest_neighbour_summary import NearestNeighbourReader # noqa
from .obs import (SDSS, MCXCClusters, PlanckClusters, TwoMPPGalaxies, # noqa
TwoMPPGroups)
from .overlap_summary import weighted_stats # noqa
from .overlap_summary import (NPairsOverlap, PairOverlap, # noqa
binned_resample_mean, get_cross_sims)
binned_resample_mean, get_cross_sims) # noqa
from .paths import Paths # noqa
from .pk_summary import PKReader # noqa
from .readsim import (MmainReader, ParticleReader, halfwidth_mask, # noqa
load_clump_particles, load_parent_particles, read_initcm)
from .tpcf_summary import TPCFReader # noqa
from .utils import (cartesian_to_radec, cols_to_structured, # noqa
radec_to_cartesian, read_h5, real2redshift, M200_to_R200)
from .utils import (M200_to_R200, cartesian_to_radec, # noqa
cols_to_structured, radec_to_cartesian, read_h5,
real2redshift)

View file

@ -19,7 +19,7 @@ from functools import lru_cache
from os.path import isfile
import numpy
from tqdm import tqdm
from tqdm import tqdm, trange
###############################################################################
# Overlap of two simulations #
@ -293,7 +293,6 @@ class PairOverlap:
if norm_kind is not None:
dist[i] /= norm[i]
return numpy.array(dist, dtype=object)
def mass_ratio(self, mass_kind="totpartmass", in_log=True, in_abs=True):
@ -406,7 +405,7 @@ class PairOverlap:
vals = self.cat0(par)
out = [None] * len(self)
for i, ind in enumerate(self["match_indxs"]):
out[i] = numpy.ones(ind.size) * vals[i]
out[i] = numpy.ones(len(ind)) * vals[i]
return numpy.array(out, dtype=object)
def cat0(self, key=None, index=None):
@ -459,6 +458,43 @@ class PairOverlap:
return self["match_indxs"].size
def weighted_stats(x, weights, min_weight=0, verbose=False):
"""
Calculate the weighted mean and standard deviation of `x` using `weights`
for each array of `x`.
Parameters
----------
x : array of arrays
Array of arrays of values to calculate the weighted mean and standard
deviation for.
weights : array of arrays
Array of arrays of weights to use for the calculation.
min_weight : float, optional
Minimum weight required for a value to be included in the calculation.
verbose : bool, optional
Verbosity flag.
Returns
-------
stat : 2-dimensional array of shape `(len(x), 2)`
The first column is the weighted mean and the second column is the
weighted standard deviation.
"""
out = numpy.full((x.size, 2), numpy.nan, dtype=numpy.float32)
for i in trange(len(x)) if verbose else range(len(x)):
x_, w_ = numpy.asarray(x[i]), numpy.asarray(weights[i])
mask = w_ > min_weight
x_ = x_[mask]
w_ = w_[mask]
if len(w_) == 0:
continue
out[i, 0] = numpy.average(x_, weights=w_)
out[i, 1] = numpy.average((x_ - out[i, 0])**2, weights=w_)**0.5
return out
###############################################################################
# Overlap of many pairs of simulations. #
###############################################################################

View file

@ -14,6 +14,7 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from argparse import ArgumentParser
from gc import collect
from os.path import join
import matplotlib as mpl
@ -21,6 +22,7 @@ import matplotlib.pyplot as plt
import numpy
import scienceplots # noqa
from cache_to_disk import cache_to_disk, delete_disk_caches_for_function
from scipy.stats import kendalltau
from tqdm import tqdm
import plt_utils
@ -88,6 +90,7 @@ def get_overlap(nsim0):
reader = csiborgtools.read.NPairsOverlap(cat0, catxs, paths)
mass = reader.cat0("totpartmass")
hindxs = reader.cat0("index")
summed_overlap = reader.summed_overlap(True)
prob_nomatch = reader.prob_nomatch(True)
@ -96,7 +99,7 @@ def get_overlap(nsim0):
def plot_summed_overlap_vs_mass(nsim0):
"""
Plot the summer overlap of probaiblity of no matching for a single
Plot the summed overlap of probability of no matching for a single
reference simulations as a function of the reference halo mass, along with
their comparison.
@ -110,6 +113,8 @@ def plot_summed_overlap_vs_mass(nsim0):
None
"""
x, __, summed_overlap, prob_nomatch = get_overlap(nsim0)
del __
collect()
mean_overlap = numpy.mean(summed_overlap, axis=1)
std_overlap = numpy.std(summed_overlap, axis=1)
@ -178,6 +183,63 @@ def plot_summed_overlap_vs_mass(nsim0):
plt.close()
def plot_mass_vs_separation(nsim0, nsimx, min_overlap=0.0):
"""
Plot the mass of a reference halo against the weighted separation of
its counterparts.
Parameters
----------
nsim0 : int
Reference simulation index.
nsimx : int
Cross simulation index.
min_overlap : float, optional
Minimum overlap to consider.
Returns
-------
None
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
cat0 = open_cat(nsim0)
catx = open_cat(nsimx)
reader = csiborgtools.read.PairOverlap(cat0, catx, paths)
mass = numpy.log10(reader.cat0("totpartmass"))
dist = reader.dist(in_initial=False, norm_kind="r200c")
overlap = reader.overlap(True)
dist = csiborgtools.read.weighted_stats(dist, overlap,
min_weight=min_overlap)
mask = numpy.isfinite(dist[:, 0])
mass = mass[mask]
dist = dist[mask, :]
dist = numpy.log10(dist)
p = numpy.polyfit(mass, dist[:, 0], 1)
xrange = numpy.linspace(numpy.min(mass), numpy.max(mass), 1000)
txt = r"$m = {}$, $c = {}$".format(*plt_utils.latex_float(*p, n=3))
with plt.style.context(plt_utils.mplstyle):
fig, ax = plt.subplots()
ax.set_title(txt, fontsize="small")
cx = ax.hexbin(mass, dist[:, 0], mincnt=1, bins="log", gridsize=50)
ax.plot(xrange, numpy.polyval(p, xrange), color="red",
linestyle="--")
fig.colorbar(cx, label="Bin counts")
ax.set_xlabel(r"$\log M_{\rm tot} / M_\odot$")
ax.set_ylabel(r"$\log \langle \Delta R / R_{\rm 200c}\rangle$")
fig.tight_layout()
for ext in ["png"]:
fout = join(plt_utils.fout, f"mass_vs_sep_{nsim0}_{nsimx}.{ext}")
print(f"Saving to `{fout}`.")
fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
plt.close()
###############################################################################
# Nearest neighbour plotting #
###############################################################################
@ -596,6 +658,7 @@ def plot_significance(simname, runs, nsim, nobs, kind, kwargs, runs_to_mass):
Nearest neighbour reader keyword arguments.
runs_to_mass : dict
Dictionary mapping run names to total halo mass range.
upper_threshold : bool, optional
Returns
-------
@ -642,7 +705,10 @@ def plot_significance(simname, runs, nsim, nobs, kind, kwargs, runs_to_mass):
if simname == "quijote":
paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
nsim = paths.quijote_fiducial_nsim(nsim, nobs)
fout = join(plt_utils.fout, f"significance_{kind}_{simname}_{str(nsim).zfill(5)}.{ext}") # noqa
nsim = str(nsim).zfill(5)
fout = join(
plt_utils.fout,
f"significance_{kind}_{simname}_{nsim}_{runs}.{ext}")
print(f"Saving to `{fout}`.")
fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
plt.close()
@ -673,7 +739,7 @@ def make_binlims(run, runs_to_mass):
def plot_significance_vs_mass(simname, runs, nsim, nobs, kind, kwargs,
runs_to_mass):
runs_to_mass, upper_threshold=False):
"""
Plot significance of the 1NN distance as a function of the total mass.
@ -694,6 +760,8 @@ def plot_significance_vs_mass(simname, runs, nsim, nobs, kind, kwargs,
Nearest neighbour reader keyword arguments.
runs_to_mass : dict
Dictionary mapping run names to total halo mass range.
upper_threshold : bool, optional
Whether to enforce an upper threshold on halo mass.
Returns
-------
@ -715,17 +783,20 @@ def plot_significance_vs_mass(simname, runs, nsim, nobs, kind, kwargs,
y = numpy.log10(make_ks(simname, run, nsim, nobs, kwargs))
xmin, xmax = make_binlims(run, runs_to_mass)
mask = (x >= xmin) & (x < xmax)
xs.append(x[mask])
mask = (x >= xmin) & (x < xmax if upper_threshold else True)
xs.append(numpy.log10(x[mask]))
ys.append(y[mask])
xs = numpy.concatenate(xs)
ys = numpy.concatenate(ys)
plt.hexbin(xs, ys, gridsize=75, mincnt=1, xscale="log", bins="log")
plt.hexbin(xs, ys, gridsize=75, mincnt=1, bins="log")
mask = numpy.isfinite(xs) & numpy.isfinite(ys)
corr = plt_utils.latex_float(*kendalltau(xs[mask], ys[mask]))
plt.title(r"$\tau = {}, p = {}$".format(*corr), fontsize="small")
plt.xlabel(r"$M_{\rm tot} / M_\odot$")
plt.xlim(numpy.min(xs))
plt.xlabel(r"$\log M_{\rm tot} / M_\odot$")
if kind == "ks":
plt.ylabel(r"$\log p$-value of $r_{1\mathrm{NN}}$ distribution")
plt.ylim(top=0)
@ -738,15 +809,18 @@ def plot_significance_vs_mass(simname, runs, nsim, nobs, kind, kwargs,
for ext in ["png"]:
if simname == "quijote":
nsim = paths.quijote_fiducial_nsim(nsim, nobs)
fout = (f"significance_vs_mass_{kind}_{simname}"
+ f"_{str(nsim).zfill(5)}.{ext}")
nsim = str(nsim).zfill(5)
fout = f"sgnf_vs_mass_{kind}_{simname}_{nsim}_{runs}.{ext}"
if upper_threshold:
fout = fout.replace(f".{ext}", f"_upper_threshold.{ext}")
fout = join(plt_utils.fout, fout)
print(f"Saving to `{fout}`.")
plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
plt.close()
def plot_kl_vs_ks(simname, runs, nsim, nobs, kwargs, runs_to_mass):
def plot_kl_vs_ks(simname, runs, nsim, nobs, kwargs, runs_to_mass,
upper_threshold=False):
"""
Plot Kullback-Leibler divergence vs Kolmogorov-Smirnov statistic p-value.
@ -764,6 +838,8 @@ def plot_kl_vs_ks(simname, runs, nsim, nobs, kwargs, runs_to_mass):
Nearest neighbour reader keyword arguments.
runs_to_mass : dict
Dictionary mapping run names to total halo mass range.
upper_threshold : bool, optional
Whether to enforce an upper threshold on halo mass.
Returns
-------
@ -779,7 +855,7 @@ def plot_kl_vs_ks(simname, runs, nsim, nobs, kwargs, runs_to_mass):
y = make_ks(simname, run, nsim, nobs, kwargs)
cmin, cmax = make_binlims(run, runs_to_mass)
mask = (c >= cmin) & (c < cmax)
mask = (c >= cmin) & (c < cmax if upper_threshold else True)
xs.append(x[mask])
ys.append(y[mask])
cs.append(c[mask])
@ -792,6 +868,9 @@ def plot_kl_vs_ks(simname, runs, nsim, nobs, kwargs, runs_to_mass):
plt.figure()
plt.hexbin(xs, ys, C=cs, gridsize=50, mincnt=0,
reduce_C_function=numpy.median)
mask = numpy.isfinite(xs) & numpy.isfinite(ys)
corr = plt_utils.latex_float(*kendalltau(xs[mask], ys[mask]))
plt.title(r"$\tau = {}, p = {}$".format(*corr), fontsize="small")
plt.colorbar(label=r"$\log M_{\rm tot} / M_\odot$")
plt.xlabel(r"$D_{\mathrm{KL}}$ of $r_{1\mathrm{NN}}$ distribution")
@ -801,14 +880,19 @@ def plot_kl_vs_ks(simname, runs, nsim, nobs, kwargs, runs_to_mass):
for ext in ["png"]:
if simname == "quijote":
nsim = paths.quijote_fiducial_nsim(nsim, nobs)
fout = join(plt_utils.fout,
f"kl_vs_ks_{simname}_{run}_{str(nsim).zfill(5)}.{ext}")
nsim = str(nsim).zfill(5)
fout = join(
plt_utils.fout,
f"kl_vs_ks_{simname}_{run}_{nsim}_{runs}.{ext}")
if upper_threshold:
fout = fout.replace(f".{ext}", f"_upper_threshold.{ext}")
print(f"Saving to `{fout}`.")
plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
plt.close()
def plot_kl_vs_overlap(runs, nsim, kwargs, runs_to_mass):
def plot_kl_vs_overlap(runs, nsim, kwargs, runs_to_mass, plot_std=True,
upper_threshold=False):
"""
Plot KL divergence vs overlap for CSiBORG.
@ -822,6 +906,10 @@ def plot_kl_vs_overlap(runs, nsim, kwargs, runs_to_mass):
Nearest neighbour reader keyword arguments.
runs_to_mass : dict
Dictionary mapping run names to total halo mass range.
plot_std : bool, optional
Whether to plot the standard deviation of the overlap distribution.
upper_threshold : bool, optional
Whether to enforce an upper threshold on halo mass.
Returns
-------
@ -848,7 +936,7 @@ def plot_kl_vs_overlap(runs, nsim, kwargs, runs_to_mass):
y1 = 1 - numpy.mean(prob_nomatch, axis=1)
y2 = numpy.std(prob_nomatch, axis=1)
cmin, cmax = make_binlims(run, runs_to_mass)
mask = (mass >= cmin) & (mass < cmax)
mask = (mass >= cmin) & (mass < cmax if upper_threshold else True)
xs.append(x[mask])
ys1.append(y1[mask])
ys2.append(y2[mask])
@ -863,18 +951,28 @@ def plot_kl_vs_overlap(runs, nsim, kwargs, runs_to_mass):
plt.figure()
plt.hexbin(xs, ys1, C=cs, gridsize=50, mincnt=0,
reduce_C_function=numpy.median)
mask = numpy.isfinite(xs) & numpy.isfinite(ys1)
corr = plt_utils.latex_float(*kendalltau(xs[mask], ys1[mask]))
plt.title(r"$\tau = {}, p = {}$".format(*corr), fontsize="small")
plt.colorbar(label=r"$\log M_{\rm tot} / M_\odot$")
plt.xlabel(r"$D_{\mathrm{KL}}$ of $r_{1\mathrm{NN}}$ distribution")
plt.ylabel(r"$1 - \langle \eta^{\mathcal{B}}_a \rangle_{\mathcal{B}}$")
plt.tight_layout()
for ext in ["png"]:
nsim = str(nsim).zfill(5)
fout = join(plt_utils.fout,
f"kl_vs_overlap_mean_{str(nsim).zfill(5)}.{ext}")
f"kl_vs_overlap_mean_{nsim}_{runs}.{ext}")
if upper_threshold:
fout = fout.replace(f".{ext}", f"_upper_threshold.{ext}")
print(f"Saving to `{fout}`.")
plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
plt.close()
if not plot_std:
return
with plt.style.context(plt_utils.mplstyle):
plt.figure()
plt.hexbin(xs, ys2, C=cs, gridsize=50, mincnt=0,
@ -882,11 +980,17 @@ def plot_kl_vs_overlap(runs, nsim, kwargs, runs_to_mass):
plt.colorbar(label=r"$\log M_{\rm tot} / M_\odot$")
plt.xlabel(r"$D_{\mathrm{KL}}$ of $r_{1\mathrm{NN}}$ distribution")
plt.ylabel(r"Ensemble std of summed overlap")
mask = numpy.isfinite(xs) & numpy.isfinite(ys2)
corr = plt_utils.latex_float(*kendalltau(xs[mask], ys2[mask]))
plt.title(r"$\tau = {}, p = {}$".format(*corr), fontsize="small")
plt.tight_layout()
for ext in ["png"]:
nsim = str(nsim).zfill(5)
fout = join(plt_utils.fout,
f"kl_vs_overlap_std_{str(nsim).zfill(5)}.{ext}")
f"kl_vs_overlap_std_{nsim}_{runs}.{ext}")
if upper_threshold:
fout = fout.replace(f".{ext}", f"_upper_threshold.{ext}")
print(f"Saving to `{fout}`.")
plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
plt.close()
@ -915,14 +1019,19 @@ if __name__ == "__main__":
"mass009": (14.0, 14.4), # There is no upper limit.
}
cached_funcs = ["get_overlap", "read_dist", "make_kl", "make_ks"]
# cached_funcs = ["get_overlap", "read_dist", "make_kl", "make_ks"]
cached_funcs = ["get_overlap"]
if args.clean:
for func in cached_funcs:
print(f"Cleaning cache for function {func}.")
delete_disk_caches_for_function(func)
# Plot 1NN distance distributions.
if True:
plot_mass_vs_separation(7444 + 24, 8956 + 24 * 3)
# Plot 1NN distance distributions.
if False:
for i in range(1, 10):
run = f"mass00{i}"
for pulled_cdf in [True, False]:
@ -931,30 +1040,35 @@ if __name__ == "__main__":
plot_dist(run, "pdf", neighbour_kwargs, runs_to_mass)
# Plot 1NN CDF differences.
if True:
if False:
runs = [f"mass00{i}" for i in range(1, 10)]
for pulled_cdf in [True, False]:
plot_cdf_diff(runs, neighbour_kwargs, pulled_cdf=pulled_cdf,
runs_to_mass=runs_to_mass)
if True:
if False:
runs = [f"mass00{i}" for i in range(1, 9)]
for kind in ["kl", "ks"]:
plot_significance("csiborg", runs, 7444, nobs=None, kind=kind,
kwargs=neighbour_kwargs,
runs_to_mass=runs_to_mass)
if True:
runs = [f"mass00{i}" for i in range(1, 10)]
if False:
# runs = [f"mass00{i}" for i in range(1, 10)]
runs = ["mass007"]
for kind in ["kl", "ks"]:
plot_significance_vs_mass("csiborg", runs, 7444, nobs=None,
kind=kind, kwargs=neighbour_kwargs,
runs_to_mass=runs_to_mass)
runs_to_mass=runs_to_mass,
upper_threshold=True)
if True:
runs = [f"mass00{i}" for i in range(1, 10)]
if False:
# runs = [f"mass00{i}" for i in range(1, 10)]
runs = ["mass006"]
plot_kl_vs_ks("csiborg", runs, 7444, None, kwargs=neighbour_kwargs,
runs_to_mass=runs_to_mass)
runs_to_mass=runs_to_mass, upper_threshold=True)
if True:
runs = [f"mass00{i}" for i in range(1, 10)]
plot_kl_vs_overlap(runs, 7444, neighbour_kwargs, runs_to_mass)
if False:
# runs = [f"mass00{i}" for i in range(1, 10)]
runs = ["mass006"]
plot_kl_vs_overlap(runs, 7444, neighbour_kwargs, runs_to_mass,
upper_threshold=True, plot_std=False)

View file

@ -16,3 +16,38 @@
dpi = 600
fout = "../plots/"
mplstyle = ["science"]
def latex_float(*floats, n=2):
"""
Convert a float or a list of floats to a LaTeX string(s). Taken from [1].
Parameters
----------
floats : float or list of floats
The float(s) to be converted.
n : int, optional
The number of significant figures to be used in the LaTeX string.
Returns
-------
latex_floats : str or list of str
The LaTeX string(s) representing the float(s).
References
----------
[1] https://stackoverflow.com/questions/13490292/format-number-using-latex-notation-in-python # noqa
"""
latex_floats = [None] * len(floats)
for i, f in enumerate(floats):
float_str = "{0:.{1}g}".format(f, n)
if "e" in float_str:
base, exponent = float_str.split("e")
latex_floats[i] = r"{0} \times 10^{{{1}}}".format(base,
int(exponent))
else:
latex_floats[i] = float_str
if len(floats) == 1:
return latex_floats[0]
return latex_floats