From 8e3127f4d9ff3da1dec8778a0fb11c345a4338d1 Mon Sep 17 00:00:00 2001 From: Richard Stiskalek Date: Fri, 18 Aug 2023 19:20:47 +0100 Subject: [PATCH] New plots (#85) * Update verbosity messages * Update verbosity messags * Update more verbosity flags * Update the iterator settings * Add basic plots * Update verbosity flags * Update arg parsre * Update plots * Remove some older code * Fix some definitions * Update plots * Update plotting * Update plots * Add support functions * Update nb * Improve plots, move back to scripts * Update plots * pep8 * Add max overlap plot * Add blank line * Upload changes * Update changes * Add weighted stats * Remove * Add import * Add Max's matching * Edit submission * Add paths to Max's matching * Fix matching * Edit submission * Edit plot * Add max overlap separation plot * Add periodic distance * Update overlap summaries * Add nsim0 for Max matvhing * Add Max's agreement plot * Add Quijote for Max method * Update ploitting * Update name --- csiborgtools/__init__.py | 2 +- csiborgtools/match/__init__.py | 3 +- csiborgtools/match/match.py | 142 +- csiborgtools/read/overlap_summary.py | 137 +- csiborgtools/read/paths.py | 44 + scripts/match_all.py | 49 +- scripts/match_singlematch.py | 98 +- scripts_plots/plot_1nn.py | 677 +------- scripts_plots/plot_match.py | 2190 +++++++++++--------------- scripts_plots/plt_utils.py | 101 +- 10 files changed, 1343 insertions(+), 2100 deletions(-) diff --git a/csiborgtools/__init__.py b/csiborgtools/__init__.py index b21249a..c78a819 100644 --- a/csiborgtools/__init__.py +++ b/csiborgtools/__init__.py @@ -14,7 +14,7 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. from csiborgtools import clustering, field, match, read # noqa -from .utils import (center_of_mass, delta2ncells, number_counts, +from .utils import (center_of_mass, delta2ncells, number_counts, # noqa periodic_distance, periodic_distance_two_points) # noqa # Arguments to csiborgtools.read.Paths. diff --git a/csiborgtools/match/__init__.py b/csiborgtools/match/__init__.py index 17e7f2e..f53f0bb 100644 --- a/csiborgtools/match/__init__.py +++ b/csiborgtools/match/__init__.py @@ -14,4 +14,5 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. from .match import (ParticleOverlap, RealisationsMatcher, # noqa calculate_overlap, calculate_overlap_indxs, pos2cell, # noqa - cosine_similarity, find_neighbour, get_halo_cell_limits) # noqa + cosine_similarity, find_neighbour, get_halo_cell_limits, # noqa + matching_max) # noqa diff --git a/csiborgtools/match/match.py b/csiborgtools/match/match.py index aaac705..cb63ab5 100644 --- a/csiborgtools/match/match.py +++ b/csiborgtools/match/match.py @@ -236,8 +236,6 @@ class RealisationsMatcher(BaseMatcher): # We begin by querying the kNN for the nearest neighbours of each halo # in the reference simulation from the cross simulation in the initial # snapshot. - if verbose: - print(f"{datetime.now()}: querying the KNN.", flush=True) match_indxs = radius_neighbours( catx.knn(in_initial=True, subtract_observer=False, periodic=True), cat0.position(in_initial=True), @@ -261,11 +259,13 @@ class RealisationsMatcher(BaseMatcher): return load_processed_halo(hid, particlesx, halo_mapx, hid2mapx, nshift=0, ncells=self.box_size) - if verbose: - print(f"{datetime.now()}: calculating overlaps.", flush=True) + iterator = tqdm( + cat0["index"], + desc=f"{datetime.now()}: calculating NGP overlaps", + disable=not verbose + ) cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size - indxs = cat0["index"] - for i, k0 in enumerate(tqdm(indxs) if verbose else indxs): + for i, k0 in enumerate(iterator): # If we have no matches continue to the next halo. matches = match_indxs[i] if matches.size == 0: @@ -347,12 +347,13 @@ class RealisationsMatcher(BaseMatcher): return load_processed_halo(hid, particlesx, halo_mapx, hid2mapx, nshift=nshift, ncells=self.box_size) - if verbose: - print(f"{datetime.now()}: calculating smoothed overlaps.", - flush=True) + iterator = tqdm( + cat0["index"], + desc=f"{datetime.now()}: calculating smoothed overlaps", + disable=not verbose + ) cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size - indxs = cat0["index"] - for i, k0 in enumerate(tqdm(indxs) if verbose else indxs): + for i, k0 in enumerate(iterator): pos0, mass0, __, mins0, maxs0 = load_processed_halo( k0, particles0, halo_map0, hid2map0, nshift=nshift, ncells=self.box_size) @@ -434,7 +435,12 @@ class ParticleOverlap(BaseMatcher): assert ((delta.shape == (ncells,) * 3) & (delta.dtype == numpy.float32)) - for hid in tqdm(halo_cat["index"]) if verbose else halo_cat["index"]: + iterator = tqdm( + halo_cat["index"], + desc=f"{datetime.now()} Calculating the background field", + disable=not verbose + ) + for hid in iterator: pos = load_halo_particles(hid, particles, halo_map, hid2map) if pos is None: continue @@ -993,11 +999,13 @@ def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1.0, if radiusKNN.size != knn.n_samples_fit_: raise ValueError("Mismatch in shape of `radiusKNN` or `knn`") - nsamples = len(X) - indxs = [None] * nsamples patchknn_max = numpy.max(radiusKNN) - for i in trange(nsamples) if verbose else range(nsamples): + iterator = trange(len(X), + desc=f"{datetime.now()}: querying the kNN", + disable=not verbose) + indxs = [None] * len(X) + for i in iterator: dist, indx = knn.radius_neighbors( X[i].reshape(1, -1), radiusX[i] + patchknn_max, sort_results=True) @@ -1082,3 +1090,107 @@ def cosine_similarity(x, y): out /= numpy.linalg.norm(x) * numpy.linalg.norm(y, axis=1) return out[0] if out.size == 1 else out + + +def matching_max(cat0, catx, mass_kind, mult, periodic, overlap=None, + match_indxs=None, verbose=True): + """ + Halo matching algorithm based on [1]. + + Parameters + ---------- + cat0 : instance of :py:class:`csiborgtools.read.BaseCatalogue` + Halo catalogue of the reference simulation. + catx : instance of :py:class:`csiborgtools.read.BaseCatalogue` + Halo catalogue of the cross simulation. + mass_kind : str + Name of the mass column. + mult : float + Multiple of R200c below which to consider a match. + periodic : bool + Whether to account for periodic boundary conditions. + overlap : array of 1-dimensional arrays, optional + Overlap of halos from `cat0` with halos from `catx`. If `overlap` or + `match_indxs` is not provided, then the overlap of the identified halos + is not calculated. + match_indxs : array of 1-dimensional arrays, optional + Indicies of halos from `catx` having a non-zero overlap with halos + from `cat0`. + verbose : bool, optional + Verbosity flag. + + Returns + ------- + out : structured array + Array of matches. Columns are `hid0`, `hidx`, `dist`, `success`. + + References + ---------- + [1] Maxwell L Hutt, Harry Desmond, Julien Devriendt, Adrianne Slyz; The + effect of local Universe constraints on halo abundance and clustering; + Monthly Notices of the Royal Astronomical Society, Volume 516, Issue 3, + November 2022, Pages 3592–3601, https://doi.org/10.1093/mnras/stac2407 + """ + pos0 = cat0.position(in_initial=False) + knnx = catx.knn(in_initial=False, subtract_observer=False, + periodic=periodic) + rad0 = cat0["r200c"] + + mass0 = numpy.log10(cat0[mass_kind]) + massx = numpy.log10(catx[mass_kind]) + + assert numpy.all(numpy.isfinite(mass0)) & numpy.all(numpy.isfinite(massx)) + + maskx = numpy.ones(len(catx), dtype=numpy.bool_) + + dtypes = [("hid0", numpy.int32), + ("hidx", numpy.int32), + ("mass0", numpy.float32), + ("massx", numpy.float32), + ("dist", numpy.float32), + ("success", numpy.bool_), + ("match_overlap", numpy.float32), + ("max_overlap", numpy.float32), + ] + out = numpy.full(len(cat0), numpy.nan, dtype=dtypes) + out["success"] = False + + for i in tqdm(numpy.argsort(mass0)[::-1], desc="Matching haloes", + disable=not verbose): + hid0 = cat0["index"][i] + out[i]["hid0"] = hid0 + out[i]["mass0"] = 10**mass0[i] + + neigh_dists, neigh_inds = knnx.radius_neighbors(pos0[i].reshape(1, -1), + mult * rad0[i]) + neigh_dists, neigh_inds = neigh_dists[0], neigh_inds[0] + + if neigh_dists.size == 0: + continue + + # Sort the neighbours by mass difference + sort_order = numpy.argsort(numpy.abs(mass0[i] - massx[neigh_inds])) + neigh_dists = neigh_dists[sort_order] + neigh_inds = neigh_inds[sort_order] + + for j, neigh_ind in enumerate(neigh_inds): + + if maskx[neigh_ind]: + out[i]["hidx"] = catx["index"][neigh_ind] + out[i]["dist"] = neigh_dists[j] + out[i]["massx"] = 10**massx[neigh_ind] + + out[i]["success"] = True + + maskx[neigh_ind] = False + + if overlap is not None and match_indxs is not None: + if neigh_ind in match_indxs[i]: + k = numpy.where(neigh_ind == match_indxs[i])[0][0] + out[i]["match_overlap"] = overlap[i][k] + if len(overlap[i]) > 0: + out[i]["max_overlap"] = numpy.max(overlap[i]) + + break + + return out diff --git a/csiborgtools/read/overlap_summary.py b/csiborgtools/read/overlap_summary.py index f76b010..d41c5f8 100644 --- a/csiborgtools/read/overlap_summary.py +++ b/csiborgtools/read/overlap_summary.py @@ -21,6 +21,8 @@ from os.path import isfile import numpy from tqdm import tqdm, trange +from ..utils import periodic_distance + ############################################################################### # Overlap of two simulations # ############################################################################### @@ -47,6 +49,7 @@ class PairOverlap: _cat0 = None _catx = None _data = None + _paths = None def __init__(self, cat0, catx, paths, min_logmass, maxdist=None): if cat0.simname != catx.simname: @@ -55,6 +58,7 @@ class PairOverlap: self._cat0 = cat0 self._catx = catx + self._paths = paths self.load(cat0, catx, paths, min_logmass, maxdist) def load(self, cat0, catx, paths, min_logmass, maxdist=None): @@ -257,6 +261,8 @@ class PairOverlap: for i in range(len(overlap)): if len(overlap[i]) > 0: out[i] = numpy.sum(overlap[i]) + else: + out[i] = 0 return out def prob_nomatch(self, from_smoothed): @@ -279,9 +285,11 @@ class PairOverlap: for i in range(len(overlap)): if len(overlap[i]) > 0: out[i] = numpy.product(numpy.subtract(1, overlap[i])) + else: + out[i] = 1 return out - def dist(self, in_initial, norm_kind=None): + def dist(self, in_initial, boxsize, norm_kind=None): """ Pair distances of matched halos between the reference and cross simulations. @@ -290,6 +298,8 @@ class PairOverlap: ---------- in_initial : bool Whether to calculate separation in the initial or final snapshot. + boxsize : float + The size of the simulation box. norm_kind : str, optional The kind of normalisation to apply to the distances. Can be `r200c`, `ref_patch` or `sum_patch`. @@ -320,8 +330,7 @@ class PairOverlap: # Now calculate distances dist = [None] * len(self) for i, ind in enumerate(self["match_indxs"]): - # n refers to the reference halo catalogue position - dist[i] = numpy.linalg.norm(pos0[i, :] - posx[ind, :], axis=1) + dist[i] = periodic_distance(posx[ind, :], pos0[i, :], boxsize) if norm_kind is not None: dist[i] /= norm[i] @@ -358,7 +367,7 @@ class PairOverlap: ratio[i] = numpy.abs(ratio[i]) return numpy.array(ratio, dtype=object) - def max_overlap_key(self, key, from_smoothed): + def max_overlap_key(self, key, min_overlap, from_smoothed): """ Calculate the maximum overlap mass of each halo in the reference simulation from the cross simulation. @@ -367,10 +376,10 @@ class PairOverlap: ---------- key : str Key to the maximum overlap statistic to calculate. + min_overlap : float + Minimum pair overlap to consider. from_smoothed : bool Whether to use the smoothed overlap or not. - mass_kind : str, optional - The mass kind whose ratio is to be calculated. Returns ------- @@ -384,11 +393,15 @@ class PairOverlap: # Skip if no match if len(match_ind) == 0: continue - out[i] = y[match_ind][numpy.argmax(overlap[i])] + + k = numpy.argmax(overlap[i]) + if overlap[i][k] > min_overlap: + out[i] = y[match_ind][k] + return out def counterpart_mass(self, from_smoothed, overlap_threshold=0., - in_log=False, mass_kind="totpartmass"): + mass_kind="totpartmass"): """ Calculate the expected counterpart mass of each halo in the reference simulation from the crossed simulation. @@ -400,9 +413,6 @@ class PairOverlap: overlap_threshold : float, optional Minimum overlap required for a halo to be considered a match. By default 0.0, i.e. no threshold. - in_log : bool, optional - Whether to calculate the expectation value in log space. By default - `False`. mass_kind : str, optional The mass kind whose ratio is to be calculated. Must be a valid catalogue key. By default `totpartmass`, i.e. the total particle @@ -434,15 +444,11 @@ class PairOverlap: massx_ = massx_[mask] overlap_ = overlap_[mask] - massx_ = numpy.log10(massx_) if in_log else massx_ + massx_ = numpy.log10(massx_) # Weighted average and *biased* standard deviation mean_ = numpy.average(massx_, weights=overlap_) std_ = numpy.average((massx_ - mean_)**2, weights=overlap_)**0.5 - # If in log, convert back to linear - mean_ = 10**mean_ if in_log else mean_ - std_ = mean_ * std_ * numpy.log(10) if in_log else std_ - mean[i] = mean_ std[i] = std_ @@ -544,7 +550,7 @@ def weighted_stats(x, weights, min_weight=0, verbose=False): """ out = numpy.full((x.size, 2), numpy.nan, dtype=numpy.float32) - for i in trange(len(x)) if verbose else range(len(x)): + for i in trange(len(x), disable=not verbose): x_, w_ = numpy.asarray(x[i]), numpy.asarray(weights[i]) mask = w_ > min_weight x_ = x_[mask] @@ -574,27 +580,30 @@ class NPairsOverlap: List of cross simulation halo catalogues. paths : py:class`csiborgtools.read.Paths` CSiBORG paths object. + min_logmass : float + Minimum log mass of halos to consider. verbose : bool, optional Verbosity flag for loading the overlap objects. """ _pairs = None - def __init__(self, cat0, catxs, paths, verbose=True): + def __init__(self, cat0, catxs, paths, min_logmass, verbose=True): pairs = [None] * len(catxs) - if verbose: - print("Loading individual overlap objects...", flush=True) - for i, catx in enumerate(tqdm(catxs) if verbose else catxs): - pairs[i] = PairOverlap(cat0, catx, paths) + for i, catx in enumerate(tqdm(catxs, desc="Loading overlap objects", + disable=not verbose)): + pairs[i] = PairOverlap(cat0, catx, paths, min_logmass) self._pairs = pairs - def max_overlap(self, from_smoothed, verbose=True): + def max_overlap(self, min_overlap, from_smoothed, verbose=True): """ Calculate maximum overlap of each halo in the reference simulation with the cross simulations. Parameters ---------- + min_overlap : float + Minimum pair overlap to consider. from_smoothed : bool Whether to use the smoothed overlap or not. verbose : bool, optional @@ -604,21 +613,24 @@ class NPairsOverlap: ------- max_overlap : 2-dimensional array of shape `(nhalos, ncatxs)` """ - out = [None] * len(self) - if verbose: - print("Calculating maximum overlap...", flush=True) - def get_max(y_): if len(y_) == 0: - return numpy.nan - return numpy.max(y_) + return 0 + out = numpy.max(y_) - for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs): + return out if out >= min_overlap else 0 + + iterator = tqdm(self.pairs, + desc="Calculating maximum overlap", + disable=not verbose + ) + out = [None] * len(self) + for i, pair in enumerate(iterator): out[i] = numpy.asanyarray([get_max(y_) for y_ in pair.overlap(from_smoothed)]) return numpy.vstack(out).T - def max_overlap_key(self, key, from_smoothed, verbose=True): + def max_overlap_key(self, key, min_overlap, from_smoothed, verbose=True): """ Calculate maximum overlap mass of each halo in the reference simulation with the cross simulations. @@ -627,6 +639,8 @@ class NPairsOverlap: ---------- key : str Key to the maximum overlap statistic to calculate. + min_overlap : float + Minimum pair overlap to consider. from_smoothed : bool Whether to use the smoothed overlap or not. verbose : bool, optional @@ -636,12 +650,13 @@ class NPairsOverlap: ------- out : 2-dimensional array of shape `(nhalos, ncatxs)` """ + iterator = tqdm(self.pairs, + desc=f"Calculating maximum overlap {key}", + disable=not verbose + ) out = [None] * len(self) - if verbose: - print(f"Calculating maximum overlap {key}...", flush=True) - - for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs): - out[i] = pair.max_overlap_key(key, from_smoothed) + for i, pair in enumerate(iterator): + out[i] = pair.max_overlap_key(key, min_overlap, from_smoothed) return numpy.vstack(out).T @@ -661,10 +676,11 @@ class NPairsOverlap: ------- summed_overlap : 2-dimensional array of shape `(nhalos, ncatxs)` """ + iterator = tqdm(self.pairs, + desc="Calculating summed overlap", + disable=not verbose) out = [None] * len(self) - if verbose: - print("Calculating summed overlap...", flush=True) - for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs): + for i, pair in enumerate(iterator): out[i] = pair.summed_overlap(from_smoothed) return numpy.vstack(out).T @@ -684,16 +700,18 @@ class NPairsOverlap: ------- prob_nomatch : 2-dimensional array of shape `(nhalos, ncatxs)` """ + iterator = tqdm(self.pairs, + desc="Calculating probability of no match", + disable=not verbose + ) out = [None] * len(self) - if verbose: - print("Calculating probability of no match...", flush=True) - for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs): + for i, pair in enumerate(iterator): out[i] = pair.prob_nomatch(from_smoothed) return numpy.vstack(out).T def counterpart_mass(self, from_smoothed, overlap_threshold=0., - in_log=False, mass_kind="totpartmass", - return_full=False, verbose=True): + mass_kind="totpartmass", return_full=False, + verbose=True): """ Calculate the expected counterpart mass of each halo in the reference simulation from the crossed simulation. @@ -705,9 +723,6 @@ class NPairsOverlap: overlap_threshold : float, optional Minimum overlap required for a halo to be considered a match. By default 0.0, i.e. no threshold. - in_log : bool, optional - Whether to calculate the expectation value in log space. By default - `False`. mass_kind : str, optional The mass kind whose ratio is to be calculated. Must be a valid catalogue key. By default `totpartmass`, i.e. the total particle @@ -727,26 +742,31 @@ class NPairsOverlap: Expected mass and standard deviation from each cross simulation. Returned only if `return_full` is `True`. """ + iterator = tqdm(self.pairs, + desc="Calculating counterpart masses", + disable=not verbose) mus, stds = [None] * len(self), [None] * len(self) - if verbose: - print("Calculating counterpart masses...", flush=True) - for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs): + for i, pair in enumerate(iterator): mus[i], stds[i] = pair.counterpart_mass( from_smoothed=from_smoothed, - overlap_threshold=overlap_threshold, in_log=in_log, - mass_kind=mass_kind) + overlap_threshold=overlap_threshold, mass_kind=mass_kind) mus, stds = numpy.vstack(mus).T, numpy.vstack(stds).T - probmatch = 1 - self.prob_nomatch(from_smoothed) # Prob of > 0 matches + # Prob of > 0 matches + probmatch = 1 - self.prob_nomatch(from_smoothed) # Normalise it for weighted sums etc. norm_probmatch = numpy.apply_along_axis( lambda x: x / numpy.sum(x), axis=1, arr=probmatch) # Mean and standard deviation of weighted stacked Gaussians - mu = numpy.sum(norm_probmatch * mus, axis=1) - std = numpy.sum(norm_probmatch * (mus**2 + stds**2), axis=1) - mu**2 + mu = numpy.sum((norm_probmatch * mus), axis=1) + std = numpy.sum((norm_probmatch * (mus**2 + stds**2)), axis=1) - mu**2 std **= 0.5 + mask = mu <= 0 + mu[mask] = numpy.nan + std[mask] = numpy.nan + if return_full: return mu, std, mus, stds return mu, std @@ -766,6 +786,11 @@ class NPairsOverlap: def cat0(self): return self.pairs[0].cat0 # All pairs have the same ref catalogue + def __getitem__(self, key): + if not isinstance(key, int): + raise TypeError("Key must be an integer.") + return self.pairs[key] + def __len__(self): return len(self.pairs) @@ -794,7 +819,7 @@ def get_cross_sims(simname, nsim0, paths, min_logmass, smoothed): Whether to use the smoothed overlap or not. """ nsimxs = [] - for nsimx in paths.get_ics("csiborg"): + for nsimx in paths.get_ics(simname): if nsimx == nsim0: continue f1 = paths.overlap(simname, nsim0, nsimx, min_logmass, smoothed) diff --git a/csiborgtools/read/paths.py b/csiborgtools/read/paths.py index 87765e7..a796001 100644 --- a/csiborgtools/read/paths.py +++ b/csiborgtools/read/paths.py @@ -501,6 +501,50 @@ class Paths: fname = fname.replace("overlap", "overlap_smoothed") return join(fdir, fname) + def match_max(self, simname, nsim0, nsimx, min_logmass, mult): + """ + Path to the files containing matching based on [1]. + + Parameters + ---------- + simname : str + Simulation name. + nsim0 : int + IC realisation index of the first simulation. + nsimx : int + IC realisation index of the second simulation. + min_logmass : float + Minimum log mass of halos to consider. + mult : float + Multiplicative search radius factor. + + Returns + ------- + path : str + + References + ---------- + [1] Maxwell L Hutt, Harry Desmond, Julien Devriendt, Adrianne Slyz; The + effect of local Universe constraints on halo abundance and clustering; + Monthly Notices of the Royal Astronomical Society, Volume 516, Issue 3, + November 2022, Pages 3592–3601, https://doi.org/10.1093/mnras/stac2407 + """ + if simname == "csiborg": + fdir = join(self.postdir, "match_max") + elif simname == "quijote": + fdir = join(self.quijote_dir, "match_max") + else: + ValueError(f"Unknown simulation name `{simname}`.") + + try_create_directory(fdir) + + nsim0 = str(nsim0).zfill(5) + nsimx = str(nsimx).zfill(5) + min_logmass = float('%.4g' % min_logmass) + fname = f"match_max_{nsim0}_{nsimx}_{min_logmass}_{str(mult)}.npz" + + return join(fdir, fname) + def field(self, kind, MAS, grid, nsim, in_rsp, smooth_scale=None): r""" Path to the files containing the calculated density fields in CSiBORG. diff --git a/scripts/match_all.py b/scripts/match_all.py index 1370f87..3ace5c1 100644 --- a/scripts/match_all.py +++ b/scripts/match_all.py @@ -12,6 +12,7 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. """A script to match all IC pairs of a simulation.""" +import warnings from argparse import ArgumentParser from distutils.util import strtobool from itertools import combinations @@ -20,15 +21,8 @@ from random import Random from mpi4py import MPI from taskmaster import work_delegation -from match_singlematch import pair_match - -try: - import csiborgtools -except ModuleNotFoundError: - import sys - - sys.path.append("../") - import csiborgtools +import csiborgtools +from match_singlematch import pair_match, pair_match_max def get_combs(simname): @@ -53,7 +47,7 @@ def get_combs(simname): return combs -def main(comb, simname, min_logmass, sigma, verbose): +def main(comb, kind, simname, min_logmass, sigma, mult, verbose): """ Match a pair of simulations. @@ -61,12 +55,16 @@ def main(comb, simname, min_logmass, sigma, verbose): ---------- comb : tuple Pair of simulation IC indices. + kind : str + Kind of matching. simname : str Simulation name. min_logmass : float Minimum log halo mass. sigma : float Smoothing scale in number of grid cells. + mult : float + Multiplicative factor for search radius. verbose : bool Verbosity flag. @@ -75,25 +73,46 @@ def main(comb, simname, min_logmass, sigma, verbose): None """ nsim0, nsimx = comb - pair_match(nsim0, nsimx, simname, min_logmass, sigma, verbose) + if kind == "overlap": + pair_match(nsim0, nsimx, simname, min_logmass, sigma, verbose) + elif args.kind == "max": + pair_match_max(nsim0, nsimx, simname, min_logmass, mult, verbose) + else: + raise ValueError(f"Unknown matching kind: `{kind}`.") if __name__ == "__main__": parser = ArgumentParser() + parser.add_argument("--kind", type=str, required=True, + choices=["overlap", "max"], help="Kind of matching.") parser.add_argument("--simname", type=str, required=True, - help="Simulation name.", choices=["csiborg", "quijote"]) + help="Simulation name.", + choices=["csiborg", "quijote"]) + parser.add_argument("--nsim0", type=int, default=None, + help="Reference IC for Max's matching method.") parser.add_argument("--min_logmass", type=float, required=True, help="Minimum log halo mass.") parser.add_argument("--sigma", type=float, default=0, help="Smoothing scale in number of grid cells.") + parser.add_argument("--mult", type=float, default=5, + help="Search radius multiplier for Max's method.") parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)), default=False, help="Verbosity flag.") args = parser.parse_args() - combs = get_combs() + if args.kind == "overlap": + combs = get_combs(args.simname) + else: + paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) + combs = [(args.nsim0, nsimx) for nsimx in paths.get_ics(args.simname) + if nsimx != args.nsim0] def _main(comb): - main(comb, args.simname, args.min_logmass, args.sigma, args.verbose) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", + "invalid value encountered in cast", + RuntimeWarning) + main(comb, args.kind, args.simname, args.min_logmass, args.sigma, + args.mult, args.verbose) work_delegation(_main, combs, MPI.COMM_WORLD) - diff --git a/scripts/match_singlematch.py b/scripts/match_singlematch.py index ef82229..1247f80 100644 --- a/scripts/match_singlematch.py +++ b/scripts/match_singlematch.py @@ -23,13 +23,75 @@ from distutils.util import strtobool import numpy from scipy.ndimage import gaussian_filter -try: - import csiborgtools -except ModuleNotFoundError: - import sys +import csiborgtools - sys.path.append("../") - import csiborgtools + +def pair_match_max(nsim0, nsimx, simname, min_logmass, mult, verbose): + """ + Match a pair of simulations using the method of [1]. + + Parameters + ---------- + nsim0 : int + The reference simulation IC index. + nsimx : int + The cross simulation IC index. + simname : str + Simulation name. + min_logmass : float + Minimum log halo mass. + mult : float + Multiplicative factor for search radius. + verbose : bool + Verbosity flag. + + Returns + ------- + None + + References + ---------- + [1] Maxwell L Hutt, Harry Desmond, Julien Devriendt, Adrianne Slyz; The + effect of local Universe constraints on halo abundance and clustering; + Monthly Notices of the Royal Astronomical Society, Volume 516, Issue 3, + November 2022, Pages 3592–3601, https://doi.org/10.1093/mnras/stac2407 + """ + paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) + + if simname == "csiborg": + mass_kind = "fof_totpartmass" + maxdist = 155 + periodic = False + bounds = {"dist": (0, maxdist), mass_kind: (10**min_logmass, None)} + cat0 = csiborgtools.read.CSiBORGHaloCatalogue( + nsim0, paths, bounds=bounds, load_fitted=True, load_initial=False) + catx = csiborgtools.read.CSiBORGHaloCatalogue( + nsimx, paths, bounds=bounds, load_fitted=True, load_initial=False) + elif simname == "quijote": + mass_kind = "group_mass" + maxdist = None + periodic = True + bounds = {mass_kind: (10**min_logmass, None)} + cat0 = csiborgtools.read.QuijoteHaloCatalogue( + nsim0, paths, 4, bounds=bounds, load_fitted=True, + load_initial=False) + catx = csiborgtools.read.QuijoteHaloCatalogue( + nsimx, paths, 4, bounds=bounds, load_fitted=True, + load_initial=False) + else: + raise ValueError(f"Unknown simulation `{simname}`.") + + reader = csiborgtools.read.PairOverlap(cat0, catx, paths, min_logmass, + maxdist=maxdist) + out = csiborgtools.match.matching_max( + cat0, catx, mass_kind, mult=mult, periodic=periodic, + overlap=reader.overlap(from_smoothed=True), + match_indxs=reader["match_indxs"], verbose=verbose) + + fout = paths.match_max(simname, nsim0, nsimx, min_logmass, mult) + if verbose: + print(f"{datetime.now()}: saving to ... `{fout}`.", flush=True) + numpy.savez(fout, **{p: out[p] for p in out.dtype.names}) def pair_match(nsim0, nsimx, simname, min_logmass, sigma, verbose): @@ -95,27 +157,17 @@ def pair_match(nsim0, nsimx, simname, min_logmass, sigma, verbose): paths.initmatch(nsimx, simname, "particles"))["particles"] hid2mapx = {hid: i for i, hid in enumerate(halomapx[:, 0])} - if verbose: - print(f"{datetime.now()}: calculating the background density fields.", - flush=True) overlapper = csiborgtools.match.ParticleOverlap(**overlapper_kwargs) delta_bckg = overlapper.make_bckg_delta(parts0, halomap0, hid2map0, cat0, verbose=verbose) delta_bckg = overlapper.make_bckg_delta(partsx, halomapx, hid2mapx, catx, delta=delta_bckg, verbose=verbose) - if verbose: - print() - - if verbose: - print(f"{datetime.now()}: NGP crossing the simulations.", flush=True) matcher = csiborgtools.match.RealisationsMatcher( mass_kind=mass_kind, **overlapper_kwargs) match_indxs, ngp_overlap = matcher.cross(cat0, catx, parts0, partsx, halomap0, halomapx, delta_bckg, verbose=verbose) - if verbose: - print() # We want to store the halo IDs of the matches, not their array positions # in the catalogues. @@ -151,6 +203,8 @@ def pair_match(nsim0, nsimx, simname, min_logmass, sigma, verbose): if __name__ == "__main__": parser = ArgumentParser() + parser.add_argument("--kind", type=str, required=True, + choices=["overlap", "max"], help="Kind of matching.") parser.add_argument("--nsim0", type=int, required=True, help="Reference simulation IC index.") parser.add_argument("--nsimx", type=int, required=True, @@ -159,11 +213,19 @@ if __name__ == "__main__": help="Simulation name.") parser.add_argument("--min_logmass", type=float, required=True, help="Minimum log halo mass.") + parser.add_argument("--mult", type=float, default=5, + help="Search radius multiplier for Max's method.") parser.add_argument("--sigma", type=float, default=0, help="Smoothing scale in number of grid cells.") parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)), default=False, help="Verbosity flag.") args = parser.parse_args() - pair_match(args.nsim0, args.nsimx, args.simname, args.min_logmass, - args.sigma, args.verbose) + if args.kind == "overlap": + pair_match(args.nsim0, args.nsimx, args.simname, args.min_logmass, + args.sigma, args.verbose) + elif args.kind == "max": + pair_match_max(args.nsim0, args.nsimx, args.simname, args.min_logmass, + args.mult, args.verbose) + else: + raise ValueError(f"Unknown matching kind: `{args.kind}`.") diff --git a/scripts_plots/plot_1nn.py b/scripts_plots/plot_1nn.py index cc8f8c8..f54e461 100644 --- a/scripts_plots/plot_1nn.py +++ b/scripts_plots/plot_1nn.py @@ -14,17 +14,15 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. from argparse import ArgumentParser -from gc import collect from os.path import join import matplotlib as mpl import matplotlib.pyplot as plt -from mpl_toolkits.axes_grid1.inset_locator import inset_axes import numpy import scienceplots # noqa from cache_to_disk import cache_to_disk, delete_disk_caches_for_function from scipy.stats import kendalltau -from tqdm import trange, tqdm +from tqdm import tqdm import plt_utils @@ -36,11 +34,7 @@ except ModuleNotFoundError: import csiborgtools -############################################################################### -# IC overlap plotting # -############################################################################### - -def open_cat(nsim: int, simname: str): +def open_cat(nsim, simname): paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) if simname == "csiborg": @@ -56,648 +50,17 @@ def open_cat(nsim: int, simname: str): return cat - +def open_cats(nsims, simname): + catxs = [None] * len(nsims) -@cache_to_disk(7) -def get_overlap(simname, nsim0): - """ - Calculate the summed overlap and probability of no match for a single - reference simulation. + for i, nsim in enumerate(tqdm(nsims, desc="Opening catalogues")): + catxs[i] = open_cat(nsim, simname) - Parameters - ---------- - simname : str - Simulation name. - nsim0 : int - Simulation index. - - Returns - ------- - mass : 1-dimensional array - Mass of halos in the reference simulation. - hindxs : 1-dimensional array - Halo indices in the reference simulation. - max_overlap : 2-dimensional array - Maximum overlap for each halo in the reference simulation. - summed_overlap : 2-dimensional array - Summed overlap for each halo in the reference simulation. - prob_nomatch : 2-dimensional array - Probability of no match for each halo in the reference simulation. - """ - paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) - nsimxs = csiborgtools.read.get_cross_sims(simname, nsim0, paths, - smoothed=True) - cat0 = open_cat(nsim0) - - catxs = [] - print("Opening catalogues...", flush=True) - for nsimx in tqdm(nsimxs): - catxs.append(open_cat(nsimx)) - - reader = csiborgtools.read.NPairsOverlap(cat0, catxs, paths) - mass = reader.cat0("totpartmass") - - hindxs = reader.cat0("index") - summed_overlap = reader.summed_overlap(True) - max_overlap = reader.max_overlap(True) - prob_nomatch = reader.prob_nomatch(True) - return mass, hindxs, max_overlap, summed_overlap, prob_nomatch - - -@cache_to_disk(7) -def get_max_key(simname, nsim0, key): - """ - Get the value of a maximum overlap halo's property. - - Parameters - ---------- - simname : str - Simulation name. - nsim0 : int - Reference simulation index. - key : str - Property to get. - - Returns - ------- - mass0 : 1-dimensional array - Mass of the reference haloes. - key_val : 1-dimensional array - Value of the property of the reference haloes. - max_overlap : 2-dimensional array - Maximum overlap of the reference haloes. - stat : 2-dimensional array - Value of the property of the maximum overlap halo. - """ - paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) - nsimxs = csiborgtools.read.get_cross_sims(simname, nsim0, paths, - smoothed=True) - nsimxs = nsimxs - cat0 = open_cat(nsim0) - - catxs = [] - print("Opening catalogues...", flush=True) - for nsimx in tqdm(nsimxs): - catxs.append(open_cat(nsimx)) - - reader = csiborgtools.read.NPairsOverlap(cat0, catxs, paths) - - mass0 = reader.cat0("totpartmass") - key_val = reader.cat0(key) - max_overlap = reader.max_overlap(True) - stat = reader.max_overlap_key(key, True) - return mass0, key_val, max_overlap, stat - - -def plot_mass_vs_pairoverlap(nsim0, nsimx): - """ - Plot the pair overlap of a reference simulation with a single cross - simulation as a function of the reference halo mass. - - Parameters - ---------- - nsim0 : int - Reference simulation index. - nsimx : int - Cross simulation index. - """ - paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) - cat0 = open_cat(nsim0) - catx = open_cat(nsimx) - reader = csiborgtools.read.PairOverlap(cat0, catx, paths) - - x = reader.copy_per_match("totpartmass") - y = reader.overlap(True) - - x = numpy.log10(numpy.concatenate(x)) - y = numpy.concatenate(y) - - with plt.style.context(plt_utils.mplstyle): - plt.figure() - plt.hexbin(x, y, mincnt=1, bins="log", - gridsize=50) - plt.colorbar(label="Counts in bins") - plt.xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") - plt.ylabel("Pair overlap") - plt.ylim(0., 1.) - - plt.tight_layout() - for ext in ["png"]: - fout = join(plt_utils.fout, f"mass_vs_pair_overlap{nsim0}.{ext}") - print(f"Saving to `{fout}`.") - plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") - plt.close() - - -def plot_mass_vs_maxpairoverlap(nsim0, nsimx): - """ - Plot the maximum pair overlap of a reference simulation haloes with a - single cross simulation. - - Parameters - ---------- - nsim0 : int - Reference simulation index. - nsimx : int - Cross simulation index. - """ - paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) - cat0 = open_cat(nsim0) - catx = open_cat(nsimx) - reader = csiborgtools.read.PairOverlap(cat0, catx, paths) - - x = numpy.log10(cat0["totpartmass"]) - y = reader.overlap(True) - - def get_max(y_): - if len(y_) == 0: - return numpy.nan - return numpy.max(y_) - - y = numpy.array([get_max(y_) for y_ in y]) - - with plt.style.context(plt_utils.mplstyle): - plt.figure() - plt.hexbin(x, y, mincnt=1, bins="log", - gridsize=50) - plt.colorbar(label="Counts in bins") - plt.xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") - plt.ylabel("Maximum pair overlap") - plt.ylim(0., 1.) - - plt.tight_layout() - for ext in ["png"]: - fout = join(plt_utils.fout, f"mass_vs_maxpairoverlap{nsim0}.{ext}") - print(f"Saving to `{fout}`.") - plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") - plt.close() - - - - -def plot_mass_vsmedmaxoverlap(nsim0): - """ - Plot the mean maximum overlap of a reference simulation haloes with all the - cross simulations. - - Parameters - ---------- - nsim0 : int - Reference simulation index. - """ - x, __, max_overlap, __, __ = get_overlap("csiborg", nsim0) - - for i in trange(max_overlap.shape[0]): - if numpy.sum(numpy.isnan(max_overlap[i, :])) > 0: - max_overlap[i, :] = numpy.nan - - x = numpy.log10(x) - - with plt.style.context(plt_utils.mplstyle): - fig, axs = plt.subplots(ncols=3, figsize=(3.5 * 2, 2.625)) - im1 = axs[0].hexbin(x, numpy.nanmean(max_overlap, axis=1), gridsize=30, - mincnt=1, bins="log") - - im2 = axs[1].hexbin(x, numpy.nanstd(max_overlap, axis=1), gridsize=30, - mincnt=1, bins="log") - im3 = axs[2].hexbin(numpy.nanmean(max_overlap, axis=1), - numpy.nanstd(max_overlap, axis=1), gridsize=30, - C=x, reduce_C_function=numpy.nanmean) - - axs[0].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") - axs[0].set_ylabel(r"Mean max. pair overlap") - axs[1].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") - axs[1].set_ylabel(r"Uncertainty of max. pair overlap") - axs[2].set_xlabel(r"Mean max. pair overlap") - axs[2].set_ylabel(r"Uncertainty of max. pair overlap") - - label = ["Bin counts", "Bin counts", r"$\log M_{\rm tot} / M_\odot$"] - ims = [im1, im2, im3] - for i in range(3): - axins = inset_axes(axs[i], width="100%", height="5%", - loc='upper center', borderpad=-0.75) - fig.colorbar(ims[i], cax=axins, orientation="horizontal", - label=label[i]) - axins.xaxis.tick_top() - axins.xaxis.set_tick_params(labeltop=True) - axins.xaxis.set_label_position("top") - - fig.tight_layout() - for ext in ["png"]: - fout = join(plt_utils.fout, f"maxpairoverlap_{nsim0}.{ext}") - print(f"Saving to `{fout}`.") - fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") - plt.close() - - -def plot_summed_overlap_vs_mass(nsim0): - """ - Plot the summed overlap of probability of no matching for a single - reference simulations as a function of the reference halo mass, along with - their comparison. - - Parameters - ---------- - nsim0 : int - Simulation index. - - Returns - ------- - None - """ - x, __, __, summed_overlap, prob_nomatch = get_overlap("csiborg", nsim0) - del __ - collect() - - for i in trange(summed_overlap.shape[0]): - if numpy.sum(numpy.isnan(summed_overlap[i, :])) > 0: - summed_overlap[i, :] = numpy.nan - - x = numpy.log10(x) - - mean_overlap = numpy.nanmean(summed_overlap, axis=1) - std_overlap = numpy.nanstd(summed_overlap, axis=1) - mean_prob_nomatch = numpy.nanmean(prob_nomatch, axis=1) - - mask = mean_overlap > 0 - x = x[mask] - mean_overlap = mean_overlap[mask] - std_overlap = std_overlap[mask] - mean_prob_nomatch = mean_prob_nomatch[mask] - - with plt.style.context(plt_utils.mplstyle): - fig, axs = plt.subplots(ncols=3, figsize=(3.5 * 2, 2.625)) - im1 = axs[0].hexbin(x, mean_overlap, mincnt=1, bins="log", - gridsize=30) - im2 = axs[1].hexbin(x, std_overlap, mincnt=1, bins="log", - gridsize=30) - im3 = axs[2].scatter(1 - mean_overlap, mean_prob_nomatch, c=x, s=2, - rasterized=True) - t = numpy.linspace(0.3, 1, 100) - axs[2].plot(t, t, color="red", linestyle="--") - axs[0].set_ylim(0.) - axs[1].set_ylim(0.) - axs[0].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") - axs[0].set_ylabel("Mean summed overlap") - axs[1].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") - axs[1].set_ylabel("Uncertainty of summed overlap") - axs[2].set_xlabel(r"$1 - $ mean summed overlap") - axs[2].set_ylabel("Mean prob. of no match") - - label = ["Bin counts", "Bin counts", - r"$\log M_{\rm tot} ~ [M_\odot / h]$"] - ims = [im1, im2, im3] - for i in range(3): - axins = inset_axes(axs[i], width="100%", height="5%", - loc='upper center', borderpad=-0.75) - fig.colorbar(ims[i], cax=axins, orientation="horizontal", - label=label[i]) - axins.xaxis.tick_top() - axins.xaxis.set_tick_params(labeltop=True) - axins.xaxis.set_label_position("top") - - fig.tight_layout() - for ext in ["png"]: - fout = join(plt_utils.fout, f"overlap_stat_{nsim0}.{ext}") - print(f"Saving to `{fout}`.") - fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") - plt.close() - - -def plot_mass_vs_separation(nsim0, nsimx, plot_std=False, min_overlap=0.0): - """ - Plot the mass of a reference halo against the weighted separation of - its counterparts. - - Parameters - ---------- - nsim0 : int - Reference simulation index. - nsimx : int - Cross simulation index. - plot_std : bool, optional - Whether to plot thestd instead of mean. - min_overlap : float, optional - Minimum overlap to consider. - - Returns - ------- - None - """ - paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) - cat0 = open_cat(nsim0) - catx = open_cat(nsimx) - - reader = csiborgtools.read.PairOverlap(cat0, catx, paths, - maxdist=155) - mass = numpy.log10(reader.cat0("totpartmass")) - dist = reader.dist(in_initial=False, norm_kind="r200c") - overlap = reader.overlap(True) - dist = csiborgtools.read.weighted_stats(dist, overlap, - min_weight=min_overlap) - - mask = numpy.isfinite(dist[:, 0]) - mass = mass[mask] - dist = dist[mask, :] - dist = numpy.log10(dist) - - if not plot_std: - p = numpy.polyfit(mass, dist[:, 0], 1) - else: - p = numpy.polyfit(mass, dist[:, 1], 1) - - xrange = numpy.linspace(numpy.min(mass), numpy.max(mass), 1000) - txt = r"$m = {}$, $c = {}$".format(*plt_utils.latex_float(*p, n=3)) - - with plt.style.context(plt_utils.mplstyle): - fig, ax = plt.subplots() - ax.set_title(txt, fontsize="small") - - if not plot_std: - cx = ax.hexbin(mass, dist[:, 0], mincnt=1, bins="log", gridsize=50) - ax.set_ylabel(r"$\log \langle \Delta R / R_{\rm 200c}\rangle$") - else: - cx = ax.hexbin(mass, dist[:, 1], mincnt=1, bins="log", gridsize=50) - ax.set_ylabel( - r"$\delta \log \langle \Delta R / R_{\rm 200c}\rangle$") - - ax.plot(xrange, numpy.polyval(p, xrange), color="red", - linestyle="--") - fig.colorbar(cx, label="Bin counts") - ax.set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") - ax.set_ylabel(r"$\log \langle \Delta R / R_{\rm 200c}\rangle$") - - fig.tight_layout() - for ext in ["png"]: - fout = join(plt_utils.fout, - f"mass_vs_sep_{nsim0}_{nsimx}_{min_overlap}.{ext}") - if plot_std: - fout = fout.replace(f".{ext}", f"_std.{ext}") - print(f"Saving to `{fout}`.") - fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") - plt.close() - - - - -def plot_maxoverlap_mass(nsim0): - """ - Plot the mass of the reference haloes against the mass of the maximum - overlap haloes. - - Parameters - ---------- - nsim0 : int - Reference simulation index. - """ - mass0, __, __, stat = get_max_key("csiborg", nsim0, "totpartmass") - - mu = numpy.mean(stat, axis=1) - std = numpy.std(numpy.log10(stat), axis=1) - mu = numpy.log10(mu) - - mass0 = numpy.log10(mass0) - with plt.style.context(plt_utils.mplstyle): - fig, axs = plt.subplots(ncols=2, figsize=(3.5 * 1.75, 2.625)) - - im0 = axs[0].hexbin(mass0, mu, mincnt=1, bins="log", gridsize=50) - im1 = axs[1].hexbin(mass0, std, mincnt=1, bins="log", gridsize=50) - - m = numpy.isfinite(mass0) & numpy.isfinite(mu) - print("True to expectation corr: ", kendalltau(mass0[m], mu[m])) - - t = numpy.linspace(*numpy.percentile(mass0, [0, 100]), 1000) - axs[0].plot(t, t, color="red", linestyle="--") - axs[0].plot(t, t + 0.2, color="red", linestyle="--", alpha=0.5) - axs[0].plot(t, t - 0.2, color="red", linestyle="--", alpha=0.5) - - axs[0].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") - axs[1].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") - axs[0].set_ylabel( - r"Max. overlap mean of $\log M_{\rm tot} ~ [M_\odot / h]$") - axs[1].set_ylabel( - r"Max. overlap std. of $\log M_{\rm tot} ~ [M_\odot / h]$") - - ims = [im0, im1] - for i in range(2): - axins = inset_axes(axs[i], width="100%", height="5%", - loc='upper center', borderpad=-0.75) - fig.colorbar(ims[i], cax=axins, orientation="horizontal", - label="Bin counts") - axins.xaxis.tick_top() - axins.xaxis.set_tick_params(labeltop=True) - axins.xaxis.set_label_position("top") - - fig.tight_layout() - for ext in ["png"]: - fout = join(plt_utils.fout, - f"max_totpartmass_{nsim0}.{ext}") - print(f"Saving to `{fout}`.") - fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") - plt.close() - - -def plot_maxoverlapstat(nsim0, key): - """ - Plot the mass of the reference haloes against the value of the maximum - overlap statistic. - - Parameters - ---------- - nsim0 : int - Reference simulation index. - key : str - Property to get. - """ - assert key != "totpartmass" - mass0, key_val, __, stat = get_max_key("csiborg", nsim0, key) - - xlabels = {"lambda200c": r"\log \lambda_{\rm 200c}"} - key_label = xlabels.get(key, key) - - mass0 = numpy.log10(mass0) - key_val = numpy.log10(key_val) - - mu = numpy.mean(stat, axis=1) - std = numpy.std(numpy.log10(stat), axis=1) - mu = numpy.log10(mu) - - with plt.style.context(plt_utils.mplstyle): - fig, axs = plt.subplots(ncols=3, figsize=(3.5 * 2, 2.625)) - - im0 = axs[0].hexbin(mass0, mu, mincnt=1, bins="log", gridsize=30) - im1 = axs[1].hexbin(mass0, std, mincnt=1, bins="log", gridsize=30) - im2 = axs[2].hexbin(key_val, mu, mincnt=1, bins="log", gridsize=30) - m = numpy.isfinite(key_val) & numpy.isfinite(mu) - print("True to expectation corr: ", kendalltau(key_val[m], mu[m])) - - axs[0].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") - axs[0].set_ylabel(r"Max. overlap mean of ${}$".format(key_label)) - axs[1].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") - axs[1].set_ylabel(r"Max. overlap std. of ${}$".format(key_label)) - axs[2].set_xlabel(r"${}$".format(key_label)) - axs[2].set_ylabel(r"Max. overlap mean of ${}$".format(key_label)) - - ims = [im0, im1, im2] - for i in range(3): - axins = inset_axes(axs[i], width="100%", height="5%", - loc='upper center', borderpad=-0.75) - fig.colorbar(ims[i], cax=axins, orientation="horizontal", - label="Bin counts") - axins.xaxis.tick_top() - axins.xaxis.set_tick_params(labeltop=True) - axins.xaxis.set_label_position("top") - - fig.tight_layout() - for ext in ["png"]: - fout = join(plt_utils.fout, - f"max_{key}_{nsim0}.{ext}") - print(f"Saving to `{fout}`.") - fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") - plt.close() - - -@cache_to_disk(7) -def get_expected_mass(simname, nsim0, min_overlap): - """ - Get the expected mass of a reference halo given its overlap with halos - from other simulations. - - Parameters - ---------- - simname : str - Simulation name. - nsim0 : int - Reference simulation index. - min_overlap : float - Minimum overlap to consider between a pair of haloes. - - Returns - ------- - mass : 1-dimensional array - Mass of the reference haloes. - mu : 1-dimensional array - Expected mass of the matched haloes. - std : 1-dimensional array - Standard deviation of the expected mass of the matched haloes. - prob_nomatch : 2-dimensional array - Probability of not matching the reference halo. - """ - paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) - nsimxs = csiborgtools.read.get_cross_sims(simname, nsim0, paths, - smoothed=True) - nsimxs = nsimxs - cat0 = open_cat(nsim0) - - catxs = [] - print("Opening catalogues...", flush=True) - for nsimx in tqdm(nsimxs): - catxs.append(open_cat(nsimx)) - - reader = csiborgtools.read.NPairsOverlap(cat0, catxs, paths) - mass = reader.cat0("totpartmass") - - mu, std = reader.counterpart_mass(True, overlap_threshold=min_overlap, - in_log=False, return_full=False) - prob_nomatch = reader.prob_nomatch(True) - return mass, mu, std, prob_nomatch - - -def plot_mass_vs_expected_mass(nsim0, min_overlap=0, max_prob_nomatch=1): - """ - Plot the mass of a reference halo against the expected mass of its - counterparts. - - Parameters - ---------- - nsim0 : int - Reference simulation index. - min_overlap : float, optional - Minimum overlap between a pair of haloes to consider. - max_prob_nomatch : float, optional - Maximum probability of no match to consider. - """ - mass, mu, std, prob_nomatch = get_expected_mass("csiborg", nsim0, - min_overlap) - - std = std / mu / numpy.log(10) - mass = numpy.log10(mass) - mu = numpy.log10(mu) - prob_nomatch = numpy.nanmedian(prob_nomatch, axis=1) - - mask = numpy.isfinite(mass) & numpy.isfinite(mu) - mask &= (prob_nomatch < max_prob_nomatch) - - with plt.style.context(plt_utils.mplstyle): - fig, axs = plt.subplots(ncols=3, figsize=(3.5 * 2, 2.625)) - - im0 = axs[0].hexbin(mass[mask], mu[mask], mincnt=1, bins="log", - gridsize=50,) - im1 = axs[1].hexbin(mass[mask], std[mask], mincnt=1, bins="log", - gridsize=50) - im2 = axs[2].hexbin(1 - prob_nomatch[mask], mass[mask] - mu[mask], - gridsize=50, C=mass[mask], - reduce_C_function=numpy.nanmedian) - axs[2].axhline(0, color="red", linestyle="--", alpha=0.5) - axs[0].set_xlabel(r"True $\log M_{\rm tot} ~ [M_\odot / h]$") - axs[0].set_ylabel(r"Expected $\log M_{\rm tot} ~ [M_\odot / h]$") - axs[1].set_xlabel(r"True $\log M_{\rm tot} ~ [M_\odot / h]$") - axs[1].set_ylabel(r"Std. of $\sigma_{\log M_{\rm tot}}$") - axs[2].set_xlabel(r"1 - median prob. of no match") - axs[2].set_ylabel(r"$\log M_{\rm tot} - \log M_{\rm tot, exp}$") - - t = numpy.linspace(*numpy.percentile(mass[mask], [0, 100]), 1000) - axs[0].plot(t, t, color="red", linestyle="--") - axs[0].plot(t, t + 0.2, color="red", linestyle="--", alpha=0.5) - axs[0].plot(t, t - 0.2, color="red", linestyle="--", alpha=0.5) - - ims = [im0, im1, im2] - labels = ["Bin counts", "Bin counts", - r"$\log M_{\rm tot} ~ [M_\odot / h]$"] - for i in range(3): - axins = inset_axes(axs[i], width="100%", height="5%", - loc='upper center', borderpad=-0.75) - fig.colorbar(ims[i], cax=axins, orientation="horizontal", - label=labels[i]) - axins.xaxis.tick_top() - axins.xaxis.set_tick_params(labeltop=True) - axins.xaxis.set_label_position("top") - - fig.tight_layout() - for ext in ["png"]: - fout = join(plt_utils.fout, - f"mass_vs_expmass_{nsim0}_{max_prob_nomatch}.{ext}") - print(f"Saving to `{fout}`.") - fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") - plt.close() - - -############################################################################### -# Nearest neighbour plotting # -############################################################################### + return catxs def read_dist(simname, run, kind, kwargs): - """ - Read PDF/CDF of a nearest neighbour distribution. - - Parameters - ---------- - simname : str - Simulation name. Must be either `csiborg` or `quijote`. - run : str - Run name. - kind : str - Kind of distribution. Must be either `pdf` or `cdf`. - kwargs : dict - Nearest neighbour reader keyword arguments. - - Returns - ------- - dist : 2-dimensional array - Distribution of distances in radial and neighbour bins. - """ paths = csiborgtools.read.Paths(**kwargs["paths_kind"]) reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths) @@ -707,26 +70,6 @@ def read_dist(simname, run, kind, kwargs): def pull_cdf(x, fid_cdf, test_cdf): - """ - Pull a CDF so that it matches the fiducial CDF at 0.5. Rescales the x-axis, - while keeping the corresponding CDF values fixed. - - Parameters - ---------- - x : 1-dimensional array - The x-axis of the CDF. - fid_cdf : 1-dimensional array - The fiducial CDF. - test_cdf : 1-dimensional array - The test CDF to be pulled. - - Returns - ------- - xnew : 1-dimensional array - The new x-axis of the test CDF. - test_cdf : 1-dimensional array - The new test CDF. - """ xnew = x * numpy.interp(0.5, fid_cdf, x) / numpy.interp(0.5, test_cdf, x) return xnew, test_cdf @@ -1360,7 +703,7 @@ def plot_kl_vs_overlap(runs, nsim, kwargs, runs_to_mass, plot_std=True, for run in runs: nn_data = nn_reader.read_single("csiborg", run, nsim, nobs=None) nn_hindxs = nn_data["ref_hindxs"] - mass, overlap_hindxs, __, summed_overlap, prob_nomatch = get_overlap("csiborg", nsim) # noqa + mass, overlap_hindxs, __, summed_overlap, prob_nomatch = get_overlap_summary("csiborg", nsim) # noqa # We need to match the hindxs between the two. hind2overlap_array = {hind: i for i, hind in enumerate(overlap_hindxs)} @@ -1457,8 +800,8 @@ if __name__ == "__main__": "mass009": (14.0, 14.4), # There is no upper limit. } - # cached_funcs = ["get_overlap", "read_dist", "make_kl", "make_ks"] - cached_funcs = ["get_max_key"] + # cached_funcs = ["get_overlap_summary", "read_dist", "make_kl", "make_ks"] + cached_funcs = ["get_property_maxoverlap"] if args.clean: for func in cached_funcs: print(f"Cleaning cache for function {func}.") diff --git a/scripts_plots/plot_match.py b/scripts_plots/plot_match.py index 0c602c1..2ba1d31 100644 --- a/scripts_plots/plot_match.py +++ b/scripts_plots/plot_match.py @@ -13,283 +13,313 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -from argparse import ArgumentParser -from gc import collect from os.path import join -import matplotlib as mpl import matplotlib.pyplot as plt -from mpl_toolkits.axes_grid1.inset_locator import inset_axes import numpy import scienceplots # noqa from cache_to_disk import cache_to_disk, delete_disk_caches_for_function from scipy.stats import kendalltau -from tqdm import trange, tqdm +from tqdm import tqdm, trange +import csiborgtools import plt_utils -try: - import csiborgtools -except ModuleNotFoundError: - import sys - sys.path.append("../") - import csiborgtools +MASS_KINDS = {"csiborg": "fof_totpartmass", + "quijote": "group_mass", + } -############################################################################### -# IC overlap plotting # -############################################################################### - -def open_cat(nsim): - """ - Open a CSiBORG halo catalogue. Applies only mass selection. - - Parameters - ---------- - nsim : int - Simulation index. - - Returns - ------- - cat : csiborgtools.read.CSiBORGHaloCatalogue - """ +def open_cat(nsim, simname): paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) - bounds = {"dist": (0, 155), "totpartmass": (1e12, None)} - return csiborgtools.read.CSiBORGHaloCatalogue(nsim, paths, bounds=bounds) + + if simname == "csiborg": + bounds = {"dist": (0, 155)} + cat = csiborgtools.read.CSiBORGHaloCatalogue( + nsim, paths, bounds=bounds) + elif simname == "quijote": + cat = csiborgtools.read.QuijoteHaloCatalogue( + nsim, paths, nsnap=4, load_fitted=True, load_initial=True, + with_lagpatch=False) + else: + raise ValueError(f"Unknown simulation name: {simname}.") + + return cat -def plot_mass_vs_pairoverlap(nsim0, nsimx): - """ - Plot the pair overlap of a reference simulation with a single cross - simulation as a function of the reference halo mass. +def open_cats(nsims, simname): + catxs = [None] * len(nsims) - Parameters - ---------- - nsim0 : int - Reference simulation index. - nsimx : int - Cross simulation index. - """ + for i, nsim in enumerate(tqdm(nsims, desc="Opening catalogues")): + catxs[i] = open_cat(nsim, simname) + + return catxs + + +@cache_to_disk(120) +def get_overlap_summary(nsim0, simname, min_logmass, smoothed): paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) - cat0 = open_cat(nsim0) - catx = open_cat(nsimx) - reader = csiborgtools.read.PairOverlap(cat0, catx, paths) + nsimxs = csiborgtools.read.get_cross_sims(simname, nsim0, paths, + min_logmass, smoothed=smoothed) + cat0 = open_cat(nsim0, simname) + catxs = open_cats(nsimxs, simname) - x = reader.copy_per_match("totpartmass") - y = reader.overlap(True) + reader = csiborgtools.read.NPairsOverlap(cat0, catxs, paths, min_logmass) + mass0 = reader.cat0(MASS_KINDS[simname]) + mask = mass0 > 10**min_logmass - x = numpy.log10(numpy.concatenate(x)) + return {"mass0": mass0[mask], + "hid0": reader.cat0("index")[mask], + "summed_overlap": reader.summed_overlap(smoothed)[mask], + "max_overlap": reader.max_overlap(0, smoothed)[mask], + "prob_nomatch": reader.prob_nomatch(smoothed)[mask], + } + + +@cache_to_disk(120) +def get_expected_mass(nsim0, simname, min_overlap, min_logmass, smoothed): + paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) + nsimxs = csiborgtools.read.get_cross_sims(simname, nsim0, paths, + min_logmass, smoothed=True)[:3] + + cat0 = open_cat(nsim0, simname) + catxs = open_cats(nsimxs, simname) + + reader = csiborgtools.read.NPairsOverlap(cat0, catxs, paths, min_logmass) + mass0 = reader.cat0(MASS_KINDS[simname]) + mask = mass0 > 10**min_logmass + mu, std = reader.counterpart_mass( + from_smoothed=True, overlap_threshold=min_overlap, return_full=False) + + return {"mass0": mass0[mask], + "mu": mu[mask], + "std": std[mask], + "prob_nomatch": reader.prob_nomatch(smoothed)[mask], + } + +# --------------------------------------------------------------------------- # +############################################################################### +# Total DM halo mass vs pair overlaps # +############################################################################### +# --------------------------------------------------------------------------- # + + +@cache_to_disk(120) +def get_mtot_vs_all_pairoverlap(nsim0, simname, mass_kind, min_logmass, + smoothed, nbins): + paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) + nsimxs = csiborgtools.read.get_cross_sims(simname, nsim0, paths, + min_logmass, smoothed=smoothed) + nsimxs = nsimxs + + cat0 = open_cat(nsim0, simname) + catxs = open_cats(nsimxs, simname) + + reader = csiborgtools.read.NPairsOverlap(cat0, catxs, paths, min_logmass) + + x = [None] * len(catxs) + y = [None] * len(catxs) + for i in trange(len(catxs), desc="Stacking catalogues"): + x[i] = numpy.log10( + numpy.concatenate(reader[i].copy_per_match(mass_kind))) + y[i] = numpy.concatenate(reader[i].overlap(smoothed)) + + x = numpy.concatenate(x) y = numpy.concatenate(y) + xbins = numpy.linspace(min(x), max(x), nbins) + + return x, y, xbins + + +def mtot_vs_all_pairoverlap(nsim0, simname, min_logmass, smoothed, nbins, + ext="png"): + mass_kind = MASS_KINDS[simname] + x, y, xbins = get_mtot_vs_all_pairoverlap(nsim0, simname, mass_kind, + min_logmass, smoothed, nbins) + with plt.style.context(plt_utils.mplstyle): plt.figure() - plt.hexbin(x, y, mincnt=1, bins="log", - gridsize=50) - plt.colorbar(label="Counts in bins") + hb = plt.hexbin(x, y, mincnt=1, gridsize=50, bins="log") + + y_median, yerr = plt_utils.compute_error_bars(x, y, xbins, sigma=2) + plt.errorbar(0.5 * (xbins[1:] + xbins[:-1]), y_median, yerr=yerr, + color='red', ls='dashed', capsize=3, + label="CSiBORG" if simname == "csiborg" else None) + + if simname == "csiborg": + x_quijote, y_quijote, xbins_quijote = get_mtot_vs_all_pairoverlap( + 0, "quijote", "group_mass", min_logmass, smoothed, nbins) + y_median_quijote, yerr_quijote = plt_utils.compute_error_bars( + x_quijote, y_quijote, xbins_quijote, sigma=2) + plt.errorbar(0.5 * (xbins[1:] + xbins[:-1]) + 0.01, + y_median_quijote, yerr=yerr_quijote, + color='blue', ls='dashed', capsize=3, + label="Quijote") + plt.legend(ncol=2, fontsize="small") + + plt.colorbar(hb, label="Counts in bins") plt.xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") plt.ylabel("Pair overlap") + plt.xlim(numpy.min(x)) plt.ylim(0., 1.) plt.tight_layout() - for ext in ["png"]: - fout = join(plt_utils.fout, f"mass_vs_pair_overlap{nsim0}.{ext}") - print(f"Saving to `{fout}`.") - plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + fout = join(plt_utils.fout, + f"mass_vs_pair_overlap_{simname}_{nsim0}.{ext}") + print(f"Saving to `{fout}`.") + plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") plt.close() -def plot_mass_vs_maxpairoverlap(nsim0, nsimx): - """ - Plot the maximum pair overlap of a reference simulation haloes with a - single cross simulation. +# --------------------------------------------------------------------------- # +############################################################################### +# Total DM halo mass vs maximum pair overlaps # +############################################################################### +# --------------------------------------------------------------------------- # - Parameters - ---------- - nsim0 : int - Reference simulation index. - nsimx : int - Cross simulation index. - """ + +@cache_to_disk(120) +def get_mtot_vs_maxpairoverlap(nsim0, simname, mass_kind, min_logmass, + smoothed, nbins): paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) - cat0 = open_cat(nsim0) - catx = open_cat(nsimx) - reader = csiborgtools.read.PairOverlap(cat0, catx, paths) - - x = numpy.log10(cat0["totpartmass"]) - y = reader.overlap(True) + nsimxs = csiborgtools.read.get_cross_sims(simname, nsim0, paths, + min_logmass, smoothed=smoothed) + cat0 = open_cat(nsim0, simname) + catxs = open_cats(nsimxs, simname) def get_max(y_): if len(y_) == 0: - return numpy.nan + return 0 return numpy.max(y_) - y = numpy.array([get_max(y_) for y_ in y]) + reader = csiborgtools.read.NPairsOverlap(cat0, catxs, paths, min_logmass) + x = [None] * len(catxs) + y = [None] * len(catxs) + for i in trange(len(catxs), desc="Stacking catalogues"): + x[i] = numpy.log10(cat0[mass_kind]) + y[i] = numpy.array([get_max(y_) for y_ in reader[i].overlap(smoothed)]) + + mask = x[i] > min_logmass + x[i] = x[i][mask] + y[i] = y[i][mask] + + x = numpy.concatenate(x) + y = numpy.concatenate(y) + + xbins = numpy.linspace(min(x), max(x), nbins) + + return x, y, xbins + + +def mtot_vs_maxpairoverlap(nsim0, simname, mass_kind, min_logmass, smoothed, + nbins, ext="png"): + x, y, xbins = get_mtot_vs_maxpairoverlap(nsim0, simname, mass_kind, + min_logmass, smoothed, nbins) + + plt.close("all") with plt.style.context(plt_utils.mplstyle): plt.figure() - plt.hexbin(x, y, mincnt=1, bins="log", - gridsize=50) + plt.hexbin(x, y, mincnt=1, gridsize=50, bins="log") + + y_median, yerr = plt_utils.compute_error_bars(x, y, xbins, sigma=2) + plt.errorbar(0.5 * (xbins[1:] + xbins[:-1]), y_median, yerr=yerr, + color='red', ls='dashed', capsize=3, + label="CSiBORG" if simname == "csiborg" else None) + + if simname == "csiborg": + x_quijote, y_quijote, xbins_quijote = get_mtot_vs_all_pairoverlap( + 0, "quijote", "group_mass", min_logmass, smoothed, nbins) + y_median_quijote, yerr_quijote = plt_utils.compute_error_bars( + x_quijote, y_quijote, xbins_quijote, sigma=2) + plt.errorbar(0.5 * (xbins[1:] + xbins[:-1]) + 0.01, + y_median_quijote, yerr=yerr_quijote, + color='blue', ls='dashed', capsize=3, + label="Quijote") + plt.legend(ncol=2, fontsize="small") + plt.colorbar(label="Counts in bins") plt.xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") plt.ylabel("Maximum pair overlap") - plt.ylim(0., 1.) + plt.ylim(-0.02, 1.) + plt.xlim(numpy.min(x) - 0.05) plt.tight_layout() - for ext in ["png"]: - fout = join(plt_utils.fout, f"mass_vs_maxpairoverlap{nsim0}.{ext}") - print(f"Saving to `{fout}`.") - plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + fout = join(plt_utils.fout, f"mass_vs_max_pair_overlap{nsim0}.{ext}") + print(f"Saving to `{fout}`.") + plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") plt.close() -@cache_to_disk(7) -def get_overlap(simname, nsim0): - """ - Calculate the summed overlap and probability of no match for a single - reference simulation. - - Parameters - ---------- - simname : str - Simulation name. - nsim0 : int - Simulation index. - - Returns - ------- - mass : 1-dimensional array - Mass of halos in the reference simulation. - hindxs : 1-dimensional array - Halo indices in the reference simulation. - max_overlap : 2-dimensional array - Maximum overlap for each halo in the reference simulation. - summed_overlap : 2-dimensional array - Summed overlap for each halo in the reference simulation. - prob_nomatch : 2-dimensional array - Probability of no match for each halo in the reference simulation. - """ - paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) - nsimxs = csiborgtools.read.get_cross_sims(simname, nsim0, paths, - smoothed=True) - cat0 = open_cat(nsim0) - - catxs = [] - print("Opening catalogues...", flush=True) - for nsimx in tqdm(nsimxs): - catxs.append(open_cat(nsimx)) - - reader = csiborgtools.read.NPairsOverlap(cat0, catxs, paths) - mass = reader.cat0("totpartmass") - - hindxs = reader.cat0("index") - summed_overlap = reader.summed_overlap(True) - max_overlap = reader.max_overlap(True) - prob_nomatch = reader.prob_nomatch(True) - return mass, hindxs, max_overlap, summed_overlap, prob_nomatch +# --------------------------------------------------------------------------- # +############################################################################### +# Total DM halo mass vs summed pair overlaps # +############################################################################### +# --------------------------------------------------------------------------- # -def plot_mass_vsmedmaxoverlap(nsim0): - """ - Plot the mean maximum overlap of a reference simulation haloes with all the - cross simulations. +def mtot_vs_summedpairoverlap(nsim0, simname, min_logmass, smoothed, nbins, + ext="png"): + x = get_overlap_summary(nsim0, simname, min_logmass, smoothed) - Parameters - ---------- - nsim0 : int - Reference simulation index. - """ - x, __, max_overlap, __, __ = get_overlap("csiborg", nsim0) + mass0 = numpy.log10(x["mass0"]) + mean_overlap = numpy.nanmean(x["summed_overlap"], axis=1) + std_overlap = numpy.nanstd(x["summed_overlap"], axis=1) + mean_prob_nomatch = numpy.nanmean(x["prob_nomatch"], axis=1) - for i in trange(max_overlap.shape[0]): - if numpy.sum(numpy.isnan(max_overlap[i, :])) > 0: - max_overlap[i, :] = numpy.nan - - x = numpy.log10(x) + xbins = numpy.linspace(numpy.nanmin(mass0), numpy.nanmax(mass0), nbins) with plt.style.context(plt_utils.mplstyle): fig, axs = plt.subplots(ncols=3, figsize=(3.5 * 2, 2.625)) - im1 = axs[0].hexbin(x, numpy.nanmean(max_overlap, axis=1), gridsize=30, - mincnt=1, bins="log") - - im2 = axs[1].hexbin(x, numpy.nanstd(max_overlap, axis=1), gridsize=30, - mincnt=1, bins="log") - im3 = axs[2].hexbin(numpy.nanmean(max_overlap, axis=1), - numpy.nanstd(max_overlap, axis=1), gridsize=30, - C=x, reduce_C_function=numpy.nanmean) - - axs[0].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") - axs[0].set_ylabel(r"Mean max. pair overlap") - axs[1].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") - axs[1].set_ylabel(r"Uncertainty of max. pair overlap") - axs[2].set_xlabel(r"Mean max. pair overlap") - axs[2].set_ylabel(r"Uncertainty of max. pair overlap") - - label = ["Bin counts", "Bin counts", r"$\log M_{\rm tot} / M_\odot$"] - ims = [im1, im2, im3] - for i in range(3): - axins = inset_axes(axs[i], width="100%", height="5%", - loc='upper center', borderpad=-0.75) - fig.colorbar(ims[i], cax=axins, orientation="horizontal", - label=label[i]) - axins.xaxis.tick_top() - axins.xaxis.set_tick_params(labeltop=True) - axins.xaxis.set_label_position("top") - - fig.tight_layout() - for ext in ["png"]: - fout = join(plt_utils.fout, f"maxpairoverlap_{nsim0}.{ext}") - print(f"Saving to `{fout}`.") - fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") - plt.close() - - -def plot_summed_overlap_vs_mass(nsim0): - """ - Plot the summed overlap of probability of no matching for a single - reference simulations as a function of the reference halo mass, along with - their comparison. - - Parameters - ---------- - nsim0 : int - Simulation index. - - Returns - ------- - None - """ - x, __, __, summed_overlap, prob_nomatch = get_overlap("csiborg", nsim0) - del __ - collect() - - for i in trange(summed_overlap.shape[0]): - if numpy.sum(numpy.isnan(summed_overlap[i, :])) > 0: - summed_overlap[i, :] = numpy.nan - - x = numpy.log10(x) - - mean_overlap = numpy.nanmean(summed_overlap, axis=1) - std_overlap = numpy.nanstd(summed_overlap, axis=1) - mean_prob_nomatch = numpy.nanmean(prob_nomatch, axis=1) - - mask = mean_overlap > 0 - x = x[mask] - mean_overlap = mean_overlap[mask] - std_overlap = std_overlap[mask] - mean_prob_nomatch = mean_prob_nomatch[mask] - - with plt.style.context(plt_utils.mplstyle): - fig, axs = plt.subplots(ncols=3, figsize=(3.5 * 2, 2.625)) - im1 = axs[0].hexbin(x, mean_overlap, mincnt=1, bins="log", + im1 = axs[0].hexbin(mass0, mean_overlap, mincnt=1, bins="log", gridsize=30) - im2 = axs[1].hexbin(x, std_overlap, mincnt=1, bins="log", + + y_median, yerr = plt_utils.compute_error_bars( + mass0, mean_overlap, xbins, sigma=2) + axs[0].errorbar(0.5 * (xbins[1:] + xbins[:-1]), y_median, yerr=yerr, + color='red', ls='dashed', capsize=3) + + im2 = axs[1].hexbin(mass0, std_overlap, mincnt=1, bins="log", gridsize=30) - im3 = axs[2].scatter(1 - mean_overlap, mean_prob_nomatch, c=x, s=2, - rasterized=True) - t = numpy.linspace(0.3, 1, 100) + + y_median, yerr = plt_utils.compute_error_bars( + mass0, std_overlap, xbins, sigma=2) + axs[1].errorbar(0.5 * (xbins[1:] + xbins[:-1]), y_median, yerr=yerr, + color='red', ls='dashed', capsize=3) + + if simname == "csiborg": + x_quijote = get_overlap_summary(0, "quijote", min_logmass, + smoothed) + mass0_quijote = numpy.log10(x_quijote["mass0"]) + mean_overlap_quijote = numpy.nanmean(x_quijote["summed_overlap"], + axis=1) + std_overlap_quijote = numpy.nanstd(x_quijote["summed_overlap"], + axis=1) + xbins_quijote = numpy.linspace(numpy.nanmin(mass0), + numpy.nanmax(mass0), nbins) + + y_median_quijote, yerr_quijote = plt_utils.compute_error_bars( + mass0_quijote, mean_overlap_quijote, xbins_quijote, sigma=2) + axs[0].errorbar(0.5 * (xbins[1:] + xbins[:-1]) + 0.01, + y_median_quijote, yerr=yerr_quijote, + color='blue', ls='dashed', capsize=3) + + y_median_quijote, yerr_quijote = plt_utils.compute_error_bars( + mass0_quijote, std_overlap_quijote, xbins_quijote, sigma=2) + axs[1].errorbar(0.5 * (xbins[1:] + xbins[:-1]) + 0.01, + y_median_quijote, yerr=yerr_quijote, + color='blue', ls='dashed', capsize=3) + + im3 = axs[2].scatter(1 - mean_overlap, mean_prob_nomatch, c=mass0, + s=2, rasterized=True) + + t = numpy.linspace(numpy.nanmin(1 - mean_overlap), 1, 100) axs[2].plot(t, t, color="red", linestyle="--") - axs[0].set_ylim(0.) - axs[1].set_ylim(0.) + + axs[0].set_ylim(0., 0.75) + axs[0].set_xlim(numpy.min(mass0)) + axs[0].set_xlim(numpy.min(mass0)) axs[0].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") axs[0].set_ylabel("Mean summed overlap") axs[1].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") @@ -301,8 +331,7 @@ def plot_summed_overlap_vs_mass(nsim0): r"$\log M_{\rm tot} ~ [M_\odot / h]$"] ims = [im1, im2, im3] for i in range(3): - axins = inset_axes(axs[i], width="100%", height="5%", - loc='upper center', borderpad=-0.75) + axins = axs[i].inset_axes([0.0, 1.0, 1.0, 0.05]) fig.colorbar(ims[i], cax=axins, orientation="horizontal", label=label[i]) axins.xaxis.tick_top() @@ -310,162 +339,336 @@ def plot_summed_overlap_vs_mass(nsim0): axins.xaxis.set_label_position("top") fig.tight_layout() - for ext in ["png"]: - fout = join(plt_utils.fout, f"overlap_stat_{nsim0}.{ext}") - print(f"Saving to `{fout}`.") - fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + fout = join(plt_utils.fout, f"overlap_stat_{simname}_{nsim0}.{ext}") + print(f"Saving to `{fout}`.") + fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") plt.close() -def plot_mass_vs_separation(nsim0, nsimx, plot_std=False, min_overlap=0.0): - """ - Plot the mass of a reference halo against the weighted separation of - its counterparts. +# --------------------------------------------------------------------------- # +############################################################################### +# Total DM halo mass vs mean maximum overlap # +############################################################################### +# --------------------------------------------------------------------------- # - Parameters - ---------- - nsim0 : int - Reference simulation index. - nsimx : int - Cross simulation index. - plot_std : bool, optional - Whether to plot thestd instead of mean. - min_overlap : float, optional - Minimum overlap to consider. - Returns - ------- - None - """ +def mtot_vs_mean_max_overlap(nsim0, simname, min_logmass, smoothed, nbins): + x = get_overlap_summary(nsim0, simname, min_logmass, smoothed) + + mass0 = numpy.log10(x["mass0"]) + max_overlap = x["max_overlap"] + + xbins = numpy.linspace(numpy.nanmin(mass0), numpy.nanmax(mass0), nbins) + + mean_max_overlap = numpy.nanmean(max_overlap, axis=1) + std_max_overlap = numpy.nanstd(max_overlap, axis=1) + + with plt.style.context(plt_utils.mplstyle): + fig, axs = plt.subplots(ncols=3, figsize=(3.5 * 2, 2.625)) + + im1 = axs[0].hexbin(mass0, mean_max_overlap, gridsize=30, mincnt=1, + bins="log") + y_median, yerr = plt_utils.compute_error_bars( + mass0, mean_max_overlap, xbins, sigma=2) + axs[0].errorbar(0.5 * (xbins[1:] + xbins[:-1]), y_median, yerr=yerr, + color='red', ls='dashed', capsize=3, + label="CSiBORG" if simname == "csiborg" else None) + + im2 = axs[1].hexbin(mass0, std_max_overlap, gridsize=30, mincnt=1, + bins="log") + y_median, yerr = plt_utils.compute_error_bars( + mass0, std_max_overlap, xbins, sigma=2) + axs[1].errorbar(0.5 * (xbins[1:] + xbins[:-1]), y_median, yerr=yerr, + color='red', ls='dashed', capsize=3) + + if simname == "csiborg": + x_quijote = get_overlap_summary(0, "quijote", min_logmass, + smoothed) + mass0_quijote = numpy.log10(x_quijote["mass0"]) + max_overlap_quijote = x_quijote["max_overlap"] + + xbins_quijote = numpy.linspace(numpy.nanmin(mass0_quijote), + numpy.nanmax(mass0_quijote), nbins) + mean_max_overlap_quijote = numpy.nanmean(max_overlap_quijote, + axis=1) + y_median_quijote, yerr_quijote = plt_utils.compute_error_bars( + mass0_quijote, mean_max_overlap_quijote, xbins_quijote, + sigma=2) + axs[0].errorbar(0.5 * (xbins[1:] + xbins[:-1]) + 0.01, + y_median_quijote, yerr=yerr_quijote, + color='blue', ls='dashed', capsize=3, + label="Quijote") + + std_max_overlap_quijote = numpy.nanstd(max_overlap_quijote, + axis=1) + y_median_quijote, yerr_quijote = plt_utils.compute_error_bars( + mass0_quijote, std_max_overlap_quijote, xbins_quijote, + sigma=2) + axs[1].errorbar(0.5 * (xbins[1:] + xbins[:-1]) + 0.01, + y_median_quijote, yerr=yerr_quijote, + color='blue', ls='dashed', capsize=3) + + axs[0].legend(fontsize="small") + + im3 = axs[2].hexbin(numpy.nanmean(max_overlap, axis=1), + numpy.nanstd(max_overlap, axis=1), gridsize=30, + C=mass0, reduce_C_function=numpy.nanmean) + + axs[0].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") + axs[0].set_ylim(0.0, 0.75) + axs[0].set_ylabel(r"Mean max. pair overlap") + axs[1].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") + axs[1].set_ylabel(r"Uncertainty of max. pair overlap") + axs[2].set_xlabel(r"Mean max. pair overlap") + axs[2].set_ylabel(r"Uncertainty of max. pair overlap") + + label = ["Bin counts", "Bin counts", + r"$\log M_{\rm tot} ~ [M_\odot / h]$"] + ims = [im1, im2, im3] + for i in range(3): + axins = axs[i].inset_axes([0.0, 1.0, 1.0, 0.05]) + fig.colorbar(ims[i], cax=axins, orientation="horizontal", + label=label[i]) + axins.xaxis.tick_top() + axins.xaxis.set_tick_params(labeltop=True) + axins.xaxis.set_label_position("top") + + fig.tight_layout() + fout = join(plt_utils.fout, f"max_pairoverlap_{simname}_{nsim0}.{ext}") + print(f"Saving to `{fout}`.") + fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + plt.close() + + +# --------------------------------------------------------------------------- # +############################################################################### +# Total DM halo mass vs mean separation # +############################################################################### +# --------------------------------------------------------------------------- # + +@cache_to_disk(120) +def get_mass_vs_separation(nsim0, nsimx, simname, min_logmass, boxsize, + smoothed): paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) - cat0 = open_cat(nsim0) - catx = open_cat(nsimx) - reader = csiborgtools.read.PairOverlap(cat0, catx, paths, - maxdist=155) - mass = numpy.log10(reader.cat0("totpartmass")) - dist = reader.dist(in_initial=False, norm_kind="r200c") - overlap = reader.overlap(True) - dist = csiborgtools.read.weighted_stats(dist, overlap, - min_weight=min_overlap) + cat0 = open_cat(nsim0, simname) + catx = open_cat(nsimx, simname) + + reader = csiborgtools.read.PairOverlap(cat0, catx, paths, min_logmass) + + mass = numpy.log10(reader.cat0(MASS_KINDS[simname])) + dist = reader.dist(in_initial=False, boxsize=boxsize, norm_kind="r200c") + overlap = reader.overlap(smoothed) + dist = csiborgtools.read.weighted_stats(dist, overlap, min_weight=0) mask = numpy.isfinite(dist[:, 0]) mass = mass[mask] dist = dist[mask, :] dist = numpy.log10(dist) - if not plot_std: - p = numpy.polyfit(mass, dist[:, 0], 1) - else: - p = numpy.polyfit(mass, dist[:, 1], 1) + return mass, dist - xrange = numpy.linspace(numpy.min(mass), numpy.max(mass), 1000) - txt = r"$m = {}$, $c = {}$".format(*plt_utils.latex_float(*p, n=3)) + +def mass_vs_separation(nsim0, nsimx, simname, min_logmass, nbins, smoothed, + boxsize, plot_std): + mass, dist = get_mass_vs_separation(nsim0, nsimx, simname, min_logmass, + boxsize, smoothed) + xbins = numpy.linspace(numpy.nanmin(mass), numpy.nanmax(mass), nbins) + y = dist[:, 0] if not plot_std else dist[:, 1] with plt.style.context(plt_utils.mplstyle): fig, ax = plt.subplots() - ax.set_title(txt, fontsize="small") + + cx = ax.hexbin(mass, y, mincnt=1, bins="log", gridsize=50) + y_median, yerr = plt_utils.compute_error_bars(mass, y, xbins, sigma=2) + ax.errorbar(0.5 * (xbins[1:] + xbins[:-1]), y_median, yerr=yerr, + color='red', ls='dashed', capsize=3, + label="CSiBORG" if simname == "csiborg" else None) + + if simname == "csiborg": + mass_quijote, dist_quijote = get_mass_vs_separation( + 0, 1, "quijote", min_logmass, boxsize, smoothed) + xbins_quijote = numpy.linspace(numpy.nanmin(mass_quijote), + numpy.nanmax(mass_quijote), nbins) + if not plot_std: + y_quijote = dist_quijote[:, 0] + else: + y_quijote = dist_quijote[:, 1] + print(mass_quijote) + print(y_quijote) + y_median_quijote, yerr_quijote = plt_utils.compute_error_bars( + mass_quijote, y_quijote, xbins_quijote, sigma=2) + ax.errorbar(0.5 * (xbins_quijote[1:] + xbins_quijote[:-1]), + y_median_quijote, yerr=yerr_quijote, color='blue', + ls='dashed', + capsize=3, label="Quijote") + ax.legend(fontsize="small", loc="upper left") if not plot_std: - cx = ax.hexbin(mass, dist[:, 0], mincnt=1, bins="log", gridsize=50) ax.set_ylabel(r"$\log \langle \Delta R / R_{\rm 200c}\rangle$") else: - cx = ax.hexbin(mass, dist[:, 1], mincnt=1, bins="log", gridsize=50) ax.set_ylabel( r"$\delta \log \langle \Delta R / R_{\rm 200c}\rangle$") - ax.plot(xrange, numpy.polyval(p, xrange), color="red", - linestyle="--") fig.colorbar(cx, label="Bin counts") ax.set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") ax.set_ylabel(r"$\log \langle \Delta R / R_{\rm 200c}\rangle$") fig.tight_layout() - for ext in ["png"]: - fout = join(plt_utils.fout, - f"mass_vs_sep_{nsim0}_{nsimx}_{min_overlap}.{ext}") - if plot_std: - fout = fout.replace(f".{ext}", f"_std.{ext}") - print(f"Saving to `{fout}`.") - fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + fout = join(plt_utils.fout, + f"mass_vs_sep_{simname}_{nsim0}_{nsimx}.{ext}") + if plot_std: + fout = fout.replace(f".{ext}", f"_std.{ext}") + print(f"Saving to `{fout}`.") + fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") plt.close() -@cache_to_disk(7) -def get_max_key(simname, nsim0, key): - """ - Get the value of a maximum overlap halo's property. +# --------------------------------------------------------------------------- # +############################################################################### +# Total DM halo mass vs max overlap separation # +############################################################################### - Parameters - ---------- - simname : str - Simulation name. - nsim0 : int - Reference simulation index. - key : str - Property to get. - Returns - ------- - mass0 : 1-dimensional array - Mass of the reference haloes. - key_val : 1-dimensional array - Value of the property of the reference haloes. - max_overlap : 2-dimensional array - Maximum overlap of the reference haloes. - stat : 2-dimensional array - Value of the property of the maximum overlap halo. - """ +@cache_to_disk(120) +def get_mass_vs_max_overlap_separation(nsim0, nsimx, simname, min_logmass, + boxsize, smoothed): paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) - nsimxs = csiborgtools.read.get_cross_sims(simname, nsim0, paths, - smoothed=True) - nsimxs = nsimxs - cat0 = open_cat(nsim0) - catxs = [] - print("Opening catalogues...", flush=True) - for nsimx in tqdm(nsimxs): - catxs.append(open_cat(nsimx)) + cat0 = open_cat(nsim0, simname) + catx = open_cat(nsimx, simname) - reader = csiborgtools.read.NPairsOverlap(cat0, catxs, paths) + reader = csiborgtools.read.PairOverlap(cat0, catx, paths, min_logmass) - mass0 = reader.cat0("totpartmass") - key_val = reader.cat0(key) - max_overlap = reader.max_overlap(True) - stat = reader.max_overlap_key(key, True) - return mass0, key_val, max_overlap, stat + mass = numpy.log10(reader.cat0(MASS_KINDS[simname])) + dist = reader.dist(in_initial=False, boxsize=boxsize, norm_kind="r200c") + overlap = reader.overlap(smoothed) + + maxoverlap_dist = numpy.full(len(cat0), numpy.nan, dtype=numpy.float32) + + for i in range(len(cat0)): + if len(overlap[i]) == 0: + continue + imax = numpy.argmax(overlap[i]) + maxoverlap_dist[i] = dist[i][imax] + + mask = numpy.isfinite(maxoverlap_dist) + mass = mass[mask] + maxoverlap_dist = numpy.log10(maxoverlap_dist[mask]) + return mass, maxoverlap_dist -def plot_maxoverlap_mass(nsim0): - """ - Plot the mass of the reference haloes against the mass of the maximum - overlap haloes. +def mass_vs_maxoverlap_separation(nsim0, nsimx, simname, min_logmass, nbins, + smoothed, boxsize, plot_std): + mass, y = get_mass_vs_max_overlap_separation( + nsim0, nsimx, simname, min_logmass, boxsize, smoothed) + xbins = numpy.linspace(numpy.nanmin(mass), numpy.nanmax(mass), nbins) - Parameters - ---------- - nsim0 : int - Reference simulation index. - """ - mass0, __, __, stat = get_max_key("csiborg", nsim0, "totpartmass") + with plt.style.context(plt_utils.mplstyle): + fig, ax = plt.subplots() - mu = numpy.mean(stat, axis=1) - std = numpy.std(numpy.log10(stat), axis=1) - mu = numpy.log10(mu) + cx = ax.hexbin(mass, y, mincnt=1, bins="log", gridsize=50) + y_median, yerr = plt_utils.compute_error_bars(mass, y, xbins, sigma=2) + ax.errorbar(0.5 * (xbins[1:] + xbins[:-1]), y_median, yerr=yerr, + color='red', ls='dashed', capsize=3, + label="CSiBORG" if simname == "csiborg" else None) + + if simname == "csiborg": + mass_quijote, y_quijote = get_mass_vs_max_overlap_separation( + 0, 1, "quijote", min_logmass, boxsize, smoothed) + xbins_quijote = numpy.linspace(numpy.nanmin(mass_quijote), + numpy.nanmax(mass_quijote), nbins) + y_median_quijote, yerr_quijote = plt_utils.compute_error_bars( + mass_quijote, y_quijote, xbins_quijote, sigma=2) + ax.errorbar(0.5 * (xbins_quijote[1:] + xbins_quijote[:-1]), + y_median_quijote, yerr=yerr_quijote, color='blue', + ls='dashed', + capsize=3, label="Quijote") + ax.legend(fontsize="small", loc="upper left") + + fig.colorbar(cx, label="Bin counts") + ax.set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") + ax.set_ylabel(r"$\log (\Delta R_{\mathcal{O}_{\max}} / R_{\rm 200c})$") + + fig.tight_layout() + fout = join(plt_utils.fout, + f"mass_vs_maxoverlap_sep_{simname}_{nsim0}_{nsimx}.{ext}") + if plot_std: + fout = fout.replace(f".{ext}", f"_std.{ext}") + print(f"Saving to `{fout}`.") + fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + plt.close() + + +# --------------------------------------------------------------------------- # +############################################################################### +# Total DM halo mass vs maximum overlap matched mass # +############################################################################### +# --------------------------------------------------------------------------- # + + +@cache_to_disk(120) +def get_property_maxoverlap(nsim0, simname, min_logmass, key, min_overlap, + smoothed): + paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) + nsimxs = csiborgtools.read.get_cross_sims(simname, nsim0, paths, + min_logmass, smoothed=smoothed) + + cat0 = open_cat(nsim0, simname) + catxs = open_cats(nsimxs, simname) + + reader = csiborgtools.read.NPairsOverlap(cat0, catxs, paths, min_logmass) + mass0 = reader.cat0(MASS_KINDS[simname]) + mask = mass0 > 10**min_logmass + + max_overlap = reader.max_overlap(min_overlap, smoothed)[mask] + prop_maxoverlap = reader.max_overlap_key(key, min_overlap, smoothed)[mask] + + return {"mass0": mass0[mask], + "prop0": reader.cat0(key)[mask], + "max_overlap": max_overlap, + "prop_maxoverlap": prop_maxoverlap, + } + + +def mtot_vs_maxoverlap_mass(nsim0, simname, min_logmass, smoothed, nbins, + min_overlap=0, ext="png"): + mass_kind = MASS_KINDS[simname] + x = get_property_maxoverlap(nsim0, simname, min_logmass, mass_kind, + min_overlap, smoothed) + + mass0 = numpy.log10(x["mass0"]) + stat = numpy.log10(x["prop_maxoverlap"]) + weight = x["max_overlap"] + + mu = plt_utils.nan_weighted_average(stat, weight, axis=1) + std = plt_utils.nan_weighted_std(stat, weight, axis=1) + + xbins = numpy.linspace(*numpy.percentile(mass0, [0, 100]), nbins) - mass0 = numpy.log10(mass0) with plt.style.context(plt_utils.mplstyle): fig, axs = plt.subplots(ncols=2, figsize=(3.5 * 1.75, 2.625)) - im0 = axs[0].hexbin(mass0, mu, mincnt=1, bins="log", gridsize=50) - im1 = axs[1].hexbin(mass0, std, mincnt=1, bins="log", gridsize=50) - m = numpy.isfinite(mass0) & numpy.isfinite(mu) + + im0 = axs[0].hexbin(mass0, mu, mincnt=1, bins="log", gridsize=50) + y_median, yerr = plt_utils.compute_error_bars(mass0[m], mu[m], xbins, + sigma=2) + axs[0].errorbar(0.5 * (xbins[1:] + xbins[:-1]), y_median, yerr=yerr, + color='red', ls='dashed', capsize=3) + + im1 = axs[1].hexbin(mass0, std, mincnt=1, bins="log", gridsize=50) + y_median, yerr = plt_utils.compute_error_bars(mass0[m], std[m], xbins, + sigma=2) + axs[1].errorbar(0.5 * (xbins[1:] + xbins[:-1]), y_median, yerr=yerr, + color='red', ls='dashed', capsize=3) + print("True to expectation corr: ", kendalltau(mass0[m], mu[m])) t = numpy.linspace(*numpy.percentile(mass0, [0, 100]), 1000) - axs[0].plot(t, t, color="red", linestyle="--") - axs[0].plot(t, t + 0.2, color="red", linestyle="--", alpha=0.5) - axs[0].plot(t, t - 0.2, color="red", linestyle="--", alpha=0.5) + axs[0].plot(t, t, color="blue", linestyle="--") + axs[0].plot(t, t + 0.2, color="blue", linestyle="--", alpha=0.5) + axs[0].plot(t, t - 0.2, color="blue", linestyle="--", alpha=0.5) axs[0].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") axs[1].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") @@ -476,8 +679,7 @@ def plot_maxoverlap_mass(nsim0): ims = [im0, im1] for i in range(2): - axins = inset_axes(axs[i], width="100%", height="5%", - loc='upper center', borderpad=-0.75) + axins = axs[i].inset_axes([0.0, 1.0, 1.0, 0.05]) fig.colorbar(ims[i], cax=axins, orientation="horizontal", label="Bin counts") axins.xaxis.tick_top() @@ -485,174 +687,76 @@ def plot_maxoverlap_mass(nsim0): axins.xaxis.set_label_position("top") fig.tight_layout() - for ext in ["png"]: - fout = join(plt_utils.fout, - f"max_totpartmass_{nsim0}.{ext}") - print(f"Saving to `{fout}`.") - fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") - plt.close() + fout = join(plt_utils.fout, + f"max_totpartmass_{simname}_{nsim0}_{min_logmass}_{min_overlap}.{ext}") # noqa + print(f"Saving to `{fout}`.") + fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + plt.show() -def plot_maxoverlapstat(nsim0, key): - """ - Plot the mass of the reference haloes against the value of the maximum - overlap statistic. - - Parameters - ---------- - nsim0 : int - Reference simulation index. - key : str - Property to get. - """ - assert key != "totpartmass" - mass0, key_val, __, stat = get_max_key("csiborg", nsim0, key) - - xlabels = {"lambda200c": r"\log \lambda_{\rm 200c}"} - key_label = xlabels.get(key, key) - - mass0 = numpy.log10(mass0) - key_val = numpy.log10(key_val) - - mu = numpy.mean(stat, axis=1) - std = numpy.std(numpy.log10(stat), axis=1) - mu = numpy.log10(mu) - - with plt.style.context(plt_utils.mplstyle): - fig, axs = plt.subplots(ncols=3, figsize=(3.5 * 2, 2.625)) - - im0 = axs[0].hexbin(mass0, mu, mincnt=1, bins="log", gridsize=30) - im1 = axs[1].hexbin(mass0, std, mincnt=1, bins="log", gridsize=30) - im2 = axs[2].hexbin(key_val, mu, mincnt=1, bins="log", gridsize=30) - m = numpy.isfinite(key_val) & numpy.isfinite(mu) - print("True to expectation corr: ", kendalltau(key_val[m], mu[m])) - - axs[0].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") - axs[0].set_ylabel(r"Max. overlap mean of ${}$".format(key_label)) - axs[1].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") - axs[1].set_ylabel(r"Max. overlap std. of ${}$".format(key_label)) - axs[2].set_xlabel(r"${}$".format(key_label)) - axs[2].set_ylabel(r"Max. overlap mean of ${}$".format(key_label)) - - ims = [im0, im1, im2] - for i in range(3): - axins = inset_axes(axs[i], width="100%", height="5%", - loc='upper center', borderpad=-0.75) - fig.colorbar(ims[i], cax=axins, orientation="horizontal", - label="Bin counts") - axins.xaxis.tick_top() - axins.xaxis.set_tick_params(labeltop=True) - axins.xaxis.set_label_position("top") - - fig.tight_layout() - for ext in ["png"]: - fout = join(plt_utils.fout, - f"max_{key}_{nsim0}.{ext}") - print(f"Saving to `{fout}`.") - fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") - plt.close() +# --------------------------------------------------------------------------- # +############################################################################### +# Total DM halo mass vs expected matched mass # +############################################################################### +# --------------------------------------------------------------------------- # -@cache_to_disk(7) -def get_expected_mass(simname, nsim0, min_overlap): - """ - Get the expected mass of a reference halo given its overlap with halos - from other simulations. +def mtot_vs_expected_mass(nsim0, simname, min_logmass, smoothed, nbins, + min_overlap=0, max_prob_nomatch=1, ext="png"): + x = get_expected_mass(nsim0, simname, min_overlap, min_logmass, smoothed) - Parameters - ---------- - simname : str - Simulation name. - nsim0 : int - Reference simulation index. - min_overlap : float - Minimum overlap to consider between a pair of haloes. + mass = x["mass0"] + mu = x["mu"] + std = x["std"] + prob_nomatch = x["prob_nomatch"] - Returns - ------- - mass : 1-dimensional array - Mass of the reference haloes. - mu : 1-dimensional array - Expected mass of the matched haloes. - std : 1-dimensional array - Standard deviation of the expected mass of the matched haloes. - prob_nomatch : 2-dimensional array - Probability of not matching the reference halo. - """ - paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) - nsimxs = csiborgtools.read.get_cross_sims(simname, nsim0, paths, - smoothed=True) - nsimxs = nsimxs - cat0 = open_cat(nsim0) - - catxs = [] - print("Opening catalogues...", flush=True) - for nsimx in tqdm(nsimxs): - catxs.append(open_cat(nsimx)) - - reader = csiborgtools.read.NPairsOverlap(cat0, catxs, paths) - mass = reader.cat0("totpartmass") - - mu, std = reader.counterpart_mass(True, overlap_threshold=min_overlap, - in_log=False, return_full=False) - prob_nomatch = reader.prob_nomatch(True) - return mass, mu, std, prob_nomatch - - -def plot_mass_vs_expected_mass(nsim0, min_overlap=0, max_prob_nomatch=1): - """ - Plot the mass of a reference halo against the expected mass of its - counterparts. - - Parameters - ---------- - nsim0 : int - Reference simulation index. - min_overlap : float, optional - Minimum overlap between a pair of haloes to consider. - max_prob_nomatch : float, optional - Maximum probability of no match to consider. - """ - mass, mu, std, prob_nomatch = get_expected_mass("csiborg", nsim0, - min_overlap) - - std = std / mu / numpy.log(10) mass = numpy.log10(mass) - mu = numpy.log10(mu) prob_nomatch = numpy.nanmedian(prob_nomatch, axis=1) mask = numpy.isfinite(mass) & numpy.isfinite(mu) - mask &= (prob_nomatch < max_prob_nomatch) + mask &= (prob_nomatch <= max_prob_nomatch) + + xbins = numpy.linspace(*numpy.percentile(mass[mask], [0, 100]), nbins) with plt.style.context(plt_utils.mplstyle): fig, axs = plt.subplots(ncols=3, figsize=(3.5 * 2, 2.625)) im0 = axs[0].hexbin(mass[mask], mu[mask], mincnt=1, bins="log", gridsize=50,) + y_median, yerr = plt_utils.compute_error_bars(mass[mask], mu[mask], + xbins, sigma=2) + axs[0].errorbar(0.5 * (xbins[1:] + xbins[:-1]), y_median, yerr=yerr, + color='red', ls='dashed', capsize=3) + im1 = axs[1].hexbin(mass[mask], std[mask], mincnt=1, bins="log", gridsize=50) + y_median, yerr = plt_utils.compute_error_bars(mass[mask], std[mask], + xbins, sigma=2) + axs[1].errorbar(0.5 * (xbins[1:] + xbins[:-1]), y_median, yerr=yerr, + color='red', ls='dashed', capsize=3) + im2 = axs[2].hexbin(1 - prob_nomatch[mask], mass[mask] - mu[mask], gridsize=50, C=mass[mask], reduce_C_function=numpy.nanmedian) + axs[2].axhline(0, color="red", linestyle="--", alpha=0.5) - axs[0].set_xlabel(r"True $\log M_{\rm tot} ~ [M_\odot / h]$") + axs[0].set_xlabel(r"Reference $\log M_{\rm tot} ~ [M_\odot / h]$") axs[0].set_ylabel(r"Expected $\log M_{\rm tot} ~ [M_\odot / h]$") - axs[1].set_xlabel(r"True $\log M_{\rm tot} ~ [M_\odot / h]$") + axs[1].set_xlabel(r"Reference $\log M_{\rm tot} ~ [M_\odot / h]$") axs[1].set_ylabel(r"Std. of $\sigma_{\log M_{\rm tot}}$") axs[2].set_xlabel(r"1 - median prob. of no match") axs[2].set_ylabel(r"$\log M_{\rm tot} - \log M_{\rm tot, exp}$") t = numpy.linspace(*numpy.percentile(mass[mask], [0, 100]), 1000) - axs[0].plot(t, t, color="red", linestyle="--") - axs[0].plot(t, t + 0.2, color="red", linestyle="--", alpha=0.5) - axs[0].plot(t, t - 0.2, color="red", linestyle="--", alpha=0.5) + axs[0].plot(t, t, color="blue", linestyle="--") + axs[0].plot(t, t + 0.2, color="blue", linestyle="--", alpha=0.5) + axs[0].plot(t, t - 0.2, color="blue", linestyle="--", alpha=0.5) ims = [im0, im1, im2] labels = ["Bin counts", "Bin counts", r"$\log M_{\rm tot} ~ [M_\odot / h]$"] for i in range(3): - axins = inset_axes(axs[i], width="100%", height="5%", - loc='upper center', borderpad=-0.75) + axins = axs[i].inset_axes([0.0, 1.0, 1.0, 0.05]) fig.colorbar(ims[i], cax=axins, orientation="horizontal", label=labels[i]) axins.xaxis.tick_top() @@ -660,873 +764,369 @@ def plot_mass_vs_expected_mass(nsim0, min_overlap=0, max_prob_nomatch=1): axins.xaxis.set_label_position("top") fig.tight_layout() - for ext in ["png"]: - fout = join(plt_utils.fout, - f"mass_vs_expmass_{nsim0}_{max_prob_nomatch}.{ext}") - print(f"Saving to `{fout}`.") - fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + fout = join( + plt_utils.fout, + f"mass_vs_expmass_{nsim0}_{simname}_{max_prob_nomatch}.{ext}" + ) + print(f"Saving to `{fout}`.") + fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") plt.close() +# --------------------------------------------------------------------------- # ############################################################################### -# Nearest neighbour plotting # +# Total DM halo mass vs maximum overlap halo property # ############################################################################### +# --------------------------------------------------------------------------- # -def read_dist(simname, run, kind, kwargs): - """ - Read PDF/CDF of a nearest neighbour distribution. +def mtot_vs_maxoverlap_property(nsim0, simname, min_logmass, key, min_overlap, + smoothed): + mass_kind = MASS_KINDS[simname] + assert key != mass_kind - Parameters - ---------- - simname : str - Simulation name. Must be either `csiborg` or `quijote`. - run : str - Run name. - kind : str - Kind of distribution. Must be either `pdf` or `cdf`. - kwargs : dict - Nearest neighbour reader keyword arguments. + x = get_property_maxoverlap(nsim0, simname, min_logmass, key, min_overlap, + smoothed) + mass0 = x["mass0"] + prop0 = x["prop0"] + stat = x["prop_maxoverlap"] - Returns - ------- - dist : 2-dimensional array - Distribution of distances in radial and neighbour bins. - """ - paths = csiborgtools.read.Paths(**kwargs["paths_kind"]) - reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths) + xlabels = {"lambda200c": r"\log \lambda_{\rm 200c}"} + key_label = xlabels.get(key, key) - fpath = paths.cross_nearest(simname, run, "tot_counts", nsim=0, nobs=0) - counts = numpy.load(fpath)["tot_counts"] - return reader.build_dist(counts, kind) + mass0 = numpy.log10(mass0) + prop0 = numpy.log10(prop0) - -def pull_cdf(x, fid_cdf, test_cdf): - """ - Pull a CDF so that it matches the fiducial CDF at 0.5. Rescales the x-axis, - while keeping the corresponding CDF values fixed. - - Parameters - ---------- - x : 1-dimensional array - The x-axis of the CDF. - fid_cdf : 1-dimensional array - The fiducial CDF. - test_cdf : 1-dimensional array - The test CDF to be pulled. - - Returns - ------- - xnew : 1-dimensional array - The new x-axis of the test CDF. - test_cdf : 1-dimensional array - The new test CDF. - """ - xnew = x * numpy.interp(0.5, fid_cdf, x) / numpy.interp(0.5, test_cdf, x) - return xnew, test_cdf - - -def plot_dist(run, kind, kwargs, runs_to_mass, pulled_cdf=False, r200=None): - r""" - Plot the PDF or CDF of the nearest neighbour distance for CSiBORG and - Quijote. - - Parameters - ---------- - run : str - Run name. - kind : str - Kind of distribution. Must be either `pdf` or `cdf`. - kwargs : dict - Nearest neighbour reader keyword arguments. - runs_to_mass : dict - Dictionary mapping run names to halo mass range. - pulled_cdf : bool, optional - Whether to pull the CDFs of CSiBORG and Quijote so that they match - (individually) at 0.5. Default is `False`. - r200 : float, optional - Halo radial size :math:`R_{200}`. If set, the x-axis will be scaled by - it. - - Returns - ------- - None - """ - assert kind in ["pdf", "cdf"] - print(f"Plotting the {kind} for {run}...", flush=True) - reader = csiborgtools.read.NearestNeighbourReader( - **kwargs, paths=csiborgtools.read.Paths(**kwargs["paths_kind"])) - raddist = reader.bin_centres("radial") - r = reader.bin_centres("neighbour") - r = r / r200 if r200 is not None else r - - y_csiborg = read_dist("csiborg", run, kind, kwargs) - y_quijote = read_dist("quijote", run, kind, kwargs) + mu = numpy.nanmean(stat, axis=1) + std = numpy.nanstd(numpy.log10(stat), axis=1) + mu = numpy.log10(mu) with plt.style.context(plt_utils.mplstyle): - norm = mpl.colors.Normalize(vmin=numpy.min(raddist), - vmax=numpy.max(raddist)) - cmap = mpl.cm.ScalarMappable(norm=norm, cmap=mpl.cm.viridis) - cmap.set_array([]) + fig, axs = plt.subplots(ncols=3, figsize=(3.5 * 2, 2.625)) - fig, ax = plt.subplots() - if run != "mass009": - ax.set_title(r"${} \leq \log M_{{\rm tot}} / (M_\odot h) < {}$" - .format(*runs_to_mass[run]), fontsize="small") - else: - ax.set_title(r"$\log M_{{\rm tot}} / (M_\odot h) \geq {}$" - .format(runs_to_mass[run][0]), fontsize="small") - # Plot data - nrad = y_csiborg.shape[0] - for i in range(nrad): - if pulled_cdf: - x1, y1 = pull_cdf(r, y_csiborg[0], y_csiborg[i]) - x2, y2 = pull_cdf(r, y_quijote[0], y_quijote[i]) - else: - x1, y1 = r, y_csiborg[i] - x2, y2 = r, y_quijote[i] + im0 = axs[0].hexbin(mass0, mu - prop0, mincnt=1, bins="log", + gridsize=30) + im1 = axs[1].hexbin(mass0, std, mincnt=1, bins="log", gridsize=30) + im2 = axs[2].hexbin(prop0, mu, mincnt=1, bins="log", gridsize=30) + m = numpy.isfinite(prop0) & numpy.isfinite(mu) + print("True to expectation corr: ", kendalltau(prop0[m], mu[m])) - ax.plot(x1, y1, c=cmap.to_rgba(raddist[i]), - label="CSiBORG" if i == 0 else None) - ax.plot(x2, y2, c="gray", ls="--", - label="Quijote" if i == 0 else None) + axs[0].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") + axs[0].set_ylabel(r"True - max. overlap mean of ${}$" + .format(key_label)) + axs[1].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") + axs[1].set_ylabel(r"Max. overlap std. of ${}$".format(key_label)) + axs[2].set_xlabel(r"${}$".format(key_label)) + axs[2].set_ylabel(r"Max. overlap mean of ${}$".format(key_label)) - fig.colorbar(cmap, ax=ax, label=r"$R_{\rm dist}~[\mathrm{Mpc} / h]$") - ax.grid(alpha=0.5, lw=0.4) - # Plot labels - if pulled_cdf: - if r200 is None: - ax.set_xlabel(r"$\tilde{r}_{1\mathrm{NN}}~[\mathrm{Mpc} / h]$") - if kind == "pdf": - ax.set_ylabel(r"$p(\tilde{r}_{1\mathrm{NN}})$") - else: - ax.set_ylabel(r"$\mathrm{CDF}(\tilde{r}_{1\mathrm{NN}})$") - else: - ax.set_xlabel(r"$\tilde{r}_{1\mathrm{NN}} / R_{200c}$") - if kind == "pdf": - ax.set_ylabel(r"$p(\tilde{r}_{1\mathrm{NN}} / R_{200c})$") - else: - ax.set_ylabel(r"$\mathrm{CDF}(\tilde{r}_{1\mathrm{NN}} / R_{200c})$") # noqa - else: - if r200 is None: - ax.set_xlabel(r"$r_{1\mathrm{NN}}~[\mathrm{Mpc} / h]$") - if kind == "pdf": - ax.set_ylabel(r"$p(r_{1\mathrm{NN}})$") - else: - ax.set_ylabel(r"$\mathrm{CDF}(r_{1\mathrm{NN}})$") - else: - ax.set_xlabel(r"$r_{1\mathrm{NN}} / R_{200c}$") - if kind == "pdf": - ax.set_ylabel(r"$p(r_{1\mathrm{NN}} / R_{200c})$") - else: - ax.set_ylabel(r"$\mathrm{CDF}(r_{1\mathrm{NN}} / R_{200c})$") # noqa + t = numpy.linspace(*numpy.percentile(prop0[m], [0, 100]), 1000) + axs[2].plot(t, t, color="blue", linestyle="--") + axs[2].plot(t, t + 0.2, color="blue", linestyle="--", alpha=0.5) + axs[2].plot(t, t - 0.2, color="blue", linestyle="--", alpha=0.5) - if kind == "cdf": - xmax = numpy.min(r[numpy.isclose(y_quijote[-1, :], 1.)]) - if xmax > 0: - ax.set_xlim(0, xmax) - ax.set_ylim(0, 1) - - ax.legend(fontsize="small") - fig.tight_layout() - for ext in ["png"]: - if pulled_cdf: - fout = join(plt_utils.fout, f"1nn_{kind}_{run}_pulled.{ext}") - else: - fout = join(plt_utils.fout, f"1nn_{kind}_{run}.{ext}") - print(f"Saving to `{fout}`.") - fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") - plt.close() - - -def get_cdf_diff(x, y_csiborg, y_quijote, pulled_cdf): - """ - Get difference between the two CDFs as a function of radial distance. - - Parameters - ---------- - x : 1-dimensional array - The x-axis of the CDFs. - y_csiborg : 2-dimensional array - The CDFs of CSiBORG. - y_quijote : 2-dimensional array - The CDFs of Quijote. - pulled_cdf : bool - Whether to pull the CDFs of CSiBORG and Quijote. - - Returns - ------- - dy : 2-dimensional array - The difference between the two CDFs. - """ - dy = numpy.full_like(y_csiborg, numpy.nan) - for i in range(y_csiborg.shape[0]): - if pulled_cdf: - x1, y1 = pull_cdf(x, y_csiborg[0], y_csiborg[i]) - y1 = numpy.interp(x, x1, y1, left=0., right=1.) - x2, y2 = pull_cdf(x, y_quijote[0], y_quijote[i]) - y2 = numpy.interp(x, x2, y2, left=0., right=1.) - dy[i] = y1 - y2 - else: - dy[i] = y_csiborg[i] - y_quijote[i] - return dy - - -def plot_cdf_diff(runs, kwargs, pulled_cdf, runs_to_mass): - """ - Plot the CDF difference between Quijote and CSiBORG. - - Parameters - ---------- - runs : list of str - Run names. - kwargs : dict - Nearest neighbour reader keyword arguments. - pulled_cdf : bool - Whether to pull the CDFs of CSiBORG and Quijote. - runs_to_mass : dict - Dictionary mapping run names to halo mass range. - - Returns - ------- - None - """ - print("Plotting the CDF difference...", flush=True) - paths = csiborgtools.read.Paths(**kwargs["paths_kind"]) - reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths) - r = reader.bin_centres("neighbour") - runs_to_mass = [numpy.mean(runs_to_mass[run]) for run in runs] - - with plt.style.context(plt_utils.mplstyle): - norm = mpl.colors.Normalize(vmin=min(runs_to_mass), - vmax=max(runs_to_mass)) - cmap = mpl.cm.ScalarMappable(norm=norm, cmap=mpl.cm.viridis) - cmap.set_array([]) - - fig, ax = plt.subplots() - for i, run in enumerate(runs): - y_quijote = read_dist("quijote", run, "cdf", kwargs) - y_csiborg = read_dist("csiborg", run, "cdf", kwargs) - - dy = get_cdf_diff(r, y_csiborg, y_quijote, pulled_cdf) - ax.plot(r, numpy.median(dy, axis=0), - c=cmap.to_rgba(runs_to_mass[i])) - ax.fill_between(r, *numpy.percentile(dy, [16, 84], axis=0), - alpha=0.5, color=cmap.to_rgba(runs_to_mass[i])) - fig.colorbar(cmap, ax=ax, ticks=runs_to_mass, - label=r"$\log M_{\rm tot} ~ [M_\odot / h]$") - ax.set_xlim(0.0, 55) - ax.set_ylim(0) - - ax.grid(alpha=1/3, lw=0.4) - - # Plot labels - if pulled_cdf: - ax.set_xlabel(r"$\tilde{r}_{1\mathrm{NN}}~[\mathrm{Mpc} / h]$") - else: - ax.set_xlabel(r"$r_{1\mathrm{NN}}~[\mathrm{Mpc} / h]$") - ax.set_ylabel(r"$\Delta \mathrm{CDF}(r_{1\mathrm{NN}})$") - - # Plot labels - if pulled_cdf: - ax.set_xlabel(r"$\tilde{r}_{1\mathrm{NN}}~[\mathrm{Mpc} / h]$") - ax.set_ylabel(r"$\Delta \mathrm{CDF}(\tilde{r}_{1\mathrm{NN}})$") - else: - ax.set_xlabel(r"$r_{1\mathrm{NN}}~[\mathrm{Mpc} / h]$") - ax.set_ylabel(r"$\Delta \mathrm{CDF}(r_{1\mathrm{NN}})$") + ims = [im0, im1, im2] + for i in range(3): + axins = axs[i].inset_axes([0.0, 1.0, 1.0, 0.05]) + fig.colorbar(ims[i], cax=axins, orientation="horizontal", + label="Bin counts") + axins.xaxis.tick_top() + axins.xaxis.set_tick_params(labeltop=True) + axins.xaxis.set_label_position("top") fig.tight_layout() - for ext in ["png"]: - if pulled_cdf: - fout = join(plt_utils.fout, f"1nn_diff_pulled.{ext}") - else: - fout = join(plt_utils.fout, f"1nn_diff.{ext}") - print(f"Saving to `{fout}`.") - fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") - plt.close() - - -@cache_to_disk(7) -def make_kl(simname, run, nsim, nobs, kwargs): - """ - Calculate the KL divergence between the distribution of nearest neighbour - distances of haloes in a reference simulation with respect to Quijote. - - Parameters - ---------- - simname : str - Simulation name. Must be either `csiborg` or `quijote`. - run : str - Run name. - nsim : int - Simulation index. - nobs : int - Fiducial Quijote observer index. For CSiBORG must be set to `None`. - kwargs : dict - Nearest neighbour reader keyword arguments. - - Returns - ------- - kl : 1-dimensional array - KL divergence of the distribution of nearest neighbour distances - of each halo in the reference simulation. - """ - paths = csiborgtools.read.Paths(**kwargs["paths_kind"]) - reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths) - # This is the reference PDF. Must be Quijote! - pdf = read_dist("quijote", run, "pdf", kwargs) - return reader.kl_divergence(simname, run, nsim, pdf, nobs=nobs) - - -@cache_to_disk(7) -def make_ks(simname, run, nsim, nobs, kwargs): - """ - Calculate the KS significance between the distribution of nearest neighbour - distances of haloes in a reference simulation with respect to Quijote. - - Parameters - ---------- - simname : str - Simulation name. Must be either `csiborg` or `quijote`. - run : str - Run name. - nsim : int - Simulation index. - nobs : int - Fiducial Quijote observer index. For CSiBORG must be set to `None`. - kwargs : dict - Nearest neighbour reader keyword arguments. - - Returns - ------- - ks : 1-dimensional array - KS significance of the distribution of nearest neighbour distances of - each halo in the reference simulation. - """ - paths = csiborgtools.read.Paths(**kwargs["paths_kind"]) - reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths) - # This is the reference CDF. Must be Quijote! - cdf = read_dist("quijote", run, "cdf", kwargs) - return reader.ks_significance(simname, run, nsim, cdf, nobs=nobs) - - -def get_cumulative_significance(simname, runs, nsim, nobs, kind, kwargs): - """ - Calculate the cumulative significance of the distribution of nearest - neighbours and evaluate it at the same points for all runs. - - Parameters - ---------- - simname : str - Simulation name. Must be either `csiborg` or `quijote`. - runs : list of str - Run names. - nsim : int - Simulation index. - nobs : int - Fiducial Quijote observer index. For CSiBORG must be set to `None`. - kind : str - Must be either `kl` (Kullback-Leibler diverge) or `ks` - (Kolmogorov-Smirnov p-value). - kwargs : dict - Nearest neighbour reader keyword arguments. - - Returns - ------- - z : 1-dimensional array - Points where the cumulative significance is evaluated. - cumsum : 2-dimensional array of shape `(len(runs), len(z)))` - Cumulative significance of the distribution of nearest neighbours. - """ - significances = [] - for run in runs: - if kind == "kl": - x = make_kl(simname, run, nsim, nobs, kwargs) - else: - x = make_ks(simname, run, nsim, nobs, kwargs) - x = numpy.log10(x) - x = x[numpy.isfinite(x)] - x = numpy.sort(x) - significances.append(x) - z = numpy.hstack(significances).reshape(-1, ) - - if kind == "ks": - zmin, zmax = numpy.percentile(z, [1, 100]) - else: - zmin, zmax = numpy.percentile(z, [0.0, 99.9]) - z = numpy.linspace(zmin, zmax, 1000, dtype=numpy.float32) - - cumsum = numpy.full((len(runs), z.size), numpy.nan, dtype=numpy.float32) - for i, run in enumerate(runs): - x = significances[i] - y = numpy.linspace(0, 1, x.size) - cumsum[i, :] = numpy.interp(z, x, y, left=0, right=1) - - return z, cumsum - - -def plot_significance(simname, runs, nsim, nobs, kind, kwargs, runs_to_mass): - """ - Plot cumulative significance of the 1NN distribution. - - Parameters - ---------- - simname : str - Simulation name. Must be either `csiborg` or `quijote`. - runs : list of str - Run names. - nsim : int - Simulation index. - nobs : int - Fiducial Quijote observer index. For CSiBORG must be set to `None`. - kind : str - Must be either `kl` (Kullback-Leibler diverge) or `ks` - (Kolmogorov-Smirnov p-value). - kwargs : dict - Nearest neighbour reader keyword arguments. - runs_to_mass : dict - Dictionary mapping run names to total halo mass range. - upper_threshold : bool, optional - - Returns - ------- - None - """ - assert kind in ["kl", "ks"] - runs_to_mass = [numpy.mean(runs_to_mass[run]) for run in runs] - - with plt.style.context(plt_utils.mplstyle): - norm = mpl.colors.Normalize(vmin=min(runs_to_mass), - vmax=max(runs_to_mass)) - cmap = mpl.cm.ScalarMappable(norm=norm, cmap=mpl.cm.viridis) - cmap.set_array([]) - - fig, ax = plt.subplots(figsize=(3.5, 2.625 * 1.2), nrows=2, - sharex=True, height_ratios=[1, 0.5]) - fig.subplots_adjust(hspace=0, wspace=0) - z, cumsum = get_cumulative_significance(simname, runs, nsim, nobs, - kind, kwargs) - - for i in range(len(runs)): - ax[0].plot(z, cumsum[i, :], color=cmap.to_rgba(runs_to_mass[i])) - - dy = cumsum[-1, :] - cumsum[i, :] - if kind == "kl": - dy *= -1 - ax[1].plot(z, dy, color=cmap.to_rgba(runs_to_mass[i])) - - cbar_ax = fig.add_axes([1.0, 0.125, 0.035, 0.85]) - fig.colorbar(cmap, cax=cbar_ax, ticks=runs_to_mass, - label=r"$\log M_{\rm tot} ~ [M_\odot / h]$") - - ax[0].set_xlim(z[0], z[-1]) - ax[0].set_ylim(1e-5, 1.) - if kind == "ks": - ax[1].set_xlabel(r"$\log p$-value of $r_{1\mathrm{NN}}$ distribution") # noqa - else: - ax[1].set_xlabel(r"$D_{\mathrm{KL}}$ of $r_{1\mathrm{NN}}$ distribution") # noqa - ax[0].set_ylabel(r"Cumulative norm. counts") - ax[1].set_ylabel(r"$\Delta f$") - - fig.tight_layout(h_pad=0, w_pad=0) - for ext in ["png"]: - if simname == "quijote": - paths = csiborgtools.read.Paths(**kwargs["paths_kind"]) - nsim = paths.quijote_fiducial_nsim(nsim, nobs) - nsim = str(nsim).zfill(5) - fout = join( - plt_utils.fout, - f"significance_{kind}_{simname}_{nsim}_{runs}.{ext}") - print(f"Saving to `{fout}`.") - fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") - plt.close() - - -def make_binlims(run, runs_to_mass, upper_threshold=None): - """ - Make bin limits for the 1NN distance runs, corresponding to the first half - of the log-mass bin. - - Parameters - ---------- - run : str - Run name. - runs_to_mass : dict - Dictionary mapping run names to total halo mass range. - upper_threshold : float, optional - Bin width in dex. If set to `None`, the bin width is taken from the - `runs_to_mass` dictionary. - - Returns - ------- - xmin, xmax : floats - """ - xmin, xmax = runs_to_mass[run] - if upper_threshold is not None: - xmax = xmin + upper_threshold - - xmin, xmax = 10**xmin, 10**xmax - if run == "mass009": - xmax = numpy.infty - return xmin, xmax - - -def plot_significance_vs_mass(simname, runs, nsim, nobs, kind, kwargs, - runs_to_mass, upper_threshold=False): - """ - Plot significance of the 1NN distance as a function of the total mass. - - Parameters - ---------- - simname : str - Simulation name. Must be either `csiborg` or `quijote`. - runs : list of str - Run names. - nsim : int - Simulation index. - nobs : int - Fiducial Quijote observer index. For CSiBORG must be set to `None`. - kind : str - Must be either `kl` (Kullback-Leibler diverge) or `ks` - (Kolmogorov-Smirnov p-value). - kwargs : dict - Nearest neighbour reader keyword arguments. - runs_to_mass : dict - Dictionary mapping run names to total halo mass range. - upper_threshold : bool, optional - Whether to enforce an upper threshold on halo mass. - - Returns - ------- - None - """ - print(f"Plotting {kind} significance vs mass.") - assert kind in ["kl", "ks"] - paths = csiborgtools.read.Paths(**kwargs["paths_kind"]) - reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths) - - with plt.style.context(plt_utils.mplstyle): - plt.figure() - xs, ys = [], [] - for run in runs: - x = reader.read_single(simname, run, nsim, nobs)["mass"] - if kind == "kl": - y = make_kl(simname, run, nsim, nobs, kwargs) - else: - y = numpy.log10(make_ks(simname, run, nsim, nobs, kwargs)) - - xmin, xmax = make_binlims(run, runs_to_mass, upper_threshold) - - mask = (x >= xmin) & (x < xmax) - xs.append(numpy.log10(x[mask])) - ys.append(y[mask]) - - xs = numpy.concatenate(xs) - ys = numpy.concatenate(ys) - - plt.hexbin(xs, ys, gridsize=75, mincnt=1, bins="log") - mask = numpy.isfinite(xs) & numpy.isfinite(ys) - corr = plt_utils.latex_float(*kendalltau(xs[mask], ys[mask])) - plt.title(r"$\tau = {}, p = {}$".format(*corr), fontsize="small") - - plt.xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") - if kind == "ks": - plt.ylabel(r"$\log p$-value of $r_{1\mathrm{NN}}$ distribution") - plt.ylim(top=0) - else: - plt.ylabel(r"$D_{\mathrm{KL}}$ of $r_{1\mathrm{NN}}$ distribution") - plt.ylim(bottom=0) - plt.colorbar(label="Bin counts") - - plt.tight_layout() - for ext in ["png"]: - if simname == "quijote": - nsim = paths.quijote_fiducial_nsim(nsim, nobs) - nsim = str(nsim).zfill(5) - fout = f"sgnf_vs_mass_{kind}_{simname}_{nsim}_{runs}.{ext}" - if upper_threshold: - fout = fout.replace(f".{ext}", f"_upper_threshold.{ext}") - fout = join(plt_utils.fout, fout) - print(f"Saving to `{fout}`.") - plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") - plt.close() - - -def plot_kl_vs_ks(simname, runs, nsim, nobs, kwargs, runs_to_mass, - upper_threshold=False): - """ - Plot Kullback-Leibler divergence vs Kolmogorov-Smirnov statistic p-value. - - Parameters - ---------- - simname : str - Simulation name. Must be either `csiborg` or `quijote`. - runs : str - Run names. - nsim : int - Simulation index. - nobs : int - Fiducial Quijote observer index. For CSiBORG must be set to `None`. - kwargs : dict - Nearest neighbour reader keyword arguments. - runs_to_mass : dict - Dictionary mapping run names to total halo mass range. - upper_threshold : bool, optional - Whether to enforce an upper threshold on halo mass. - - Returns - ------- - None - """ - paths = csiborgtools.read.Paths(**kwargs["paths_kind"]) - reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths) - - xs, ys, cs = [], [], [] - for run in runs: - c = reader.read_single(simname, run, nsim, nobs)["mass"] - x = make_kl(simname, run, nsim, nobs, kwargs) - y = make_ks(simname, run, nsim, nobs, kwargs) - - cmin, cmax = make_binlims(run, runs_to_mass) - mask = (c >= cmin) & (c < cmax if upper_threshold else True) - xs.append(x[mask]) - ys.append(y[mask]) - cs.append(c[mask]) - - xs = numpy.concatenate(xs) - ys = numpy.log10(numpy.concatenate(ys)) - cs = numpy.log10(numpy.concatenate(cs)) - - with plt.style.context(plt_utils.mplstyle): - plt.figure() - plt.hexbin(xs, ys, C=cs, gridsize=50, mincnt=0, - reduce_C_function=numpy.median) - mask = numpy.isfinite(xs) & numpy.isfinite(ys) - corr = plt_utils.latex_float(*kendalltau(xs[mask], ys[mask])) - plt.title(r"$\tau = {}, p = {}$".format(*corr), fontsize="small") - plt.colorbar(label=r"$\log M_{\rm tot} / M_\odot$") - - plt.xlabel(r"$D_{\mathrm{KL}}$ of $r_{1\mathrm{NN}}$ distribution") - plt.ylabel(r"$\log p$-value of $r_{1\mathrm{NN}}$ distribution") - - plt.tight_layout() - for ext in ["png"]: - if simname == "quijote": - nsim = paths.quijote_fiducial_nsim(nsim, nobs) - nsim = str(nsim).zfill(5) - fout = join( - plt_utils.fout, - f"kl_vs_ks_{simname}_{run}_{nsim}_{runs}.{ext}") - if upper_threshold: - fout = fout.replace(f".{ext}", f"_upper_threshold.{ext}") - print(f"Saving to `{fout}`.") - plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") - plt.close() - - -def plot_kl_vs_overlap(runs, nsim, kwargs, runs_to_mass, plot_std=True, - upper_threshold=False): - """ - Plot KL divergence vs overlap for CSiBORG. - - Parameters - ---------- - runs : str - Run names. - nsim : int - Simulation index. - kwargs : dict - Nearest neighbour reader keyword arguments. - runs_to_mass : dict - Dictionary mapping run names to total halo mass range. - plot_std : bool, optional - Whether to plot the standard deviation of the overlap distribution. - upper_threshold : bool, optional - Whether to enforce an upper threshold on halo mass. - - Returns - ------- - None - """ - paths = csiborgtools.read.Paths(**kwargs["paths_kind"]) - nn_reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths) - - xs, ys1, ys2, cs = [], [], [], [] - for run in runs: - nn_data = nn_reader.read_single("csiborg", run, nsim, nobs=None) - nn_hindxs = nn_data["ref_hindxs"] - mass, overlap_hindxs, __, summed_overlap, prob_nomatch = get_overlap("csiborg", nsim) # noqa - - # We need to match the hindxs between the two. - hind2overlap_array = {hind: i for i, hind in enumerate(overlap_hindxs)} - mask = numpy.asanyarray([hind2overlap_array[hind] - for hind in nn_hindxs]) - summed_overlap = summed_overlap[mask] - prob_nomatch = prob_nomatch[mask] - mass = mass[mask] - - x = make_kl("csiborg", run, nsim, nobs=None, kwargs=kwargs) - y1 = 1 - numpy.mean(prob_nomatch, axis=1) - y2 = numpy.std(prob_nomatch, axis=1) - cmin, cmax = make_binlims(run, runs_to_mass, upper_threshold) - mask = (mass >= cmin) & (mass < cmax if upper_threshold else True) - xs.append(x[mask]) - ys1.append(y1[mask]) - ys2.append(y2[mask]) - cs.append(numpy.log10(mass[mask])) - - xs = numpy.concatenate(xs) - ys1 = numpy.concatenate(ys1) - ys2 = numpy.concatenate(ys2) - cs = numpy.concatenate(cs) - - with plt.style.context(plt_utils.mplstyle): - plt.figure() - plt.hexbin(xs, ys1, C=cs, gridsize=50, mincnt=0, - reduce_C_function=numpy.median) - mask = numpy.isfinite(xs) & numpy.isfinite(ys1) - corr = plt_utils.latex_float(*kendalltau(xs[mask], ys1[mask])) - plt.title(r"$\tau = {}, p = {}$".format(*corr), fontsize="small") - - plt.colorbar(label=r"$\log M_{\rm tot} / M_\odot$") - plt.xlabel(r"$D_{\mathrm{KL}}$ of $r_{1\mathrm{NN}}$ distribution") - plt.ylabel("1 - mean prob. of no match") - - plt.tight_layout() - for ext in ["png"]: - nsim = str(nsim).zfill(5) - fout = join(plt_utils.fout, - f"kl_vs_overlap_mean_{nsim}_{runs}.{ext}") - if upper_threshold: - fout = fout.replace(f".{ext}", f"_upper_threshold.{ext}") - print(f"Saving to `{fout}`.") - plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") - plt.close() - - if not plot_std: - return - - with plt.style.context(plt_utils.mplstyle): - plt.figure() - plt.hexbin(xs, ys2, C=cs, gridsize=50, mincnt=0, - reduce_C_function=numpy.median) - plt.colorbar(label=r"$\log M_{\rm tot} / M_\odot$") - plt.xlabel(r"$D_{\mathrm{KL}}$ of $r_{1\mathrm{NN}}$ distribution") - plt.ylabel(r"Ensemble std of summed overlap") - mask = numpy.isfinite(xs) & numpy.isfinite(ys2) - corr = plt_utils.latex_float(*kendalltau(xs[mask], ys2[mask])) - plt.title(r"$\tau = {}, p = {}$".format(*corr), fontsize="small") - - plt.tight_layout() - for ext in ["png"]: - nsim = str(nsim).zfill(5) - fout = join(plt_utils.fout, - f"kl_vs_overlap_std_{nsim}_{runs}.{ext}") - if upper_threshold: - fout = fout.replace(f".{ext}", f"_upper_threshold.{ext}") - print(f"Saving to `{fout}`.") - plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + fout = join(plt_utils.fout, + f"max_{key}_{simname}_{nsim0}_{min_overlap}.{ext}") + print(f"Saving to `{fout}`.") + fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") plt.close() +# --------------------------------------------------------------------------- # ############################################################################### -# Command line interface # +# Max's matching vs overlap success # ############################################################################### +# --------------------------------------------------------------------------- # + + +@cache_to_disk(120) +def get_matching_max_vs_overlap(simname, nsim0, min_logmass, mult): + paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) + + nsimsx = [nsim for nsim in paths.get_ics(simname) if nsim != nsim0] + for i in trange(len(nsimsx), desc="Loading data"): + nsimx = nsimsx[i] + fpath = paths.match_max(simname, nsim0, nsimx, min_logmass, + mult=mult) + + data = numpy.load(fpath, allow_pickle=True) + + if i == 0: + mass0 = data["mass0"] + max_overlap = numpy.full((mass0.size, len(nsimsx)), numpy.nan) + match_overlap = numpy.full((mass0.size, len(nsimsx)), numpy.nan) + success = numpy.zeros((mass0.size, len(nsimsx)), numpy.bool_) + + max_overlap[:, i] = data["max_overlap"] + match_overlap[:, i] = data["match_overlap"] + success[:, i] = data["success"] + + return {"mass0": mass0, "max_overlap": max_overlap, + "match_overlap": match_overlap, "success": success} + + +def matching_max_vs_overlap(simname, nsim0, min_logmass): + left_edges = numpy.arange(min_logmass, 15, 0.1) + + delete_disk_caches_for_function("get_matching_max_vs_overlap") + + with plt.style.context("science"): + fig, axs = plt.subplots(ncols=2, figsize=(3.5 * 2, 2.625)) + cols = plt.rcParams["axes.prop_cycle"].by_key()["color"] + for n, mult in enumerate([2.5, 5., 7.5, 10.0]): + x = get_matching_max_vs_overlap(simname, + nsim0, min_logmass, mult=mult) + mass0 = numpy.log10(x["mass0"]) + max_overlap = x["max_overlap"] + match_overlap = x["match_overlap"] + success = x["success"] + + nbins = len(left_edges) + y = numpy.full((nbins, 100), numpy.nan) + y2 = numpy.full((nbins, 100), numpy.nan) + for i in range(nbins): + m = mass0 > left_edges[i] + for j in range(100): + y[i, j] = numpy.sum( + max_overlap[m, j] == match_overlap[m, j]) + y[i, j] /= numpy.sum(success[m, j]) + y2[i, j] = numpy.sum(success[m, j]) / numpy.sum(m) + + offset = numpy.random.normal(0, 0.01) + + ysummary = numpy.percentile(y, [16, 50, 84], axis=1) + axs[0].errorbar( + left_edges + offset, ysummary[1], + yerr=[ysummary[1] - ysummary[0], ysummary[2] - ysummary[1]], + capsize=3, c=cols[n], + label=r"$\leq {}~R_{{\rm 200c}}$".format(mult), errorevery=2) + ysummary = numpy.percentile(y2, [16, 50, 84], axis=1) + axs[1].errorbar( + left_edges + offset, ysummary[1], ls="--", + yerr=[ysummary[1] - ysummary[0], ysummary[2] - ysummary[1]], + capsize=3, c=cols[n], errorevery=2) + + axs[0].legend(ncols=2, fontsize="small") + for i in range(2): + axs[i].set_xlabel(r"$\log M_{\rm tot, min} ~ [M_\odot / h]$") + + axs[0].set_ylabel(r"$f_{\rm agreement}$") + axs[1].set_ylabel(r"$f_{\rm match}$") + + fig.tight_layout() + fout = join( + plt_utils.fout, + f"matching_max_agreement_{simname}_{nsim0}_{min_logmass}.png") + print(f"Saving to `{fout}`.") + fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + plt.close() + + +# --------------------------------------------------------------------------- # +############################################################################### +# KL final snapshot vs overlaps # +############################################################################### +# --------------------------------------------------------------------------- # + + +# def plot_kl_vs_overlap(runs, nsim, kwargs, runs_to_mass, plot_std=True, +# upper_threshold=False): +# """ +# Plot KL divergence vs overlap for CSiBORG. +# +# Parameters +# ---------- +# runs : str +# Run names. +# nsim : int +# Simulation index. +# kwargs : dict +# Nearest neighbour reader keyword arguments. +# runs_to_mass : dict +# Dictionary mapping run names to total halo mass range. +# plot_std : bool, optional +# Whether to plot the standard deviation of the overlap distribution. +# upper_threshold : bool, optional +# Whether to enforce an upper threshold on halo mass. +# +# Returns +# ------- +# None +# """ +# paths = csiborgtools.read.Paths(**kwargs["paths_kind"]) +# nn_reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths) +# +# xs, ys1, ys2, cs = [], [], [], [] +# for run in runs: +# nn_data = nn_reader.read_single("csiborg", run, nsim, nobs=None) +# nn_hindxs = nn_data["ref_hindxs"] +# mass, overlap_hindxs, __, summed_overlap, prob_nomatch = get_overlap_summary("csiborg", nsim) # noqa +# +# # We need to match the hindxs between the two. +# hind2overlap_array = {hind: i for i, hind in enumerate(overlap_hindxs)} +# mask = numpy.asanyarray([hind2overlap_array[hind] +# for hind in nn_hindxs]) +# summed_overlap = summed_overlap[mask] +# prob_nomatch = prob_nomatch[mask] +# mass = mass[mask] +# +# x = make_kl("csiborg", run, nsim, nobs=None, kwargs=kwargs) +# y1 = 1 - numpy.mean(prob_nomatch, axis=1) +# y2 = numpy.std(prob_nomatch, axis=1) +# cmin, cmax = make_binlims(run, runs_to_mass, upper_threshold) +# mask = (mass >= cmin) & (mass < cmax if upper_threshold else True) +# xs.append(x[mask]) +# ys1.append(y1[mask]) +# ys2.append(y2[mask]) +# cs.append(numpy.log10(mass[mask])) +# +# xs = numpy.concatenate(xs) +# ys1 = numpy.concatenate(ys1) +# ys2 = numpy.concatenate(ys2) +# cs = numpy.concatenate(cs) +# +# with plt.style.context(plt_utils.mplstyle): +# plt.figure() +# plt.hexbin(xs, ys1, C=cs, gridsize=50, mincnt=0, +# reduce_C_function=numpy.median) +# mask = numpy.isfinite(xs) & numpy.isfinite(ys1) +# corr = plt_utils.latex_float(*kendalltau(xs[mask], ys1[mask])) +# plt.title(r"$\tau = {}, p = {}$".format(*corr), fontsize="small") +# +# plt.colorbar(label=r"$\log M_{\rm tot} / M_\odot$") +# plt.xlabel(r"$D_{\mathrm{KL}}$ of $r_{1\mathrm{NN}}$ distribution") +# plt.ylabel("1 - mean prob. of no match") +# +# plt.tight_layout() +# for ext in ["png"]: +# nsim = str(nsim).zfill(5) +# fout = join(plt_utils.fout, +# f"kl_vs_overlap_mean_{nsim}_{runs}.{ext}") +# if upper_threshold: +# fout = fout.replace(f".{ext}", f"_upper_threshold.{ext}") +# print(f"Saving to `{fout}`.") +# plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") +# plt.close() +# +# if not plot_std: +# return +# +# with plt.style.context(plt_utils.mplstyle): +# plt.figure() +# plt.hexbin(xs, ys2, C=cs, gridsize=50, mincnt=0, +# reduce_C_function=numpy.median) +# plt.colorbar(label=r"$\log M_{\rm tot} / M_\odot$") +# plt.xlabel(r"$D_{\mathrm{KL}}$ of $r_{1\mathrm{NN}}$ distribution") +# plt.ylabel(r"Ensemble std of summed overlap") +# mask = numpy.isfinite(xs) & numpy.isfinite(ys2) +# corr = plt_utils.latex_float(*kendalltau(xs[mask], ys2[mask])) +# plt.title(r"$\tau = {}, p = {}$".format(*corr), fontsize="small") +# +# plt.tight_layout() +# for ext in ["png"]: +# nsim = str(nsim).zfill(5) +# fout = join(plt_utils.fout, +# f"kl_vs_overlap_std_{nsim}_{runs}.{ext}") +# if upper_threshold: +# fout = fout.replace(f".{ext}", f"_upper_threshold.{ext}") +# print(f"Saving to `{fout}`.") +# plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") +# plt.close() if __name__ == "__main__": - parser = ArgumentParser() - parser.add_argument('-c', '--clean', action='store_true') - args = parser.parse_args() - neighbour_kwargs = csiborgtools.neighbour_kwargs + min_logmass = 13.25 + smoothed = True + nbins = 10 + ext = "png" + plot_quijote = False + min_maxoverlap = 0. - runs_to_mass = { - "mass001": (12.4, 12.8), - "mass002": (12.6, 13.0), - "mass003": (12.8, 13.2), - "mass004": (13.0, 13.4), - "mass005": (13.2, 13.6), - "mass006": (13.4, 13.8), - "mass007": (13.6, 14.0), - "mass008": (13.8, 14.2), - "mass009": (14.0, 14.4), # There is no upper limit. - } - - # cached_funcs = ["get_overlap", "read_dist", "make_kl", "make_ks"] - cached_funcs = ["get_max_key"] - if args.clean: - for func in cached_funcs: - print(f"Cleaning cache for function {func}.") - delete_disk_caches_for_function(func) + funcs = [ + # "get_overlap_summary", + # "get_expected_mass", + # "get_mtot_vs_all_pairoverlap", + # "get_mtot_vs_maxpairoverlap", + # "get_mass_vs_separation", + # "mass_vs_maxoverlap_separation", + # "get_property_maxoverlap", + ] + for func in funcs: + print(f"Cleaning up cache for `{func}`.") + delete_disk_caches_for_function(func) if False: - plot_mass_vs_pairoverlap(7444 + 24, 8956 + 24 * 3) + mtot_vs_all_pairoverlap(7444, "csiborg", min_logmass, smoothed, + nbins, ext=ext) + if plot_quijote: + mtot_vs_all_pairoverlap(0, "quijote", min_logmass, smoothed, + nbins, ext=ext) if False: - plot_mass_vs_maxpairoverlap(7444 + 24, 8956 + 24 * 3) + mtot_vs_maxpairoverlap(7444, "csiborg", "fof_totpartmass", min_logmass, + smoothed, nbins, ext=ext) + if plot_quijote: + mtot_vs_maxpairoverlap(0, "quijote", "group_mass", min_logmass, + smoothed, nbins, ext=ext) if False: - plot_mass_vsmedmaxoverlap(7444) + mtot_vs_summedpairoverlap(7444, "csiborg", min_logmass, smoothed, + nbins, ext) + if plot_quijote: + mtot_vs_summedpairoverlap(0, "quijote", min_logmass, smoothed, + nbins, ext) + if False: + mtot_vs_maxoverlap_mass(7444, "csiborg", min_logmass, smoothed, + nbins, min_maxoverlap, ext) + if plot_quijote: + mtot_vs_maxoverlap_mass(0, "quijote", min_logmass, smoothed, + nbins, min_maxoverlap, ext) if False: - plot_summed_overlap_vs_mass(7444) + mtot_vs_expected_mass(7444, "csiborg", min_logmass, smoothed, nbins, + min_overlap=0, max_prob_nomatch=1, ext=ext) + if plot_quijote: + mtot_vs_expected_mass(0, "quijote", min_logmass, smoothed, nbins, + min_overlap=0, max_prob_nomatch=1, ext=ext) + + if False: + mass_vs_separation(7444, 7444 + 24, "csiborg", min_logmass, nbins, + smoothed, boxsize=677.7, plot_std=False) + if plot_quijote: + mass_vs_separation(0, 1, "quijote", min_logmass, nbins, + smoothed, boxsize=1000, plot_std=False) + if False: + mass_vs_maxoverlap_separation(7444, 7444 + 24, "csiborg", min_logmass, + nbins, smoothed, boxsize=677.7, + plot_std=False) + if plot_quijote: + mass_vs_maxoverlap_separation( + 0, 1, "quijote", min_logmass, nbins, smoothed, boxsize=1000, + plot_std=False) + + if False: + mtot_vs_mean_max_overlap(7444, "csiborg", min_logmass, smoothed, nbins) + + if plot_quijote: + mtot_vs_mean_max_overlap(0, "quijote", min_logmass, smoothed, + nbins) + + if False: + key = "lambda200c" + mtot_vs_maxoverlap_property(7444, "csiborg", min_logmass, key, + min_maxoverlap, smoothed) + if plot_quijote: + mtot_vs_maxoverlap_property(0, "quijote", min_logmass, key, + min_maxoverlap, smoothed) if True: - plot_mass_vs_separation(7444 + 24, 8956 + 24 * 3, min_overlap=0.0) - - if False: - plot_maxoverlap_mass(7444) - - if False: - plot_maxoverlapstat(7444, "lambda200c") - - if False: - plot_maxoverlapstat(7444, "totpartmass") - - if False: - plot_mass_vs_expected_mass(7444, max_prob_nomatch=1.0) - - # Plot 1NN distance distributions. - if False: - for i in range(1, 10): - run = f"mass00{i}" - for pulled_cdf in [True, False]: - plot_dist(run, "cdf", neighbour_kwargs, runs_to_mass, - pulled_cdf=pulled_cdf,) - plot_dist(run, "pdf", neighbour_kwargs, runs_to_mass) - - # Plot 1NN CDF differences. - if False: - runs = [f"mass00{i}" for i in range(1, 10)] - for pulled_cdf in [True, False]: - plot_cdf_diff(runs, neighbour_kwargs, pulled_cdf=pulled_cdf, - runs_to_mass=runs_to_mass) - if False: - runs = [f"mass00{i}" for i in range(1, 9)] - for kind in ["kl", "ks"]: - plot_significance("csiborg", runs, 7444, nobs=None, kind=kind, - kwargs=neighbour_kwargs, - runs_to_mass=runs_to_mass) - - if False: - # runs = [[f"mass00{i}"] for i in range(1, 10)] - runs = [[f"mass00{i}"] for i in [4]] - for runs_ in runs: - # runs = ["mass007"] - for kind in ["kl"]: - plot_significance_vs_mass("csiborg", runs_, 7444, nobs=None, - kind=kind, kwargs=neighbour_kwargs, - runs_to_mass=runs_to_mass, - upper_threshold=100) - - if False: - # runs = [f"mass00{i}" for i in range(1, 10)] - runs = ["mass004"] - plot_kl_vs_ks("csiborg", runs, 7444, None, kwargs=neighbour_kwargs, - runs_to_mass=runs_to_mass, upper_threshold=100) - - if False: - # runs = [f"mass00{i}" for i in range(1, 10)] - runs = ["mass007"] - plot_kl_vs_overlap(runs, 7444, neighbour_kwargs, runs_to_mass, - upper_threshold=100, plot_std=False) + matching_max_vs_overlap(7444, min_logmass) diff --git a/scripts_plots/plt_utils.py b/scripts_plots/plt_utils.py index 302ee1b..c38112c 100644 --- a/scripts_plots/plt_utils.py +++ b/scripts_plots/plt_utils.py @@ -15,6 +15,7 @@ import numpy from scipy.stats import binned_statistic +from scipy.special import erf dpi = 600 fout = "../plots/" @@ -56,38 +57,74 @@ def latex_float(*floats, n=2): return latex_floats -def binned_trend(x, y, weights, bins): - """ - Calculate the weighted mean and standard deviation of `y` in bins of `x`. +def nan_weighted_average(arr, weights=None, axis=None): + if weights is None: + weights = numpy.ones_like(arr) - Parameters - ---------- - x : 1-dimensional array - The x-coordinates of the data points. - y : 1-dimensional array - The y-coordinates of the data points. - weights : 1-dimensional array - The weights of the data points. - bins : 1-dimensional array - The bin edges. + valid_entries = ~numpy.isnan(arr) - Returns - ------- - stat_x : 1-dimensional array - The x-coordinates of the binned data points. - stat_mu : 1-dimensional array - The weighted mean of `y` in bins of `x`. - stat_std : 1-dimensional array - The weighted standard deviation of `y` in bins of `x`. - """ - stat_mu, __, __ = binned_statistic(x, y * weights, bins=bins, - statistic="sum") - stat_std, __, __ = binned_statistic(x, y * weights, bins=bins, - statistic=numpy.var) - stat_w, __, __ = binned_statistic(x, weights, bins=bins, statistic="sum") + # Set NaN entries in arr to 0 for computation + arr = numpy.where(valid_entries, arr, 0) - stat_x = (bins[1:] + bins[:-1]) / 2 - stat_mu /= stat_w - stat_std /= stat_w - stat_std = numpy.sqrt(stat_std) - return stat_x, stat_mu, stat_std + # Set weights of NaN entries to 0 + weights = numpy.where(valid_entries, weights, 0) + + # Compute the weighted sum and the sum of weights along the axis + weighted_sum = numpy.sum(arr * weights, axis=axis) + sum_weights = numpy.sum(weights, axis=axis) + + return weighted_sum / sum_weights + + +def nan_weighted_std(arr, weights=None, axis=None, ddof=0): + if weights is None: + weights = numpy.ones_like(arr) + + valid_entries = ~numpy.isnan(arr) + + # Set NaN entries in arr to 0 for computation + arr = numpy.where(valid_entries, arr, 0) + + # Set weights of NaN entries to 0 + weights = numpy.where(valid_entries, weights, 0) + + # Calculate weighted mean + weighted_mean = numpy.sum( + arr * weights, axis=axis) / numpy.sum(weights, axis=axis) + + # Calculate the weighted variance + variance = numpy.sum( + weights * (arr - numpy.expand_dims(weighted_mean, axis))**2, axis=axis) + variance /= numpy.sum(weights, axis=axis) - ddof + + return numpy.sqrt(variance) + + +def compute_error_bars(x, y, xbins, sigma): + bin_indices = numpy.digitize(x, xbins) + y_medians = numpy.array([numpy.median(y[bin_indices == i]) + for i in range(1, len(xbins))]) + + lower_pct = 100 * 0.5 * (1 - erf(sigma / numpy.sqrt(2))) + upper_pct = 100 - lower_pct + + y_lower = numpy.full(len(y_medians), numpy.nan) + y_upper = numpy.full(len(y_medians), numpy.nan) + + for i in range(len(y_medians)): + if numpy.sum(bin_indices == i + 1) == 0: + continue + + y_lower[i] = numpy.percentile(y[bin_indices == i + 1], lower_pct) + y_upper[i] = numpy.percentile(y[bin_indices == i + 1], upper_pct) + + yerr = (y_medians - numpy.array(y_lower), numpy.array(y_upper) - y_medians) + + return y_medians, yerr + + +def normalize_hexbin(hb): + hexagon_counts = hb.get_array() + normalized_counts = hexagon_counts / hexagon_counts.sum() + hb.set_array(normalized_counts) + hb.set_clim(normalized_counts.min(), normalized_counts.max())