From 5500fbd2b9778875fc1238de9a7e9f0ff50dd043 Mon Sep 17 00:00:00 2001 From: Richard Stiskalek Date: Tue, 17 Oct 2023 12:11:15 +0100 Subject: [PATCH] Matching paper plots (#91) * Fix calculations of expected mass * Add paper plots * Edits to pltos * Add overlap summary * Add imports * Add import * Add binned stat * Add fit * Add more plots * Add basic env * Add histogram mode * Edit expected mass * Improve expected plots * Clean up plot * Improve separation plot * Update plots * Edit expected calculation * Update plotting * Update plots * Update plots * Update plots * Add conc fraction * Add halo maker sorting * Renaming * Add import * Add NaN treatment * add import * Move cosine smi * Update plots * Move similarity * Fix little bugs * Shorten documentation * Update plots --- csiborgtools/__init__.py | 8 +- csiborgtools/match/__init__.py | 2 +- csiborgtools/match/match.py | 111 +- csiborgtools/summary/__init__.py | 8 +- csiborgtools/summary/field_interp.py | 145 ++ csiborgtools/summary/overlap_summary.py | 317 ++-- csiborgtools/utils.py | 43 + scripts/sort_halomaker.py | 100 + scripts/{pre_sortinit.py => sort_initsnap.py} | 13 +- scripts_plots/paper_match.py | 1632 +++++++++++++++++ scripts_plots/plot_data.py | 108 +- 11 files changed, 2193 insertions(+), 294 deletions(-) create mode 100644 scripts/sort_halomaker.py rename scripts/{pre_sortinit.py => sort_initsnap.py} (94%) create mode 100644 scripts_plots/paper_match.py diff --git a/csiborgtools/__init__.py b/csiborgtools/__init__.py index c05bc30..e382c56 100644 --- a/csiborgtools/__init__.py +++ b/csiborgtools/__init__.py @@ -12,10 +12,12 @@ # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -from csiborgtools import clustering, field, match, read, summary # noqa +from csiborgtools import clustering, field, match, read, summary # noqa + +from .utils import (center_of_mass, delta2ncells, number_counts, # noqa + periodic_distance, periodic_distance_two_points, # noqa + binned_statistic, cosine_similarity) # noqa -from .utils import (center_of_mass, delta2ncells, number_counts, # noqa - periodic_distance, periodic_distance_two_points) # noqa # Arguments to csiborgtools.read.Paths. paths_glamdring = {"srcdir": "/mnt/extraspace/hdesmond/", diff --git a/csiborgtools/match/__init__.py b/csiborgtools/match/__init__.py index f53f0bb..d5b45f9 100644 --- a/csiborgtools/match/__init__.py +++ b/csiborgtools/match/__init__.py @@ -14,5 +14,5 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. from .match import (ParticleOverlap, RealisationsMatcher, # noqa calculate_overlap, calculate_overlap_indxs, pos2cell, # noqa - cosine_similarity, find_neighbour, get_halo_cell_limits, # noqa + find_neighbour, get_halo_cell_limits, # noqa matching_max) # noqa diff --git a/csiborgtools/match/match.py b/csiborgtools/match/match.py index 1d34018..5d821b9 100644 --- a/csiborgtools/match/match.py +++ b/csiborgtools/match/match.py @@ -660,19 +660,12 @@ class ParticleOverlap(BaseMatcher): def pos2cell(pos, ncells): - """ - Convert position to cell number if there are `ncells` cells along the axis. - """ if pos.dtype.char in numpy.typecodes["AllInteger"]: return pos return numpy.floor(pos * ncells).astype(numpy.int32) def read_nshift(smooth_kwargs): - """ - Determine the number of cells to pad the density field if smoothing is - applied. Defaults to the ceiling of three times the smoothing scale. - """ return 0 if smooth_kwargs is None else ceil(3 * smooth_kwargs["sigma"]) @@ -774,33 +767,26 @@ def get_halo_cell_limits(pos, ncells, nshift=0): return mins, maxs -@jit(nopython=True) +@jit(nopython=True, boundscheck=False) def calculate_overlap(delta1, delta2, cellmins, delta_bckg, box_size, bckg_halfsize): - r""" - Overlap between two halos whose density fields are evaluated on the - same grid. This is a JIT implementation, hence it is outside of the main - class. + """ + Calculate overlap between two halos' density fields on the same grid. Parameters ---------- - delta1: 3-dimensional array - Density field of the first halo. - delta2 : 3-dimensional array - Density field of the second halo. - cellmins : len-3 tuple - Tuple of lower cell ID in the full box. - delta_bckg : 3-dimensional array - Summed background density field of the reference and cross simulations - calculated with particles assigned to halos at the final snapshot. - Calculated on a grid determined by `bckg_halfsize`. + delta1, delta2 : 3D array + Density fields of the first and second halos, respectively. + cellmins : tuple (len=3) + Lower cell ID in the full box. + delta_bckg : 3D array + Combined background density field of reference and cross simulations + on `bckg_halfsize` grid. box_size : int - Number of cells in the box. + Cell count in the box. bckg_halfsize : int - Background half-size for density field calculation. This is the - grid distance from the center of the box to each side over which to - evaluate the background density field. Must be less than or equal to - half the box size. + Grid distance from box center for background density. + ≤ 0.5 * box_size. Returns ------- @@ -834,39 +820,29 @@ def calculate_overlap(delta1, delta2, cellmins, delta_bckg, box_size, return intersect / (totmass - intersect) -@jit(nopython=True) +@jit(nopython=True, boundscheck=False) def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero, mass1, mass2, box_size, bckg_halfsize): - r""" - Overlap between two haloes whose density fields are evaluated on the - same grid and `nonzero1` enumerates the non-zero cells of `delta1. This is - a JIT implementation, hence it is outside of the main class. + """ + Calculate overlap of two halos' density fields on the same grid. Parameters ---------- - delta1: 3-dimensional array - Density field of the first halo. - delta2 : 3-dimensional array - Density field of the second halo. - cellmins : len-3 tuple - Tuple of lower cell ID in the full box. - delta_bckg : 3-dimensional array - Summed background density field of the reference and cross simulations - calculated with particles assigned to halos at the final snapshot. - Calculated on a grid determined by `bckg_halfsize`. - nonzero : 2-dimensional array of shape `(n_cells, 3)` - Indices of cells that are non-zero of the lower mass halo. Expected to - be precomputed from `fill_delta_indxs`. - mass1, mass2 : floats, optional - Total masses of the two haloes, respectively. Optional. If not provided - calculcated directly from the density field. + delta1, delta2 : 3D array + Density fields of the first and second halos, respectively. + cellmins : tuple (len=3) + Lower cell ID in the full box. + delta_bckg : 3D array + Combined background density from reference and cross simulations + on `bckg_halfsize` grid. + nonzero : 2D array (shape: (n_cells, 3)) + Non-zero cells for the lower mass halo (from `fill_delta_indxs`). + mass1, mass2 : float, optional + Halos' total masses. Calculated from density if not provided. box_size : int - Number of cells in the box. + Cell count in the box. bckg_halfsize : int - Background half-size for density field calculation. This is the - grid distance from the center of the box to each side over which to - evaluate the background density field. Must be less than or equal to - half the box size. + Grid distance from box center for background density; ≤ 0.5 * box_size. Returns ------- @@ -1039,35 +1015,6 @@ def find_neighbour(nsim0, cats): return dists, cross_hindxs -def cosine_similarity(x, y): - r""" - Calculate the cosine similarity between two Cartesian vectors. Defined - as :math:`\Sum_{i} x_i y_{i} / (|x| * |y|)`. - - Parameters - ---------- - x : 1-dimensional array - The first vector. - y : 1- or 2-dimensional array - The second vector. Can be 2-dimensional of shape `(n_samples, 3)`, - in which case the calculation is broadcasted. - - Returns - ------- - out : float or 1-dimensional array - """ - if x.ndim != 1: - raise ValueError("`x` must be a 1-dimensional array.") - - if y.ndim == 1: - y = y.reshape(1, -1) - - out = numpy.sum(x * y, axis=1) - out /= numpy.linalg.norm(x) * numpy.linalg.norm(y, axis=1) - - return out[0] if out.size == 1 else out - - def matching_max(cat0, catx, mass_kind, mult, periodic, overlap=None, match_indxs=None, verbose=True): """ diff --git a/csiborgtools/summary/__init__.py b/csiborgtools/summary/__init__.py index cf81f79..9b476af 100644 --- a/csiborgtools/summary/__init__.py +++ b/csiborgtools/summary/__init__.py @@ -16,7 +16,11 @@ from .knn_summary import kNNCDFReader # noqa from .nearest_neighbour_summary import NearestNeighbourReader # noqa from .overlap_summary import weighted_stats # noqa -from .overlap_summary import NPairsOverlap, PairOverlap, get_cross_sims # noqa +from .overlap_summary import (NPairsOverlap, PairOverlap, get_cross_sims, # noqa + max_overlap_agreement, max_overlap_agreements, # noqa + find_peak) # noqa from .pk_summary import PKReader # noqa from .tpcf_summary import TPCFReader # noqa -from .field_interp import read_interpolated_field # noqa +from .field_interp import (read_interpolated_field, # noqa + bayesian_bootstrap_correlation, # noqa + correlate_at_fixed_smoothing) # noqa diff --git a/csiborgtools/summary/field_interp.py b/csiborgtools/summary/field_interp.py index 1336aa8..e2e89e3 100644 --- a/csiborgtools/summary/field_interp.py +++ b/csiborgtools/summary/field_interp.py @@ -15,6 +15,12 @@ import numpy from tqdm import tqdm +from numba import jit + + +############################################################################### +# Read in the field values at the galaxy positions # +############################################################################### def read_interpolated_field(survey_name, kind, galaxy_index, paths, MAS, grid, @@ -75,3 +81,142 @@ def read_interpolated_field(survey_name, kind, galaxy_index, paths, MAS, grid, ks[i] = j return out[:, ks, :] + + +############################################################################### +# Calculate the Bayesian bootstrapped correlation # +############################################################################### + + +@jit(nopython=True, fastmath=True, boundscheck=False) +def dot_product(x, y): + tot = 0.0 + for i in range(len(x)): + tot += x[i] * y[i] + return tot + + +@jit(nopython=True, fastmath=True, boundscheck=False) +def cov(x, y, mean_x, mean_y, weights): + tot = 0.0 + for i in range(len(x)): + tot += (x[i] - mean_x) * (y[i] - mean_y) * weights[i] + return tot + + +@jit(nopython=True, fastmath=True, boundscheck=False) +def var(x, mean_x, weights): + tot = 0.0 + for i in range(len(x)): + tot += (x[i] - mean_x)**2 * weights[i] + return tot + + +@jit(nopython=True, fastmath=True, boundscheck=False) +def weighted_correlation(x, y, weights): + mean_x = dot_product(x, weights) + mean_y = dot_product(y, weights) + + cov_xy = cov(x, y, mean_x, mean_y, weights) + + var_x = var(x, mean_x, weights) + var_y = var(y, mean_y, weights) + + return cov_xy / numpy.sqrt(var_x * var_y) + + +@jit(nopython=True, fastmath=True, boundscheck=False) +def _bayesian_bootstrap_correlation(x, y, weights): + nweights = len(weights) + bootstrapped_correlations = numpy.full(nweights, numpy.nan, dtype=x.dtype) + for i in range(nweights): + bootstrapped_correlations[i] = weighted_correlation(x, y, weights[i]) + return bootstrapped_correlations + + +@jit(nopython=True, fastmath=True, boundscheck=False) +def rank(x): + order = numpy.argsort(x) + ranks = order.argsort() + return ranks + + +@jit(nopython=True, fastmath=True, boundscheck=False) +def bayesian_bootstrap_correlation(x, y, kind="spearman", n_bootstrap=10000): + """ + Calculate the Bayesian bootstrapped correlation between two arrays. + + Parameters + ---------- + x, y : 1-dimensional arrays + The two arrays to calculate the correlation between. + kind : str, optional + The type of correlation to calculate. Either `spearman` or `pearson`. + n_bootstrap : int, optional + The number of bootstrap samples to use. + + Returns + ------- + corr : 1-dimensional array of shape `(n_bootstrap,)` + """ + if len(x) != len(y): + raise ValueError("Input arrays must have the same length") + + if kind not in ["spearman", "pearson"]: + raise ValueError("kind must be either `spearman` or `pearson`") + + if kind == "spearman": + dtype = x.dtype + x = rank(x).astype(dtype) + y = rank(y).astype(dtype) + + alphas = numpy.ones(len(x), dtype=x.dtype) + weights = numpy.random.dirichlet(alphas, size=n_bootstrap) + return _bayesian_bootstrap_correlation(x, y, weights) + + +############################################################################### +# Distribution disagreement # +############################################################################### + + +def distribution_disagreement(x, y): + """ + Think about this more when stacking non-Gaussian distributions. + """ + delta = x - y + return numpy.abs(delta.mean()) / delta.std() + + + + +""" + +field will be of value (nsims, ngal, nsmooth) + +Calculate the correlation for each sim and smoothing scale (nsims, nsmooth) + +For each of the above stack the distributions? +""" +def correlate_at_fixed_smoothing(field_values, galaxy_property, + kind="spearman", n_bootstrap=1000): + galaxy_property = galaxy_property.astype(field_values.dtype) + nsims = len(field_values) + + distributions = numpy.empty((nsims, n_bootstrap), dtype=field_values.dtype) + + from tqdm import trange + + for i in trange(nsims): + distributions[i] = bayesian_bootstrap_correlation( + field_values[i], galaxy_property, kind=kind, n_bootstrap=n_bootstrap) + + return distributions + + + +def do_something(field_values, galaxy_property): + + pass + + diff --git a/csiborgtools/summary/overlap_summary.py b/csiborgtools/summary/overlap_summary.py index 66a262e..fa12e82 100644 --- a/csiborgtools/summary/overlap_summary.py +++ b/csiborgtools/summary/overlap_summary.py @@ -23,6 +23,30 @@ from tqdm import tqdm, trange from ..utils import periodic_distance + +############################################################################### +# Utility functions # +############################################################################### + +def find_peak(x, weights, shrink=0.95, min_obs=5): + """ + Find the peak of a 1D distribution using a shrinking window. + """ + assert shrink <= 1. + + xmin, xmax = numpy.min(x), numpy.max(x) + xpos = (xmax + xmin) / 2 + rad = (xmax - xmin) / 2 + + while True: + mask = numpy.abs(x - xpos) < rad + if mask.sum() < min_obs: + return xpos + + xpos = numpy.average(x[mask], weights=weights[mask]) + rad *= shrink + + ############################################################################### # Overlap of two simulations # ############################################################################### @@ -251,42 +275,16 @@ class PairOverlap: ---------- from_smoothed : bool Whether to use the smoothed overlap or not. - Returns ------- summed_overlap : 1-dimensional array of shape `(nhalos, )` """ overlap = self.overlap(from_smoothed) - out = numpy.full(len(overlap), numpy.nan, dtype=numpy.float32) + out = numpy.zeros(len(overlap), dtype=numpy.float32) + for i in range(len(overlap)): if len(overlap[i]) > 0: out[i] = numpy.sum(overlap[i]) - else: - out[i] = 0 - return out - - def prob_nomatch(self, from_smoothed): - """ - Probability of no match for each halo in the reference simulation with - the cross simulation. Defined as a product of 1 - overlap with other - halos. - - Parameters - ---------- - from_smoothed : bool - Whether to use the smoothed overlap or not. - - Returns - ------- - prob_nomatch : 1-dimensional array of shape `(nhalos, )` - """ - overlap = self.overlap(from_smoothed) - out = numpy.full(len(overlap), numpy.nan, dtype=numpy.float32) - for i in range(len(overlap)): - if len(overlap[i]) > 0: - out[i] = numpy.product(numpy.subtract(1, overlap[i])) - else: - out[i] = 1 return out def dist(self, in_initial, boxsize, norm_kind=None): @@ -308,8 +306,7 @@ class PairOverlap: ------- dist : array of 1-dimensional arrays of shape `(nhalos, )` """ - assert (norm_kind is None - or norm_kind in ("r200c", "ref_patch", "sum_patch")) + assert (norm_kind is None or norm_kind in ("r200c", "ref_patch", "sum_patch")) # noqa # Get positions either in the initial or final snapshot pos0 = self.cat0().position(in_initial=in_initial) posx = self.catx().position(in_initial=in_initial) @@ -400,60 +397,6 @@ class PairOverlap: return out - def counterpart_mass(self, from_smoothed, overlap_threshold=0., - mass_kind="totpartmass"): - """ - Calculate the expected counterpart mass of each halo in the reference - simulation from the crossed simulation. - - Parameters - ---------- - from_smoothed : bool - Whether to use the smoothed overlap or not. - overlap_threshold : float, optional - Minimum overlap required for a halo to be considered a match. By - default 0.0, i.e. no threshold. - mass_kind : str, optional - The mass kind whose ratio is to be calculated. Must be a valid - catalogue key. By default `totpartmass`, i.e. the total particle - mass associated with a halo. - - Returns - ------- - mean, std : 1-dimensional arrays of shape `(nhalos, )` - """ - mean = numpy.full(len(self), numpy.nan, dtype=numpy.float32) - std = numpy.full(len(self), numpy.nan, dtype=numpy.float32) - - massx = self.catx(mass_kind) # Create references to speed - overlap = self.overlap(from_smoothed) # up the loop below - - for i, match_ind in enumerate(self["match_indxs"]): - # Skip if no match - if len(match_ind) == 0: - continue - - massx_ = massx[match_ind] # Again just create references - overlap_ = overlap[i] # to the appropriate elements - - # Optionally apply overlap threshold - if overlap_threshold > 0.: - mask = overlap_ > overlap_threshold - if numpy.sum(mask) == 0: - continue - massx_ = massx_[mask] - overlap_ = overlap_[mask] - - massx_ = numpy.log10(massx_) - # Weighted average and *biased* standard deviation - mean_ = numpy.average(massx_, weights=overlap_) - std_ = numpy.average((massx_ - mean_)**2, weights=overlap_)**0.5 - - mean[i] = mean_ - std[i] = std_ - - return mean, std - def copy_per_match(self, par): """ Make an array like `self.match_indxs` where each of its element is an @@ -525,6 +468,82 @@ class PairOverlap: return self["match_indxs"].size +############################################################################### +# Support functions for pair overlaps # +############################################################################### + + +def max_overlap_agreement(cat0, catx, min_logmass, maxdist, paths): + r""" + Calculate whether for a halo `A` from catalogue `cat0` that has a maximum + overlap with halo `B` from catalogue `catx` it is also `B` that has a + maximum overlap with `A`. + + Parameters + ---------- + cat0 : instance of :py:class:`csiborgtools.read.BaseCatalogue` + Halo catalogue corresponding to the reference simulation. + catx : instance of :py:class:`csiborgtools.read.BaseCatalogue` + Halo catalogue corresponding to the cross simulation. + min_logmass : float + Minimum halo mass in :math:`\log_{10} M_\odot / h` to consider. + maxdist : float, optional + Maximum halo distance in :math:`\mathrm{Mpc} / h` from the centre + of the high-resolution region. + paths : py:class`csiborgtools.read.Paths` + CSiBORG paths object. + + Returns + ------- + agreement : 1-dimensional array of shape `(nhalos, )` + """ + kwargs = {"paths": paths, "min_logmass": min_logmass, "maxdist": maxdist} + pair_forward = PairOverlap(cat0, catx, **kwargs) + pair_backward = PairOverlap(catx, cat0, **kwargs) + + nhalos = len(pair_forward.cat0()) + agreement = numpy.full(nhalos, numpy.nan, dtype=numpy.float32) + + for i in range(nhalos): + match_indxs_forward = pair_forward["match_indxs"][i] + + if len(match_indxs_forward) == 0: + continue + + overlap_forward = pair_forward["smoothed_overlap"][i] + + kmax = match_indxs_forward[numpy.argmax(overlap_forward)] + match_indxs_backward = pair_backward["match_indxs"][kmax] + overlap_backward = pair_backward["smoothed_overlap"][kmax] + + imatch = match_indxs_backward[numpy.argmax(overlap_backward)] + agreement[i] = imatch == i + + return agreement + + +def max_overlap_agreements(cat0, catxs, min_logmass, maxdist, paths, + verbose=True): + """ + Repeat `max_overlap_agreement` for many cross simulations. + + Parameters + ---------- + ... + + Returns + ------- + agreements : 2-dimensional array of shape `(ncatxs, nhalos)` + """ + agreements = [None] * len(catxs) + desc = "Calculating maximum overlap agreement" + for i, catx in enumerate(tqdm(catxs, desc=desc, disable=not verbose)): + agreements[i] = max_overlap_agreement(cat0, catx, min_logmass, + maxdist, paths) + + return numpy.asanyarray(agreements) + + def weighted_stats(x, weights, min_weight=0, verbose=False): """ Calculate the weighted mean and standard deviation of `x` using `weights` @@ -544,11 +563,10 @@ def weighted_stats(x, weights, min_weight=0, verbose=False): Returns ------- - stat : 2-dimensional array of shape `(len(x), 2)` - The first column is the weighted mean and the second column is the - weighted standard deviation. + mu, std : 1-dimensional arrays of shape `(len(x), )` """ - out = numpy.full((x.size, 2), numpy.nan, dtype=numpy.float32) + mu = numpy.full(x.size, numpy.nan, dtype=numpy.float32) + std = numpy.full(x.size, numpy.nan, dtype=numpy.float32) for i in trange(len(x), disable=not verbose): x_, w_ = numpy.asarray(x[i]), numpy.asarray(weights[i]) @@ -557,9 +575,9 @@ def weighted_stats(x, weights, min_weight=0, verbose=False): w_ = w_[mask] if len(w_) == 0: continue - out[i, 0] = numpy.average(x_, weights=w_) - out[i, 1] = numpy.average((x_ - out[i, 0])**2, weights=w_)**0.5 - return out + mu[i] = numpy.average(x_, weights=w_) + std[i] = numpy.average((x_ - mu[i])**2, weights=w_)**0.5 + return mu, std ############################################################################### @@ -684,92 +702,87 @@ class NPairsOverlap: out[i] = pair.summed_overlap(from_smoothed) return numpy.vstack(out).T - def prob_nomatch(self, from_smoothed, verbose=True): - """ - Probability of no match for each halo in the reference simulation with - the cross simulation. + def expected_property_single(self, k, key, from_smoothed, in_log=True): + ys = [None] * len(self) + overlaps = [None] * len(self) + for i, pair in enumerate(self): + overlap = pair.overlap(from_smoothed) + if len(overlap[k]) == 0: + ys[i] = numpy.nan + overlaps[i] = numpy.nan + continue + match_indxs = pair["match_indxs"] + j = numpy.argmax(overlap[k]) - Parameters - ---------- - from_smoothed : bool - Whether to use the smoothed overlap or not. - verbose : bool, optional - Verbosity flag. + ys[i] = pair.catx(key)[match_indxs[k][j]] + if in_log: + ys[i] = numpy.log10(ys[i]) + overlaps[i] = overlap[k][j] - Returns - ------- - prob_nomatch : 2-dimensional array of shape `(nhalos, ncatxs)` - """ - iterator = tqdm(self.pairs, - desc="Calculating probability of no match", - disable=not verbose - ) - out = [None] * len(self) - for i, pair in enumerate(iterator): - out[i] = pair.prob_nomatch(from_smoothed) - return numpy.vstack(out).T + return ys, overlaps - def counterpart_mass(self, from_smoothed, overlap_threshold=0., - mass_kind="totpartmass", return_full=False, - verbose=True): + def expected_property(self, key, from_smoothed, min_logmass, + in_log=True, mass_kind="totpartmass", verbose=True): """ Calculate the expected counterpart mass of each halo in the reference simulation from the crossed simulation. Parameters ---------- + key : str + Property key. from_smoothed : bool Whether to use the smoothed overlap or not. - overlap_threshold : float, optional - Minimum overlap required for a halo to be considered a match. By - default 0.0, i.e. no threshold. + min_logmass : float + Minimum log mass of reference halos to consider. + in_log : bool, optional + Whether to calculated the expected property in log10. mass_kind : str, optional The mass kind whose ratio is to be calculated. Must be a valid catalogue key. By default `totpartmass`, i.e. the total particle mass associated with a halo. - return_full : bool, optional - Whether to return the full results of matching each pair or - calculate summary statistics by Gaussian averaging. verbose : bool, optional Verbosity flag. Returns ------- - mu, std : 1-dimensional arrays of shape `(nhalos,)` - Summary expected mass and standard deviation from all cross - simulations. - mus, stds : 2-dimensional arrays of shape `(nhalos, ncatx)`, optional - Expected mass and standard deviation from each cross simulation. - Returned only if `return_full` is `True`. + mean_expected : 1-dimensional array of shape `(nhalos, )` + Expected property from all cross simulations. + std_expected : 1-dimensional array of shape `(nhalos, )` + Standard deviation of the expected property. """ - iterator = tqdm(self.pairs, - desc="Calculating counterpart masses", - disable=not verbose) - mus, stds = [None] * len(self), [None] * len(self) - for i, pair in enumerate(iterator): - mus[i], stds[i] = pair.counterpart_mass( - from_smoothed=from_smoothed, - overlap_threshold=overlap_threshold, mass_kind=mass_kind) - mus, stds = numpy.vstack(mus).T, numpy.vstack(stds).T + log_mass0 = numpy.log10(self.cat0(mass_kind)) + ntot = len(log_mass0) + mean_expected = numpy.full(ntot, numpy.nan, dtype=numpy.float32) + std_expected = numpy.full(ntot, numpy.nan, dtype=numpy.float32) - # Prob of > 0 matches - probmatch = 1 - self.prob_nomatch(from_smoothed) - # Normalise it for weighted sums etc. - norm_probmatch = numpy.apply_along_axis( - lambda x: x / numpy.sum(x), axis=1, arr=probmatch) + indxs = numpy.where(log_mass0 > min_logmass)[0] + for i in tqdm(indxs, disable=not verbose, + desc="Calculating expectation"): + ys = numpy.full(len(self), numpy.nan, dtype=numpy.float32) + weights = numpy.full(len(self), numpy.nan, dtype=numpy.float32) + for j, pair in enumerate(self): + overlap = pair.overlap(from_smoothed) + if len(overlap[i]) == 0: + continue - # Mean and standard deviation of weighted stacked Gaussians - mu = numpy.sum((norm_probmatch * mus), axis=1) - std = numpy.sum((norm_probmatch * (mus**2 + stds**2)), axis=1) - mu**2 - std **= 0.5 + k = numpy.argmax(overlap[i]) + ys[j] = pair.catx(key)[pair["match_indxs"][i][k]] + weights[j] = overlap[i][k] - mask = mu <= 0 - mu[mask] = numpy.nan - std[mask] = numpy.nan + if in_log: + ys[j] = numpy.log10(ys[j]) - if return_full: - return mu, std, mus, stds - return mu, std + mask = numpy.isfinite(ys) & numpy.isfinite(weights) + if numpy.sum(mask) <= 2: + continue + + mean_expected[i] = find_peak(ys[mask], weights=weights[mask]) + std_expected[i] = numpy.average((ys[mask] - mean_expected[i])**2, + weights=weights[mask])**0.5 + print(log_mass0[i], mean_expected[i], std_expected[i]) + + return mean_expected, std_expected @property def pairs(self): diff --git a/csiborgtools/utils.py b/csiborgtools/utils.py index 3edc0d7..388c4f6 100644 --- a/csiborgtools/utils.py +++ b/csiborgtools/utils.py @@ -150,6 +150,35 @@ def radec_to_cartesian(X): ]).T +def cosine_similarity(x, y): + r""" + Calculate the cosine similarity between two Cartesian vectors. Defined + as :math:`\Sum_{i} x_i y_{i} / (|x| * |y|)`. + + Parameters + ---------- + x : 1-dimensional array + The first vector. + y : 1- or 2-dimensional array + The second vector. Can be 2-dimensional of shape `(n_samples, 3)`, + in which case the calculation is broadcasted. + + Returns + ------- + out : float or 1-dimensional array + """ + if x.ndim != 1: + raise ValueError("`x` must be a 1-dimensional array.") + + if y.ndim == 1: + y = y.reshape(1, -1) + + out = numpy.sum(x * y, axis=1) + out /= numpy.linalg.norm(x) * numpy.linalg.norm(y, axis=1) + + return out[0] if out.size == 1 else out + + def real2redshift(pos, vel, observer_location, observer_velocity, box, periodic_wrap=True, make_copy=True): r""" @@ -219,3 +248,17 @@ def number_counts(x, bin_edges): for i in range(bin_edges.size - 1): out[i] = numpy.sum((x >= bin_edges[i]) & (x < bin_edges[i + 1])) return out + + +def binned_statistic(x, y, left_edges, bin_width, statistic): + """ + Calculate a binned statistic. + """ + out = numpy.full(left_edges.size, numpy.nan, dtype=x.dtype) + + for i in range(left_edges.size): + mask = (x >= left_edges[i]) & (x < left_edges[i] + bin_width) + + if numpy.any(mask): + out[i] = statistic(y[mask]) + return out diff --git a/scripts/sort_halomaker.py b/scripts/sort_halomaker.py new file mode 100644 index 0000000..20e95f1 --- /dev/null +++ b/scripts/sort_halomaker.py @@ -0,0 +1,100 @@ +# Copyright (C) 2022 Richard Stiskalek +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +""" +Script to sort the HaloMaker's `particle_membership` file to match the ordering +of particles in the simulation snapshot. +""" +from argparse import ArgumentParser +from datetime import datetime +from glob import iglob + +import h5py +import numpy +import pynbody +from mpi4py import MPI +from taskmaster import work_delegation +from tqdm import trange + +import csiborgtools + + +def sort_particle_membership(nsim, nsnap, method): + """ + Read the FoF particle halo membership and sort the halo IDs to the ordering + of particles in the PHEW clump IDs. + + Parameters + ---------- + nsim : int + IC realisation index. + verbose : bool, optional + Verbosity flag. + """ + print(f"{datetime.now()}: starting simulation {nsim}, snapshot {nsnap} and method {method}.") # noqa + paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) + + fpath = next(iglob(f"/mnt/extraspace/rstiskalek/CSiBORG/halo_maker/ramses_{nsim}/output_{str(nsnap).zfill(5)}/**/*particle_membership*", recursive=True), None) # noqa + print(f"{datetime.now()}: loading particle membership `{fpath}`.") + # Columns are halo ID, particle ID + membership = numpy.genfromtxt(fpath, dtype=int) + + print(f"{datetime.now()}: loading particle IDs from the snapshot.") + sim = pynbody.load(paths.snapshot(nsnap, nsim, "csiborg")) + pids = numpy.asanyarray(sim["iord"]) + + print(f"{datetime.now()}: mapping particle IDs to their indices.") + pids_idx = {pid: i for i, pid in enumerate(pids)} + + print(f"{datetime.now()}: mapping HIDs to their array indices.") + # Unassigned particle IDs are assigned a halo ID of 0. + hids = numpy.zeros(pids.size, dtype=numpy.int32) + for i in trange(membership.shape[0]): + hid, pid = membership[i] + hids[pids_idx[pid]] = hid + + fout = fpath + "_sorted.hdf5" + print(f"{datetime.now()}: saving the sorted data to ... `{fout}`") + + header = """ + This dataset represents halo indices for each particle. + - The particles are ordered as they appear in the simulation snapshot. + - Unassigned particles are given a halo index of 0. + """ + with h5py.File(fout, 'w') as hdf: + dset = hdf.create_dataset('hids_dataset', data=hids) + dset.attrs['header'] = header + + +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument("--method", type=str, required=True, + help="HaloMaker method") + parser.add_argument("--nsim", type=int, required=False, default=None, + help="IC index. If not set process all.") + args = parser.parse_args() + paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) + + if args.nsim is None: + ics = paths.get_ics("csiborg") + else: + ics = [args.nsim] + + snaps = numpy.array([max(paths.get_snapshots(nsim, "csiborg")) + for nsim in ics]) + + def main(n): + sort_particle_membership(ics[n], snaps[n], args.method) + + work_delegation(main, list(range(len(ics))), MPI.COMM_WORLD) diff --git a/scripts/pre_sortinit.py b/scripts/sort_initsnap.py similarity index 94% rename from scripts/pre_sortinit.py rename to scripts/sort_initsnap.py index b130ae6..302f8bf 100644 --- a/scripts/pre_sortinit.py +++ b/scripts/sort_initsnap.py @@ -29,16 +29,9 @@ import numpy from mpi4py import MPI from taskmaster import work_delegation +import csiborgtools from utils import get_nsims -try: - import csiborgtools -except ModuleNotFoundError: - import sys - - sys.path.append("../") - import csiborgtools - def _main(nsim, simname, verbose): """ @@ -51,9 +44,7 @@ def _main(nsim, simname, verbose): else: partreader = csiborgtools.read.QuijoteReader(paths) - if verbose: - print(f"{datetime.now()}: reading and processing simulation `{nsim}`.", - flush=True) + print(f"{datetime.now()}: processing simulation `{nsim}`.", flush=True) # We first load the particle IDs in the final snapshot. pidf = csiborgtools.read.read_h5(paths.particles(nsim, simname)) pidf = pidf["particle_ids"] diff --git a/scripts_plots/paper_match.py b/scripts_plots/paper_match.py new file mode 100644 index 0000000..014c041 --- /dev/null +++ b/scripts_plots/paper_match.py @@ -0,0 +1,1632 @@ +# Copyright (C) 2023 Richard Stiskalek +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +from os.path import join + +import matplotlib.pyplot as plt +import numpy +import scienceplots # noqa +from cache_to_disk import cache_to_disk, delete_disk_caches_for_function +from scipy.stats import norm +from sklearn.metrics import r2_score +from tqdm import tqdm, trange +from astropy import units as u +from astropy.coordinates import SkyCoord + +from colossus.cosmology import cosmology +from colossus.halo import concentration + +import csiborgtools +import plt_utils + +MASS_KINDS = {"csiborg": "fof_totpartmass", + "quijote": "group_mass", + } + + +def open_cat(nsim, simname): + paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) + + if simname == "csiborg": + bounds = {"dist": (0, 155)} + cat = csiborgtools.read.CSiBORGHaloCatalogue( + nsim, paths, bounds=bounds) + elif simname == "quijote": + cat = csiborgtools.read.QuijoteHaloCatalogue( + nsim, paths, nsnap=4, load_fitted=True, load_initial=True, + with_lagpatch=False) + else: + raise ValueError(f"Unknown simulation name: {simname}.") + + return cat + + +def open_cats(nsims, simname): + catxs = [None] * len(nsims) + + for i, nsim in enumerate(tqdm(nsims, desc="Opening catalogues")): + catxs[i] = open_cat(nsim, simname) + + return catxs + + +@cache_to_disk(120) +def get_overlap_summary(nsim0, simname, min_logmass, smoothed): + paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) + nsimxs = csiborgtools.summary.get_cross_sims( + simname, nsim0, paths, min_logmass, smoothed=smoothed) + cat0 = open_cat(nsim0, simname) + catxs = open_cats(nsimxs, simname) + + reader = csiborgtools.summary.NPairsOverlap(cat0, catxs, paths, + min_logmass) + mass0 = reader.cat0(MASS_KINDS[simname]) + mask = mass0 > 10**min_logmass + + return {"mass0": mass0[mask], + "hid0": reader.cat0("index")[mask], + "summed_overlap": reader.summed_overlap(smoothed)[mask], + "max_overlap": reader.max_overlap(0, smoothed)[mask], + "prob_nomatch": reader.prob_nomatch(smoothed)[mask], + } + + +# --------------------------------------------------------------------------- # +############################################################################### +# Total DM halo mass vs pair overlaps # +############################################################################### +# --------------------------------------------------------------------------- # + + +@cache_to_disk(120) +def get_mtot_vs_all_pairoverlap(nsim0, simname, mass_kind, min_logmass, + smoothed, nbins): + paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) + nsimxs = csiborgtools.summary.get_cross_sims(simname, nsim0, paths, + min_logmass, + smoothed=smoothed) + nsimxs = nsimxs + + cat0 = open_cat(nsim0, simname) + catxs = open_cats(nsimxs, simname) + + reader = csiborgtools.summary.NPairsOverlap(cat0, catxs, paths, + min_logmass) + + x = [None] * len(catxs) + y = [None] * len(catxs) + for i in trange(len(catxs), desc="Stacking catalogues"): + x[i] = numpy.log10( + numpy.concatenate(reader[i].copy_per_match(mass_kind))) + y[i] = numpy.concatenate(reader[i].overlap(smoothed)) + + x = numpy.concatenate(x) + y = numpy.concatenate(y) + + xbins = numpy.linspace(min(x), max(x), nbins) + + return x, y, xbins + + +def mtot_vs_all_pairoverlap(nsim0, simname, min_logmass, smoothed, nbins, + ext="png"): + mass_kind = MASS_KINDS[simname] + x, y, xbins = get_mtot_vs_all_pairoverlap(nsim0, simname, mass_kind, + min_logmass, smoothed, nbins) + sigma = 1 + + with plt.style.context(plt_utils.mplstyle): + plt.figure() + hb = plt.hexbin(x, y, mincnt=1, gridsize=50, bins="log") + + y_median, yerr = plt_utils.compute_error_bars(x, y, xbins, sigma=sigma) + plt.errorbar(0.5 * (xbins[1:] + xbins[:-1]), y_median, yerr=yerr, + color='red', ls='dashed', capsize=3, + label="CSiBORG" if simname == "csiborg" else None, + fmt="o", ms=3) + + if simname == "csiborg": + x_quijote, y_quijote, xbins_quijote = get_mtot_vs_all_pairoverlap( + 0, "quijote", "group_mass", min_logmass, smoothed, nbins) + y_median_quijote, yerr_quijote = plt_utils.compute_error_bars( + x_quijote, y_quijote, xbins_quijote, sigma=sigma) + plt.errorbar(0.5 * (xbins[1:] + xbins[:-1]) + 0.01, + y_median_quijote, yerr=yerr_quijote, + color='sandybrown', ls='dashed', capsize=3, + label="Quijote", fmt="o", ms=3) + plt.legend(loc="upper left", ncols=2, columnspacing=1.0) + + plt.colorbar(hb, label="Counts in bins", pad=0) + plt.xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") + plt.ylabel(r"$\mathcal{O}_{a b}$") + plt.xlim(numpy.min(x)) + plt.ylim(0., 1.) + + plt.tight_layout() + fout = join(plt_utils.fout, + f"mass_vs_pair_overlap_{simname}_{nsim0}.{ext}") + print(f"Saving to `{fout}`.") + plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + plt.close() + + +# --------------------------------------------------------------------------- # +############################################################################### +# Total DM halo mass vs maximum pair overlaps # +############################################################################### +# --------------------------------------------------------------------------- # + + +@cache_to_disk(120) +def get_mtot_vs_maxpairoverlap(nsim0, simname, mass_kind, min_logmass, + smoothed, nbins, concatenate=True): + paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) + nsimxs = csiborgtools.summary.get_cross_sims( + simname, nsim0, paths, min_logmass, smoothed=smoothed) + cat0 = open_cat(nsim0, simname) + catxs = open_cats(nsimxs, simname) + + def get_max(y_): + if len(y_) == 0: + return 0 + return numpy.nanmax(y_) + + reader = csiborgtools.summary.NPairsOverlap(cat0, catxs, paths, + min_logmass) + + x = [None] * len(catxs) + y = [None] * len(catxs) + for i in trange(len(catxs), desc="Stacking catalogues"): + x[i] = numpy.log10(cat0[mass_kind]) + y[i] = numpy.array([get_max(y_) for y_ in reader[i].overlap(smoothed)]) + + mask = x[i] > min_logmass + x[i] = x[i][mask] + y[i] = y[i][mask] + + xbins = numpy.linspace(numpy.min(x), numpy.max(x), nbins) + if concatenate: + x = numpy.concatenate(x) + y = numpy.concatenate(y) + + return x, y, xbins + + +def mtot_vs_maxpairoverlap(nsim0, simname, mass_kind, min_logmass, smoothed, + nbins, ext="png"): + x, y, xbins = get_mtot_vs_maxpairoverlap(nsim0, simname, mass_kind, + min_logmass, smoothed, nbins) + + with plt.style.context(plt_utils.mplstyle): + plt.figure() + plt.hexbin(x, y, mincnt=1, gridsize=50, bins="log") + + y_median, yerr = plt_utils.compute_error_bars(x, y, xbins, sigma=1) + plt.errorbar(0.5 * (xbins[1:] + xbins[:-1]), y_median, yerr=yerr, + color='red', ls='dashed', capsize=3, + label="CSiBORG" if simname == "csiborg" else None, + fmt="o", ms=3) + + if simname == "csiborg": + x_quijote, y_quijote, xbins_quijote = get_mtot_vs_all_pairoverlap( + 0, "quijote", "group_mass", min_logmass, smoothed, nbins) + y_median_quijote, yerr_quijote = plt_utils.compute_error_bars( + x_quijote, y_quijote, xbins_quijote, sigma=1) + plt.errorbar(0.5 * (xbins[1:] + xbins[:-1]) + 0.01, + y_median_quijote, yerr=yerr_quijote, + color='sandybrown', ls='dashed', capsize=3, + label="Quijote", fmt="o", ms=3) + + plt.colorbar(label="Counts in bins", pad=0) + plt.xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") + plt.ylabel(r"$\max_{b \in \mathcal{B}} \mathcal{O}_{a b}$") + plt.ylim(-0.02, 1.) + plt.xlim(numpy.min(x) - 0.05) + + plt.tight_layout() + fout = join(plt_utils.fout, f"mass_vs_max_pair_overlap{nsim0}.{ext}") + print(f"Saving to `{fout}`.") + plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + plt.close() + + +def mtot_vs_maxpairoverlap_statistic(nsim0, simname, mass_kind, min_logmass, + smoothed, nbins, ext="png"): + x, y, xbins = get_mtot_vs_maxpairoverlap(nsim0, simname, mass_kind, + min_logmass, smoothed, nbins) + with plt.style.context(plt_utils.mplstyle): + plt.figure() + plt.hexbin(x, y, mincnt=1, gridsize=50, bins="log") + + y_median, yerr = plt_utils.compute_error_bars(x, y, xbins, sigma=2) + plt.errorbar(0.5 * (xbins[1:] + xbins[:-1]), y_median, yerr=yerr, + color='red', ls='dashed', capsize=3, + label="CSiBORG" if simname == "csiborg" else None, + fmt="o", ms=3) + + if simname == "csiborg": + x_quijote, y_quijote, xbins_quijote = get_mtot_vs_all_pairoverlap( + 0, "quijote", "group_mass", min_logmass, smoothed, nbins) + y_median_quijote, yerr_quijote = plt_utils.compute_error_bars( + x_quijote, y_quijote, xbins_quijote, sigma=2) + plt.errorbar(0.5 * (xbins[1:] + xbins[:-1]) + 0.01, + y_median_quijote, yerr=yerr_quijote, + color='blue', ls='dashed', capsize=3, + label="Quijote", fmt="o", ms=3) + plt.legend(ncol=2, fontsize="small") + + # plt.colorbar(label="Counts in bins", pad=0) + plt.xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") + plt.ylabel(r"$\max_{b \in \mathcal{B}} \mathcal{O}_{a b}$") + plt.ylim(-0.02, 1.) + plt.xlim(numpy.min(x) - 0.05) + + plt.tight_layout() + fout = join(plt_utils.fout, f"mass_vs_max_pair_overlap{nsim0}.{ext}") + print(f"Saving to `{fout}`.") + plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + plt.close() + + +def mtot_vs_maxpairoverlap_fraction(min_logmass, smoothed, nbins, ext="png"): + + csiborg_nsims = [7444 + 24 * n for n in range(10)] + quijote_nsims = [n for n in range(10)] + + @cache_to_disk(120) + def get_xy_maxoverlap_fraction(n): + x_csiborg, y_csiborg, __ = get_mtot_vs_maxpairoverlap( + csiborg_nsims[n], "csiborg", MASS_KINDS["csiborg"], min_logmass, + smoothed, nbins, concatenate=False) + x_quijote, y_quijote, __ = get_mtot_vs_maxpairoverlap( + quijote_nsims[n], "quijote", MASS_KINDS["quijote"], min_logmass, + smoothed, nbins, concatenate=False) + + x_csiborg = x_csiborg[0] + x_quijote = x_quijote[0] + + y_csiborg = numpy.asanyarray(y_csiborg) + y_quijote = numpy.asanyarray(y_quijote) + y_csiborg = numpy.median(y_csiborg, axis=0) + y_quijote = numpy.median(y_quijote, axis=0) + + xbins = numpy.arange(min_logmass, 15.61, 0.2) + x = 0.5 * (xbins[1:] + xbins[:-1]) + y = numpy.full((len(x), 3), numpy.nan) + percentiles = norm.cdf(x=[1, 2, 3]) * 100 + + for i in range(len(xbins)-1): + mask_csiborg = (x_csiborg >= xbins[i]) & (x_csiborg < xbins[i+1]) + mask_quijote = (x_quijote >= xbins[i]) & (x_quijote < xbins[i+1]) + + current_y_csiborg = y_csiborg[mask_csiborg] + current_y_quijote = y_quijote[mask_quijote] + current_tot_csiborg = len(current_y_csiborg) + + for j, q in enumerate(percentiles): + threshold = numpy.percentile(current_y_quijote, q) + y[i, j] = (current_y_csiborg > threshold).sum() + y[i, j] /= current_tot_csiborg + return x, y + + ys = [None] * 10 + for n in range(10): + x, ys[n] = get_xy_maxoverlap_fraction(n) + ys = numpy.asanyarray(ys) + + ymean = numpy.nanmean(ys, axis=0) + ystd = numpy.nanstd(ys, axis=0) + + with plt.style.context(plt_utils.mplstyle): + plt.figure() + for i in range(3): + plt.plot(x, ymean[:, i], label=r"${}\sigma$".format(i+1)) + plt.fill_between(x, ymean[:, i] - ystd[:, i], + ymean[:, i] + ystd[:, i], alpha=0.2) + + plt.legend() + plt.ylim(0.0, 1.025) + + plt.xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") + plt.ylabel(r"$f_{\rm significant}$") + + plt.xlim(numpy.nanmin(x), numpy.nanmax(x)) + + plt.tight_layout() + fout = join(plt_utils.fout, f"mass_vs_max_pair_overlap_fraction.{ext}") + print(f"Saving to `{fout}`.") + plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + plt.close() + + +def summed_to_max_overlap(min_logmass, smoothed, nbins, ext="png"): + x_csiborg = get_overlap_summary(7444, "csiborg", min_logmass, smoothed) + + with plt.style.context(plt_utils.mplstyle): + plt.figure() + x = numpy.mean(x_csiborg["summed_overlap"], axis=1) + y = numpy.mean(x_csiborg["max_overlap"], axis=1) + + plt.hexbin(x, y, mincnt=0, gridsize=40, + C=numpy.log10(x_csiborg["mass0"]), + reduce_C_function=numpy.median) + plt.colorbar(label=r"$\log M_{\rm tot} ~ [M_\odot / h]$", pad=0) + + plt.axline((0, 0), slope=1, color='red', linestyle='--', + label=r"$1-1$") + + plt.legend() + + plt.xlabel(r"$\langle \sum_{b \in \mathcal{B}} \mathcal{O}_{a b}\rangle_{\mathcal{B}}$") # noqa + plt.ylabel(r"$\langle \max_{b \in \mathcal{B}} \mathcal{O}_{a b}\rangle_{\mathcal{B}}$") # noqa + + print(x.min(), x.max()) + print(y.min(), y.max()) + + plt.tight_layout() + ext = "pdf" + fout = join(plt_utils.fout, f"summed_to_max_overlap.{ext}") + print(f"Saving to `{fout}`.") + plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + plt.close() + + +# --------------------------------------------------------------------------- # +############################################################################### +# Total DM halo mass vs pair overlaps # +############################################################################### +# --------------------------------------------------------------------------- # + +@cache_to_disk(120) +def get_max_overlap_agreement(nsim0, simname, min_logmass, smoothed): + paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) + nsimxs = csiborgtools.summary.get_cross_sims( + simname, nsim0, paths, min_logmass, smoothed=smoothed) + cat0 = open_cat(nsim0, simname) + catxs = open_cats(nsimxs, simname) + + return csiborgtools.summary.max_overlap_agreements(cat0, catxs, 13.25, + 155.5, paths) + + +def maximum_overlap_agreement(nsim0, simname, min_logmass, smoothed): + + agreements = get_max_overlap_agreement(nsim0, simname, min_logmass, + smoothed) + + x, y, mass_bins = get_mtot_vs_maxpairoverlap( + nsim0, simname, MASS_KINDS[simname], min_logmass, smoothed, 10, + concatenate=False) + x = x[0] + y = numpy.asanyarray(y) + mean_max_overlap = numpy.mean(y, axis=0) + + cat0 = open_cat(nsim0, simname) + totpartmass = numpy.log10(cat0[MASS_KINDS[simname]]) + + mask = totpartmass > min_logmass + agreements = agreements[:, mask] + totpartmass = totpartmass[mask] + + mask = numpy.any(numpy.isfinite(agreements), axis=0) + y = numpy.sum(agreements == 1., axis=0) + + with plt.style.context(plt_utils.mplstyle): + plt.figure(figsize=(3.5, 2.625)) + + plt.scatter(totpartmass[mask], y[mask], s=5, c=mean_max_overlap, + rasterized=True) + plt.colorbar(label=r"$\langle \max_{b \in \mathcal{B}} \mathcal{O}_{a b}\rangle_{\mathcal{B}}$", pad=0) # noqa + + ymed, yerr = plt_utils.compute_error_bars(totpartmass[mask], y[mask], + mass_bins, sigma=1) + plt.errorbar(0.5 * (mass_bins[1:] + mass_bins[:-1]), ymed, yerr=yerr, + capsize=3, c="red", ls="--", lw=0.5, fmt="o", ms=3) + + plt.xlim(numpy.nanmin(totpartmass[mask])) + plt.xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") + plt.ylabel(r"$f_{\rm sym}$") + plt.tight_layout() + fout = join(plt_utils.fout, + f"maximum_overlap_agreement{simname}_{nsim0}.{ext}") + print(f"Saving to `{fout}`.") + plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + plt.close() + + with plt.style.context(plt_utils.mplstyle): + plt.figure(figsize=(3.5, 2.625)) + + plt.scatter(mean_max_overlap[mask], y[mask], s=5, c=totpartmass, + rasterized=True) + plt.colorbar(label=r"$\log M_{\rm tot} ~ [M_\odot / h]$", pad=0) + bins = numpy.arange(0, 0.7, 0.05) + ymed, yerr = plt_utils.compute_error_bars( + mean_max_overlap[mask], y[mask], bins, sigma=1) + + plt.errorbar(0.5 * (bins[1:] + bins[:-1]), ymed, yerr=yerr, + capsize=3, c="red", ls="--", lw=0.5, fmt="o", ms=3) + plt.xlabel(r"$\langle \max_{b \in \mathcal{B}} \mathcal{O}_{a b}\rangle_{\mathcal{B}}$") # noqa + plt.ylabel(r"$f_{\rm sym}$") + plt.xlim(numpy.nanmin(mean_max_overlap[mask])) + + # plt.xscale("log") + plt.tight_layout() + fout = join(plt_utils.fout, f"maximum_overlap_agreement_mean_overlap{simname}_{nsim0}.{ext}") # noqa + print(f"Saving to `{fout}`.") + plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + plt.close() + + +# --------------------------------------------------------------------------- # +############################################################################### +# Total DM halo mass vs summed pair overlaps # +############################################################################### +# --------------------------------------------------------------------------- # + +def mtot_vs_summedpairoverlap(nsim0, simname, min_logmass, smoothed, nbins, + ext="png"): + x = get_overlap_summary(nsim0, simname, min_logmass, smoothed) + + mass0 = numpy.log10(x["mass0"]) + mean_overlap = numpy.nanmean(x["summed_overlap"], axis=1) + std_overlap = numpy.nanstd(x["summed_overlap"], axis=1) + + xbins = numpy.linspace(numpy.nanmin(mass0), numpy.nanmax(mass0), nbins) + + with plt.style.context(plt_utils.mplstyle): + plt.figure(figsize=(3.5, 2.625)) + plt.hexbin(mass0, mean_overlap, mincnt=1, bins="log", gridsize=30) + + y_median, yerr = plt_utils.compute_error_bars( + mass0, mean_overlap, xbins, sigma=1) + plt.errorbar(0.5 * (xbins[1:] + xbins[:-1]), y_median, yerr=yerr, + color='red', ls='dashed', capsize=3, label="CSiBORG", + fmt="o", ms=3) + + if simname == "csiborg": + x_quijote = get_overlap_summary(0, "quijote", min_logmass, + smoothed) + mass0_quijote = numpy.log10(x_quijote["mass0"]) + mean_overlap_quijote = numpy.nanmean(x_quijote["summed_overlap"], + axis=1) + xbins_quijote = numpy.linspace(numpy.nanmin(mass0), + numpy.nanmax(mass0), nbins) + + y_median_quijote, yerr_quijote = plt_utils.compute_error_bars( + mass0_quijote, mean_overlap_quijote, xbins_quijote, sigma=1) + plt.errorbar(0.5 * (xbins[1:] + xbins[:-1]) + 0.01, + y_median_quijote, yerr=yerr_quijote, + color='sandybrown', ls='dashed', capsize=3, + label="Quijote", fmt="o", ms=3) + plt.legend() + + plt.xlim(numpy.min(mass0)) + plt.xlim(numpy.min(mass0)) + plt.xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") + plt.ylabel(r"$\langle \sum_{b \in \mathcal{B}} \mathcal{O}_{a b}\rangle_{\mathcal{B}}$") # noqa + plt.colorbar(label="Counts in bins", pad=0) + + plt.tight_layout() + fout = join(plt_utils.fout, f"prob_match_mean_{simname}_{nsim0}.{ext}") + print(f"Saving to `{fout}`.") + plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + plt.close() + + with plt.style.context(plt_utils.mplstyle): + plt.figure(figsize=(3.5, 2.625)) + plt.hexbin(mass0, std_overlap, mincnt=1, bins="log", gridsize=30) + + y_median, yerr = plt_utils.compute_error_bars( + mass0, std_overlap, xbins, sigma=2) + plt.errorbar(0.5 * (xbins[1:] + xbins[:-1]), y_median, yerr=yerr, + color='red', ls='dashed', capsize=3, fmt="o", ms=3) + + if simname == "csiborg": + x_quijote = get_overlap_summary(0, "quijote", min_logmass, + smoothed) + mass0_quijote = numpy.log10(x_quijote["mass0"]) + mean_overlap_quijote = numpy.nanmean(x_quijote["summed_overlap"], + axis=1) + std_overlap_quijote = numpy.nanstd(x_quijote["summed_overlap"], + axis=1) + xbins_quijote = numpy.linspace(numpy.nanmin(mass0), + numpy.nanmax(mass0), nbins) + + y_median_quijote, yerr_quijote = plt_utils.compute_error_bars( + mass0_quijote, std_overlap_quijote, xbins_quijote, sigma=2) + plt.errorbar(0.5 * (xbins[1:] + xbins[:-1]) + 0.01, + y_median_quijote, yerr=yerr_quijote, + color='sandybrown', ls='dashed', capsize=3, + fmt="o", ms=3) + + plt.colorbar(label=r"Counts in bins", pad=0) + plt.xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") + plt.ylabel(r"$\sigma\left(\sum_{b \in \mathcal{B}} \mathcal{O}_{a b}\right)_{\mathcal{B}}$") # noqa + + plt.tight_layout() + fout = join(plt_utils.fout, f"prob_match_std_{simname}_{nsim0}.{ext}") + print(f"Saving to `{fout}`.") + plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + plt.close() + + +# --------------------------------------------------------------------------- # +############################################################################### +# Total DM halo mass vs mean separation # +############################################################################### +# --------------------------------------------------------------------------- # + +@cache_to_disk(120) +def get_mass_vs_separation(nsim0, simname, min_logmass, boxsize, + smoothed): + paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) + nsimxs = csiborgtools.summary.get_cross_sims( + simname, nsim0, paths, min_logmass, smoothed=smoothed) + cat0 = open_cat(nsim0, simname) + catxs = open_cats(nsimxs, simname) + + reader = csiborgtools.summary.NPairsOverlap( + cat0, catxs, paths, min_logmass) + + mass = numpy.log10(reader.cat0(MASS_KINDS[simname])) + mus = numpy.zeros((len(catxs), len(mass))) + for i in trange(len(catxs), desc="Stacking catalogues"): + dist = reader[i].dist(in_initial=False, boxsize=boxsize, + norm_kind=None) + overlap = reader[i].overlap(smoothed) + mus[i], __ = csiborgtools.summary.weighted_stats(dist, overlap) + + return mass, mus + + +def mass_vs_separation(nsim0, simname, min_logmass, nbins, smoothed, + boxsize): + mass, dist = get_mass_vs_separation( + nsim0, simname, min_logmass, boxsize, smoothed) + + dist = numpy.nanmean(dist, axis=0) + mask = numpy.isfinite(mass) & numpy.isfinite(dist) + + mass = mass[mask] + dist = dist[mask] + + xbins = numpy.linspace(numpy.nanmin(mass), numpy.nanmax(mass), nbins) + + with plt.style.context(plt_utils.mplstyle): + fig, ax = plt.subplots() + plt.rcParams["axes.grid"] = False + + cx = ax.hexbin(mass, dist, mincnt=0, bins="log", gridsize=50) + y_median, yerr = plt_utils.compute_error_bars(mass, dist, xbins, + sigma=1) + ax.errorbar(0.5 * (xbins[1:] + xbins[:-1]), y_median, yerr=yerr, + color='red', ls='dashed', capsize=3, + label="CSiBORG" if simname == "csiborg" else None, + fmt="o", ms=3) + ax.set_xlim(numpy.nanmin(mass)) + + if simname == "csiborg": + mass_quijote, dist_quijote = get_mass_vs_separation( + 0, "quijote", min_logmass, boxsize, smoothed) + dist_quijote = numpy.nanmean(dist_quijote, axis=0) + mask = numpy.isfinite(mass_quijote) & numpy.isfinite(dist_quijote) + mass_quijote = mass_quijote[mask] + dist_quijote = dist_quijote[mask] + + # dist_quijote = numpy.log10(dist_quijote) + xbins_quijote = numpy.linspace(numpy.nanmin(mass_quijote), + numpy.nanmax(mass_quijote), nbins) + xbins_quijote = xbins_quijote[:-1] + y_median_quijote, yerr_quijote = plt_utils.compute_error_bars( + mass_quijote, dist_quijote, xbins_quijote, sigma=1) + ax.errorbar(0.5 * (xbins_quijote[1:] + xbins_quijote[:-1]), + y_median_quijote, yerr=yerr_quijote, + color='sandybrown', ls='dashed', capsize=3, + label="Quijote", fmt="o", ms=3) + ax.legend(ncols=2) + + fig.colorbar(cx, label="Bin counts", pad=0) + ax.set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") + ax.set_ylabel(r"$\langle \Delta R / R_{\rm 200c}\rangle_{\mathcal{B}}$") # noqa + ax.set_ylabel(r"$\langle \Delta R \rangle_{\mathcal{B}} ~ [\mathrm{Mpc} / h]$") # noqa + + fig.tight_layout() + fout = join(plt_utils.fout, f"mass_vs_sep_{simname}_{nsim0}.{ext}") + print(f"Saving to `{fout}`.") + fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + plt.close() + + +# --------------------------------------------------------------------------- # +############################################################################### +# Total DM halo mass vs expected matched mass # +############################################################################### +# --------------------------------------------------------------------------- # + + +@cache_to_disk(120) +def get_expected_mass(nsim0, simname, min_logmass, smoothed): + paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) + nsimxs = csiborgtools.summary.get_cross_sims(simname, nsim0, paths, + min_logmass, smoothed=True) + + cat0 = open_cat(nsim0, simname) + catxs = open_cats(nsimxs, simname) + + reader = csiborgtools.summary.NPairsOverlap(cat0, catxs, paths, + min_logmass) + mass0 = reader.cat0(MASS_KINDS[simname]) + mask = mass0 > 10**min_logmass + + mean_expected, std_expected = reader.expected_property( + MASS_KINDS[simname], smoothed, min_logmass) + + return {"mass0": mass0[mask], + "mu": mean_expected[mask], + "std": std_expected[mask], + "prob_match": reader.summed_overlap(smoothed)[mask], + } + + +def mtot_vs_expected_mass(nsim0, simname, min_logmass, smoothed, ext="png"): + x = get_expected_mass(nsim0, simname, min_logmass, smoothed) + + mass = x["mass0"] + mu = x["mu"] + std = x["std"] + prob_match = x["prob_match"] + + mass = numpy.log10(mass) + prob_match = numpy.nanmean(prob_match, axis=1) + mask = numpy.isfinite(mass) & numpy.isfinite(mu) & numpy.isfinite(std) + + with plt.style.context(plt_utils.mplstyle): + fig, axs = plt.subplots(ncols=3, figsize=(3.5 * 2, 2.625)) + + im0 = axs[0].hexbin(mass[mask], mu[mask], mincnt=1, bins="log", + gridsize=30,) + im1 = axs[1].hexbin(mass[mask], std[mask], mincnt=1, bins="log", + gridsize=30) + im2 = axs[2].hexbin(prob_match[mask], mu[mask] - mass[mask], + gridsize=30, C=mass[mask], + reduce_C_function=numpy.nanmedian) + + axs[2].axhline(0, color="red", linestyle="--", alpha=0.5) + axs[0].set_xlabel(r"$\log M_{\rm tot, ref} ~ [M_\odot / h]$") + axs[0].set_ylabel(r"$\log M_{\rm tot, exp} ~ [M_\odot / h]$") + axs[1].set_xlabel(r"$\log M_{\rm tot, ref} ~ [M_\odot / h]$") + axs[1].set_ylabel(r"$\sigma_{\log M_{\rm tot, exp}}$") + axs[2].set_xlabel(r"$\langle \sum_{b \in \mathcal{B}} \mathcal{O}_{a b}\rangle_{\mathcal{B}}$") # noqa + axs[2].set_ylabel(r"$\log (M_{\rm tot, exp} / M_{\rm tot, ref})$") + + z = numpy.nanmean(mass[mask]) + axs[0].axline((z, z), slope=1, color='red', linestyle='--', + label=r"$1-1$") + axs[0].legend() + + ims = [im0, im1, im2] + labels = ["Bin counts", "Bin counts", + r"$\log M_{\rm tot} ~ [M_\odot / h]$"] + for i in range(3): + axins = axs[i].inset_axes([0.0, 1.0, 1.0, 0.05]) + fig.colorbar(ims[i], cax=axins, orientation="horizontal", + label=labels[i]) + axins.xaxis.tick_top() + axins.xaxis.set_tick_params(labeltop=True) + axins.xaxis.set_label_position("top") + + fig.tight_layout() + fout = join(plt_utils.fout, f"mass_vs_expmass_{nsim0}_{simname}.{ext}") + print(f"Saving to `{fout}`.") + fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + plt.close() + + +# --------------------------------------------------------------------------- # +############################################################################### +# Total DM halo mass vs maximum overlap halo property # +############################################################################### +# --------------------------------------------------------------------------- # + +@cache_to_disk(120) +def get_expected_key(nsim0, simname, min_logmass, key, smoothed): + paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) + nsimxs = csiborgtools.summary.get_cross_sims(simname, nsim0, paths, + min_logmass, smoothed=True) + + cat0 = open_cat(nsim0, simname) + catxs = open_cats(nsimxs, simname) + + reader = csiborgtools.summary.NPairsOverlap(cat0, catxs, paths, + min_logmass) + mass0 = reader.cat0(MASS_KINDS[simname]) + mask = mass0 > 10**min_logmass + + in_log = False if key == "conc" else True + mean_expected, std_expected = reader.expected_property( + key, smoothed, min_logmass, in_log=in_log) + + log_mass0 = numpy.log10(mass0) + control = numpy.full(len(log_mass0), numpy.nan) + # for i in trange(len(log_mass0), desc="Control"): + # if not mask[i]: + # continue + + # control_ = [None] * len(catxs) + # for j in range(len(catxs)): + # log_massx = numpy.log10(reader[j].catx(MASS_KINDS[simname])) + # ks = numpy.argsort(numpy.abs(log_massx - log_mass0[i]))[:15] + # control_[j] = reader[j].catx(key)[ks] + + # control[i] = numpy.nanmean(numpy.concatenate(control_)) + + return {"mass0": mass0[mask], + "prop0": reader.cat0(key)[mask], + "mu": mean_expected[mask], + "std": std_expected[mask], + "control": control[mask], + "prob_match": reader.summed_overlap(smoothed)[mask], + } + + +def mtot_vs_expected_key(nsim0, simname, min_logmass, key, smoothed, + min_logmass_run=None): + mass_kind = MASS_KINDS[simname] + assert key != mass_kind + + x = get_expected_key(nsim0, simname, min_logmass, key, smoothed) + mass0 = numpy.log10(x["mass0"]) + prop0 = x["prop0"] + mu = x["mu"] + std = x["std"] + prob_match = numpy.nanmean(x["prob_match"], axis=1) + control = x["control"] + + print("prop0 ", prop0) + print("mu ", mu) + print("std ", std) + print("control", control) + + mask = numpy.isfinite(prop0) & numpy.isfinite(mu) & numpy.isfinite(std) + if min_logmass_run is not None: + mask &= mass0 > 15 + mask &= prop0 > 0.4 + + mass0 = mass0[mask] + prop0 = prop0[mask] + mu = mu[mask] + std = std[mask] + control = control[mask] + prob_match = prob_match[mask] + + def rmse(x, y, sample_weight=None): + return numpy.sqrt(numpy.average((x - y)**2, weights=sample_weight)) + + print("Unweigted R2 = ", r2_score(prop0, mu)) + print("Err Weighted R2 = ", r2_score(prop0, mu, sample_weight=1 / std**2)) # noqa + print("Pmatch R2 = ", r2_score(prop0, mu, sample_weight=prob_match)) # noqa + + print() + + print("Unweigted RMSE = ", rmse(prop0, mu)) + print("Err Weighted RMSE = ", rmse(prop0, mu, 1 / std**2)) + print("Pmatch RMSE = ", rmse(prop0, mu, prob_match)) + + with plt.style.context(plt_utils.mplstyle): + fig, ax = plt.subplots(figsize=(3.5, 2.625)) + + ax.errorbar(prop0, mu, yerr=std, capsize=3, ls="none", marker="o") + + z = numpy.nanmean(prop0) + ax.axline((z, z), slope=1, color='red', linestyle='--', label=r"$1-1$") + + ax.legend() + + # if key == "lambda200c": + # ax.axhline(numpy.median(control), color="red", ls="--", zorder=0) + + if key == "lambda200c": + ax.set_xlabel(r"$\log \lambda_{\rm 200c, ref}$") + ax.set_ylabel(r"$\log \lambda_{\rm 200c, exp}$") + elif key == "conc": + ax.set_xlabel(r"$c_{\rm 200c, ref}$") + ax.set_ylabel(r"$c_{\rm 200c, exp}$") + + fig.tight_layout() + fout = join(plt_utils.fout, f"max_{key}_{simname}_{nsim0}.{ext}") + print(f"Saving to `{fout}`.") + fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + plt.close() + + +# --------------------------------------------------------------------------- # +############################################################################### +# Total mass of a single halo expectation # +############################################################################### +# --------------------------------------------------------------------------- # + + +@cache_to_disk(120) +def get_expected_single(k, nsim0, simname, min_logmass, key, smoothed, + in_log): + paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) + nsimxs = csiborgtools.summary.get_cross_sims( + simname, nsim0, paths, min_logmass, smoothed=smoothed) + + cat0 = open_cat(nsim0, simname) + catxs = open_cats(nsimxs, simname) + + reader = csiborgtools.summary.NPairsOverlap(cat0, catxs, paths, + min_logmass) + + x0 = reader.cat0(key) + + if "maxmass" in k: + k0 = int(k.split("__")[1]) + k = numpy.argsort(reader.cat0(MASS_KINDS[simname]))[::-1][k0] + print(f"Doing the {k0}th maximum hass halo with index {k}.") + + if k == "max": + k = numpy.nanargmax(x0) + + xcross, overlaps = reader.expected_property_single(k, key, smoothed, + in_log) + + control = [None] * len(catxs) + log_mass0 = numpy.log10(reader.cat0(MASS_KINDS[simname])[k]) + for j in range(len(catxs)): + log_massx = numpy.log10(reader[j].catx(MASS_KINDS[simname])) + ks = numpy.argsort(numpy.abs(log_massx - log_mass0))[:15] + control[j] = reader[j].catx(key)[ks] + + xcross = numpy.asanyarray(xcross) + overlaps = numpy.asanyarray(overlaps) + + return x0[k], xcross, overlaps, control + + +def mtot_vs_expected_single(k, nsim0, simname, min_logmass, key, smoothed): + x0, xcross, overlaps, control = get_expected_single( + k, nsim0, simname, min_logmass, key, smoothed, False) + + control = numpy.concatenate(control) + + if key == "lambda200c" or key == "totpartmass": + xcross = numpy.log10(xcross) + control = numpy.log10(control) + x0 = numpy.log10(x0) + + m = numpy.isfinite(xcross) & numpy.isfinite(overlaps) + with plt.style.context("science"): + cols = plt.rcParams["axes.prop_cycle"].by_key()["color"] + plt.figure() + plt.hist(xcross[m], weights=overlaps[m], bins=30, histtype="step", + density=1, label="Matched", color=cols[0]) + + peak = csiborgtools.summary.find_peak(xcross[m], overlaps[m]) + plt.axvline(peak, color="mediumblue", ls="--") + plt.axvline(x0, color="red", ls="--") + + if key != "totpartmass": + m = numpy.isfinite(control) + plt.hist(control[m], bins=30, histtype="step", density=1, + label="Control", color=cols[1]) + + m = numpy.isfinite(control) + xmin, xmax = numpy.percentile(control[m], [16, 84]) + plt.axvspan(xmin, xmax, color=cols[1], alpha=0.1) + + if key == "totpartmass" or key == "fof_totpartmass": + plt.xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") + elif key == "lambda200c": + plt.xlabel(r"$\log \lambda_{\rm 200c}$") + elif key == "conc": + plt.xlabel(r"$c$") + + plt.ylabel("Normalized counts") + plt.legend() + + plt.tight_layout() + fout = join( + plt_utils.fout, + f"expected_single_{k}_{key}_{nsim0}_{simname}_{min_logmass}.pdf") + print(f"Saving to `{fout}`.") + plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + plt.close() + + +def mtot_vs_expected_single_panel(k, nsim0, simname, min_logmass, smoothed): + print("Plotting..") + + def local_data(key): + x0, xcross, overlaps, control = get_expected_single( + k, nsim0, simname, min_logmass, key, smoothed, False) + + control = numpy.concatenate(control) + if key == "lambda200c" or key == "totpartmass": + xcross = numpy.log10(xcross) + control = numpy.log10(control) + x0 = numpy.log10(x0) + + m = numpy.isfinite(xcross) & numpy.isfinite(overlaps) + return x0, xcross[m], overlaps[m], control + + with plt.style.context("science"): + cols = plt.rcParams["axes.prop_cycle"].by_key()["color"] + fig, axs = plt.subplots(ncols=3, figsize=(2 * 3.5, 2.625)) + fig.subplots_adjust(wspace=0.0) + nbins = 25 + + # TOTAL MASS + x0, x, weights, __ = local_data("totpartmass") + axs[0].hist(x, weights=weights, bins=nbins, histtype="step", density=1, + label="Matched") + axs[0].legend() + peak = csiborgtools.summary.find_peak(x, weights) + axs[0].axvline(peak, color="mediumblue", ls="--") + axs[0].axvline(x0, color="red", ls="--") + std = numpy.average((x - peak)**2, weights=weights)**0.5 + xmin, xmax = peak - std, peak + std + axs[0].axvspan(xmin, xmax, color=cols[0], alpha=0.2) + + # SPIN + x0, x, weights, control = local_data("lambda200c") + axs[1].hist(x, weights=weights, bins=nbins, histtype="step", density=1, + color=cols[0]) + axs[1].hist(control, bins=nbins, histtype="step", density=1, + color=cols[1], label="Control") + axs[1].legend() + xmin, xmax = numpy.percentile(control, [16, 84]) + axs[1].axvspan(xmin, xmax, color=cols[1], alpha=0.1) + peak = csiborgtools.summary.find_peak(x, weights) + axs[1].axvline(peak, color="mediumblue", ls="--") + axs[1].axvline(x0, color="red", ls="--") + axs[1].set_yticklabels([]) + m = numpy.isfinite(control) + xmin, mu, xmax = numpy.percentile(control[m], [16, 50, 84]) + axs[1].axvspan(xmin, xmax, color=cols[1], alpha=0.2) + axs[1].axvline(mu, color=cols[1], ls="--") + std = numpy.average((x - peak)**2, weights=weights)**0.5 + xmin, xmax = peak - std, peak + std + axs[1].axvspan(xmin, xmax, color=cols[0], alpha=0.2) + + # CONCENTRATION + x0, x, weights, control = local_data("conc") + axs[2].hist(x, weights=weights, bins=nbins, histtype="step", density=1, + color=cols[0]) + axs[2].hist(control, bins=nbins, histtype="step", density=1, + color=cols[1]) + xmin, xmax = numpy.percentile(control, [16, 84]) + axs[2].axvspan(xmin, xmax, color=cols[1], alpha=0.1) + peak = csiborgtools.summary.find_peak(x, weights) + axs[2].axvline(peak, color="mediumblue", ls="--") + axs[2].axvline(x0, color="red", ls="--") + axs[2].set_yticklabels([]) + m = numpy.isfinite(control) + xmin, mu, xmax = numpy.percentile(control[m], [16, 50, 84]) + axs[2].axvspan(xmin, xmax, color=cols[1], alpha=0.2) + axs[2].axvline(mu, color=cols[1], ls="--") + std = numpy.average((x - peak)**2, weights=weights)**0.5 + xmin, xmax = peak - std, peak + std + axs[2].axvspan(xmin, xmax, color=cols[0], alpha=0.2) + + axs[0].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") + axs[1].set_xlabel(r"$\log \lambda_{\rm 200c}$") + axs[2].set_xlabel(r"$c$") + + axs[0].set_ylabel("Normalized counts") + + fig.tight_layout(h_pad=0, w_pad=0) + fout = join( + plt_utils.fout, + f"expected_single_{k}_panel_{nsim0}_{simname}_{min_logmass}.pdf") + print(f"Saving to `{fout}`.") + plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + plt.close() + + +# --------------------------------------------------------------------------- # +############################################################################### +# Concentration reconstruction # +############################################################################### +# --------------------------------------------------------------------------- # + + +@cache_to_disk(120) +def get_expected_many_topmass(kmax, nsim0, simname, min_logmass, key, smoothed, + in_log): + paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) + nsimxs = csiborgtools.summary.get_cross_sims( + simname, nsim0, paths, min_logmass, smoothed=smoothed) + + cat0 = open_cat(nsim0, simname) + catxs = open_cats(nsimxs, simname) + + reader = csiborgtools.summary.NPairsOverlap(cat0, catxs, paths, + min_logmass) + + log_mass0 = numpy.log10(reader.cat0(MASS_KINDS[simname])) + x0 = reader.cat0(key) + + out_mass = numpy.full((kmax), numpy.nan) + out_true = numpy.full((kmax), numpy.nan) + out_matched = numpy.full((kmax, len(nsimxs), 2), numpy.nan) + out_control = numpy.full((kmax, len(nsimxs)), numpy.nan) + + for k0 in trange(kmax, desc="Looping over top masses"): + k = numpy.argsort(log_mass0)[::-1][k0] + xcross, overlaps = reader.expected_property_single(k, key, smoothed, + in_log) + out_matched[k0, :, 0] = xcross + out_matched[k0, :, 1] = overlaps + + out_mass[k0] = log_mass0[k] + out_true[k0] = x0[k] + + for j in range(len(catxs)): + log_massx = numpy.log10(reader[j].catx(MASS_KINDS[simname])) + ks = numpy.argsort(numpy.abs(log_massx - log_mass0[k]))[:15] + out_control[k0, j] = numpy.nanmean(reader[j].catx(key)[ks]) + + return out_mass, out_true, out_matched, out_control + + +def expected_in_tolerance(kmax, nsim0, simname, min_logmass, key, smoothed): + out_mass, out_true, out_matched, out_control = get_expected_many_topmass( + kmax, nsim0, simname, min_logmass, key, smoothed, False) + + if key == "lambda200c" or key == "totpartmass": + out_true = numpy.log10(out_true) + out_matched[..., 0] = numpy.log10(out_matched[..., 0]) + out_control = numpy.log10(out_control) + + tolerances = [0.1, 0.3, 0.5] + + in_tolerance = numpy.full((kmax, len(tolerances)), numpy.nan) + for k in range(kmax): + frac_diff = (out_matched[k, :, 0] - out_true[k]) / out_true[k] + frac_diff = numpy.abs(frac_diff) + for j, tol in enumerate(tolerances): + in_tolerance[k, j] = numpy.sum(frac_diff < tol) + + left_edges = numpy.arange(numpy.nanmin(out_mass), numpy.nanmax(out_mass), + 0.01) + bw = 0.2 + x = left_edges + 0.5 * bw + nsimx = out_matched.shape[1] + + with plt.style.context("science"): + plt.figure() + + for j, tol in enumerate(tolerances): + + med = csiborgtools.binned_statistic(out_mass, in_tolerance[:, j], + left_edges, bw, numpy.median) + lower = csiborgtools.binned_statistic( + out_mass, in_tolerance[:, j], left_edges, bw, + lambda x: numpy.percentile(x, 16)) + upper = csiborgtools.binned_statistic( + out_mass, in_tolerance[:, j], left_edges, bw, + lambda x: numpy.percentile(x, 84)) + lower /= nsimx + upper /= nsimx + med /= nsimx + + plt.plot(x, med, label=r"${} \%$".format(int(tol * 100)),) + plt.fill_between(x, lower, upper, alpha=0.2) + + plt.xlim(x.min(), x.max()) + plt.ylim(0, 1) + plt.xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") + plt.ylabel(r"$f_{\rm in~tolerance}$") + plt.legend(ncols=3) + + plt.tight_layout() + fout = join( + plt_utils.fout, + f"expected_tolerance_{key}_{nsim0}_{simname}_{min_logmass}.png") + print(f"Saving to `{fout}`.") + plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + plt.close() + + +# --------------------------------------------------------------------------- # +############################################################################### +# Concentration reconstruction # +############################################################################### +# --------------------------------------------------------------------------- # + + +@cache_to_disk(120) +def get_max_overlap_velocity(kmax, nsim0, simname, min_logmass, smoothed): + paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) + nsimxs = csiborgtools.summary.get_cross_sims( + simname, nsim0, paths, min_logmass, smoothed=smoothed) + + cat0 = open_cat(nsim0, simname) + catxs = open_cats(nsimxs, simname) + + reader = csiborgtools.summary.NPairsOverlap(cat0, catxs, paths, + min_logmass) + + log_mass0 = numpy.log10(reader.cat0(MASS_KINDS[simname])) + vx, vy, vz = reader.cat0("vx"), reader.cat0("vy"), reader.cat0("vz") + + out_mass = numpy.full((kmax), numpy.nan) + out_true = numpy.full((kmax, 3), numpy.nan) + out_matched = numpy.full((kmax, len(nsimxs), 4), numpy.nan) + + for k0 in trange(kmax, desc="Looping over top masses"): + k = numpy.argsort(log_mass0)[::-1][k0] + for i, key in enumerate(["vx", "vy", "vz"]): + vel, overlaps = reader.expected_property_single(k, key, smoothed, + in_log=False) + out_matched[k0, :, i] = vel + out_matched[k0, :, -1] = overlaps + + out_mass[k0] = log_mass0[k] + out_true[k0, 0] = vx[k] + out_true[k0, 1] = vy[k] + out_true[k0, 2] = vz[k] + + return out_mass, out_true, out_matched + + +def expected_in_velocity(kmax, nsim0, simname, min_logmass, smoothed): + cat0 = open_cat(nsim0, simname) + pos = cat0.position(cartesian=False) + + if kmax == -1: + kmax = numpy.sum(numpy.log10(cat0[MASS_KINDS[simname]]) > min_logmass) - 1 # noqa + print(f"Selected kmax = -1, so doing {kmax}.") + + out_mass, out_true, out_matched = get_max_overlap_velocity( + kmax, nsim0, simname, min_logmass, smoothed) + __, out_true_conc, out_matched_conc, out_control_conc = get_expected_many_topmass(kmax, nsim0, simname, min_logmass, "conc", smoothed, False) # noqa + __, out_true_mass, out_matched_mass, out_control_mass = get_expected_many_topmass(kmax, nsim0, simname, min_logmass, "totpartmass", smoothed, False) # noqa + + sep_mass, sep = get_mass_vs_separation(nsim0, simname, min_logmass, 677.7, + smoothed) + m = numpy.argsort(sep_mass)[::-1] + sep_mass = sep_mass[m][:kmax] + sep = sep[:, m][:, :kmax] + + clusters = { + 'Virgo': {'l': 283.8, 'b': 74.5, 'distance_mpc': 17}, + 'Fornax': {'l': 236.8, 'b': -53.6, 'distance_mpc': 19}, + 'Coma': {'l': 58.1, 'b': 87.9, 'distance_mpc': 98}, + 'Perseus': {'l': 150.6, 'b': -13.3, 'distance_mpc': 73}, + 'Hydra': {'l': 269.1, 'b': 26.5, 'distance_mpc': 49}, + 'Centaurus': {'l': 302.4, 'b': 21.6, 'distance_mpc': 52} + } + + nsimx = out_matched.shape[1] + # Calculate the cosine similarity between the true and expected velocity + # and the difference in velocity. + yalign = numpy.full((kmax, nsimx), numpy.nan) + ydiff = numpy.full((kmax, nsimx), numpy.nan) + ymag = numpy.full((kmax, nsimx), numpy.nan) + w = numpy.full((kmax, nsimx), numpy.nan) + for k in range(kmax): + yalign[k] = csiborgtools.utils.cosine_similarity(out_true[k], + out_matched[k, :, :3]) + ydiff[k] = numpy.sum( + (out_true[k] - out_matched[k, :, :3])**2, axis=1)**0.5 + ymag[k] = numpy.linalg.norm(out_matched[k, :, :3], axis=1) + w[k] = out_matched[k, :, -1] + + for k in range(0): + k0 = numpy.argsort(cat0["totpartmass"])[::-1][k] + with plt.style.context("science"): + fig, axs = plt.subplots(nrows=4, ncols=3, + figsize=(2 * 3.5, 2.5 * 2.625)) + fig.subplots_adjust(wspace=0., hspace=0.0) + + # Maximum overlap histogram + axs[0, 0].hist(w[k], bins="auto", histtype="step", density=1) + axs[0, 0].set_xlabel(r"$\max_{b \in \mathcal{B}} \mathcal{O}_{a b}$") # noqa + axs[0, 0].set_ylabel(r"Normalized counts") + axs[0, 0].set_xlim(0.01, 1) + # Cosine similarity alignment + axs[0, 1].hist(yalign[k], weights=w[k], bins=20, histtype="step", + density=1) + axs[0, 1].set_xlabel(r"$\cos \theta$") + axs[0, 1].set_xlim(-1, 1) + axs[0, 1].set_ylabel(r"Normalized counts") + # Normalized Absolute difference in velocity + axs[0, 2].hist(ydiff[k] / numpy.linalg.norm(out_true[k]), + weights=w[k], bins=20, histtype="step", + density=1) + axs[0, 2].set_xlabel(r"$|\Delta \textbf{v}| / |\textbf{v}_{\rm ref}|$") # noqa + axs[0, 2].set_xlim(0) + axs[0, 2].set_ylabel(r"Normalized counts") + # Max overlap vs mass + axs[1, 0].scatter(w[k], numpy.log10(out_matched_mass[k, :, 0]), + s=3) + axs[1, 0].axhline(out_mass[k], color="red", ls="--") + axs[1, 0].axhline( + numpy.average(numpy.log10(out_matched_mass[k, :, 0]), + weights=w[k]), + color="blue", ls="--") + axs[1, 0].set_xlabel(r"$\max_{b \in \mathcal{B}} \mathcal{O}_{a b}$") # noqa + axs[1, 0].set_ylabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$") + # Max overlap vs concentration + axs[1, 1].scatter(w[k], out_matched_conc[k, :, 0], s=3) + axs[1, 1].axhline(out_true_conc[k], color="red", ls="--") + axs[1, 1].set_xlabel(r"$\max_{b \in \mathcal{B}} \mathcal{O}_{a b}$") # noqa + axs[1, 1].set_ylabel(r"$c$") + # Alignment vs difference in velocity + axs[1, 2].scatter(yalign[k], ydiff[k], s=3) + axs[1, 2].set_xlabel(r"$\cos \theta$") + axs[1, 2].set_ylabel(r"$|\Delta \textbf{v}|~[\mathrm{km} / \mathrm{s}]$") # noqa + axs[1, 2].set_ylim(0.01) + # Sky positions + c = SkyCoord(ra=pos[k0, 1] * u.degree, dec=pos[k0, 2] * u.degree, + frame='icrs') + axs[2, 0].scatter(c.galactic.l, c.galactic.b, s=3, zorder=1, + c="red") + dist = str(round(pos[k0, 0], 0)) + axs[2, 0].text(c.galactic.l.to_value(), c.galactic.b.to_value(), + dist, fontsize=6, ha='left', + va='bottom', zorder=0) + for cluster in clusters.keys(): + l, b = clusters[cluster]['l'], clusters[cluster]['b'] + axs[2, 0].scatter(l, b, c="blue", s=2, zorder=0) + dist = round(clusters[cluster]["distance_mpc"], 0) + key = f"{cluster} ({dist})" + axs[2, 0].text(l, b, key, fontsize=6, ha='right', + va='bottom', zorder=0) + axs[2, 0].axhspan(-10, 10, alpha=0.2) + axs[2, 0].set_xlabel(r"$l$") + axs[2, 0].set_ylabel(r"$b$") + # Max overlap vs alignment + axs[2, 1].scatter(w[k], yalign[k], s=3) + axs[2, 1].set_xlabel(r"$\max_{b \in \mathcal{B}} \mathcal{O}_{a b}$") # noqa + axs[2, 1].set_ylabel(r"$\cos \theta$") + # Max overlap vs velocity difference + axs[2, 2].scatter(w[k], ydiff[k], s=3) + axs[2, 2].set_xlabel(r"$\max_{b \in \mathcal{B}} \mathcal{O}_{a b}$") # noqa + axs[2, 2].set_ylabel(r"$|\Delta \textbf{v}|~[\mathrm{km} / \mathrm{s}]$") # noqa + # Alignment vs separation + axs[3, 0].scatter(yalign[k], sep[:, k], s=3) + axs[3, 0].set_xlabel(r"$\cos \theta$") + axs[3, 0].set_ylabel(r"$\Delta R ~ [Mpc / h]$") + # Separation vs max overlap + paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) + axs[3, 1].scatter(sep[:, k], w[k], s=3) + axs[3, 1].set_xlabel(r"$\Delta R ~ [Mpc / h]$") + axs[3, 1].set_ylabel(r"$\max_{b \in \mathcal{B}} \mathcal{O}_{a b}$") # noqa + # max overlap vs chain index + y = numpy.abs(nsim0 - numpy.array(csiborgtools.summary.get_cross_sims(simname, nsim0, paths, min_logmass, smoothed=smoothed))) # noqa + axs[3, 2].scatter(w[k], y, s=3) + axs[3, 2].set_xlabel(r"$\max_{b \in \mathcal{B}} \mathcal{O}_{a b}$") # noqa + axs[3, 2].set_ylabel(r"$\Delta N_{\rm chain}$") + + fig.tight_layout() + fout = join( + plt_utils.fout, + f"vel_{k}_{nsim0}_{simname}_{min_logmass}.png") + print(f"Saving to `{fout}`.") + plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + plt.close() + + with plt.style.context("science"): + fig, axs = plt.subplots(nrows=2, ncols=1, figsize=(3.5, 1.5 * 2.625), + sharex=True) + fig.subplots_adjust(wspace=0., hspace=0.0) + + x = numpy.asanyarray([numpy.nanmean(w[k]) for k in range(kmax)]) + # y = [csiborgtools.summary.find_peak(yalign[k], w[k]) + # for k in range(kmax)] + y = numpy.asanyarray([numpy.nansum(numpy.arccos(yalign[k]) * w[k]) / numpy.nansum(w[k]) for k in range(kmax)]) # noqa + y *= 180 / numpy.pi + c = numpy.log10(out_true_mass) + + # plt.errorbar(x, y, xerr=[xlower, xupper], fmt="o", ms=3, lw=0.5,) + # plt.hexbin(x, y, gridsize=30, mincnt=1, bins="log") + cax = axs[0].scatter(x, y, s=7.5, c=c, rasterized=True) + axins = axs[0].inset_axes([0.0, 1.0, 1.0, 0.05]) + fig.colorbar(cax, cax=axins, orientation="horizontal", + label=r"$\log M_{\rm tot} ~ [M_\odot / h]$") + axins.xaxis.tick_top() + axins.xaxis.set_tick_params(labeltop=True) + axins.xaxis.set_label_position("top") + # fig.colorbar(cax, ax=axs[0], pad=0, + # ) + # axs[0].set_colorbar(label=r"$\log M_{\rm tot} ~ [M_\odot / h]$", pad=0) + + # plt.ylim(top=90) + + m = numpy.isfinite(x) & numpy.isfinite(y) + xbins = numpy.arange(x[m].min(), x[m].max(), 0.1) + xcent = 0.5 * (xbins[1:] + xbins[:-1]) + ymed, yerr = plt_utils.compute_error_bars(x[m], y[m], xbins, sigma=1) + axs[0].errorbar(xcent, ymed, yerr=yerr, fmt="o", ms=3, lw=0.5, c="red", + capsize=3, ls="--") + + y = numpy.asanyarray([numpy.nansum(ymag[k] * w[k]) / numpy.nansum(w[k]) / numpy.linalg.norm(out_true[k]) for k in range(kmax)]) # noqa + + m = y < 3 + + axs[1].scatter(x[m], y[m], s=7.5, c=c[m], rasterized=True) + m = numpy.isfinite(x) & numpy.isfinite(y) + xbins = numpy.arange(x[m].min(), x[m].max(), 0.1) + xcent = 0.5 * (xbins[1:] + xbins[:-1]) + ymed, yerr = plt_utils.compute_error_bars(x[m], y[m], xbins, sigma=1) + axs[1].errorbar(xcent, ymed, yerr=yerr, fmt="o", ms=3, lw=0.5, c="red", + capsize=3, ls="--") + + label = r"$\langle |\textbf{v}_{\mathcal{B}}| \rangle_{\mathcal{B}} / |\textbf{v}_{\rm ref}|$" + axs[1].set_ylabel(label) + + axs[0].set_ylabel(r"$\langle \theta \rangle_{\mathcal{B}} ~ [\deg]$") + axs[1].set_xlabel(r"$\langle \max_{b \in \mathcal{B}} \mathcal{O}_{a b} \rangle_{\mathcal{B}}$") # noqa + + fig.tight_layout(w_pad=0, h_pad=0) + fout = join( + plt_utils.fout, + f"stat_vel_agreement_{kmax}_{nsim0}_{simname}_{min_logmass}.pdf") + print(f"Saving to `{fout}`.") + plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + plt.close() + + +# --------------------------------------------------------------------------- # +############################################################################### +# Max's matching vs overlap success # +############################################################################### +# --------------------------------------------------------------------------- # + + +@cache_to_disk(120) +def get_matching_max_vs_overlap(simname, nsim0, min_logmass, mult): + paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) + + nsimsx = [nsim for nsim in paths.get_ics(simname) if nsim != nsim0] + for i in trange(len(nsimsx), desc="Loading data"): + nsimx = nsimsx[i] + fpath = paths.match_max(simname, nsim0, nsimx, min_logmass, + mult=mult) + + data = numpy.load(fpath, allow_pickle=True) + + if i == 0: + mass0 = data["mass0"] + max_overlap = numpy.full((mass0.size, len(nsimsx)), numpy.nan) + match_overlap = numpy.full((mass0.size, len(nsimsx)), numpy.nan) + success = numpy.zeros((mass0.size, len(nsimsx)), numpy.bool_) + + max_overlap[:, i] = data["max_overlap"] + match_overlap[:, i] = data["match_overlap"] + success[:, i] = data["success"] + + return {"mass0": mass0, "max_overlap": max_overlap, + "match_overlap": match_overlap, "success": success} + + +def matching_max_vs_overlap(min_logmass): + left_edges = numpy.arange(min_logmass, 15, 0.1) + + with plt.style.context("science"): + # fig, axs = plt.subplots(ncols=2, figsize=(2 * 3.5, 2.625)) + fig, axs = plt.subplots(ncols=1, figsize=(3.5, 2.625)) + axs = [axs] + ax2 = axs[0].twinx() + cols = plt.rcParams["axes.prop_cycle"].by_key()["color"] + for n, mult in enumerate([2.5, 5., 7.5]): + + def make_y1_y2(simname): + nsims = 100 if simname == "csiborg" else 9 + nsim0 = 7444 if simname == "csiborg" else 0 + x = get_matching_max_vs_overlap(simname, + nsim0, min_logmass, mult=mult) + + mask = numpy.all(numpy.isfinite(x["max_overlap"]), axis=1) + x["success"][~mask, :] = False + + mass0 = numpy.log10(x["mass0"]) + max_overlap = x["max_overlap"] + match_overlap = x["match_overlap"] + success = x["success"] + + nbins = len(left_edges) + y = numpy.full((nbins, nsims), numpy.nan) + y2 = numpy.full(nbins, numpy.nan) + for i in range(nbins): + m = mass0 > left_edges[i] + for j in range(nsims): + y[i, j] = numpy.sum((max_overlap[m, j] == match_overlap[m, j]) & success[m, j]) # noqa + y[i, j] /= numpy.sum(success[m, j]) + + y2[i] = success[m, 0].mean() + return y, y2 + + offset = numpy.random.normal(0, 0.015) + + y1_csiborg, y2_csiborg = make_y1_y2("csiborg") + + ysummary = numpy.percentile(y1_csiborg, [16, 50, 84], axis=1) + axs[0].plot(left_edges + offset, ysummary[1], c=cols[n], + label=r"${}~R_{{\rm 200c}}$".format(mult)) + ax2.plot(left_edges + offset, y2_csiborg, c=cols[n], ls="dotted", + zorder=0) + + axs[0].set_xlim(left_edges.min(), left_edges.max()) + axs[0].legend(ncols=1, loc="upper left") + for i in range(1): + axs[i].set_xlabel(r"$\log M_{\rm tot, min} ~ [M_\odot / h]$") + + axs[0].set_ylabel(r"$f_{\rm agreement}$") + ax2.set_ylabel(r"$f_{\rm match}$") + + fig.tight_layout() + fout = join(plt_utils.fout, + f"matching_max_agreement_{min_logmass}.pdf") + print(f"Saving to `{fout}`.") + fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + plt.close() + + +# --------------------------------------------------------------------------- # +############################################################################### +# Mass-concentration relation # +############################################################################### +# --------------------------------------------------------------------------- # + + +@cache_to_disk(120) +def get_mass_concentration(nsim, simname): + cat = open_cat(nsim, simname) + mass = cat["m200c"] + conc = cat["conc"] + return mass, conc + + +def mass_concentration(ext="pdf"): + + cosmology.setCosmology('WMAP5') + xrange = numpy.linspace(12, 15, 100) + cvir = concentration.concentration(10**xrange, '200c', 0.0, + model='diemer19') + + with plt.style.context("science"): + plt.figure() + + for nsim in tqdm([7444 + n * 24 for n in (0, 10, 20, 30, 40, 50, 60, 70, 80)]): # noqa + x, y = get_mass_concentration(nsim, "csiborg") + m = numpy.isfinite(x) & numpy.isfinite(y) + x = x[m] + y = y[m] + x = numpy.log10(x) + + xbins = numpy.arange(12, 15, 0.2) + xcent = 0.5 * (xbins[1:] + xbins[:-1]) + ymed, yerr = plt_utils.compute_error_bars(x, y, xbins, 1) + plt.plot(xcent, ymed, c="black", lw=0.5) + # plt.errorbar(xcent, ymed, yerr=yerr, fmt="o", ms=3, lw=0.5, + # c="red", capsize=3) + + plt.plot(xrange, cvir, c="blue", lw=1, label="Diemer+19") + plt.yscale("log") + plt.legend() + + plt.tight_layout() + fout = join(plt_utils.fout, "mass_concentration.png") + print(f"Saving to `{fout}`.") + plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight") + plt.close() + + +if __name__ == "__main__": + min_logmass = 13.25 + smoothed = True + nbins = 10 + ext = "pdf" + plot_quijote = False + min_maxoverlap = 0. + + funcs = [ + # "get_overlap_summary", + # "get_max_overlap_agreement", + # "get_mtot_vs_all_pairoverlap", + # "get_mtot_vs_maxpairoverlap", + # "get_mass_vs_separation", + # "get_expected_mass", + # "get_expected_key", + # "get_expected_single", + # "get_expected_many_topmass", + # "get_max_overlap_velocity", + # "get_mass_concentration", + ] + for func in funcs: + print(f"Cleaning up cache for `{func}`.") + delete_disk_caches_for_function(func) + + if False: + mtot_vs_all_pairoverlap(7444, "csiborg", min_logmass, smoothed, + nbins, ext=ext) + mtot_vs_maxpairoverlap(7444, "csiborg", "fof_totpartmass", min_logmass, + smoothed, nbins, ext=ext) + if plot_quijote: + mtot_vs_all_pairoverlap(0, "quijote", min_logmass, smoothed, + nbins, ext=ext) + mtot_vs_maxpairoverlap(0, "quijote", "group_mass", min_logmass, + smoothed, nbins, ext=ext) + + if False: + mtot_vs_maxpairoverlap_fraction(min_logmass, smoothed, nbins, ext=ext) + + if False: + maximum_overlap_agreement(7444, "csiborg", min_logmass, smoothed) + + if False: + summed_to_max_overlap(min_logmass, smoothed, nbins, ext=ext) + + if False: + mtot_vs_summedpairoverlap(7444, "csiborg", min_logmass, smoothed, + nbins, ext) + if plot_quijote: + mtot_vs_summedpairoverlap(0, "quijote", min_logmass, smoothed, + nbins, ext) + + if False: + mtot_vs_expected_mass(7444, "csiborg", min_logmass, smoothed, ext=ext) + # if plot_quijote: + # mtot_vs_expected_mass(0, "quijote", min_logmass, smoothed, nbins, + # max_prob_nomatch=1, ext=ext) + + if False: + key = "conc" + mtot_vs_expected_key(7444, "csiborg", min_logmass, key, smoothed, 15) + # if plot_quijote: + # mtot_vs_expected_key(0, "quijote", min_logmass, key, smoothed, + # nbins) + + if False: + mass_vs_separation(7444, "csiborg", min_logmass, nbins, smoothed, + boxsize=677.7) + # if plot_quijote: + # mass_vs_separation(0, 1, "quijote", min_logmass, nbins, + # smoothed, boxsize=1000, plot_std=False) + + if True: + matching_max_vs_overlap(min_logmass) + + if False: + # mtot_vs_expected_single("max", 7444, "csiborg", min_logmass, + # "totpartmass", True) + # mtot_vs_expected_single("maxmass__0", 7444, "csiborg", + # min_logmass, "lambda200c", True) + for i in range(25): + mtot_vs_expected_single(f"maxmass__{i}", 7444, "csiborg", + min_logmass, "conc", True) + # if plot_quijote: + # mtot_vs_expected_single("max", 0, "quijote", min_logmass, + # "totpartmass", True, True) + + if False: + mtot_vs_expected_single_panel("maxmass__0", 7444, "csiborg", + min_logmass, True) + + if False: + kmax = 100 + expected_in_tolerance(kmax, 7444, "csiborg", min_logmass, "conc", True) + + if False: + kmax = 1000 + expected_in_velocity(kmax, 7444 + 24 * 30, "csiborg", min_logmass, smoothed) + + if False: + mass_concentration() diff --git a/scripts_plots/plot_data.py b/scripts_plots/plot_data.py index a748dfa..fecb83e 100644 --- a/scripts_plots/plot_data.py +++ b/scripts_plots/plot_data.py @@ -66,6 +66,12 @@ def plot_mass_vs_ncells(nsim, pdf=False): cat = open_csiborg(nsim) mpart = 4.38304044e+09 + x = numpy.log10(cat["totpartmass"]) + y = numpy.log10(cat["lagpatch_ncells"]) + + p = numpy.polyfit(x, y, 1) + print("Fitted parameters are: ", p) + with plt.style.context(plt_utils.mplstyle): plt.figure() plt.scatter(cat["totpartmass"], cat["lagpatch_ncells"], s=0.25, @@ -105,9 +111,9 @@ def plot_hmf(pdf=False): csiborg_counts = numpy.full((len(csiborg_nsims), len(bins) - 1), numpy.nan, dtype=numpy.float32) csiborg_counts[i, :] = data["counts"] - # csiborg_counts /= numpy.diff(bins).reshape(1, -1) + csiborg_counts /= numpy.diff(bins).reshape(1, -1) - csiborg5511 = numpy.load(paths.halo_counts("csiborg", 5511))["counts"] + # csiborg5511 = numpy.load(paths.halo_counts("csiborg", 5511))["counts"] # csiborg5511 /= numpy.diff(data["bins"]) print("Loading Quijote halo counts.", flush=True) @@ -121,73 +127,89 @@ def plot_hmf(pdf=False): (len(quijote_nsims) * nmax, len(bins) - 1), numpy.nan, dtype=numpy.float32) quijote_counts[i * nmax:(i + 1) * nmax, :] = data["counts"] - # quijote_counts /= numpy.diff(bins).reshape(1, -1) + quijote_counts /= numpy.diff(bins).reshape(1, -1) - # vol = 155.5**3 - # csiborg_counts /= vol - # quijote_counts /= vol + vol = 4 * numpy.pi / 3 * 155.5**3 + csiborg_counts /= vol + quijote_counts /= vol # csiborg5511 /= vol x = 10**(0.5 * (bins[1:] + bins[:-1])) # Edit lower limits - csiborg_counts[:, x < 1e12] = numpy.nan + csiborg_counts[:, x < 10**13.1] = numpy.nan quijote_counts[:, x < 10**(13.1)] = numpy.nan # Edit upper limits csiborg_counts[:, x > 3e15] = numpy.nan quijote_counts[:, x > 3e15] = numpy.nan - csiborg5511[x > 3e15] = numpy.nan + # csiborg5511[x > 3e15] = numpy.nan with plt.style.context(plt_utils.mplstyle): cols = plt.rcParams["axes.prop_cycle"].by_key()["color"] - fig, ax = plt.subplots(nrows=2, sharex=True, - figsize=(3.5, 2.625 * 1.25), - gridspec_kw={"height_ratios": [1, 0.45]}) - fig.subplots_adjust(hspace=0, wspace=0) + fig, ax = plt.subplots(nrows=1, sharex=True, + figsize=(3.5, 2.625)) + ax = [ax] + # fig, ax = plt.subplots(nrows=2, sharex=True, + # figsize=(3.5, 2.625 * 1.25), + # gridspec_kw={"height_ratios": [1, 0.25]}) + # fig.subplots_adjust(hspace=0, wspace=0) # Upper panel data mean_csiborg = numpy.mean(csiborg_counts, axis=0) std_csiborg = numpy.std(csiborg_counts, axis=0) - ax[0].plot(x, mean_csiborg, label="CSiBORG", c=cols[0]) - ax[0].fill_between(x, mean_csiborg - std_csiborg, - mean_csiborg + std_csiborg, - alpha=0.5, color=cols[0]) + + for i in range(len(csiborg_counts)): + ax[0].plot(x, csiborg_counts[i, :], c="cornflowerblue", lw=0.5, zorder=0) + + ax[0].plot(x, mean_csiborg, label="CSiBORG", c="mediumblue", zorder=1) + # ax[0].fill_between(x, mean_csiborg - std_csiborg, + # mean_csiborg + std_csiborg, + # alpha=0.5, color=cols[0]) mean_quijote = numpy.mean(quijote_counts, axis=0) std_quijote = numpy.std(quijote_counts, axis=0) - ax[0].plot(x, mean_quijote, label="Quijote", c=cols[1]) - ax[0].fill_between(x, mean_quijote - std_quijote, - mean_quijote + std_quijote, alpha=0.5, - color=cols[1]) - ax[0].plot(x, csiborg5511, label="CSiBORG 5511", c="k", ls="--") - std5511 = numpy.sqrt(csiborg5511) - ax[0].fill_between(x, csiborg5511 - std_csiborg, csiborg5511 + std5511, - alpha=0.2, color="k") - # Lower panel data - log_y = numpy.log10(mean_csiborg / mean_quijote) - err = numpy.sqrt((std_csiborg / mean_csiborg / numpy.log(10))**2 - + (std_quijote / mean_quijote / numpy.log(10))**2) - ax[1].plot(x, 10**log_y, c=cols[0]) - ax[1].fill_between(x, 10**(log_y - err), 10**(log_y + err), alpha=0.5, - color=cols[0]) + for i in range(len(quijote_counts)): + ax[0].plot(x, quijote_counts[i, :], c="palegreen", lw=0.5, zorder=-1) - ax[1].plot(x, csiborg5511 / mean_quijote, c="k", ls="--") + + + ax[0].plot(x, mean_quijote, label="Quijote", c="darkgreen", zorder=1) + # ax[0].fill_between(x, mean_quijote - std_quijote, + # mean_quijote + std_quijote, alpha=0.5, + # color=cols[1]) + + # ax[0].plot(x, csiborg5511, label="CSiBORG 5511", c="k", ls="--") + # std5511 = numpy.sqrt(csiborg5511) + # ax[0].fill_between(x, csiborg5511 - std_csiborg, csiborg5511 + std5511, + # alpha=0.2, color="k") + + # # Lower panel data + # log_y = numpy.log10(mean_csiborg / mean_quijote) + # err = numpy.sqrt((std_csiborg / mean_csiborg / numpy.log(10))**2 + # + (std_quijote / mean_quijote / numpy.log(10))**2) + # ax[1].plot(x, 10**log_y, c=cols[0]) + # ax[1].fill_between(x, 10**(log_y - err), 10**(log_y + err), alpha=0.5, + # color="k") + + # ax[1].plot(x, csiborg5511 / mean_quijote, c="k", ls="--") # Labels and accesories - ax[1].axhline(1, color="k", ls="--", - lw=0.5 * plt.rcParams["lines.linewidth"], zorder=0) + # ax[1].axhline(1, color="k", ls="--", + # lw=0.5 * plt.rcParams["lines.linewidth"], zorder=0) # ax[0].set_ylabel(r"$\frac{\mathrm{d}^2 N}{\mathrm{d} V \mathrm{d}\log M_{\rm tot}}~[\mathrm{dex}^{-1} (\mathrm{Mpc} / h)^{-3}]$", # noqa # fontsize="small") - ax[0].set_ylabel("Counts in bins") - ax[1].set_xlabel(r"$M_{\rm tot}~[M_\odot / h]$", fontsize="small") - ax[1].set_ylabel(r"$\mathrm{CSiBORG} / \mathrm{Quijote}$", - fontsize="small") + m = numpy.isfinite(mean_quijote) + ax[0].set_xlim(x[m].min(), x[m].max()) + ax[0].set_ylabel(r"$\mathrm{HMF}~[\mathrm{dex}^{-1} (\mathrm{Mpc} / h)^{-3}]$") + ax[0].set_xlabel(r"$M_{\rm tot}~[M_\odot / h]$", fontsize="small") + # ax[1].set_ylabel(r"$\mathrm{CSiBORG} / \mathrm{Quijote}$", + # fontsize="small") ax[0].set_xscale("log") ax[0].set_yscale("log") - ax[1].set_ylim(0.5, 2.0) + # ax[1].set_ylim(0.5, 1.5) # ax[1].set_yscale("log") - ax[0].legend(fontsize="small") + ax[0].legend() fig.tight_layout(h_pad=0, w_pad=0) for ext in ["png"] if pdf is False else ["png", "pdf"]: @@ -556,8 +578,8 @@ if __name__ == "__main__": if False: plot_mass_vs_ncells(7444, pdf=False) - if False: - plot_hmf(pdf=False) + if True: + plot_hmf(pdf=True) if False: plot_hmf_quijote_full(pdf=False) @@ -569,7 +591,7 @@ if __name__ == "__main__": plot_groups=False, dmin=45, dmax=60, plot_halos=5e13, volume_weight=True) - if True: + if False: kind = "environment" grid = 512 smooth_scale = 8.0