New plots (#85)

* Update verbosity messages

* Update verbosity messags

* Update more verbosity flags

* Update the iterator settings

* Add basic plots

* Update verbosity flags

* Update arg parsre

* Update plots

* Remove some older code

* Fix some definitions

* Update plots

* Update plotting

* Update plots

* Add support functions

* Update nb

* Improve plots, move back to scripts

* Update plots

* pep8

* Add max overlap plot

* Add blank line

* Upload changes

* Update changes

* Add weighted stats

* Remove

* Add import

* Add Max's matching

* Edit submission

* Add paths to Max's matching

* Fix matching

* Edit submission

* Edit plot

* Add max overlap separation plot

* Add periodic distance

* Update overlap summaries

* Add nsim0 for Max matvhing

* Add Max's agreement plot

* Add Quijote for Max method

* Update ploitting

* Update name
This commit is contained in:
Richard Stiskalek 2023-08-18 19:20:47 +01:00 committed by GitHub
parent ca3772ac6f
commit 8e3127f4d9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 1343 additions and 2100 deletions

View file

@ -14,7 +14,7 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from csiborgtools import clustering, field, match, read # noqa from csiborgtools import clustering, field, match, read # noqa
from .utils import (center_of_mass, delta2ncells, number_counts, from .utils import (center_of_mass, delta2ncells, number_counts, # noqa
periodic_distance, periodic_distance_two_points) # noqa periodic_distance, periodic_distance_two_points) # noqa
# Arguments to csiborgtools.read.Paths. # Arguments to csiborgtools.read.Paths.

View file

@ -14,4 +14,5 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .match import (ParticleOverlap, RealisationsMatcher, # noqa from .match import (ParticleOverlap, RealisationsMatcher, # noqa
calculate_overlap, calculate_overlap_indxs, pos2cell, # noqa calculate_overlap, calculate_overlap_indxs, pos2cell, # noqa
cosine_similarity, find_neighbour, get_halo_cell_limits) # noqa cosine_similarity, find_neighbour, get_halo_cell_limits, # noqa
matching_max) # noqa

View file

@ -236,8 +236,6 @@ class RealisationsMatcher(BaseMatcher):
# We begin by querying the kNN for the nearest neighbours of each halo # We begin by querying the kNN for the nearest neighbours of each halo
# in the reference simulation from the cross simulation in the initial # in the reference simulation from the cross simulation in the initial
# snapshot. # snapshot.
if verbose:
print(f"{datetime.now()}: querying the KNN.", flush=True)
match_indxs = radius_neighbours( match_indxs = radius_neighbours(
catx.knn(in_initial=True, subtract_observer=False, periodic=True), catx.knn(in_initial=True, subtract_observer=False, periodic=True),
cat0.position(in_initial=True), cat0.position(in_initial=True),
@ -261,11 +259,13 @@ class RealisationsMatcher(BaseMatcher):
return load_processed_halo(hid, particlesx, halo_mapx, hid2mapx, return load_processed_halo(hid, particlesx, halo_mapx, hid2mapx,
nshift=0, ncells=self.box_size) nshift=0, ncells=self.box_size)
if verbose: iterator = tqdm(
print(f"{datetime.now()}: calculating overlaps.", flush=True) cat0["index"],
desc=f"{datetime.now()}: calculating NGP overlaps",
disable=not verbose
)
cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
indxs = cat0["index"] for i, k0 in enumerate(iterator):
for i, k0 in enumerate(tqdm(indxs) if verbose else indxs):
# If we have no matches continue to the next halo. # If we have no matches continue to the next halo.
matches = match_indxs[i] matches = match_indxs[i]
if matches.size == 0: if matches.size == 0:
@ -347,12 +347,13 @@ class RealisationsMatcher(BaseMatcher):
return load_processed_halo(hid, particlesx, halo_mapx, hid2mapx, return load_processed_halo(hid, particlesx, halo_mapx, hid2mapx,
nshift=nshift, ncells=self.box_size) nshift=nshift, ncells=self.box_size)
if verbose: iterator = tqdm(
print(f"{datetime.now()}: calculating smoothed overlaps.", cat0["index"],
flush=True) desc=f"{datetime.now()}: calculating smoothed overlaps",
disable=not verbose
)
cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
indxs = cat0["index"] for i, k0 in enumerate(iterator):
for i, k0 in enumerate(tqdm(indxs) if verbose else indxs):
pos0, mass0, __, mins0, maxs0 = load_processed_halo( pos0, mass0, __, mins0, maxs0 = load_processed_halo(
k0, particles0, halo_map0, hid2map0, nshift=nshift, k0, particles0, halo_map0, hid2map0, nshift=nshift,
ncells=self.box_size) ncells=self.box_size)
@ -434,7 +435,12 @@ class ParticleOverlap(BaseMatcher):
assert ((delta.shape == (ncells,) * 3) assert ((delta.shape == (ncells,) * 3)
& (delta.dtype == numpy.float32)) & (delta.dtype == numpy.float32))
for hid in tqdm(halo_cat["index"]) if verbose else halo_cat["index"]: iterator = tqdm(
halo_cat["index"],
desc=f"{datetime.now()} Calculating the background field",
disable=not verbose
)
for hid in iterator:
pos = load_halo_particles(hid, particles, halo_map, hid2map) pos = load_halo_particles(hid, particles, halo_map, hid2map)
if pos is None: if pos is None:
continue continue
@ -993,11 +999,13 @@ def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1.0,
if radiusKNN.size != knn.n_samples_fit_: if radiusKNN.size != knn.n_samples_fit_:
raise ValueError("Mismatch in shape of `radiusKNN` or `knn`") raise ValueError("Mismatch in shape of `radiusKNN` or `knn`")
nsamples = len(X)
indxs = [None] * nsamples
patchknn_max = numpy.max(radiusKNN) patchknn_max = numpy.max(radiusKNN)
for i in trange(nsamples) if verbose else range(nsamples): iterator = trange(len(X),
desc=f"{datetime.now()}: querying the kNN",
disable=not verbose)
indxs = [None] * len(X)
for i in iterator:
dist, indx = knn.radius_neighbors( dist, indx = knn.radius_neighbors(
X[i].reshape(1, -1), radiusX[i] + patchknn_max, X[i].reshape(1, -1), radiusX[i] + patchknn_max,
sort_results=True) sort_results=True)
@ -1082,3 +1090,107 @@ def cosine_similarity(x, y):
out /= numpy.linalg.norm(x) * numpy.linalg.norm(y, axis=1) out /= numpy.linalg.norm(x) * numpy.linalg.norm(y, axis=1)
return out[0] if out.size == 1 else out return out[0] if out.size == 1 else out
def matching_max(cat0, catx, mass_kind, mult, periodic, overlap=None,
match_indxs=None, verbose=True):
"""
Halo matching algorithm based on [1].
Parameters
----------
cat0 : instance of :py:class:`csiborgtools.read.BaseCatalogue`
Halo catalogue of the reference simulation.
catx : instance of :py:class:`csiborgtools.read.BaseCatalogue`
Halo catalogue of the cross simulation.
mass_kind : str
Name of the mass column.
mult : float
Multiple of R200c below which to consider a match.
periodic : bool
Whether to account for periodic boundary conditions.
overlap : array of 1-dimensional arrays, optional
Overlap of halos from `cat0` with halos from `catx`. If `overlap` or
`match_indxs` is not provided, then the overlap of the identified halos
is not calculated.
match_indxs : array of 1-dimensional arrays, optional
Indicies of halos from `catx` having a non-zero overlap with halos
from `cat0`.
verbose : bool, optional
Verbosity flag.
Returns
-------
out : structured array
Array of matches. Columns are `hid0`, `hidx`, `dist`, `success`.
References
----------
[1] Maxwell L Hutt, Harry Desmond, Julien Devriendt, Adrianne Slyz; The
effect of local Universe constraints on halo abundance and clustering;
Monthly Notices of the Royal Astronomical Society, Volume 516, Issue 3,
November 2022, Pages 35923601, https://doi.org/10.1093/mnras/stac2407
"""
pos0 = cat0.position(in_initial=False)
knnx = catx.knn(in_initial=False, subtract_observer=False,
periodic=periodic)
rad0 = cat0["r200c"]
mass0 = numpy.log10(cat0[mass_kind])
massx = numpy.log10(catx[mass_kind])
assert numpy.all(numpy.isfinite(mass0)) & numpy.all(numpy.isfinite(massx))
maskx = numpy.ones(len(catx), dtype=numpy.bool_)
dtypes = [("hid0", numpy.int32),
("hidx", numpy.int32),
("mass0", numpy.float32),
("massx", numpy.float32),
("dist", numpy.float32),
("success", numpy.bool_),
("match_overlap", numpy.float32),
("max_overlap", numpy.float32),
]
out = numpy.full(len(cat0), numpy.nan, dtype=dtypes)
out["success"] = False
for i in tqdm(numpy.argsort(mass0)[::-1], desc="Matching haloes",
disable=not verbose):
hid0 = cat0["index"][i]
out[i]["hid0"] = hid0
out[i]["mass0"] = 10**mass0[i]
neigh_dists, neigh_inds = knnx.radius_neighbors(pos0[i].reshape(1, -1),
mult * rad0[i])
neigh_dists, neigh_inds = neigh_dists[0], neigh_inds[0]
if neigh_dists.size == 0:
continue
# Sort the neighbours by mass difference
sort_order = numpy.argsort(numpy.abs(mass0[i] - massx[neigh_inds]))
neigh_dists = neigh_dists[sort_order]
neigh_inds = neigh_inds[sort_order]
for j, neigh_ind in enumerate(neigh_inds):
if maskx[neigh_ind]:
out[i]["hidx"] = catx["index"][neigh_ind]
out[i]["dist"] = neigh_dists[j]
out[i]["massx"] = 10**massx[neigh_ind]
out[i]["success"] = True
maskx[neigh_ind] = False
if overlap is not None and match_indxs is not None:
if neigh_ind in match_indxs[i]:
k = numpy.where(neigh_ind == match_indxs[i])[0][0]
out[i]["match_overlap"] = overlap[i][k]
if len(overlap[i]) > 0:
out[i]["max_overlap"] = numpy.max(overlap[i])
break
return out

View file

@ -21,6 +21,8 @@ from os.path import isfile
import numpy import numpy
from tqdm import tqdm, trange from tqdm import tqdm, trange
from ..utils import periodic_distance
############################################################################### ###############################################################################
# Overlap of two simulations # # Overlap of two simulations #
############################################################################### ###############################################################################
@ -47,6 +49,7 @@ class PairOverlap:
_cat0 = None _cat0 = None
_catx = None _catx = None
_data = None _data = None
_paths = None
def __init__(self, cat0, catx, paths, min_logmass, maxdist=None): def __init__(self, cat0, catx, paths, min_logmass, maxdist=None):
if cat0.simname != catx.simname: if cat0.simname != catx.simname:
@ -55,6 +58,7 @@ class PairOverlap:
self._cat0 = cat0 self._cat0 = cat0
self._catx = catx self._catx = catx
self._paths = paths
self.load(cat0, catx, paths, min_logmass, maxdist) self.load(cat0, catx, paths, min_logmass, maxdist)
def load(self, cat0, catx, paths, min_logmass, maxdist=None): def load(self, cat0, catx, paths, min_logmass, maxdist=None):
@ -257,6 +261,8 @@ class PairOverlap:
for i in range(len(overlap)): for i in range(len(overlap)):
if len(overlap[i]) > 0: if len(overlap[i]) > 0:
out[i] = numpy.sum(overlap[i]) out[i] = numpy.sum(overlap[i])
else:
out[i] = 0
return out return out
def prob_nomatch(self, from_smoothed): def prob_nomatch(self, from_smoothed):
@ -279,9 +285,11 @@ class PairOverlap:
for i in range(len(overlap)): for i in range(len(overlap)):
if len(overlap[i]) > 0: if len(overlap[i]) > 0:
out[i] = numpy.product(numpy.subtract(1, overlap[i])) out[i] = numpy.product(numpy.subtract(1, overlap[i]))
else:
out[i] = 1
return out return out
def dist(self, in_initial, norm_kind=None): def dist(self, in_initial, boxsize, norm_kind=None):
""" """
Pair distances of matched halos between the reference and cross Pair distances of matched halos between the reference and cross
simulations. simulations.
@ -290,6 +298,8 @@ class PairOverlap:
---------- ----------
in_initial : bool in_initial : bool
Whether to calculate separation in the initial or final snapshot. Whether to calculate separation in the initial or final snapshot.
boxsize : float
The size of the simulation box.
norm_kind : str, optional norm_kind : str, optional
The kind of normalisation to apply to the distances. The kind of normalisation to apply to the distances.
Can be `r200c`, `ref_patch` or `sum_patch`. Can be `r200c`, `ref_patch` or `sum_patch`.
@ -320,8 +330,7 @@ class PairOverlap:
# Now calculate distances # Now calculate distances
dist = [None] * len(self) dist = [None] * len(self)
for i, ind in enumerate(self["match_indxs"]): for i, ind in enumerate(self["match_indxs"]):
# n refers to the reference halo catalogue position dist[i] = periodic_distance(posx[ind, :], pos0[i, :], boxsize)
dist[i] = numpy.linalg.norm(pos0[i, :] - posx[ind, :], axis=1)
if norm_kind is not None: if norm_kind is not None:
dist[i] /= norm[i] dist[i] /= norm[i]
@ -358,7 +367,7 @@ class PairOverlap:
ratio[i] = numpy.abs(ratio[i]) ratio[i] = numpy.abs(ratio[i])
return numpy.array(ratio, dtype=object) return numpy.array(ratio, dtype=object)
def max_overlap_key(self, key, from_smoothed): def max_overlap_key(self, key, min_overlap, from_smoothed):
""" """
Calculate the maximum overlap mass of each halo in the reference Calculate the maximum overlap mass of each halo in the reference
simulation from the cross simulation. simulation from the cross simulation.
@ -367,10 +376,10 @@ class PairOverlap:
---------- ----------
key : str key : str
Key to the maximum overlap statistic to calculate. Key to the maximum overlap statistic to calculate.
min_overlap : float
Minimum pair overlap to consider.
from_smoothed : bool from_smoothed : bool
Whether to use the smoothed overlap or not. Whether to use the smoothed overlap or not.
mass_kind : str, optional
The mass kind whose ratio is to be calculated.
Returns Returns
------- -------
@ -384,11 +393,15 @@ class PairOverlap:
# Skip if no match # Skip if no match
if len(match_ind) == 0: if len(match_ind) == 0:
continue continue
out[i] = y[match_ind][numpy.argmax(overlap[i])]
k = numpy.argmax(overlap[i])
if overlap[i][k] > min_overlap:
out[i] = y[match_ind][k]
return out return out
def counterpart_mass(self, from_smoothed, overlap_threshold=0., def counterpart_mass(self, from_smoothed, overlap_threshold=0.,
in_log=False, mass_kind="totpartmass"): mass_kind="totpartmass"):
""" """
Calculate the expected counterpart mass of each halo in the reference Calculate the expected counterpart mass of each halo in the reference
simulation from the crossed simulation. simulation from the crossed simulation.
@ -400,9 +413,6 @@ class PairOverlap:
overlap_threshold : float, optional overlap_threshold : float, optional
Minimum overlap required for a halo to be considered a match. By Minimum overlap required for a halo to be considered a match. By
default 0.0, i.e. no threshold. default 0.0, i.e. no threshold.
in_log : bool, optional
Whether to calculate the expectation value in log space. By default
`False`.
mass_kind : str, optional mass_kind : str, optional
The mass kind whose ratio is to be calculated. Must be a valid The mass kind whose ratio is to be calculated. Must be a valid
catalogue key. By default `totpartmass`, i.e. the total particle catalogue key. By default `totpartmass`, i.e. the total particle
@ -434,15 +444,11 @@ class PairOverlap:
massx_ = massx_[mask] massx_ = massx_[mask]
overlap_ = overlap_[mask] overlap_ = overlap_[mask]
massx_ = numpy.log10(massx_) if in_log else massx_ massx_ = numpy.log10(massx_)
# Weighted average and *biased* standard deviation # Weighted average and *biased* standard deviation
mean_ = numpy.average(massx_, weights=overlap_) mean_ = numpy.average(massx_, weights=overlap_)
std_ = numpy.average((massx_ - mean_)**2, weights=overlap_)**0.5 std_ = numpy.average((massx_ - mean_)**2, weights=overlap_)**0.5
# If in log, convert back to linear
mean_ = 10**mean_ if in_log else mean_
std_ = mean_ * std_ * numpy.log(10) if in_log else std_
mean[i] = mean_ mean[i] = mean_
std[i] = std_ std[i] = std_
@ -544,7 +550,7 @@ def weighted_stats(x, weights, min_weight=0, verbose=False):
""" """
out = numpy.full((x.size, 2), numpy.nan, dtype=numpy.float32) out = numpy.full((x.size, 2), numpy.nan, dtype=numpy.float32)
for i in trange(len(x)) if verbose else range(len(x)): for i in trange(len(x), disable=not verbose):
x_, w_ = numpy.asarray(x[i]), numpy.asarray(weights[i]) x_, w_ = numpy.asarray(x[i]), numpy.asarray(weights[i])
mask = w_ > min_weight mask = w_ > min_weight
x_ = x_[mask] x_ = x_[mask]
@ -574,27 +580,30 @@ class NPairsOverlap:
List of cross simulation halo catalogues. List of cross simulation halo catalogues.
paths : py:class`csiborgtools.read.Paths` paths : py:class`csiborgtools.read.Paths`
CSiBORG paths object. CSiBORG paths object.
min_logmass : float
Minimum log mass of halos to consider.
verbose : bool, optional verbose : bool, optional
Verbosity flag for loading the overlap objects. Verbosity flag for loading the overlap objects.
""" """
_pairs = None _pairs = None
def __init__(self, cat0, catxs, paths, verbose=True): def __init__(self, cat0, catxs, paths, min_logmass, verbose=True):
pairs = [None] * len(catxs) pairs = [None] * len(catxs)
if verbose: for i, catx in enumerate(tqdm(catxs, desc="Loading overlap objects",
print("Loading individual overlap objects...", flush=True) disable=not verbose)):
for i, catx in enumerate(tqdm(catxs) if verbose else catxs): pairs[i] = PairOverlap(cat0, catx, paths, min_logmass)
pairs[i] = PairOverlap(cat0, catx, paths)
self._pairs = pairs self._pairs = pairs
def max_overlap(self, from_smoothed, verbose=True): def max_overlap(self, min_overlap, from_smoothed, verbose=True):
""" """
Calculate maximum overlap of each halo in the reference simulation with Calculate maximum overlap of each halo in the reference simulation with
the cross simulations. the cross simulations.
Parameters Parameters
---------- ----------
min_overlap : float
Minimum pair overlap to consider.
from_smoothed : bool from_smoothed : bool
Whether to use the smoothed overlap or not. Whether to use the smoothed overlap or not.
verbose : bool, optional verbose : bool, optional
@ -604,21 +613,24 @@ class NPairsOverlap:
------- -------
max_overlap : 2-dimensional array of shape `(nhalos, ncatxs)` max_overlap : 2-dimensional array of shape `(nhalos, ncatxs)`
""" """
out = [None] * len(self)
if verbose:
print("Calculating maximum overlap...", flush=True)
def get_max(y_): def get_max(y_):
if len(y_) == 0: if len(y_) == 0:
return numpy.nan return 0
return numpy.max(y_) out = numpy.max(y_)
for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs): return out if out >= min_overlap else 0
iterator = tqdm(self.pairs,
desc="Calculating maximum overlap",
disable=not verbose
)
out = [None] * len(self)
for i, pair in enumerate(iterator):
out[i] = numpy.asanyarray([get_max(y_) out[i] = numpy.asanyarray([get_max(y_)
for y_ in pair.overlap(from_smoothed)]) for y_ in pair.overlap(from_smoothed)])
return numpy.vstack(out).T return numpy.vstack(out).T
def max_overlap_key(self, key, from_smoothed, verbose=True): def max_overlap_key(self, key, min_overlap, from_smoothed, verbose=True):
""" """
Calculate maximum overlap mass of each halo in the reference Calculate maximum overlap mass of each halo in the reference
simulation with the cross simulations. simulation with the cross simulations.
@ -627,6 +639,8 @@ class NPairsOverlap:
---------- ----------
key : str key : str
Key to the maximum overlap statistic to calculate. Key to the maximum overlap statistic to calculate.
min_overlap : float
Minimum pair overlap to consider.
from_smoothed : bool from_smoothed : bool
Whether to use the smoothed overlap or not. Whether to use the smoothed overlap or not.
verbose : bool, optional verbose : bool, optional
@ -636,12 +650,13 @@ class NPairsOverlap:
------- -------
out : 2-dimensional array of shape `(nhalos, ncatxs)` out : 2-dimensional array of shape `(nhalos, ncatxs)`
""" """
iterator = tqdm(self.pairs,
desc=f"Calculating maximum overlap {key}",
disable=not verbose
)
out = [None] * len(self) out = [None] * len(self)
if verbose: for i, pair in enumerate(iterator):
print(f"Calculating maximum overlap {key}...", flush=True) out[i] = pair.max_overlap_key(key, min_overlap, from_smoothed)
for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs):
out[i] = pair.max_overlap_key(key, from_smoothed)
return numpy.vstack(out).T return numpy.vstack(out).T
@ -661,10 +676,11 @@ class NPairsOverlap:
------- -------
summed_overlap : 2-dimensional array of shape `(nhalos, ncatxs)` summed_overlap : 2-dimensional array of shape `(nhalos, ncatxs)`
""" """
iterator = tqdm(self.pairs,
desc="Calculating summed overlap",
disable=not verbose)
out = [None] * len(self) out = [None] * len(self)
if verbose: for i, pair in enumerate(iterator):
print("Calculating summed overlap...", flush=True)
for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs):
out[i] = pair.summed_overlap(from_smoothed) out[i] = pair.summed_overlap(from_smoothed)
return numpy.vstack(out).T return numpy.vstack(out).T
@ -684,16 +700,18 @@ class NPairsOverlap:
------- -------
prob_nomatch : 2-dimensional array of shape `(nhalos, ncatxs)` prob_nomatch : 2-dimensional array of shape `(nhalos, ncatxs)`
""" """
iterator = tqdm(self.pairs,
desc="Calculating probability of no match",
disable=not verbose
)
out = [None] * len(self) out = [None] * len(self)
if verbose: for i, pair in enumerate(iterator):
print("Calculating probability of no match...", flush=True)
for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs):
out[i] = pair.prob_nomatch(from_smoothed) out[i] = pair.prob_nomatch(from_smoothed)
return numpy.vstack(out).T return numpy.vstack(out).T
def counterpart_mass(self, from_smoothed, overlap_threshold=0., def counterpart_mass(self, from_smoothed, overlap_threshold=0.,
in_log=False, mass_kind="totpartmass", mass_kind="totpartmass", return_full=False,
return_full=False, verbose=True): verbose=True):
""" """
Calculate the expected counterpart mass of each halo in the reference Calculate the expected counterpart mass of each halo in the reference
simulation from the crossed simulation. simulation from the crossed simulation.
@ -705,9 +723,6 @@ class NPairsOverlap:
overlap_threshold : float, optional overlap_threshold : float, optional
Minimum overlap required for a halo to be considered a match. By Minimum overlap required for a halo to be considered a match. By
default 0.0, i.e. no threshold. default 0.0, i.e. no threshold.
in_log : bool, optional
Whether to calculate the expectation value in log space. By default
`False`.
mass_kind : str, optional mass_kind : str, optional
The mass kind whose ratio is to be calculated. Must be a valid The mass kind whose ratio is to be calculated. Must be a valid
catalogue key. By default `totpartmass`, i.e. the total particle catalogue key. By default `totpartmass`, i.e. the total particle
@ -727,26 +742,31 @@ class NPairsOverlap:
Expected mass and standard deviation from each cross simulation. Expected mass and standard deviation from each cross simulation.
Returned only if `return_full` is `True`. Returned only if `return_full` is `True`.
""" """
iterator = tqdm(self.pairs,
desc="Calculating counterpart masses",
disable=not verbose)
mus, stds = [None] * len(self), [None] * len(self) mus, stds = [None] * len(self), [None] * len(self)
if verbose: for i, pair in enumerate(iterator):
print("Calculating counterpart masses...", flush=True)
for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs):
mus[i], stds[i] = pair.counterpart_mass( mus[i], stds[i] = pair.counterpart_mass(
from_smoothed=from_smoothed, from_smoothed=from_smoothed,
overlap_threshold=overlap_threshold, in_log=in_log, overlap_threshold=overlap_threshold, mass_kind=mass_kind)
mass_kind=mass_kind)
mus, stds = numpy.vstack(mus).T, numpy.vstack(stds).T mus, stds = numpy.vstack(mus).T, numpy.vstack(stds).T
probmatch = 1 - self.prob_nomatch(from_smoothed) # Prob of > 0 matches # Prob of > 0 matches
probmatch = 1 - self.prob_nomatch(from_smoothed)
# Normalise it for weighted sums etc. # Normalise it for weighted sums etc.
norm_probmatch = numpy.apply_along_axis( norm_probmatch = numpy.apply_along_axis(
lambda x: x / numpy.sum(x), axis=1, arr=probmatch) lambda x: x / numpy.sum(x), axis=1, arr=probmatch)
# Mean and standard deviation of weighted stacked Gaussians # Mean and standard deviation of weighted stacked Gaussians
mu = numpy.sum(norm_probmatch * mus, axis=1) mu = numpy.sum((norm_probmatch * mus), axis=1)
std = numpy.sum(norm_probmatch * (mus**2 + stds**2), axis=1) - mu**2 std = numpy.sum((norm_probmatch * (mus**2 + stds**2)), axis=1) - mu**2
std **= 0.5 std **= 0.5
mask = mu <= 0
mu[mask] = numpy.nan
std[mask] = numpy.nan
if return_full: if return_full:
return mu, std, mus, stds return mu, std, mus, stds
return mu, std return mu, std
@ -766,6 +786,11 @@ class NPairsOverlap:
def cat0(self): def cat0(self):
return self.pairs[0].cat0 # All pairs have the same ref catalogue return self.pairs[0].cat0 # All pairs have the same ref catalogue
def __getitem__(self, key):
if not isinstance(key, int):
raise TypeError("Key must be an integer.")
return self.pairs[key]
def __len__(self): def __len__(self):
return len(self.pairs) return len(self.pairs)
@ -794,7 +819,7 @@ def get_cross_sims(simname, nsim0, paths, min_logmass, smoothed):
Whether to use the smoothed overlap or not. Whether to use the smoothed overlap or not.
""" """
nsimxs = [] nsimxs = []
for nsimx in paths.get_ics("csiborg"): for nsimx in paths.get_ics(simname):
if nsimx == nsim0: if nsimx == nsim0:
continue continue
f1 = paths.overlap(simname, nsim0, nsimx, min_logmass, smoothed) f1 = paths.overlap(simname, nsim0, nsimx, min_logmass, smoothed)

View file

@ -501,6 +501,50 @@ class Paths:
fname = fname.replace("overlap", "overlap_smoothed") fname = fname.replace("overlap", "overlap_smoothed")
return join(fdir, fname) return join(fdir, fname)
def match_max(self, simname, nsim0, nsimx, min_logmass, mult):
"""
Path to the files containing matching based on [1].
Parameters
----------
simname : str
Simulation name.
nsim0 : int
IC realisation index of the first simulation.
nsimx : int
IC realisation index of the second simulation.
min_logmass : float
Minimum log mass of halos to consider.
mult : float
Multiplicative search radius factor.
Returns
-------
path : str
References
----------
[1] Maxwell L Hutt, Harry Desmond, Julien Devriendt, Adrianne Slyz; The
effect of local Universe constraints on halo abundance and clustering;
Monthly Notices of the Royal Astronomical Society, Volume 516, Issue 3,
November 2022, Pages 35923601, https://doi.org/10.1093/mnras/stac2407
"""
if simname == "csiborg":
fdir = join(self.postdir, "match_max")
elif simname == "quijote":
fdir = join(self.quijote_dir, "match_max")
else:
ValueError(f"Unknown simulation name `{simname}`.")
try_create_directory(fdir)
nsim0 = str(nsim0).zfill(5)
nsimx = str(nsimx).zfill(5)
min_logmass = float('%.4g' % min_logmass)
fname = f"match_max_{nsim0}_{nsimx}_{min_logmass}_{str(mult)}.npz"
return join(fdir, fname)
def field(self, kind, MAS, grid, nsim, in_rsp, smooth_scale=None): def field(self, kind, MAS, grid, nsim, in_rsp, smooth_scale=None):
r""" r"""
Path to the files containing the calculated density fields in CSiBORG. Path to the files containing the calculated density fields in CSiBORG.

View file

@ -12,6 +12,7 @@
# with this program; if not, write to the Free Software Foundation, Inc., # with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""A script to match all IC pairs of a simulation.""" """A script to match all IC pairs of a simulation."""
import warnings
from argparse import ArgumentParser from argparse import ArgumentParser
from distutils.util import strtobool from distutils.util import strtobool
from itertools import combinations from itertools import combinations
@ -20,15 +21,8 @@ from random import Random
from mpi4py import MPI from mpi4py import MPI
from taskmaster import work_delegation from taskmaster import work_delegation
from match_singlematch import pair_match import csiborgtools
from match_singlematch import pair_match, pair_match_max
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
def get_combs(simname): def get_combs(simname):
@ -53,7 +47,7 @@ def get_combs(simname):
return combs return combs
def main(comb, simname, min_logmass, sigma, verbose): def main(comb, kind, simname, min_logmass, sigma, mult, verbose):
""" """
Match a pair of simulations. Match a pair of simulations.
@ -61,12 +55,16 @@ def main(comb, simname, min_logmass, sigma, verbose):
---------- ----------
comb : tuple comb : tuple
Pair of simulation IC indices. Pair of simulation IC indices.
kind : str
Kind of matching.
simname : str simname : str
Simulation name. Simulation name.
min_logmass : float min_logmass : float
Minimum log halo mass. Minimum log halo mass.
sigma : float sigma : float
Smoothing scale in number of grid cells. Smoothing scale in number of grid cells.
mult : float
Multiplicative factor for search radius.
verbose : bool verbose : bool
Verbosity flag. Verbosity flag.
@ -75,25 +73,46 @@ def main(comb, simname, min_logmass, sigma, verbose):
None None
""" """
nsim0, nsimx = comb nsim0, nsimx = comb
pair_match(nsim0, nsimx, simname, min_logmass, sigma, verbose) if kind == "overlap":
pair_match(nsim0, nsimx, simname, min_logmass, sigma, verbose)
elif args.kind == "max":
pair_match_max(nsim0, nsimx, simname, min_logmass, mult, verbose)
else:
raise ValueError(f"Unknown matching kind: `{kind}`.")
if __name__ == "__main__": if __name__ == "__main__":
parser = ArgumentParser() parser = ArgumentParser()
parser.add_argument("--kind", type=str, required=True,
choices=["overlap", "max"], help="Kind of matching.")
parser.add_argument("--simname", type=str, required=True, parser.add_argument("--simname", type=str, required=True,
help="Simulation name.", choices=["csiborg", "quijote"]) help="Simulation name.",
choices=["csiborg", "quijote"])
parser.add_argument("--nsim0", type=int, default=None,
help="Reference IC for Max's matching method.")
parser.add_argument("--min_logmass", type=float, required=True, parser.add_argument("--min_logmass", type=float, required=True,
help="Minimum log halo mass.") help="Minimum log halo mass.")
parser.add_argument("--sigma", type=float, default=0, parser.add_argument("--sigma", type=float, default=0,
help="Smoothing scale in number of grid cells.") help="Smoothing scale in number of grid cells.")
parser.add_argument("--mult", type=float, default=5,
help="Search radius multiplier for Max's method.")
parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)), parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
default=False, help="Verbosity flag.") default=False, help="Verbosity flag.")
args = parser.parse_args() args = parser.parse_args()
combs = get_combs() if args.kind == "overlap":
combs = get_combs(args.simname)
else:
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
combs = [(args.nsim0, nsimx) for nsimx in paths.get_ics(args.simname)
if nsimx != args.nsim0]
def _main(comb): def _main(comb):
main(comb, args.simname, args.min_logmass, args.sigma, args.verbose) with warnings.catch_warnings():
warnings.filterwarnings("ignore",
"invalid value encountered in cast",
RuntimeWarning)
main(comb, args.kind, args.simname, args.min_logmass, args.sigma,
args.mult, args.verbose)
work_delegation(_main, combs, MPI.COMM_WORLD) work_delegation(_main, combs, MPI.COMM_WORLD)

View file

@ -23,13 +23,75 @@ from distutils.util import strtobool
import numpy import numpy
from scipy.ndimage import gaussian_filter from scipy.ndimage import gaussian_filter
try: import csiborgtools
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools def pair_match_max(nsim0, nsimx, simname, min_logmass, mult, verbose):
"""
Match a pair of simulations using the method of [1].
Parameters
----------
nsim0 : int
The reference simulation IC index.
nsimx : int
The cross simulation IC index.
simname : str
Simulation name.
min_logmass : float
Minimum log halo mass.
mult : float
Multiplicative factor for search radius.
verbose : bool
Verbosity flag.
Returns
-------
None
References
----------
[1] Maxwell L Hutt, Harry Desmond, Julien Devriendt, Adrianne Slyz; The
effect of local Universe constraints on halo abundance and clustering;
Monthly Notices of the Royal Astronomical Society, Volume 516, Issue 3,
November 2022, Pages 35923601, https://doi.org/10.1093/mnras/stac2407
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
if simname == "csiborg":
mass_kind = "fof_totpartmass"
maxdist = 155
periodic = False
bounds = {"dist": (0, maxdist), mass_kind: (10**min_logmass, None)}
cat0 = csiborgtools.read.CSiBORGHaloCatalogue(
nsim0, paths, bounds=bounds, load_fitted=True, load_initial=False)
catx = csiborgtools.read.CSiBORGHaloCatalogue(
nsimx, paths, bounds=bounds, load_fitted=True, load_initial=False)
elif simname == "quijote":
mass_kind = "group_mass"
maxdist = None
periodic = True
bounds = {mass_kind: (10**min_logmass, None)}
cat0 = csiborgtools.read.QuijoteHaloCatalogue(
nsim0, paths, 4, bounds=bounds, load_fitted=True,
load_initial=False)
catx = csiborgtools.read.QuijoteHaloCatalogue(
nsimx, paths, 4, bounds=bounds, load_fitted=True,
load_initial=False)
else:
raise ValueError(f"Unknown simulation `{simname}`.")
reader = csiborgtools.read.PairOverlap(cat0, catx, paths, min_logmass,
maxdist=maxdist)
out = csiborgtools.match.matching_max(
cat0, catx, mass_kind, mult=mult, periodic=periodic,
overlap=reader.overlap(from_smoothed=True),
match_indxs=reader["match_indxs"], verbose=verbose)
fout = paths.match_max(simname, nsim0, nsimx, min_logmass, mult)
if verbose:
print(f"{datetime.now()}: saving to ... `{fout}`.", flush=True)
numpy.savez(fout, **{p: out[p] for p in out.dtype.names})
def pair_match(nsim0, nsimx, simname, min_logmass, sigma, verbose): def pair_match(nsim0, nsimx, simname, min_logmass, sigma, verbose):
@ -95,27 +157,17 @@ def pair_match(nsim0, nsimx, simname, min_logmass, sigma, verbose):
paths.initmatch(nsimx, simname, "particles"))["particles"] paths.initmatch(nsimx, simname, "particles"))["particles"]
hid2mapx = {hid: i for i, hid in enumerate(halomapx[:, 0])} hid2mapx = {hid: i for i, hid in enumerate(halomapx[:, 0])}
if verbose:
print(f"{datetime.now()}: calculating the background density fields.",
flush=True)
overlapper = csiborgtools.match.ParticleOverlap(**overlapper_kwargs) overlapper = csiborgtools.match.ParticleOverlap(**overlapper_kwargs)
delta_bckg = overlapper.make_bckg_delta(parts0, halomap0, hid2map0, cat0, delta_bckg = overlapper.make_bckg_delta(parts0, halomap0, hid2map0, cat0,
verbose=verbose) verbose=verbose)
delta_bckg = overlapper.make_bckg_delta(partsx, halomapx, hid2mapx, catx, delta_bckg = overlapper.make_bckg_delta(partsx, halomapx, hid2mapx, catx,
delta=delta_bckg, verbose=verbose) delta=delta_bckg, verbose=verbose)
if verbose:
print()
if verbose:
print(f"{datetime.now()}: NGP crossing the simulations.", flush=True)
matcher = csiborgtools.match.RealisationsMatcher( matcher = csiborgtools.match.RealisationsMatcher(
mass_kind=mass_kind, **overlapper_kwargs) mass_kind=mass_kind, **overlapper_kwargs)
match_indxs, ngp_overlap = matcher.cross(cat0, catx, parts0, partsx, match_indxs, ngp_overlap = matcher.cross(cat0, catx, parts0, partsx,
halomap0, halomapx, delta_bckg, halomap0, halomapx, delta_bckg,
verbose=verbose) verbose=verbose)
if verbose:
print()
# We want to store the halo IDs of the matches, not their array positions # We want to store the halo IDs of the matches, not their array positions
# in the catalogues. # in the catalogues.
@ -151,6 +203,8 @@ def pair_match(nsim0, nsimx, simname, min_logmass, sigma, verbose):
if __name__ == "__main__": if __name__ == "__main__":
parser = ArgumentParser() parser = ArgumentParser()
parser.add_argument("--kind", type=str, required=True,
choices=["overlap", "max"], help="Kind of matching.")
parser.add_argument("--nsim0", type=int, required=True, parser.add_argument("--nsim0", type=int, required=True,
help="Reference simulation IC index.") help="Reference simulation IC index.")
parser.add_argument("--nsimx", type=int, required=True, parser.add_argument("--nsimx", type=int, required=True,
@ -159,11 +213,19 @@ if __name__ == "__main__":
help="Simulation name.") help="Simulation name.")
parser.add_argument("--min_logmass", type=float, required=True, parser.add_argument("--min_logmass", type=float, required=True,
help="Minimum log halo mass.") help="Minimum log halo mass.")
parser.add_argument("--mult", type=float, default=5,
help="Search radius multiplier for Max's method.")
parser.add_argument("--sigma", type=float, default=0, parser.add_argument("--sigma", type=float, default=0,
help="Smoothing scale in number of grid cells.") help="Smoothing scale in number of grid cells.")
parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)), parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
default=False, help="Verbosity flag.") default=False, help="Verbosity flag.")
args = parser.parse_args() args = parser.parse_args()
pair_match(args.nsim0, args.nsimx, args.simname, args.min_logmass, if args.kind == "overlap":
args.sigma, args.verbose) pair_match(args.nsim0, args.nsimx, args.simname, args.min_logmass,
args.sigma, args.verbose)
elif args.kind == "max":
pair_match_max(args.nsim0, args.nsimx, args.simname, args.min_logmass,
args.mult, args.verbose)
else:
raise ValueError(f"Unknown matching kind: `{args.kind}`.")

View file

@ -14,17 +14,15 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from argparse import ArgumentParser from argparse import ArgumentParser
from gc import collect
from os.path import join from os.path import join
import matplotlib as mpl import matplotlib as mpl
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import numpy import numpy
import scienceplots # noqa import scienceplots # noqa
from cache_to_disk import cache_to_disk, delete_disk_caches_for_function from cache_to_disk import cache_to_disk, delete_disk_caches_for_function
from scipy.stats import kendalltau from scipy.stats import kendalltau
from tqdm import trange, tqdm from tqdm import tqdm
import plt_utils import plt_utils
@ -36,11 +34,7 @@ except ModuleNotFoundError:
import csiborgtools import csiborgtools
############################################################################### def open_cat(nsim, simname):
# IC overlap plotting #
###############################################################################
def open_cat(nsim: int, simname: str):
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
if simname == "csiborg": if simname == "csiborg":
@ -56,648 +50,17 @@ def open_cat(nsim: int, simname: str):
return cat return cat
def open_cats(nsims, simname):
catxs = [None] * len(nsims)
@cache_to_disk(7) for i, nsim in enumerate(tqdm(nsims, desc="Opening catalogues")):
def get_overlap(simname, nsim0): catxs[i] = open_cat(nsim, simname)
"""
Calculate the summed overlap and probability of no match for a single
reference simulation.
Parameters return catxs
----------
simname : str
Simulation name.
nsim0 : int
Simulation index.
Returns
-------
mass : 1-dimensional array
Mass of halos in the reference simulation.
hindxs : 1-dimensional array
Halo indices in the reference simulation.
max_overlap : 2-dimensional array
Maximum overlap for each halo in the reference simulation.
summed_overlap : 2-dimensional array
Summed overlap for each halo in the reference simulation.
prob_nomatch : 2-dimensional array
Probability of no match for each halo in the reference simulation.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsimxs = csiborgtools.read.get_cross_sims(simname, nsim0, paths,
smoothed=True)
cat0 = open_cat(nsim0)
catxs = []
print("Opening catalogues...", flush=True)
for nsimx in tqdm(nsimxs):
catxs.append(open_cat(nsimx))
reader = csiborgtools.read.NPairsOverlap(cat0, catxs, paths)
mass = reader.cat0("totpartmass")
hindxs = reader.cat0("index")
summed_overlap = reader.summed_overlap(True)
max_overlap = reader.max_overlap(True)
prob_nomatch = reader.prob_nomatch(True)
return mass, hindxs, max_overlap, summed_overlap, prob_nomatch
@cache_to_disk(7)
def get_max_key(simname, nsim0, key):
"""
Get the value of a maximum overlap halo's property.
Parameters
----------
simname : str
Simulation name.
nsim0 : int
Reference simulation index.
key : str
Property to get.
Returns
-------
mass0 : 1-dimensional array
Mass of the reference haloes.
key_val : 1-dimensional array
Value of the property of the reference haloes.
max_overlap : 2-dimensional array
Maximum overlap of the reference haloes.
stat : 2-dimensional array
Value of the property of the maximum overlap halo.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsimxs = csiborgtools.read.get_cross_sims(simname, nsim0, paths,
smoothed=True)
nsimxs = nsimxs
cat0 = open_cat(nsim0)
catxs = []
print("Opening catalogues...", flush=True)
for nsimx in tqdm(nsimxs):
catxs.append(open_cat(nsimx))
reader = csiborgtools.read.NPairsOverlap(cat0, catxs, paths)
mass0 = reader.cat0("totpartmass")
key_val = reader.cat0(key)
max_overlap = reader.max_overlap(True)
stat = reader.max_overlap_key(key, True)
return mass0, key_val, max_overlap, stat
def plot_mass_vs_pairoverlap(nsim0, nsimx):
"""
Plot the pair overlap of a reference simulation with a single cross
simulation as a function of the reference halo mass.
Parameters
----------
nsim0 : int
Reference simulation index.
nsimx : int
Cross simulation index.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
cat0 = open_cat(nsim0)
catx = open_cat(nsimx)
reader = csiborgtools.read.PairOverlap(cat0, catx, paths)
x = reader.copy_per_match("totpartmass")
y = reader.overlap(True)
x = numpy.log10(numpy.concatenate(x))
y = numpy.concatenate(y)
with plt.style.context(plt_utils.mplstyle):
plt.figure()
plt.hexbin(x, y, mincnt=1, bins="log",
gridsize=50)
plt.colorbar(label="Counts in bins")
plt.xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$")
plt.ylabel("Pair overlap")
plt.ylim(0., 1.)
plt.tight_layout()
for ext in ["png"]:
fout = join(plt_utils.fout, f"mass_vs_pair_overlap{nsim0}.{ext}")
print(f"Saving to `{fout}`.")
plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
plt.close()
def plot_mass_vs_maxpairoverlap(nsim0, nsimx):
"""
Plot the maximum pair overlap of a reference simulation haloes with a
single cross simulation.
Parameters
----------
nsim0 : int
Reference simulation index.
nsimx : int
Cross simulation index.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
cat0 = open_cat(nsim0)
catx = open_cat(nsimx)
reader = csiborgtools.read.PairOverlap(cat0, catx, paths)
x = numpy.log10(cat0["totpartmass"])
y = reader.overlap(True)
def get_max(y_):
if len(y_) == 0:
return numpy.nan
return numpy.max(y_)
y = numpy.array([get_max(y_) for y_ in y])
with plt.style.context(plt_utils.mplstyle):
plt.figure()
plt.hexbin(x, y, mincnt=1, bins="log",
gridsize=50)
plt.colorbar(label="Counts in bins")
plt.xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$")
plt.ylabel("Maximum pair overlap")
plt.ylim(0., 1.)
plt.tight_layout()
for ext in ["png"]:
fout = join(plt_utils.fout, f"mass_vs_maxpairoverlap{nsim0}.{ext}")
print(f"Saving to `{fout}`.")
plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
plt.close()
def plot_mass_vsmedmaxoverlap(nsim0):
"""
Plot the mean maximum overlap of a reference simulation haloes with all the
cross simulations.
Parameters
----------
nsim0 : int
Reference simulation index.
"""
x, __, max_overlap, __, __ = get_overlap("csiborg", nsim0)
for i in trange(max_overlap.shape[0]):
if numpy.sum(numpy.isnan(max_overlap[i, :])) > 0:
max_overlap[i, :] = numpy.nan
x = numpy.log10(x)
with plt.style.context(plt_utils.mplstyle):
fig, axs = plt.subplots(ncols=3, figsize=(3.5 * 2, 2.625))
im1 = axs[0].hexbin(x, numpy.nanmean(max_overlap, axis=1), gridsize=30,
mincnt=1, bins="log")
im2 = axs[1].hexbin(x, numpy.nanstd(max_overlap, axis=1), gridsize=30,
mincnt=1, bins="log")
im3 = axs[2].hexbin(numpy.nanmean(max_overlap, axis=1),
numpy.nanstd(max_overlap, axis=1), gridsize=30,
C=x, reduce_C_function=numpy.nanmean)
axs[0].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$")
axs[0].set_ylabel(r"Mean max. pair overlap")
axs[1].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$")
axs[1].set_ylabel(r"Uncertainty of max. pair overlap")
axs[2].set_xlabel(r"Mean max. pair overlap")
axs[2].set_ylabel(r"Uncertainty of max. pair overlap")
label = ["Bin counts", "Bin counts", r"$\log M_{\rm tot} / M_\odot$"]
ims = [im1, im2, im3]
for i in range(3):
axins = inset_axes(axs[i], width="100%", height="5%",
loc='upper center', borderpad=-0.75)
fig.colorbar(ims[i], cax=axins, orientation="horizontal",
label=label[i])
axins.xaxis.tick_top()
axins.xaxis.set_tick_params(labeltop=True)
axins.xaxis.set_label_position("top")
fig.tight_layout()
for ext in ["png"]:
fout = join(plt_utils.fout, f"maxpairoverlap_{nsim0}.{ext}")
print(f"Saving to `{fout}`.")
fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
plt.close()
def plot_summed_overlap_vs_mass(nsim0):
"""
Plot the summed overlap of probability of no matching for a single
reference simulations as a function of the reference halo mass, along with
their comparison.
Parameters
----------
nsim0 : int
Simulation index.
Returns
-------
None
"""
x, __, __, summed_overlap, prob_nomatch = get_overlap("csiborg", nsim0)
del __
collect()
for i in trange(summed_overlap.shape[0]):
if numpy.sum(numpy.isnan(summed_overlap[i, :])) > 0:
summed_overlap[i, :] = numpy.nan
x = numpy.log10(x)
mean_overlap = numpy.nanmean(summed_overlap, axis=1)
std_overlap = numpy.nanstd(summed_overlap, axis=1)
mean_prob_nomatch = numpy.nanmean(prob_nomatch, axis=1)
mask = mean_overlap > 0
x = x[mask]
mean_overlap = mean_overlap[mask]
std_overlap = std_overlap[mask]
mean_prob_nomatch = mean_prob_nomatch[mask]
with plt.style.context(plt_utils.mplstyle):
fig, axs = plt.subplots(ncols=3, figsize=(3.5 * 2, 2.625))
im1 = axs[0].hexbin(x, mean_overlap, mincnt=1, bins="log",
gridsize=30)
im2 = axs[1].hexbin(x, std_overlap, mincnt=1, bins="log",
gridsize=30)
im3 = axs[2].scatter(1 - mean_overlap, mean_prob_nomatch, c=x, s=2,
rasterized=True)
t = numpy.linspace(0.3, 1, 100)
axs[2].plot(t, t, color="red", linestyle="--")
axs[0].set_ylim(0.)
axs[1].set_ylim(0.)
axs[0].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$")
axs[0].set_ylabel("Mean summed overlap")
axs[1].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$")
axs[1].set_ylabel("Uncertainty of summed overlap")
axs[2].set_xlabel(r"$1 - $ mean summed overlap")
axs[2].set_ylabel("Mean prob. of no match")
label = ["Bin counts", "Bin counts",
r"$\log M_{\rm tot} ~ [M_\odot / h]$"]
ims = [im1, im2, im3]
for i in range(3):
axins = inset_axes(axs[i], width="100%", height="5%",
loc='upper center', borderpad=-0.75)
fig.colorbar(ims[i], cax=axins, orientation="horizontal",
label=label[i])
axins.xaxis.tick_top()
axins.xaxis.set_tick_params(labeltop=True)
axins.xaxis.set_label_position("top")
fig.tight_layout()
for ext in ["png"]:
fout = join(plt_utils.fout, f"overlap_stat_{nsim0}.{ext}")
print(f"Saving to `{fout}`.")
fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
plt.close()
def plot_mass_vs_separation(nsim0, nsimx, plot_std=False, min_overlap=0.0):
"""
Plot the mass of a reference halo against the weighted separation of
its counterparts.
Parameters
----------
nsim0 : int
Reference simulation index.
nsimx : int
Cross simulation index.
plot_std : bool, optional
Whether to plot thestd instead of mean.
min_overlap : float, optional
Minimum overlap to consider.
Returns
-------
None
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
cat0 = open_cat(nsim0)
catx = open_cat(nsimx)
reader = csiborgtools.read.PairOverlap(cat0, catx, paths,
maxdist=155)
mass = numpy.log10(reader.cat0("totpartmass"))
dist = reader.dist(in_initial=False, norm_kind="r200c")
overlap = reader.overlap(True)
dist = csiborgtools.read.weighted_stats(dist, overlap,
min_weight=min_overlap)
mask = numpy.isfinite(dist[:, 0])
mass = mass[mask]
dist = dist[mask, :]
dist = numpy.log10(dist)
if not plot_std:
p = numpy.polyfit(mass, dist[:, 0], 1)
else:
p = numpy.polyfit(mass, dist[:, 1], 1)
xrange = numpy.linspace(numpy.min(mass), numpy.max(mass), 1000)
txt = r"$m = {}$, $c = {}$".format(*plt_utils.latex_float(*p, n=3))
with plt.style.context(plt_utils.mplstyle):
fig, ax = plt.subplots()
ax.set_title(txt, fontsize="small")
if not plot_std:
cx = ax.hexbin(mass, dist[:, 0], mincnt=1, bins="log", gridsize=50)
ax.set_ylabel(r"$\log \langle \Delta R / R_{\rm 200c}\rangle$")
else:
cx = ax.hexbin(mass, dist[:, 1], mincnt=1, bins="log", gridsize=50)
ax.set_ylabel(
r"$\delta \log \langle \Delta R / R_{\rm 200c}\rangle$")
ax.plot(xrange, numpy.polyval(p, xrange), color="red",
linestyle="--")
fig.colorbar(cx, label="Bin counts")
ax.set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$")
ax.set_ylabel(r"$\log \langle \Delta R / R_{\rm 200c}\rangle$")
fig.tight_layout()
for ext in ["png"]:
fout = join(plt_utils.fout,
f"mass_vs_sep_{nsim0}_{nsimx}_{min_overlap}.{ext}")
if plot_std:
fout = fout.replace(f".{ext}", f"_std.{ext}")
print(f"Saving to `{fout}`.")
fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
plt.close()
def plot_maxoverlap_mass(nsim0):
"""
Plot the mass of the reference haloes against the mass of the maximum
overlap haloes.
Parameters
----------
nsim0 : int
Reference simulation index.
"""
mass0, __, __, stat = get_max_key("csiborg", nsim0, "totpartmass")
mu = numpy.mean(stat, axis=1)
std = numpy.std(numpy.log10(stat), axis=1)
mu = numpy.log10(mu)
mass0 = numpy.log10(mass0)
with plt.style.context(plt_utils.mplstyle):
fig, axs = plt.subplots(ncols=2, figsize=(3.5 * 1.75, 2.625))
im0 = axs[0].hexbin(mass0, mu, mincnt=1, bins="log", gridsize=50)
im1 = axs[1].hexbin(mass0, std, mincnt=1, bins="log", gridsize=50)
m = numpy.isfinite(mass0) & numpy.isfinite(mu)
print("True to expectation corr: ", kendalltau(mass0[m], mu[m]))
t = numpy.linspace(*numpy.percentile(mass0, [0, 100]), 1000)
axs[0].plot(t, t, color="red", linestyle="--")
axs[0].plot(t, t + 0.2, color="red", linestyle="--", alpha=0.5)
axs[0].plot(t, t - 0.2, color="red", linestyle="--", alpha=0.5)
axs[0].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$")
axs[1].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$")
axs[0].set_ylabel(
r"Max. overlap mean of $\log M_{\rm tot} ~ [M_\odot / h]$")
axs[1].set_ylabel(
r"Max. overlap std. of $\log M_{\rm tot} ~ [M_\odot / h]$")
ims = [im0, im1]
for i in range(2):
axins = inset_axes(axs[i], width="100%", height="5%",
loc='upper center', borderpad=-0.75)
fig.colorbar(ims[i], cax=axins, orientation="horizontal",
label="Bin counts")
axins.xaxis.tick_top()
axins.xaxis.set_tick_params(labeltop=True)
axins.xaxis.set_label_position("top")
fig.tight_layout()
for ext in ["png"]:
fout = join(plt_utils.fout,
f"max_totpartmass_{nsim0}.{ext}")
print(f"Saving to `{fout}`.")
fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
plt.close()
def plot_maxoverlapstat(nsim0, key):
"""
Plot the mass of the reference haloes against the value of the maximum
overlap statistic.
Parameters
----------
nsim0 : int
Reference simulation index.
key : str
Property to get.
"""
assert key != "totpartmass"
mass0, key_val, __, stat = get_max_key("csiborg", nsim0, key)
xlabels = {"lambda200c": r"\log \lambda_{\rm 200c}"}
key_label = xlabels.get(key, key)
mass0 = numpy.log10(mass0)
key_val = numpy.log10(key_val)
mu = numpy.mean(stat, axis=1)
std = numpy.std(numpy.log10(stat), axis=1)
mu = numpy.log10(mu)
with plt.style.context(plt_utils.mplstyle):
fig, axs = plt.subplots(ncols=3, figsize=(3.5 * 2, 2.625))
im0 = axs[0].hexbin(mass0, mu, mincnt=1, bins="log", gridsize=30)
im1 = axs[1].hexbin(mass0, std, mincnt=1, bins="log", gridsize=30)
im2 = axs[2].hexbin(key_val, mu, mincnt=1, bins="log", gridsize=30)
m = numpy.isfinite(key_val) & numpy.isfinite(mu)
print("True to expectation corr: ", kendalltau(key_val[m], mu[m]))
axs[0].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$")
axs[0].set_ylabel(r"Max. overlap mean of ${}$".format(key_label))
axs[1].set_xlabel(r"$\log M_{\rm tot} ~ [M_\odot / h]$")
axs[1].set_ylabel(r"Max. overlap std. of ${}$".format(key_label))
axs[2].set_xlabel(r"${}$".format(key_label))
axs[2].set_ylabel(r"Max. overlap mean of ${}$".format(key_label))
ims = [im0, im1, im2]
for i in range(3):
axins = inset_axes(axs[i], width="100%", height="5%",
loc='upper center', borderpad=-0.75)
fig.colorbar(ims[i], cax=axins, orientation="horizontal",
label="Bin counts")
axins.xaxis.tick_top()
axins.xaxis.set_tick_params(labeltop=True)
axins.xaxis.set_label_position("top")
fig.tight_layout()
for ext in ["png"]:
fout = join(plt_utils.fout,
f"max_{key}_{nsim0}.{ext}")
print(f"Saving to `{fout}`.")
fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
plt.close()
@cache_to_disk(7)
def get_expected_mass(simname, nsim0, min_overlap):
"""
Get the expected mass of a reference halo given its overlap with halos
from other simulations.
Parameters
----------
simname : str
Simulation name.
nsim0 : int
Reference simulation index.
min_overlap : float
Minimum overlap to consider between a pair of haloes.
Returns
-------
mass : 1-dimensional array
Mass of the reference haloes.
mu : 1-dimensional array
Expected mass of the matched haloes.
std : 1-dimensional array
Standard deviation of the expected mass of the matched haloes.
prob_nomatch : 2-dimensional array
Probability of not matching the reference halo.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsimxs = csiborgtools.read.get_cross_sims(simname, nsim0, paths,
smoothed=True)
nsimxs = nsimxs
cat0 = open_cat(nsim0)
catxs = []
print("Opening catalogues...", flush=True)
for nsimx in tqdm(nsimxs):
catxs.append(open_cat(nsimx))
reader = csiborgtools.read.NPairsOverlap(cat0, catxs, paths)
mass = reader.cat0("totpartmass")
mu, std = reader.counterpart_mass(True, overlap_threshold=min_overlap,
in_log=False, return_full=False)
prob_nomatch = reader.prob_nomatch(True)
return mass, mu, std, prob_nomatch
def plot_mass_vs_expected_mass(nsim0, min_overlap=0, max_prob_nomatch=1):
"""
Plot the mass of a reference halo against the expected mass of its
counterparts.
Parameters
----------
nsim0 : int
Reference simulation index.
min_overlap : float, optional
Minimum overlap between a pair of haloes to consider.
max_prob_nomatch : float, optional
Maximum probability of no match to consider.
"""
mass, mu, std, prob_nomatch = get_expected_mass("csiborg", nsim0,
min_overlap)
std = std / mu / numpy.log(10)
mass = numpy.log10(mass)
mu = numpy.log10(mu)
prob_nomatch = numpy.nanmedian(prob_nomatch, axis=1)
mask = numpy.isfinite(mass) & numpy.isfinite(mu)
mask &= (prob_nomatch < max_prob_nomatch)
with plt.style.context(plt_utils.mplstyle):
fig, axs = plt.subplots(ncols=3, figsize=(3.5 * 2, 2.625))
im0 = axs[0].hexbin(mass[mask], mu[mask], mincnt=1, bins="log",
gridsize=50,)
im1 = axs[1].hexbin(mass[mask], std[mask], mincnt=1, bins="log",
gridsize=50)
im2 = axs[2].hexbin(1 - prob_nomatch[mask], mass[mask] - mu[mask],
gridsize=50, C=mass[mask],
reduce_C_function=numpy.nanmedian)
axs[2].axhline(0, color="red", linestyle="--", alpha=0.5)
axs[0].set_xlabel(r"True $\log M_{\rm tot} ~ [M_\odot / h]$")
axs[0].set_ylabel(r"Expected $\log M_{\rm tot} ~ [M_\odot / h]$")
axs[1].set_xlabel(r"True $\log M_{\rm tot} ~ [M_\odot / h]$")
axs[1].set_ylabel(r"Std. of $\sigma_{\log M_{\rm tot}}$")
axs[2].set_xlabel(r"1 - median prob. of no match")
axs[2].set_ylabel(r"$\log M_{\rm tot} - \log M_{\rm tot, exp}$")
t = numpy.linspace(*numpy.percentile(mass[mask], [0, 100]), 1000)
axs[0].plot(t, t, color="red", linestyle="--")
axs[0].plot(t, t + 0.2, color="red", linestyle="--", alpha=0.5)
axs[0].plot(t, t - 0.2, color="red", linestyle="--", alpha=0.5)
ims = [im0, im1, im2]
labels = ["Bin counts", "Bin counts",
r"$\log M_{\rm tot} ~ [M_\odot / h]$"]
for i in range(3):
axins = inset_axes(axs[i], width="100%", height="5%",
loc='upper center', borderpad=-0.75)
fig.colorbar(ims[i], cax=axins, orientation="horizontal",
label=labels[i])
axins.xaxis.tick_top()
axins.xaxis.set_tick_params(labeltop=True)
axins.xaxis.set_label_position("top")
fig.tight_layout()
for ext in ["png"]:
fout = join(plt_utils.fout,
f"mass_vs_expmass_{nsim0}_{max_prob_nomatch}.{ext}")
print(f"Saving to `{fout}`.")
fig.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
plt.close()
###############################################################################
# Nearest neighbour plotting #
###############################################################################
def read_dist(simname, run, kind, kwargs): def read_dist(simname, run, kind, kwargs):
"""
Read PDF/CDF of a nearest neighbour distribution.
Parameters
----------
simname : str
Simulation name. Must be either `csiborg` or `quijote`.
run : str
Run name.
kind : str
Kind of distribution. Must be either `pdf` or `cdf`.
kwargs : dict
Nearest neighbour reader keyword arguments.
Returns
-------
dist : 2-dimensional array
Distribution of distances in radial and neighbour bins.
"""
paths = csiborgtools.read.Paths(**kwargs["paths_kind"]) paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths) reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths)
@ -707,26 +70,6 @@ def read_dist(simname, run, kind, kwargs):
def pull_cdf(x, fid_cdf, test_cdf): def pull_cdf(x, fid_cdf, test_cdf):
"""
Pull a CDF so that it matches the fiducial CDF at 0.5. Rescales the x-axis,
while keeping the corresponding CDF values fixed.
Parameters
----------
x : 1-dimensional array
The x-axis of the CDF.
fid_cdf : 1-dimensional array
The fiducial CDF.
test_cdf : 1-dimensional array
The test CDF to be pulled.
Returns
-------
xnew : 1-dimensional array
The new x-axis of the test CDF.
test_cdf : 1-dimensional array
The new test CDF.
"""
xnew = x * numpy.interp(0.5, fid_cdf, x) / numpy.interp(0.5, test_cdf, x) xnew = x * numpy.interp(0.5, fid_cdf, x) / numpy.interp(0.5, test_cdf, x)
return xnew, test_cdf return xnew, test_cdf
@ -1360,7 +703,7 @@ def plot_kl_vs_overlap(runs, nsim, kwargs, runs_to_mass, plot_std=True,
for run in runs: for run in runs:
nn_data = nn_reader.read_single("csiborg", run, nsim, nobs=None) nn_data = nn_reader.read_single("csiborg", run, nsim, nobs=None)
nn_hindxs = nn_data["ref_hindxs"] nn_hindxs = nn_data["ref_hindxs"]
mass, overlap_hindxs, __, summed_overlap, prob_nomatch = get_overlap("csiborg", nsim) # noqa mass, overlap_hindxs, __, summed_overlap, prob_nomatch = get_overlap_summary("csiborg", nsim) # noqa
# We need to match the hindxs between the two. # We need to match the hindxs between the two.
hind2overlap_array = {hind: i for i, hind in enumerate(overlap_hindxs)} hind2overlap_array = {hind: i for i, hind in enumerate(overlap_hindxs)}
@ -1457,8 +800,8 @@ if __name__ == "__main__":
"mass009": (14.0, 14.4), # There is no upper limit. "mass009": (14.0, 14.4), # There is no upper limit.
} }
# cached_funcs = ["get_overlap", "read_dist", "make_kl", "make_ks"] # cached_funcs = ["get_overlap_summary", "read_dist", "make_kl", "make_ks"]
cached_funcs = ["get_max_key"] cached_funcs = ["get_property_maxoverlap"]
if args.clean: if args.clean:
for func in cached_funcs: for func in cached_funcs:
print(f"Cleaning cache for function {func}.") print(f"Cleaning cache for function {func}.")

File diff suppressed because it is too large Load diff

View file

@ -15,6 +15,7 @@
import numpy import numpy
from scipy.stats import binned_statistic from scipy.stats import binned_statistic
from scipy.special import erf
dpi = 600 dpi = 600
fout = "../plots/" fout = "../plots/"
@ -56,38 +57,74 @@ def latex_float(*floats, n=2):
return latex_floats return latex_floats
def binned_trend(x, y, weights, bins): def nan_weighted_average(arr, weights=None, axis=None):
""" if weights is None:
Calculate the weighted mean and standard deviation of `y` in bins of `x`. weights = numpy.ones_like(arr)
Parameters valid_entries = ~numpy.isnan(arr)
----------
x : 1-dimensional array
The x-coordinates of the data points.
y : 1-dimensional array
The y-coordinates of the data points.
weights : 1-dimensional array
The weights of the data points.
bins : 1-dimensional array
The bin edges.
Returns # Set NaN entries in arr to 0 for computation
------- arr = numpy.where(valid_entries, arr, 0)
stat_x : 1-dimensional array
The x-coordinates of the binned data points.
stat_mu : 1-dimensional array
The weighted mean of `y` in bins of `x`.
stat_std : 1-dimensional array
The weighted standard deviation of `y` in bins of `x`.
"""
stat_mu, __, __ = binned_statistic(x, y * weights, bins=bins,
statistic="sum")
stat_std, __, __ = binned_statistic(x, y * weights, bins=bins,
statistic=numpy.var)
stat_w, __, __ = binned_statistic(x, weights, bins=bins, statistic="sum")
stat_x = (bins[1:] + bins[:-1]) / 2 # Set weights of NaN entries to 0
stat_mu /= stat_w weights = numpy.where(valid_entries, weights, 0)
stat_std /= stat_w
stat_std = numpy.sqrt(stat_std) # Compute the weighted sum and the sum of weights along the axis
return stat_x, stat_mu, stat_std weighted_sum = numpy.sum(arr * weights, axis=axis)
sum_weights = numpy.sum(weights, axis=axis)
return weighted_sum / sum_weights
def nan_weighted_std(arr, weights=None, axis=None, ddof=0):
if weights is None:
weights = numpy.ones_like(arr)
valid_entries = ~numpy.isnan(arr)
# Set NaN entries in arr to 0 for computation
arr = numpy.where(valid_entries, arr, 0)
# Set weights of NaN entries to 0
weights = numpy.where(valid_entries, weights, 0)
# Calculate weighted mean
weighted_mean = numpy.sum(
arr * weights, axis=axis) / numpy.sum(weights, axis=axis)
# Calculate the weighted variance
variance = numpy.sum(
weights * (arr - numpy.expand_dims(weighted_mean, axis))**2, axis=axis)
variance /= numpy.sum(weights, axis=axis) - ddof
return numpy.sqrt(variance)
def compute_error_bars(x, y, xbins, sigma):
bin_indices = numpy.digitize(x, xbins)
y_medians = numpy.array([numpy.median(y[bin_indices == i])
for i in range(1, len(xbins))])
lower_pct = 100 * 0.5 * (1 - erf(sigma / numpy.sqrt(2)))
upper_pct = 100 - lower_pct
y_lower = numpy.full(len(y_medians), numpy.nan)
y_upper = numpy.full(len(y_medians), numpy.nan)
for i in range(len(y_medians)):
if numpy.sum(bin_indices == i + 1) == 0:
continue
y_lower[i] = numpy.percentile(y[bin_indices == i + 1], lower_pct)
y_upper[i] = numpy.percentile(y[bin_indices == i + 1], upper_pct)
yerr = (y_medians - numpy.array(y_lower), numpy.array(y_upper) - y_medians)
return y_medians, yerr
def normalize_hexbin(hb):
hexagon_counts = hb.get_array()
normalized_counts = hexagon_counts / hexagon_counts.sum()
hb.set_array(normalized_counts)
hb.set_clim(normalized_counts.min(), normalized_counts.max())