New plots (#85)

* Update verbosity messages

* Update verbosity messags

* Update more verbosity flags

* Update the iterator settings

* Add basic plots

* Update verbosity flags

* Update arg parsre

* Update plots

* Remove some older code

* Fix some definitions

* Update plots

* Update plotting

* Update plots

* Add support functions

* Update nb

* Improve plots, move back to scripts

* Update plots

* pep8

* Add max overlap plot

* Add blank line

* Upload changes

* Update changes

* Add weighted stats

* Remove

* Add import

* Add Max's matching

* Edit submission

* Add paths to Max's matching

* Fix matching

* Edit submission

* Edit plot

* Add max overlap separation plot

* Add periodic distance

* Update overlap summaries

* Add nsim0 for Max matvhing

* Add Max's agreement plot

* Add Quijote for Max method

* Update ploitting

* Update name
This commit is contained in:
Richard Stiskalek 2023-08-18 19:20:47 +01:00 committed by GitHub
parent ca3772ac6f
commit 8e3127f4d9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 1343 additions and 2100 deletions

View file

@ -14,7 +14,7 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from csiborgtools import clustering, field, match, read # noqa
from .utils import (center_of_mass, delta2ncells, number_counts,
from .utils import (center_of_mass, delta2ncells, number_counts, # noqa
periodic_distance, periodic_distance_two_points) # noqa
# Arguments to csiborgtools.read.Paths.

View file

@ -14,4 +14,5 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .match import (ParticleOverlap, RealisationsMatcher, # noqa
calculate_overlap, calculate_overlap_indxs, pos2cell, # noqa
cosine_similarity, find_neighbour, get_halo_cell_limits) # noqa
cosine_similarity, find_neighbour, get_halo_cell_limits, # noqa
matching_max) # noqa

View file

@ -236,8 +236,6 @@ class RealisationsMatcher(BaseMatcher):
# We begin by querying the kNN for the nearest neighbours of each halo
# in the reference simulation from the cross simulation in the initial
# snapshot.
if verbose:
print(f"{datetime.now()}: querying the KNN.", flush=True)
match_indxs = radius_neighbours(
catx.knn(in_initial=True, subtract_observer=False, periodic=True),
cat0.position(in_initial=True),
@ -261,11 +259,13 @@ class RealisationsMatcher(BaseMatcher):
return load_processed_halo(hid, particlesx, halo_mapx, hid2mapx,
nshift=0, ncells=self.box_size)
if verbose:
print(f"{datetime.now()}: calculating overlaps.", flush=True)
iterator = tqdm(
cat0["index"],
desc=f"{datetime.now()}: calculating NGP overlaps",
disable=not verbose
)
cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
indxs = cat0["index"]
for i, k0 in enumerate(tqdm(indxs) if verbose else indxs):
for i, k0 in enumerate(iterator):
# If we have no matches continue to the next halo.
matches = match_indxs[i]
if matches.size == 0:
@ -347,12 +347,13 @@ class RealisationsMatcher(BaseMatcher):
return load_processed_halo(hid, particlesx, halo_mapx, hid2mapx,
nshift=nshift, ncells=self.box_size)
if verbose:
print(f"{datetime.now()}: calculating smoothed overlaps.",
flush=True)
iterator = tqdm(
cat0["index"],
desc=f"{datetime.now()}: calculating smoothed overlaps",
disable=not verbose
)
cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
indxs = cat0["index"]
for i, k0 in enumerate(tqdm(indxs) if verbose else indxs):
for i, k0 in enumerate(iterator):
pos0, mass0, __, mins0, maxs0 = load_processed_halo(
k0, particles0, halo_map0, hid2map0, nshift=nshift,
ncells=self.box_size)
@ -434,7 +435,12 @@ class ParticleOverlap(BaseMatcher):
assert ((delta.shape == (ncells,) * 3)
& (delta.dtype == numpy.float32))
for hid in tqdm(halo_cat["index"]) if verbose else halo_cat["index"]:
iterator = tqdm(
halo_cat["index"],
desc=f"{datetime.now()} Calculating the background field",
disable=not verbose
)
for hid in iterator:
pos = load_halo_particles(hid, particles, halo_map, hid2map)
if pos is None:
continue
@ -993,11 +999,13 @@ def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1.0,
if radiusKNN.size != knn.n_samples_fit_:
raise ValueError("Mismatch in shape of `radiusKNN` or `knn`")
nsamples = len(X)
indxs = [None] * nsamples
patchknn_max = numpy.max(radiusKNN)
for i in trange(nsamples) if verbose else range(nsamples):
iterator = trange(len(X),
desc=f"{datetime.now()}: querying the kNN",
disable=not verbose)
indxs = [None] * len(X)
for i in iterator:
dist, indx = knn.radius_neighbors(
X[i].reshape(1, -1), radiusX[i] + patchknn_max,
sort_results=True)
@ -1082,3 +1090,107 @@ def cosine_similarity(x, y):
out /= numpy.linalg.norm(x) * numpy.linalg.norm(y, axis=1)
return out[0] if out.size == 1 else out
def matching_max(cat0, catx, mass_kind, mult, periodic, overlap=None,
match_indxs=None, verbose=True):
"""
Halo matching algorithm based on [1].
Parameters
----------
cat0 : instance of :py:class:`csiborgtools.read.BaseCatalogue`
Halo catalogue of the reference simulation.
catx : instance of :py:class:`csiborgtools.read.BaseCatalogue`
Halo catalogue of the cross simulation.
mass_kind : str
Name of the mass column.
mult : float
Multiple of R200c below which to consider a match.
periodic : bool
Whether to account for periodic boundary conditions.
overlap : array of 1-dimensional arrays, optional
Overlap of halos from `cat0` with halos from `catx`. If `overlap` or
`match_indxs` is not provided, then the overlap of the identified halos
is not calculated.
match_indxs : array of 1-dimensional arrays, optional
Indicies of halos from `catx` having a non-zero overlap with halos
from `cat0`.
verbose : bool, optional
Verbosity flag.
Returns
-------
out : structured array
Array of matches. Columns are `hid0`, `hidx`, `dist`, `success`.
References
----------
[1] Maxwell L Hutt, Harry Desmond, Julien Devriendt, Adrianne Slyz; The
effect of local Universe constraints on halo abundance and clustering;
Monthly Notices of the Royal Astronomical Society, Volume 516, Issue 3,
November 2022, Pages 35923601, https://doi.org/10.1093/mnras/stac2407
"""
pos0 = cat0.position(in_initial=False)
knnx = catx.knn(in_initial=False, subtract_observer=False,
periodic=periodic)
rad0 = cat0["r200c"]
mass0 = numpy.log10(cat0[mass_kind])
massx = numpy.log10(catx[mass_kind])
assert numpy.all(numpy.isfinite(mass0)) & numpy.all(numpy.isfinite(massx))
maskx = numpy.ones(len(catx), dtype=numpy.bool_)
dtypes = [("hid0", numpy.int32),
("hidx", numpy.int32),
("mass0", numpy.float32),
("massx", numpy.float32),
("dist", numpy.float32),
("success", numpy.bool_),
("match_overlap", numpy.float32),
("max_overlap", numpy.float32),
]
out = numpy.full(len(cat0), numpy.nan, dtype=dtypes)
out["success"] = False
for i in tqdm(numpy.argsort(mass0)[::-1], desc="Matching haloes",
disable=not verbose):
hid0 = cat0["index"][i]
out[i]["hid0"] = hid0
out[i]["mass0"] = 10**mass0[i]
neigh_dists, neigh_inds = knnx.radius_neighbors(pos0[i].reshape(1, -1),
mult * rad0[i])
neigh_dists, neigh_inds = neigh_dists[0], neigh_inds[0]
if neigh_dists.size == 0:
continue
# Sort the neighbours by mass difference
sort_order = numpy.argsort(numpy.abs(mass0[i] - massx[neigh_inds]))
neigh_dists = neigh_dists[sort_order]
neigh_inds = neigh_inds[sort_order]
for j, neigh_ind in enumerate(neigh_inds):
if maskx[neigh_ind]:
out[i]["hidx"] = catx["index"][neigh_ind]
out[i]["dist"] = neigh_dists[j]
out[i]["massx"] = 10**massx[neigh_ind]
out[i]["success"] = True
maskx[neigh_ind] = False
if overlap is not None and match_indxs is not None:
if neigh_ind in match_indxs[i]:
k = numpy.where(neigh_ind == match_indxs[i])[0][0]
out[i]["match_overlap"] = overlap[i][k]
if len(overlap[i]) > 0:
out[i]["max_overlap"] = numpy.max(overlap[i])
break
return out

View file

@ -21,6 +21,8 @@ from os.path import isfile
import numpy
from tqdm import tqdm, trange
from ..utils import periodic_distance
###############################################################################
# Overlap of two simulations #
###############################################################################
@ -47,6 +49,7 @@ class PairOverlap:
_cat0 = None
_catx = None
_data = None
_paths = None
def __init__(self, cat0, catx, paths, min_logmass, maxdist=None):
if cat0.simname != catx.simname:
@ -55,6 +58,7 @@ class PairOverlap:
self._cat0 = cat0
self._catx = catx
self._paths = paths
self.load(cat0, catx, paths, min_logmass, maxdist)
def load(self, cat0, catx, paths, min_logmass, maxdist=None):
@ -257,6 +261,8 @@ class PairOverlap:
for i in range(len(overlap)):
if len(overlap[i]) > 0:
out[i] = numpy.sum(overlap[i])
else:
out[i] = 0
return out
def prob_nomatch(self, from_smoothed):
@ -279,9 +285,11 @@ class PairOverlap:
for i in range(len(overlap)):
if len(overlap[i]) > 0:
out[i] = numpy.product(numpy.subtract(1, overlap[i]))
else:
out[i] = 1
return out
def dist(self, in_initial, norm_kind=None):
def dist(self, in_initial, boxsize, norm_kind=None):
"""
Pair distances of matched halos between the reference and cross
simulations.
@ -290,6 +298,8 @@ class PairOverlap:
----------
in_initial : bool
Whether to calculate separation in the initial or final snapshot.
boxsize : float
The size of the simulation box.
norm_kind : str, optional
The kind of normalisation to apply to the distances.
Can be `r200c`, `ref_patch` or `sum_patch`.
@ -320,8 +330,7 @@ class PairOverlap:
# Now calculate distances
dist = [None] * len(self)
for i, ind in enumerate(self["match_indxs"]):
# n refers to the reference halo catalogue position
dist[i] = numpy.linalg.norm(pos0[i, :] - posx[ind, :], axis=1)
dist[i] = periodic_distance(posx[ind, :], pos0[i, :], boxsize)
if norm_kind is not None:
dist[i] /= norm[i]
@ -358,7 +367,7 @@ class PairOverlap:
ratio[i] = numpy.abs(ratio[i])
return numpy.array(ratio, dtype=object)
def max_overlap_key(self, key, from_smoothed):
def max_overlap_key(self, key, min_overlap, from_smoothed):
"""
Calculate the maximum overlap mass of each halo in the reference
simulation from the cross simulation.
@ -367,10 +376,10 @@ class PairOverlap:
----------
key : str
Key to the maximum overlap statistic to calculate.
min_overlap : float
Minimum pair overlap to consider.
from_smoothed : bool
Whether to use the smoothed overlap or not.
mass_kind : str, optional
The mass kind whose ratio is to be calculated.
Returns
-------
@ -384,11 +393,15 @@ class PairOverlap:
# Skip if no match
if len(match_ind) == 0:
continue
out[i] = y[match_ind][numpy.argmax(overlap[i])]
k = numpy.argmax(overlap[i])
if overlap[i][k] > min_overlap:
out[i] = y[match_ind][k]
return out
def counterpart_mass(self, from_smoothed, overlap_threshold=0.,
in_log=False, mass_kind="totpartmass"):
mass_kind="totpartmass"):
"""
Calculate the expected counterpart mass of each halo in the reference
simulation from the crossed simulation.
@ -400,9 +413,6 @@ class PairOverlap:
overlap_threshold : float, optional
Minimum overlap required for a halo to be considered a match. By
default 0.0, i.e. no threshold.
in_log : bool, optional
Whether to calculate the expectation value in log space. By default
`False`.
mass_kind : str, optional
The mass kind whose ratio is to be calculated. Must be a valid
catalogue key. By default `totpartmass`, i.e. the total particle
@ -434,15 +444,11 @@ class PairOverlap:
massx_ = massx_[mask]
overlap_ = overlap_[mask]
massx_ = numpy.log10(massx_) if in_log else massx_
massx_ = numpy.log10(massx_)
# Weighted average and *biased* standard deviation
mean_ = numpy.average(massx_, weights=overlap_)
std_ = numpy.average((massx_ - mean_)**2, weights=overlap_)**0.5
# If in log, convert back to linear
mean_ = 10**mean_ if in_log else mean_
std_ = mean_ * std_ * numpy.log(10) if in_log else std_
mean[i] = mean_
std[i] = std_
@ -544,7 +550,7 @@ def weighted_stats(x, weights, min_weight=0, verbose=False):
"""
out = numpy.full((x.size, 2), numpy.nan, dtype=numpy.float32)
for i in trange(len(x)) if verbose else range(len(x)):
for i in trange(len(x), disable=not verbose):
x_, w_ = numpy.asarray(x[i]), numpy.asarray(weights[i])
mask = w_ > min_weight
x_ = x_[mask]
@ -574,27 +580,30 @@ class NPairsOverlap:
List of cross simulation halo catalogues.
paths : py:class`csiborgtools.read.Paths`
CSiBORG paths object.
min_logmass : float
Minimum log mass of halos to consider.
verbose : bool, optional
Verbosity flag for loading the overlap objects.
"""
_pairs = None
def __init__(self, cat0, catxs, paths, verbose=True):
def __init__(self, cat0, catxs, paths, min_logmass, verbose=True):
pairs = [None] * len(catxs)
if verbose:
print("Loading individual overlap objects...", flush=True)
for i, catx in enumerate(tqdm(catxs) if verbose else catxs):
pairs[i] = PairOverlap(cat0, catx, paths)
for i, catx in enumerate(tqdm(catxs, desc="Loading overlap objects",
disable=not verbose)):
pairs[i] = PairOverlap(cat0, catx, paths, min_logmass)
self._pairs = pairs
def max_overlap(self, from_smoothed, verbose=True):
def max_overlap(self, min_overlap, from_smoothed, verbose=True):
"""
Calculate maximum overlap of each halo in the reference simulation with
the cross simulations.
Parameters
----------
min_overlap : float
Minimum pair overlap to consider.
from_smoothed : bool
Whether to use the smoothed overlap or not.
verbose : bool, optional
@ -604,21 +613,24 @@ class NPairsOverlap:
-------
max_overlap : 2-dimensional array of shape `(nhalos, ncatxs)`
"""
out = [None] * len(self)
if verbose:
print("Calculating maximum overlap...", flush=True)
def get_max(y_):
if len(y_) == 0:
return numpy.nan
return numpy.max(y_)
return 0
out = numpy.max(y_)
for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs):
return out if out >= min_overlap else 0
iterator = tqdm(self.pairs,
desc="Calculating maximum overlap",
disable=not verbose
)
out = [None] * len(self)
for i, pair in enumerate(iterator):
out[i] = numpy.asanyarray([get_max(y_)
for y_ in pair.overlap(from_smoothed)])
return numpy.vstack(out).T
def max_overlap_key(self, key, from_smoothed, verbose=True):
def max_overlap_key(self, key, min_overlap, from_smoothed, verbose=True):
"""
Calculate maximum overlap mass of each halo in the reference
simulation with the cross simulations.
@ -627,6 +639,8 @@ class NPairsOverlap:
----------
key : str
Key to the maximum overlap statistic to calculate.
min_overlap : float
Minimum pair overlap to consider.
from_smoothed : bool
Whether to use the smoothed overlap or not.
verbose : bool, optional
@ -636,12 +650,13 @@ class NPairsOverlap:
-------
out : 2-dimensional array of shape `(nhalos, ncatxs)`
"""
iterator = tqdm(self.pairs,
desc=f"Calculating maximum overlap {key}",
disable=not verbose
)
out = [None] * len(self)
if verbose:
print(f"Calculating maximum overlap {key}...", flush=True)
for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs):
out[i] = pair.max_overlap_key(key, from_smoothed)
for i, pair in enumerate(iterator):
out[i] = pair.max_overlap_key(key, min_overlap, from_smoothed)
return numpy.vstack(out).T
@ -661,10 +676,11 @@ class NPairsOverlap:
-------
summed_overlap : 2-dimensional array of shape `(nhalos, ncatxs)`
"""
iterator = tqdm(self.pairs,
desc="Calculating summed overlap",
disable=not verbose)
out = [None] * len(self)
if verbose:
print("Calculating summed overlap...", flush=True)
for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs):
for i, pair in enumerate(iterator):
out[i] = pair.summed_overlap(from_smoothed)
return numpy.vstack(out).T
@ -684,16 +700,18 @@ class NPairsOverlap:
-------
prob_nomatch : 2-dimensional array of shape `(nhalos, ncatxs)`
"""
iterator = tqdm(self.pairs,
desc="Calculating probability of no match",
disable=not verbose
)
out = [None] * len(self)
if verbose:
print("Calculating probability of no match...", flush=True)
for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs):
for i, pair in enumerate(iterator):
out[i] = pair.prob_nomatch(from_smoothed)
return numpy.vstack(out).T
def counterpart_mass(self, from_smoothed, overlap_threshold=0.,
in_log=False, mass_kind="totpartmass",
return_full=False, verbose=True):
mass_kind="totpartmass", return_full=False,
verbose=True):
"""
Calculate the expected counterpart mass of each halo in the reference
simulation from the crossed simulation.
@ -705,9 +723,6 @@ class NPairsOverlap:
overlap_threshold : float, optional
Minimum overlap required for a halo to be considered a match. By
default 0.0, i.e. no threshold.
in_log : bool, optional
Whether to calculate the expectation value in log space. By default
`False`.
mass_kind : str, optional
The mass kind whose ratio is to be calculated. Must be a valid
catalogue key. By default `totpartmass`, i.e. the total particle
@ -727,26 +742,31 @@ class NPairsOverlap:
Expected mass and standard deviation from each cross simulation.
Returned only if `return_full` is `True`.
"""
iterator = tqdm(self.pairs,
desc="Calculating counterpart masses",
disable=not verbose)
mus, stds = [None] * len(self), [None] * len(self)
if verbose:
print("Calculating counterpart masses...", flush=True)
for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs):
for i, pair in enumerate(iterator):
mus[i], stds[i] = pair.counterpart_mass(
from_smoothed=from_smoothed,
overlap_threshold=overlap_threshold, in_log=in_log,
mass_kind=mass_kind)
overlap_threshold=overlap_threshold, mass_kind=mass_kind)
mus, stds = numpy.vstack(mus).T, numpy.vstack(stds).T
probmatch = 1 - self.prob_nomatch(from_smoothed) # Prob of > 0 matches
# Prob of > 0 matches
probmatch = 1 - self.prob_nomatch(from_smoothed)
# Normalise it for weighted sums etc.
norm_probmatch = numpy.apply_along_axis(
lambda x: x / numpy.sum(x), axis=1, arr=probmatch)
# Mean and standard deviation of weighted stacked Gaussians
mu = numpy.sum(norm_probmatch * mus, axis=1)
std = numpy.sum(norm_probmatch * (mus**2 + stds**2), axis=1) - mu**2
mu = numpy.sum((norm_probmatch * mus), axis=1)
std = numpy.sum((norm_probmatch * (mus**2 + stds**2)), axis=1) - mu**2
std **= 0.5
mask = mu <= 0
mu[mask] = numpy.nan
std[mask] = numpy.nan
if return_full:
return mu, std, mus, stds
return mu, std
@ -766,6 +786,11 @@ class NPairsOverlap:
def cat0(self):
return self.pairs[0].cat0 # All pairs have the same ref catalogue
def __getitem__(self, key):
if not isinstance(key, int):
raise TypeError("Key must be an integer.")
return self.pairs[key]
def __len__(self):
return len(self.pairs)
@ -794,7 +819,7 @@ def get_cross_sims(simname, nsim0, paths, min_logmass, smoothed):
Whether to use the smoothed overlap or not.
"""
nsimxs = []
for nsimx in paths.get_ics("csiborg"):
for nsimx in paths.get_ics(simname):
if nsimx == nsim0:
continue
f1 = paths.overlap(simname, nsim0, nsimx, min_logmass, smoothed)

View file

@ -501,6 +501,50 @@ class Paths:
fname = fname.replace("overlap", "overlap_smoothed")
return join(fdir, fname)
def match_max(self, simname, nsim0, nsimx, min_logmass, mult):
"""
Path to the files containing matching based on [1].
Parameters
----------
simname : str
Simulation name.
nsim0 : int
IC realisation index of the first simulation.
nsimx : int
IC realisation index of the second simulation.
min_logmass : float
Minimum log mass of halos to consider.
mult : float
Multiplicative search radius factor.
Returns
-------
path : str
References
----------
[1] Maxwell L Hutt, Harry Desmond, Julien Devriendt, Adrianne Slyz; The
effect of local Universe constraints on halo abundance and clustering;
Monthly Notices of the Royal Astronomical Society, Volume 516, Issue 3,
November 2022, Pages 35923601, https://doi.org/10.1093/mnras/stac2407
"""
if simname == "csiborg":
fdir = join(self.postdir, "match_max")
elif simname == "quijote":
fdir = join(self.quijote_dir, "match_max")
else:
ValueError(f"Unknown simulation name `{simname}`.")
try_create_directory(fdir)
nsim0 = str(nsim0).zfill(5)
nsimx = str(nsimx).zfill(5)
min_logmass = float('%.4g' % min_logmass)
fname = f"match_max_{nsim0}_{nsimx}_{min_logmass}_{str(mult)}.npz"
return join(fdir, fname)
def field(self, kind, MAS, grid, nsim, in_rsp, smooth_scale=None):
r"""
Path to the files containing the calculated density fields in CSiBORG.