Matching paper plots (#91)

* Fix calculations of expected mass

* Add paper plots

* Edits to pltos

* Add overlap summary

* Add imports

* Add import

* Add binned stat

* Add fit

* Add more plots

* Add basic env

* Add histogram mode

* Edit expected mass

* Improve expected plots

* Clean up plot

* Improve separation plot

* Update plots

* Edit expected calculation

* Update plotting

* Update plots

* Update plots

* Update plots

* Add conc fraction

* Add halo maker sorting

* Renaming

* Add import

* Add NaN treatment

* add import

* Move cosine smi

* Update plots

* Move similarity

* Fix little bugs

* Shorten documentation

* Update plots
This commit is contained in:
Richard Stiskalek 2023-10-17 12:11:15 +01:00 committed by GitHub
parent 136c552369
commit 5500fbd2b9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 2193 additions and 294 deletions

View File

@ -12,10 +12,12 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from csiborgtools import clustering, field, match, read, summary # noqa
from csiborgtools import clustering, field, match, read, summary # noqa
from .utils import (center_of_mass, delta2ncells, number_counts, # noqa
periodic_distance, periodic_distance_two_points, # noqa
binned_statistic, cosine_similarity) # noqa
from .utils import (center_of_mass, delta2ncells, number_counts, # noqa
periodic_distance, periodic_distance_two_points) # noqa
# Arguments to csiborgtools.read.Paths.
paths_glamdring = {"srcdir": "/mnt/extraspace/hdesmond/",

View File

@ -14,5 +14,5 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .match import (ParticleOverlap, RealisationsMatcher, # noqa
calculate_overlap, calculate_overlap_indxs, pos2cell, # noqa
cosine_similarity, find_neighbour, get_halo_cell_limits, # noqa
find_neighbour, get_halo_cell_limits, # noqa
matching_max) # noqa

View File

@ -660,19 +660,12 @@ class ParticleOverlap(BaseMatcher):
def pos2cell(pos, ncells):
"""
Convert position to cell number if there are `ncells` cells along the axis.
"""
if pos.dtype.char in numpy.typecodes["AllInteger"]:
return pos
return numpy.floor(pos * ncells).astype(numpy.int32)
def read_nshift(smooth_kwargs):
"""
Determine the number of cells to pad the density field if smoothing is
applied. Defaults to the ceiling of three times the smoothing scale.
"""
return 0 if smooth_kwargs is None else ceil(3 * smooth_kwargs["sigma"])
@ -774,33 +767,26 @@ def get_halo_cell_limits(pos, ncells, nshift=0):
return mins, maxs
@jit(nopython=True)
@jit(nopython=True, boundscheck=False)
def calculate_overlap(delta1, delta2, cellmins, delta_bckg, box_size,
bckg_halfsize):
r"""
Overlap between two halos whose density fields are evaluated on the
same grid. This is a JIT implementation, hence it is outside of the main
class.
"""
Calculate overlap between two halos' density fields on the same grid.
Parameters
----------
delta1: 3-dimensional array
Density field of the first halo.
delta2 : 3-dimensional array
Density field of the second halo.
cellmins : len-3 tuple
Tuple of lower cell ID in the full box.
delta_bckg : 3-dimensional array
Summed background density field of the reference and cross simulations
calculated with particles assigned to halos at the final snapshot.
Calculated on a grid determined by `bckg_halfsize`.
delta1, delta2 : 3D array
Density fields of the first and second halos, respectively.
cellmins : tuple (len=3)
Lower cell ID in the full box.
delta_bckg : 3D array
Combined background density field of reference and cross simulations
on `bckg_halfsize` grid.
box_size : int
Number of cells in the box.
Cell count in the box.
bckg_halfsize : int
Background half-size for density field calculation. This is the
grid distance from the center of the box to each side over which to
evaluate the background density field. Must be less than or equal to
half the box size.
Grid distance from box center for background density.
0.5 * box_size.
Returns
-------
@ -834,39 +820,29 @@ def calculate_overlap(delta1, delta2, cellmins, delta_bckg, box_size,
return intersect / (totmass - intersect)
@jit(nopython=True)
@jit(nopython=True, boundscheck=False)
def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
mass1, mass2, box_size, bckg_halfsize):
r"""
Overlap between two haloes whose density fields are evaluated on the
same grid and `nonzero1` enumerates the non-zero cells of `delta1. This is
a JIT implementation, hence it is outside of the main class.
"""
Calculate overlap of two halos' density fields on the same grid.
Parameters
----------
delta1: 3-dimensional array
Density field of the first halo.
delta2 : 3-dimensional array
Density field of the second halo.
cellmins : len-3 tuple
Tuple of lower cell ID in the full box.
delta_bckg : 3-dimensional array
Summed background density field of the reference and cross simulations
calculated with particles assigned to halos at the final snapshot.
Calculated on a grid determined by `bckg_halfsize`.
nonzero : 2-dimensional array of shape `(n_cells, 3)`
Indices of cells that are non-zero of the lower mass halo. Expected to
be precomputed from `fill_delta_indxs`.
mass1, mass2 : floats, optional
Total masses of the two haloes, respectively. Optional. If not provided
calculcated directly from the density field.
delta1, delta2 : 3D array
Density fields of the first and second halos, respectively.
cellmins : tuple (len=3)
Lower cell ID in the full box.
delta_bckg : 3D array
Combined background density from reference and cross simulations
on `bckg_halfsize` grid.
nonzero : 2D array (shape: (n_cells, 3))
Non-zero cells for the lower mass halo (from `fill_delta_indxs`).
mass1, mass2 : float, optional
Halos' total masses. Calculated from density if not provided.
box_size : int
Number of cells in the box.
Cell count in the box.
bckg_halfsize : int
Background half-size for density field calculation. This is the
grid distance from the center of the box to each side over which to
evaluate the background density field. Must be less than or equal to
half the box size.
Grid distance from box center for background density; 0.5 * box_size.
Returns
-------
@ -1039,35 +1015,6 @@ def find_neighbour(nsim0, cats):
return dists, cross_hindxs
def cosine_similarity(x, y):
r"""
Calculate the cosine similarity between two Cartesian vectors. Defined
as :math:`\Sum_{i} x_i y_{i} / (|x| * |y|)`.
Parameters
----------
x : 1-dimensional array
The first vector.
y : 1- or 2-dimensional array
The second vector. Can be 2-dimensional of shape `(n_samples, 3)`,
in which case the calculation is broadcasted.
Returns
-------
out : float or 1-dimensional array
"""
if x.ndim != 1:
raise ValueError("`x` must be a 1-dimensional array.")
if y.ndim == 1:
y = y.reshape(1, -1)
out = numpy.sum(x * y, axis=1)
out /= numpy.linalg.norm(x) * numpy.linalg.norm(y, axis=1)
return out[0] if out.size == 1 else out
def matching_max(cat0, catx, mass_kind, mult, periodic, overlap=None,
match_indxs=None, verbose=True):
"""

View File

@ -16,7 +16,11 @@
from .knn_summary import kNNCDFReader # noqa
from .nearest_neighbour_summary import NearestNeighbourReader # noqa
from .overlap_summary import weighted_stats # noqa
from .overlap_summary import NPairsOverlap, PairOverlap, get_cross_sims # noqa
from .overlap_summary import (NPairsOverlap, PairOverlap, get_cross_sims, # noqa
max_overlap_agreement, max_overlap_agreements, # noqa
find_peak) # noqa
from .pk_summary import PKReader # noqa
from .tpcf_summary import TPCFReader # noqa
from .field_interp import read_interpolated_field # noqa
from .field_interp import (read_interpolated_field, # noqa
bayesian_bootstrap_correlation, # noqa
correlate_at_fixed_smoothing) # noqa

View File

@ -15,6 +15,12 @@
import numpy
from tqdm import tqdm
from numba import jit
###############################################################################
# Read in the field values at the galaxy positions #
###############################################################################
def read_interpolated_field(survey_name, kind, galaxy_index, paths, MAS, grid,
@ -75,3 +81,142 @@ def read_interpolated_field(survey_name, kind, galaxy_index, paths, MAS, grid,
ks[i] = j
return out[:, ks, :]
###############################################################################
# Calculate the Bayesian bootstrapped correlation #
###############################################################################
@jit(nopython=True, fastmath=True, boundscheck=False)
def dot_product(x, y):
tot = 0.0
for i in range(len(x)):
tot += x[i] * y[i]
return tot
@jit(nopython=True, fastmath=True, boundscheck=False)
def cov(x, y, mean_x, mean_y, weights):
tot = 0.0
for i in range(len(x)):
tot += (x[i] - mean_x) * (y[i] - mean_y) * weights[i]
return tot
@jit(nopython=True, fastmath=True, boundscheck=False)
def var(x, mean_x, weights):
tot = 0.0
for i in range(len(x)):
tot += (x[i] - mean_x)**2 * weights[i]
return tot
@jit(nopython=True, fastmath=True, boundscheck=False)
def weighted_correlation(x, y, weights):
mean_x = dot_product(x, weights)
mean_y = dot_product(y, weights)
cov_xy = cov(x, y, mean_x, mean_y, weights)
var_x = var(x, mean_x, weights)
var_y = var(y, mean_y, weights)
return cov_xy / numpy.sqrt(var_x * var_y)
@jit(nopython=True, fastmath=True, boundscheck=False)
def _bayesian_bootstrap_correlation(x, y, weights):
nweights = len(weights)
bootstrapped_correlations = numpy.full(nweights, numpy.nan, dtype=x.dtype)
for i in range(nweights):
bootstrapped_correlations[i] = weighted_correlation(x, y, weights[i])
return bootstrapped_correlations
@jit(nopython=True, fastmath=True, boundscheck=False)
def rank(x):
order = numpy.argsort(x)
ranks = order.argsort()
return ranks
@jit(nopython=True, fastmath=True, boundscheck=False)
def bayesian_bootstrap_correlation(x, y, kind="spearman", n_bootstrap=10000):
"""
Calculate the Bayesian bootstrapped correlation between two arrays.
Parameters
----------
x, y : 1-dimensional arrays
The two arrays to calculate the correlation between.
kind : str, optional
The type of correlation to calculate. Either `spearman` or `pearson`.
n_bootstrap : int, optional
The number of bootstrap samples to use.
Returns
-------
corr : 1-dimensional array of shape `(n_bootstrap,)`
"""
if len(x) != len(y):
raise ValueError("Input arrays must have the same length")
if kind not in ["spearman", "pearson"]:
raise ValueError("kind must be either `spearman` or `pearson`")
if kind == "spearman":
dtype = x.dtype
x = rank(x).astype(dtype)
y = rank(y).astype(dtype)
alphas = numpy.ones(len(x), dtype=x.dtype)
weights = numpy.random.dirichlet(alphas, size=n_bootstrap)
return _bayesian_bootstrap_correlation(x, y, weights)
###############################################################################
# Distribution disagreement #
###############################################################################
def distribution_disagreement(x, y):
"""
Think about this more when stacking non-Gaussian distributions.
"""
delta = x - y
return numpy.abs(delta.mean()) / delta.std()
"""
field will be of value (nsims, ngal, nsmooth)
Calculate the correlation for each sim and smoothing scale (nsims, nsmooth)
For each of the above stack the distributions?
"""
def correlate_at_fixed_smoothing(field_values, galaxy_property,
kind="spearman", n_bootstrap=1000):
galaxy_property = galaxy_property.astype(field_values.dtype)
nsims = len(field_values)
distributions = numpy.empty((nsims, n_bootstrap), dtype=field_values.dtype)
from tqdm import trange
for i in trange(nsims):
distributions[i] = bayesian_bootstrap_correlation(
field_values[i], galaxy_property, kind=kind, n_bootstrap=n_bootstrap)
return distributions
def do_something(field_values, galaxy_property):
pass

View File

@ -23,6 +23,30 @@ from tqdm import tqdm, trange
from ..utils import periodic_distance
###############################################################################
# Utility functions #
###############################################################################
def find_peak(x, weights, shrink=0.95, min_obs=5):
"""
Find the peak of a 1D distribution using a shrinking window.
"""
assert shrink <= 1.
xmin, xmax = numpy.min(x), numpy.max(x)
xpos = (xmax + xmin) / 2
rad = (xmax - xmin) / 2
while True:
mask = numpy.abs(x - xpos) < rad
if mask.sum() < min_obs:
return xpos
xpos = numpy.average(x[mask], weights=weights[mask])
rad *= shrink
###############################################################################
# Overlap of two simulations #
###############################################################################
@ -251,42 +275,16 @@ class PairOverlap:
----------
from_smoothed : bool
Whether to use the smoothed overlap or not.
Returns
-------
summed_overlap : 1-dimensional array of shape `(nhalos, )`
"""
overlap = self.overlap(from_smoothed)
out = numpy.full(len(overlap), numpy.nan, dtype=numpy.float32)
out = numpy.zeros(len(overlap), dtype=numpy.float32)
for i in range(len(overlap)):
if len(overlap[i]) > 0:
out[i] = numpy.sum(overlap[i])
else:
out[i] = 0
return out
def prob_nomatch(self, from_smoothed):
"""
Probability of no match for each halo in the reference simulation with
the cross simulation. Defined as a product of 1 - overlap with other
halos.
Parameters
----------
from_smoothed : bool
Whether to use the smoothed overlap or not.
Returns
-------
prob_nomatch : 1-dimensional array of shape `(nhalos, )`
"""
overlap = self.overlap(from_smoothed)
out = numpy.full(len(overlap), numpy.nan, dtype=numpy.float32)
for i in range(len(overlap)):
if len(overlap[i]) > 0:
out[i] = numpy.product(numpy.subtract(1, overlap[i]))
else:
out[i] = 1
return out
def dist(self, in_initial, boxsize, norm_kind=None):
@ -308,8 +306,7 @@ class PairOverlap:
-------
dist : array of 1-dimensional arrays of shape `(nhalos, )`
"""
assert (norm_kind is None
or norm_kind in ("r200c", "ref_patch", "sum_patch"))
assert (norm_kind is None or norm_kind in ("r200c", "ref_patch", "sum_patch")) # noqa
# Get positions either in the initial or final snapshot
pos0 = self.cat0().position(in_initial=in_initial)
posx = self.catx().position(in_initial=in_initial)
@ -400,60 +397,6 @@ class PairOverlap:
return out
def counterpart_mass(self, from_smoothed, overlap_threshold=0.,
mass_kind="totpartmass"):
"""
Calculate the expected counterpart mass of each halo in the reference
simulation from the crossed simulation.
Parameters
----------
from_smoothed : bool
Whether to use the smoothed overlap or not.
overlap_threshold : float, optional
Minimum overlap required for a halo to be considered a match. By
default 0.0, i.e. no threshold.
mass_kind : str, optional
The mass kind whose ratio is to be calculated. Must be a valid
catalogue key. By default `totpartmass`, i.e. the total particle
mass associated with a halo.
Returns
-------
mean, std : 1-dimensional arrays of shape `(nhalos, )`
"""
mean = numpy.full(len(self), numpy.nan, dtype=numpy.float32)
std = numpy.full(len(self), numpy.nan, dtype=numpy.float32)
massx = self.catx(mass_kind) # Create references to speed
overlap = self.overlap(from_smoothed) # up the loop below
for i, match_ind in enumerate(self["match_indxs"]):
# Skip if no match
if len(match_ind) == 0:
continue
massx_ = massx[match_ind] # Again just create references
overlap_ = overlap[i] # to the appropriate elements
# Optionally apply overlap threshold
if overlap_threshold > 0.:
mask = overlap_ > overlap_threshold
if numpy.sum(mask) == 0:
continue
massx_ = massx_[mask]
overlap_ = overlap_[mask]
massx_ = numpy.log10(massx_)
# Weighted average and *biased* standard deviation
mean_ = numpy.average(massx_, weights=overlap_)
std_ = numpy.average((massx_ - mean_)**2, weights=overlap_)**0.5
mean[i] = mean_
std[i] = std_
return mean, std
def copy_per_match(self, par):
"""
Make an array like `self.match_indxs` where each of its element is an
@ -525,6 +468,82 @@ class PairOverlap:
return self["match_indxs"].size
###############################################################################
# Support functions for pair overlaps #
###############################################################################
def max_overlap_agreement(cat0, catx, min_logmass, maxdist, paths):
r"""
Calculate whether for a halo `A` from catalogue `cat0` that has a maximum
overlap with halo `B` from catalogue `catx` it is also `B` that has a
maximum overlap with `A`.
Parameters
----------
cat0 : instance of :py:class:`csiborgtools.read.BaseCatalogue`
Halo catalogue corresponding to the reference simulation.
catx : instance of :py:class:`csiborgtools.read.BaseCatalogue`
Halo catalogue corresponding to the cross simulation.
min_logmass : float
Minimum halo mass in :math:`\log_{10} M_\odot / h` to consider.
maxdist : float, optional
Maximum halo distance in :math:`\mathrm{Mpc} / h` from the centre
of the high-resolution region.
paths : py:class`csiborgtools.read.Paths`
CSiBORG paths object.
Returns
-------
agreement : 1-dimensional array of shape `(nhalos, )`
"""
kwargs = {"paths": paths, "min_logmass": min_logmass, "maxdist": maxdist}
pair_forward = PairOverlap(cat0, catx, **kwargs)
pair_backward = PairOverlap(catx, cat0, **kwargs)
nhalos = len(pair_forward.cat0())
agreement = numpy.full(nhalos, numpy.nan, dtype=numpy.float32)
for i in range(nhalos):
match_indxs_forward = pair_forward["match_indxs"][i]
if len(match_indxs_forward) == 0:
continue
overlap_forward = pair_forward["smoothed_overlap"][i]
kmax = match_indxs_forward[numpy.argmax(overlap_forward)]
match_indxs_backward = pair_backward["match_indxs"][kmax]
overlap_backward = pair_backward["smoothed_overlap"][kmax]
imatch = match_indxs_backward[numpy.argmax(overlap_backward)]
agreement[i] = imatch == i
return agreement
def max_overlap_agreements(cat0, catxs, min_logmass, maxdist, paths,
verbose=True):
"""
Repeat `max_overlap_agreement` for many cross simulations.
Parameters
----------
...
Returns
-------
agreements : 2-dimensional array of shape `(ncatxs, nhalos)`
"""
agreements = [None] * len(catxs)
desc = "Calculating maximum overlap agreement"
for i, catx in enumerate(tqdm(catxs, desc=desc, disable=not verbose)):
agreements[i] = max_overlap_agreement(cat0, catx, min_logmass,
maxdist, paths)
return numpy.asanyarray(agreements)
def weighted_stats(x, weights, min_weight=0, verbose=False):
"""
Calculate the weighted mean and standard deviation of `x` using `weights`
@ -544,11 +563,10 @@ def weighted_stats(x, weights, min_weight=0, verbose=False):
Returns
-------
stat : 2-dimensional array of shape `(len(x), 2)`
The first column is the weighted mean and the second column is the
weighted standard deviation.
mu, std : 1-dimensional arrays of shape `(len(x), )`
"""
out = numpy.full((x.size, 2), numpy.nan, dtype=numpy.float32)
mu = numpy.full(x.size, numpy.nan, dtype=numpy.float32)
std = numpy.full(x.size, numpy.nan, dtype=numpy.float32)
for i in trange(len(x), disable=not verbose):
x_, w_ = numpy.asarray(x[i]), numpy.asarray(weights[i])
@ -557,9 +575,9 @@ def weighted_stats(x, weights, min_weight=0, verbose=False):
w_ = w_[mask]
if len(w_) == 0:
continue
out[i, 0] = numpy.average(x_, weights=w_)
out[i, 1] = numpy.average((x_ - out[i, 0])**2, weights=w_)**0.5
return out
mu[i] = numpy.average(x_, weights=w_)
std[i] = numpy.average((x_ - mu[i])**2, weights=w_)**0.5
return mu, std
###############################################################################
@ -684,92 +702,87 @@ class NPairsOverlap:
out[i] = pair.summed_overlap(from_smoothed)
return numpy.vstack(out).T
def prob_nomatch(self, from_smoothed, verbose=True):
"""
Probability of no match for each halo in the reference simulation with
the cross simulation.
def expected_property_single(self, k, key, from_smoothed, in_log=True):
ys = [None] * len(self)
overlaps = [None] * len(self)
for i, pair in enumerate(self):
overlap = pair.overlap(from_smoothed)
if len(overlap[k]) == 0:
ys[i] = numpy.nan
overlaps[i] = numpy.nan
continue
match_indxs = pair["match_indxs"]
j = numpy.argmax(overlap[k])
Parameters
----------
from_smoothed : bool
Whether to use the smoothed overlap or not.
verbose : bool, optional
Verbosity flag.
ys[i] = pair.catx(key)[match_indxs[k][j]]
if in_log:
ys[i] = numpy.log10(ys[i])
overlaps[i] = overlap[k][j]
Returns
-------
prob_nomatch : 2-dimensional array of shape `(nhalos, ncatxs)`
"""
iterator = tqdm(self.pairs,
desc="Calculating probability of no match",
disable=not verbose
)
out = [None] * len(self)
for i, pair in enumerate(iterator):
out[i] = pair.prob_nomatch(from_smoothed)
return numpy.vstack(out).T
return ys, overlaps
def counterpart_mass(self, from_smoothed, overlap_threshold=0.,
mass_kind="totpartmass", return_full=False,
verbose=True):
def expected_property(self, key, from_smoothed, min_logmass,
in_log=True, mass_kind="totpartmass", verbose=True):
"""
Calculate the expected counterpart mass of each halo in the reference
simulation from the crossed simulation.
Parameters
----------
key : str
Property key.
from_smoothed : bool
Whether to use the smoothed overlap or not.
overlap_threshold : float, optional
Minimum overlap required for a halo to be considered a match. By
default 0.0, i.e. no threshold.
min_logmass : float
Minimum log mass of reference halos to consider.
in_log : bool, optional
Whether to calculated the expected property in log10.
mass_kind : str, optional
The mass kind whose ratio is to be calculated. Must be a valid
catalogue key. By default `totpartmass`, i.e. the total particle
mass associated with a halo.
return_full : bool, optional
Whether to return the full results of matching each pair or
calculate summary statistics by Gaussian averaging.
verbose : bool, optional
Verbosity flag.
Returns
-------
mu, std : 1-dimensional arrays of shape `(nhalos,)`
Summary expected mass and standard deviation from all cross
simulations.
mus, stds : 2-dimensional arrays of shape `(nhalos, ncatx)`, optional
Expected mass and standard deviation from each cross simulation.
Returned only if `return_full` is `True`.
mean_expected : 1-dimensional array of shape `(nhalos, )`
Expected property from all cross simulations.
std_expected : 1-dimensional array of shape `(nhalos, )`
Standard deviation of the expected property.
"""
iterator = tqdm(self.pairs,
desc="Calculating counterpart masses",
disable=not verbose)
mus, stds = [None] * len(self), [None] * len(self)
for i, pair in enumerate(iterator):
mus[i], stds[i] = pair.counterpart_mass(
from_smoothed=from_smoothed,
overlap_threshold=overlap_threshold, mass_kind=mass_kind)
mus, stds = numpy.vstack(mus).T, numpy.vstack(stds).T
log_mass0 = numpy.log10(self.cat0(mass_kind))
ntot = len(log_mass0)
mean_expected = numpy.full(ntot, numpy.nan, dtype=numpy.float32)
std_expected = numpy.full(ntot, numpy.nan, dtype=numpy.float32)
# Prob of > 0 matches
probmatch = 1 - self.prob_nomatch(from_smoothed)
# Normalise it for weighted sums etc.
norm_probmatch = numpy.apply_along_axis(
lambda x: x / numpy.sum(x), axis=1, arr=probmatch)
indxs = numpy.where(log_mass0 > min_logmass)[0]
for i in tqdm(indxs, disable=not verbose,
desc="Calculating expectation"):
ys = numpy.full(len(self), numpy.nan, dtype=numpy.float32)
weights = numpy.full(len(self), numpy.nan, dtype=numpy.float32)
for j, pair in enumerate(self):
overlap = pair.overlap(from_smoothed)
if len(overlap[i]) == 0:
continue
# Mean and standard deviation of weighted stacked Gaussians
mu = numpy.sum((norm_probmatch * mus), axis=1)
std = numpy.sum((norm_probmatch * (mus**2 + stds**2)), axis=1) - mu**2
std **= 0.5
k = numpy.argmax(overlap[i])
ys[j] = pair.catx(key)[pair["match_indxs"][i][k]]
weights[j] = overlap[i][k]
mask = mu <= 0
mu[mask] = numpy.nan
std[mask] = numpy.nan
if in_log:
ys[j] = numpy.log10(ys[j])
if return_full:
return mu, std, mus, stds
return mu, std
mask = numpy.isfinite(ys) & numpy.isfinite(weights)
if numpy.sum(mask) <= 2:
continue
mean_expected[i] = find_peak(ys[mask], weights=weights[mask])
std_expected[i] = numpy.average((ys[mask] - mean_expected[i])**2,
weights=weights[mask])**0.5
print(log_mass0[i], mean_expected[i], std_expected[i])
return mean_expected, std_expected
@property
def pairs(self):

View File

@ -150,6 +150,35 @@ def radec_to_cartesian(X):
]).T
def cosine_similarity(x, y):
r"""
Calculate the cosine similarity between two Cartesian vectors. Defined
as :math:`\Sum_{i} x_i y_{i} / (|x| * |y|)`.
Parameters
----------
x : 1-dimensional array
The first vector.
y : 1- or 2-dimensional array
The second vector. Can be 2-dimensional of shape `(n_samples, 3)`,
in which case the calculation is broadcasted.
Returns
-------
out : float or 1-dimensional array
"""
if x.ndim != 1:
raise ValueError("`x` must be a 1-dimensional array.")
if y.ndim == 1:
y = y.reshape(1, -1)
out = numpy.sum(x * y, axis=1)
out /= numpy.linalg.norm(x) * numpy.linalg.norm(y, axis=1)
return out[0] if out.size == 1 else out
def real2redshift(pos, vel, observer_location, observer_velocity, box,
periodic_wrap=True, make_copy=True):
r"""
@ -219,3 +248,17 @@ def number_counts(x, bin_edges):
for i in range(bin_edges.size - 1):
out[i] = numpy.sum((x >= bin_edges[i]) & (x < bin_edges[i + 1]))
return out
def binned_statistic(x, y, left_edges, bin_width, statistic):
"""
Calculate a binned statistic.
"""
out = numpy.full(left_edges.size, numpy.nan, dtype=x.dtype)
for i in range(left_edges.size):
mask = (x >= left_edges[i]) & (x < left_edges[i] + bin_width)
if numpy.any(mask):
out[i] = statistic(y[mask])
return out

100
scripts/sort_halomaker.py Normal file
View File

@ -0,0 +1,100 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to sort the HaloMaker's `particle_membership` file to match the ordering
of particles in the simulation snapshot.
"""
from argparse import ArgumentParser
from datetime import datetime
from glob import iglob
import h5py
import numpy
import pynbody
from mpi4py import MPI
from taskmaster import work_delegation
from tqdm import trange
import csiborgtools
def sort_particle_membership(nsim, nsnap, method):
"""
Read the FoF particle halo membership and sort the halo IDs to the ordering
of particles in the PHEW clump IDs.
Parameters
----------
nsim : int
IC realisation index.
verbose : bool, optional
Verbosity flag.
"""
print(f"{datetime.now()}: starting simulation {nsim}, snapshot {nsnap} and method {method}.") # noqa
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
fpath = next(iglob(f"/mnt/extraspace/rstiskalek/CSiBORG/halo_maker/ramses_{nsim}/output_{str(nsnap).zfill(5)}/**/*particle_membership*", recursive=True), None) # noqa
print(f"{datetime.now()}: loading particle membership `{fpath}`.")
# Columns are halo ID, particle ID
membership = numpy.genfromtxt(fpath, dtype=int)
print(f"{datetime.now()}: loading particle IDs from the snapshot.")
sim = pynbody.load(paths.snapshot(nsnap, nsim, "csiborg"))
pids = numpy.asanyarray(sim["iord"])
print(f"{datetime.now()}: mapping particle IDs to their indices.")
pids_idx = {pid: i for i, pid in enumerate(pids)}
print(f"{datetime.now()}: mapping HIDs to their array indices.")
# Unassigned particle IDs are assigned a halo ID of 0.
hids = numpy.zeros(pids.size, dtype=numpy.int32)
for i in trange(membership.shape[0]):
hid, pid = membership[i]
hids[pids_idx[pid]] = hid
fout = fpath + "_sorted.hdf5"
print(f"{datetime.now()}: saving the sorted data to ... `{fout}`")
header = """
This dataset represents halo indices for each particle.
- The particles are ordered as they appear in the simulation snapshot.
- Unassigned particles are given a halo index of 0.
"""
with h5py.File(fout, 'w') as hdf:
dset = hdf.create_dataset('hids_dataset', data=hids)
dset.attrs['header'] = header
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--method", type=str, required=True,
help="HaloMaker method")
parser.add_argument("--nsim", type=int, required=False, default=None,
help="IC index. If not set process all.")
args = parser.parse_args()
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
if args.nsim is None:
ics = paths.get_ics("csiborg")
else:
ics = [args.nsim]
snaps = numpy.array([max(paths.get_snapshots(nsim, "csiborg"))
for nsim in ics])
def main(n):
sort_particle_membership(ics[n], snaps[n], args.method)
work_delegation(main, list(range(len(ics))), MPI.COMM_WORLD)

View File

@ -29,16 +29,9 @@ import numpy
from mpi4py import MPI
from taskmaster import work_delegation
import csiborgtools
from utils import get_nsims
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
def _main(nsim, simname, verbose):
"""
@ -51,9 +44,7 @@ def _main(nsim, simname, verbose):
else:
partreader = csiborgtools.read.QuijoteReader(paths)
if verbose:
print(f"{datetime.now()}: reading and processing simulation `{nsim}`.",
flush=True)
print(f"{datetime.now()}: processing simulation `{nsim}`.", flush=True)
# We first load the particle IDs in the final snapshot.
pidf = csiborgtools.read.read_h5(paths.particles(nsim, simname))
pidf = pidf["particle_ids"]

1632
scripts_plots/paper_match.py Normal file

File diff suppressed because it is too large Load Diff

View File

@ -66,6 +66,12 @@ def plot_mass_vs_ncells(nsim, pdf=False):
cat = open_csiborg(nsim)
mpart = 4.38304044e+09
x = numpy.log10(cat["totpartmass"])
y = numpy.log10(cat["lagpatch_ncells"])
p = numpy.polyfit(x, y, 1)
print("Fitted parameters are: ", p)
with plt.style.context(plt_utils.mplstyle):
plt.figure()
plt.scatter(cat["totpartmass"], cat["lagpatch_ncells"], s=0.25,
@ -105,9 +111,9 @@ def plot_hmf(pdf=False):
csiborg_counts = numpy.full((len(csiborg_nsims), len(bins) - 1),
numpy.nan, dtype=numpy.float32)
csiborg_counts[i, :] = data["counts"]
# csiborg_counts /= numpy.diff(bins).reshape(1, -1)
csiborg_counts /= numpy.diff(bins).reshape(1, -1)
csiborg5511 = numpy.load(paths.halo_counts("csiborg", 5511))["counts"]
# csiborg5511 = numpy.load(paths.halo_counts("csiborg", 5511))["counts"]
# csiborg5511 /= numpy.diff(data["bins"])
print("Loading Quijote halo counts.", flush=True)
@ -121,73 +127,89 @@ def plot_hmf(pdf=False):
(len(quijote_nsims) * nmax, len(bins) - 1), numpy.nan,
dtype=numpy.float32)
quijote_counts[i * nmax:(i + 1) * nmax, :] = data["counts"]
# quijote_counts /= numpy.diff(bins).reshape(1, -1)
quijote_counts /= numpy.diff(bins).reshape(1, -1)
# vol = 155.5**3
# csiborg_counts /= vol
# quijote_counts /= vol
vol = 4 * numpy.pi / 3 * 155.5**3
csiborg_counts /= vol
quijote_counts /= vol
# csiborg5511 /= vol
x = 10**(0.5 * (bins[1:] + bins[:-1]))
# Edit lower limits
csiborg_counts[:, x < 1e12] = numpy.nan
csiborg_counts[:, x < 10**13.1] = numpy.nan
quijote_counts[:, x < 10**(13.1)] = numpy.nan
# Edit upper limits
csiborg_counts[:, x > 3e15] = numpy.nan
quijote_counts[:, x > 3e15] = numpy.nan
csiborg5511[x > 3e15] = numpy.nan
# csiborg5511[x > 3e15] = numpy.nan
with plt.style.context(plt_utils.mplstyle):
cols = plt.rcParams["axes.prop_cycle"].by_key()["color"]
fig, ax = plt.subplots(nrows=2, sharex=True,
figsize=(3.5, 2.625 * 1.25),
gridspec_kw={"height_ratios": [1, 0.45]})
fig.subplots_adjust(hspace=0, wspace=0)
fig, ax = plt.subplots(nrows=1, sharex=True,
figsize=(3.5, 2.625))
ax = [ax]
# fig, ax = plt.subplots(nrows=2, sharex=True,
# figsize=(3.5, 2.625 * 1.25),
# gridspec_kw={"height_ratios": [1, 0.25]})
# fig.subplots_adjust(hspace=0, wspace=0)
# Upper panel data
mean_csiborg = numpy.mean(csiborg_counts, axis=0)
std_csiborg = numpy.std(csiborg_counts, axis=0)
ax[0].plot(x, mean_csiborg, label="CSiBORG", c=cols[0])
ax[0].fill_between(x, mean_csiborg - std_csiborg,
mean_csiborg + std_csiborg,
alpha=0.5, color=cols[0])
for i in range(len(csiborg_counts)):
ax[0].plot(x, csiborg_counts[i, :], c="cornflowerblue", lw=0.5, zorder=0)
ax[0].plot(x, mean_csiborg, label="CSiBORG", c="mediumblue", zorder=1)
# ax[0].fill_between(x, mean_csiborg - std_csiborg,
# mean_csiborg + std_csiborg,
# alpha=0.5, color=cols[0])
mean_quijote = numpy.mean(quijote_counts, axis=0)
std_quijote = numpy.std(quijote_counts, axis=0)
ax[0].plot(x, mean_quijote, label="Quijote", c=cols[1])
ax[0].fill_between(x, mean_quijote - std_quijote,
mean_quijote + std_quijote, alpha=0.5,
color=cols[1])
ax[0].plot(x, csiborg5511, label="CSiBORG 5511", c="k", ls="--")
std5511 = numpy.sqrt(csiborg5511)
ax[0].fill_between(x, csiborg5511 - std_csiborg, csiborg5511 + std5511,
alpha=0.2, color="k")
# Lower panel data
log_y = numpy.log10(mean_csiborg / mean_quijote)
err = numpy.sqrt((std_csiborg / mean_csiborg / numpy.log(10))**2
+ (std_quijote / mean_quijote / numpy.log(10))**2)
ax[1].plot(x, 10**log_y, c=cols[0])
ax[1].fill_between(x, 10**(log_y - err), 10**(log_y + err), alpha=0.5,
color=cols[0])
for i in range(len(quijote_counts)):
ax[0].plot(x, quijote_counts[i, :], c="palegreen", lw=0.5, zorder=-1)
ax[1].plot(x, csiborg5511 / mean_quijote, c="k", ls="--")
ax[0].plot(x, mean_quijote, label="Quijote", c="darkgreen", zorder=1)
# ax[0].fill_between(x, mean_quijote - std_quijote,
# mean_quijote + std_quijote, alpha=0.5,
# color=cols[1])
# ax[0].plot(x, csiborg5511, label="CSiBORG 5511", c="k", ls="--")
# std5511 = numpy.sqrt(csiborg5511)
# ax[0].fill_between(x, csiborg5511 - std_csiborg, csiborg5511 + std5511,
# alpha=0.2, color="k")
# # Lower panel data
# log_y = numpy.log10(mean_csiborg / mean_quijote)
# err = numpy.sqrt((std_csiborg / mean_csiborg / numpy.log(10))**2
# + (std_quijote / mean_quijote / numpy.log(10))**2)
# ax[1].plot(x, 10**log_y, c=cols[0])
# ax[1].fill_between(x, 10**(log_y - err), 10**(log_y + err), alpha=0.5,
# color="k")
# ax[1].plot(x, csiborg5511 / mean_quijote, c="k", ls="--")
# Labels and accesories
ax[1].axhline(1, color="k", ls="--",
lw=0.5 * plt.rcParams["lines.linewidth"], zorder=0)
# ax[1].axhline(1, color="k", ls="--",
# lw=0.5 * plt.rcParams["lines.linewidth"], zorder=0)
# ax[0].set_ylabel(r"$\frac{\mathrm{d}^2 N}{\mathrm{d} V \mathrm{d}\log M_{\rm tot}}~[\mathrm{dex}^{-1} (\mathrm{Mpc} / h)^{-3}]$", # noqa
# fontsize="small")
ax[0].set_ylabel("Counts in bins")
ax[1].set_xlabel(r"$M_{\rm tot}~[M_\odot / h]$", fontsize="small")
ax[1].set_ylabel(r"$\mathrm{CSiBORG} / \mathrm{Quijote}$",
fontsize="small")
m = numpy.isfinite(mean_quijote)
ax[0].set_xlim(x[m].min(), x[m].max())
ax[0].set_ylabel(r"$\mathrm{HMF}~[\mathrm{dex}^{-1} (\mathrm{Mpc} / h)^{-3}]$")
ax[0].set_xlabel(r"$M_{\rm tot}~[M_\odot / h]$", fontsize="small")
# ax[1].set_ylabel(r"$\mathrm{CSiBORG} / \mathrm{Quijote}$",
# fontsize="small")
ax[0].set_xscale("log")
ax[0].set_yscale("log")
ax[1].set_ylim(0.5, 2.0)
# ax[1].set_ylim(0.5, 1.5)
# ax[1].set_yscale("log")
ax[0].legend(fontsize="small")
ax[0].legend()
fig.tight_layout(h_pad=0, w_pad=0)
for ext in ["png"] if pdf is False else ["png", "pdf"]:
@ -556,8 +578,8 @@ if __name__ == "__main__":
if False:
plot_mass_vs_ncells(7444, pdf=False)
if False:
plot_hmf(pdf=False)
if True:
plot_hmf(pdf=True)
if False:
plot_hmf_quijote_full(pdf=False)
@ -569,7 +591,7 @@ if __name__ == "__main__":
plot_groups=False, dmin=45, dmax=60,
plot_halos=5e13, volume_weight=True)
if True:
if False:
kind = "environment"
grid = 512
smooth_scale = 8.0