Add a shared reader (#32)

* add import

* Rename object

* Simplify how catalogs are handled

* Move functions around

* Add NPair reader

* Add counterpart Gaussian average

* Change what is returned in exp mass

* small bug

* Simplify stat calcu

* Add mptebppl
This commit is contained in:
Richard Stiskalek 2023-03-16 18:02:21 +00:00 committed by GitHub
parent 153f1c0002
commit 9b524db617
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 3428 additions and 132 deletions

View file

@ -18,4 +18,4 @@ from .make_cat import (HaloCatalogue, concatenate_clumps, clumps_pos2cell) # no
from .readobs import (PlanckClusters, MCXCClusters, TwoMPPGalaxies, # noqa from .readobs import (PlanckClusters, MCXCClusters, TwoMPPGalaxies, # noqa
TwoMPPGroups, SDSS) # noqa TwoMPPGroups, SDSS) # noqa
from .outsim import (dump_split, combine_splits, make_ascii_powmes) # noqa from .outsim import (dump_split, combine_splits, make_ascii_powmes) # noqa
from .summaries import (PKReader, OverlapReader, binned_resample_mean) # noqa from .summaries import (PKReader, PairOverlap, NPairsOverlap, binned_resample_mean) # noqa

View file

@ -169,7 +169,7 @@ class PKReader:
return ks, xpks return ks, xpks
class OverlapReader: class PairOverlap:
r""" r"""
A shortcut object for reading in the results of matching two simulations. A shortcut object for reading in the results of matching two simulations.
@ -182,14 +182,15 @@ class OverlapReader:
Path to the overlap. By default `None`, i.e. Path to the overlap. By default `None`, i.e.
`/mnt/extraspace/rstiskalek/csiborg/overlap/cross_{}_{}.npz`. `/mnt/extraspace/rstiskalek/csiborg/overlap/cross_{}_{}.npz`.
min_mass : float, optional min_mass : float, optional
The minimum :math:`M_{\rm tot} / M_\odot` mass. By default no Minimum :math:`M_{\rm tot} / M_\odot` mass in the reference catalogue.
threshold. By default no threshold.
max_dist : float, optional max_dist : float, optional
The maximum comoving distance of a halo. By default no upper limit. Maximum comoving distance in the reference catalogue. By default upper
limit.
""" """
_cat0 = None _cat0 = None
_catx = None _catx = None
_refmask = None _data = None
def __init__(self, cat0, catx, fskel=None, min_mass=None, max_dist=None): def __init__(self, cat0, catx, fskel=None, min_mass=None, max_dist=None):
self._cat0 = cat0 self._cat0 = cat0
@ -229,28 +230,6 @@ class OverlapReader:
self._make_refmask(min_mass, max_dist) self._make_refmask(min_mass, max_dist)
@property
def cat0(self):
"""
The reference halo catalogue.
Returns
-------
cat0 : :py:class:`csiborgtools.read.HaloCatalogue`
"""
return self._cat0
@property
def catx(self):
"""
The cross halo catalogue.
Returns
-------
catx : :py:class:`csiborgtools.read.HaloCatalogue`
"""
return self._catx
@staticmethod @staticmethod
def _invert_match(match_indxs, overlap, cross_size): def _invert_match(match_indxs, overlap, cross_size):
""" """
@ -322,58 +301,37 @@ class OverlapReader:
# Enforce a cut on the reference catalogue # Enforce a cut on the reference catalogue
min_mass = 0 if min_mass is None else min_mass min_mass = 0 if min_mass is None else min_mass
max_dist = numpy.infty if max_dist is None else max_dist max_dist = numpy.infty if max_dist is None else max_dist
m = ((self.cat0["totpartmass"] > min_mass) m = ((self.cat0()["totpartmass"] > min_mass)
& (self.cat0["dist"] < max_dist)) & (self.cat0()["dist"] < max_dist))
# Now remove indices that are below this cut # Now remove indices that are below this cut
self._data["index"] = self._data["index"][m] self._data["index"] = self._data["index"][m]
self._data["match_indxs"] = self._data["match_indxs"][m] self._data["match_indxs"] = self._data["match_indxs"][m]
self._data["overlap"] = self._data["overlap"][m] self._data["overlap"] = self._data["overlap"][m]
self._data["refmask"] = m
self._refmask = m def summed_overlap(self):
@property
def indxs(self):
""" """
Indices of halos from the reference catalogue. Summed overlap of each halo in the reference simulation with the cross
simulation.
Returns Returns
------- -------
indxs : 1-dimensional array summed_overlap : 1-dimensional array of shape `(nhalos, )`
""" """
return self._data["index"] return numpy.array([numpy.sum(cross) for cross in self["overlap"]])
@property def prob_nomatch(self):
def match_indxs(self):
""" """
Indices of halos from the cross catalogue. Probability of no match for each halo in the reference simulation with
the cross simulation. Defined as a product of 1 - overlap with other
halos.
Returns Returns
------- -------
match_indxs : array of 1-dimensional arrays of shape `(nhalos, )` prob_nomatch : 1-dimensional array of shape `(nhalos, )`
""" """
return self._data["match_indxs"] return numpy.array(
[numpy.product(1 - overlap) for overlap in self["overlap"]])
@property
def overlap(self):
"""
Pair overlap of halos between the reference and cross simulations.
Returns
-------
overlap : array of 1-dimensional arrays of shape `(nhalos, )`
"""
return self._data["overlap"]
@property
def refmask(self):
"""
Mask of the reference catalogue to match the calculated overlaps.
Returns
-------
refmask : 1-dimensional boolean array
"""
return self._refmask
def dist(self, in_initial, norm_kind=None): def dist(self, in_initial, norm_kind=None):
""" """
@ -396,27 +354,27 @@ class OverlapReader:
or norm_kind in ("r200", "ref_patch", "sum_patch")) or norm_kind in ("r200", "ref_patch", "sum_patch"))
# Get positions either in the initial or final snapshot # Get positions either in the initial or final snapshot
if in_initial: if in_initial:
pos0, posx = self.cat0.positions0, self.catx.positions0 pos0, posx = self.cat0().positions0, self.catx().positions0
else: else:
pos0, posx = self.cat0.positions, self.catx.positions pos0, posx = self.cat0().positions, self.catx().positions
pos0 = pos0[self.refmask, :] # Apply the reference catalogue mask pos0 = pos0[self["refmask"], :] # Apply the reference catalogue mask
# Get the normalisation array if applicable # Get the normalisation array if applicable
if norm_kind == "r200": if norm_kind == "r200":
norm = self.cat0["r200"][self.refmask] norm = self.cat0("r200")
if norm_kind == "ref_patch": if norm_kind == "ref_patch":
norm = self.cat0["lagpatch"][self.refmask] norm = self.cat0("lagpatch")
if norm_kind == "sum_patch": if norm_kind == "sum_patch":
patch0 = self.cat0["lagpatch"][self.refmask] patch0 = self.cat0("lagpatch")
patchx = self.catx["lagpatch"] patchx = self.catx("lagpatch")
norm = [None] * self.indxs.size norm = [None] * len(self)
for i, ind in enumerate(self.match_indxs): for i, ind in enumerate(self["match_indxs"]):
norm[i] = patch0[i] + patchx[ind] norm[i] = patch0[i] + patchx[ind]
norm = numpy.array(norm, dtype=object) norm = numpy.array(norm, dtype=object)
# Now calculate distances # Now calculate distances
dist = [None] * self.indxs.size dist = [None] * len(self)
for i, ind in enumerate(self.match_indxs): for i, ind in enumerate(self["match_indxs"]):
# n refers to the reference halo catalogue position # n refers to the reference halo catalogue position
dist[i] = numpy.linalg.norm(pos0[i, :] - posx[ind, :], axis=1) dist[i] = numpy.linalg.norm(pos0[i, :] - posx[ind, :], axis=1)
@ -445,11 +403,10 @@ class OverlapReader:
------- -------
ratio : array of 1-dimensional arrays of shape `(nhalos, )` ratio : array of 1-dimensional arrays of shape `(nhalos, )`
""" """
mass0 = self.cat0[mass_kind][self.refmask] mass0, massx = self.cat0(mass_kind), self.catx(mass_kind)
massx = self.catx[mass_kind]
ratio = [None] * self.indxs.size ratio = [None] * len(self)
for i, ind in enumerate(self.match_indxs): for i, ind in enumerate(self["match_indxs"]):
ratio[i] = mass0[i] / massx[ind] ratio[i] = mass0[i] / massx[ind]
if in_log: if in_log:
ratio[i] = numpy.log10(ratio[i]) ratio[i] = numpy.log10(ratio[i])
@ -457,52 +414,7 @@ class OverlapReader:
ratio[i] = numpy.abs(ratio[i]) ratio[i] = numpy.abs(ratio[i])
return numpy.array(ratio, dtype=object) return numpy.array(ratio, dtype=object)
def summed_overlap(self): def counterpart_mass(self, overlap_threshold=0., in_log=False,
"""
Summed overlap of each halo in the reference simulation with the cross
simulation.
Returns
-------
summed_overlap : 1-dimensional array of shape `(nhalos, )`
"""
return numpy.array([numpy.sum(cross) for cross in self.overlap])
def copy_per_match(self, par):
"""
Make an array like `self.match_indxs` where each of its element is an
equal value array of the pair clump property from the reference
catalogue.
Parameters
----------
par : str
Property to be copied over.
Returns
-------
out : 1-dimensional array of shape `(nhalos, )`
"""
vals = self.cat0[par][self.refmask]
out = [None] * self.indxs.size
for i, ind in enumerate(self.match_indxs):
out[i] = numpy.ones(ind.size) * vals[i]
return numpy.array(out, dtype=object)
def prob_nomatch(self):
"""
Probability of no match for each halo in the reference simulation with
the cross simulation. Defined as a product of 1 - overlap with other
halos.
Returns
-------
out : 1-dimensional array of shape `(nhalos, )`
"""
return numpy.array(
[numpy.product(1 - overlap) for overlap in self.overlap])
def expected_counterpart_mass(self, overlap_threshold=0., in_log=False,
mass_kind="totpartmass"): mass_kind="totpartmass"):
""" """
Calculate the expected counterpart mass of each halo in the reference Calculate the expected counterpart mass of each halo in the reference
@ -525,14 +437,13 @@ class OverlapReader:
------- -------
mean, std : 1-dimensional arrays of shape `(nhalos, )` mean, std : 1-dimensional arrays of shape `(nhalos, )`
""" """
nhalos = self.indxs.size mean = numpy.full(len(self), numpy.nan, dtype=numpy.float32)
mean = numpy.full(nhalos, numpy.nan) # Preallocate output arrays std = numpy.full(len(self), numpy.nan, dtype=numpy.float32)
std = numpy.full(nhalos, numpy.nan)
massx = self.catx[mass_kind] # Create references to the arrays here massx = self.catx(mass_kind) # Create references to the arrays here
overlap = self.overlap # to speed up the loop below. overlap = self["overlap"] # to speed up the loop below.
for i, match_ind in enumerate(self.match_indxs): for i, match_ind in enumerate(self["match_indxs"]):
# Skip if no match # Skip if no match
if match_ind.size == 0: if match_ind.size == 0:
continue continue
@ -562,6 +473,215 @@ class OverlapReader:
return mean, std return mean, std
def copy_per_match(self, par):
"""
Make an array like `self.match_indxs` where each of its element is an
equal value array of the pair clump property from the reference
catalogue.
Parameters
----------
par : str
Property to be copied over.
Returns
-------
out : 1-dimensional array of shape `(nhalos, )`
"""
vals = self.cat0(par)
out = [None] * len(self)
for i, ind in enumerate(self["match_indxs"]):
out[i] = numpy.ones(ind.size) * vals[i]
return numpy.array(out, dtype=object)
def cat0(self, key=None, index=None):
"""
Return the reference halo catalogue if `key` is `None`, otherwise
return values from the reference catalogue and apply `refmask`.
Parameters
----------
key : str, optional
Key to get. If `None` return the whole catalogue.
index : int or array, optional
Indices to get, if `None` return all.
Returns
-------
out : :py:class:`csiborgtools.read.HaloCatalogue` or array
"""
if key is None:
return self._cat0
out = self._cat0[key][self["refmask"]]
return out if index is None else out[index]
def catx(self, key=None, index=None):
"""
Return the cross halo catalogue if `key` is `None`, otherwise
return values from the reference catalogue.
Parameters
----------
key : str, optional
Key to get. If `None` return the whole catalogue.
index : int or array, optional
Indices to get, if `None` return all.
Returns
-------
out : :py:class:`csiborgtools.read.HaloCatalogue` or array
"""
if key is None:
return self._catx
out = self._catx[key]
return out if index is None else out[index]
def __getitem__(self, key):
"""
Must be one of `index`, `match_indxs`, `overlap` or `refmask`.
"""
assert key in ("index", "match_indxs", "overlap", "refmask")
return self._data[key]
def __len__(self):
return self["index"].size
class NPairsOverlap:
r"""
A shortcut object for reading in the results of matching a reference
simulation with many cross simulations.
Parameters
----------
cat0 : :py:class:`csiborgtools.read.HaloCatalogue`
Reference simulation halo catalogue.
catxs : list of :py:class:`csiborgtools.read.HaloCatalogue`
List of cross simulation halo catalogues.
fskel : str, optional
Path to the overlap. By default `None`, i.e.
`/mnt/extraspace/rstiskalek/csiborg/overlap/cross_{}_{}.npz`.
min_mass : float, optional
Minimum :math:`M_{\rm tot} / M_\odot` mass in the reference catalogue.
By default no threshold.
max_dist : float, optional
Maximum comoving distance in the reference catalogue. By default upper
limit.
"""
_pairs = None
def __init__(self, cat0, catxs, fskel=None, min_mass=None, max_dist=None):
self._pairs = [PairOverlap(cat0, catx, fskel=fskel, min_mass=min_mass,
max_dist=max_dist) for catx in catxs]
def summed_overlap(self, verbose=False):
"""
Summed overlap of each halo in the reference simulation with the cross
simulations.
Parameters
----------
verbose : bool, optional
Returns
-------
summed_overlap : 2-dimensional array of shape `(nhalos, ncatxs)`
"""
out = [None] * len(self)
for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs):
out[i] = pair.summed_overlap()
return numpy.vstack(out).T
def prob_nomatch(self, verbose=False):
"""
Probability of no match for each halo in the reference simulation with
the cross simulation.
Parameters
----------
verbose : bool, optional
Returns
-------
prob_nomatch : 2-dimensional array of shape `(nhalos, ncatxs)`
"""
out = [None] * len(self)
for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs):
out[i] = pair.prob_nomatch()
return numpy.vstack(out).T
def counterpart_mass(self, overlap_threshold=0., in_log=False,
mass_kind="totpartmass", return_full=True,
verbose=False):
"""
Calculate the expected counterpart mass of each halo in the reference
simulation from the crossed simulation.
Parameters
-----------
overlap_threshold : float, optional
Minimum overlap required for a halo to be considered a match. By
default 0.0, i.e. no threshold.
in_log : bool, optional
Whether to calculate the expectation value in log space. By default
`False`.
mass_kind : str, optional
The mass kind whose ratio is to be calculated. Must be a valid
catalogue key. By default `totpartmass`, i.e. the total particle
mass associated with a halo.
return_full : bool, optional
Whether to return the full results of matching each pair or
calculate summary statistics by Gaussian averaging.
verbose : bool, optional
Verbosity flag. By default `False`.
Returns
-------
mu, std : 1-dimensional arrays of shape `(nhalos,)`
Summary expected mass and standard deviation from all cross
simulations.
mus, stds : 2-dimensional arrays of shape `(nhalos, ncatx)`, optional
Expected mass and standard deviation from each cross simulation.
Returned only if `return_full` is `True`.
"""
mus, stds = [None] * len(self), [None] * len(self)
for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs):
mus[i], stds[i] = pair.counterpart_mass(
overlap_threshold=overlap_threshold, in_log=in_log,
mass_kind=mass_kind)
mus, stds = numpy.vstack(mus).T, numpy.vstack(stds).T
probmatch = 1 - self.prob_nomatch() # Prob of > 0 matches
# Normalise it for weighted sums etc.
norm_probmatch = numpy.apply_along_axis(
lambda x: x / numpy.sum(x), axis=1, arr=probmatch)
# Mean and standard deviation of weighted stacked Gaussians
mu = numpy.sum(norm_probmatch * mus, axis=1)
std = numpy.sum(norm_probmatch * (mus**2 + stds**2), axis=1) - mu**2
if return_full:
return mu, std, mus, stds
return mu, std
@property
def pairs(self):
"""
List of `PairOverlap` objects in this reader.
Returns
-------
pairs : list of :py:class:`csiborgtools.read.PairOverlap`
"""
return self._pairs
@property
def cat0(self):
return self.pairs[0].cat0 # All pairs have the same ref catalogue
def __len__(self):
return len(self.pairs)
def binned_resample_mean(x, y, prob, bins, nresample=50, seed=42): def binned_resample_mean(x, y, prob, bins, nresample=50, seed=42):
""" """

File diff suppressed because one or more lines are too long