mirror of
https://github.com/Richard-Sti/csiborgtools.git
synced 2025-04-18 04:10:53 +00:00
* Add seperate autoknn script & config file * edit ics * Edit submission script * Add threshold values * Edit batch sizign * Remove print * edit * Rename files * Rename * Update nb * edit runs * Edit submit * Add median threshold * add new auto reader * editt submit * edit submit * Edit submit * Add mean prk * Edit runs * Remove correlation file * Move split to clutering * Add init * Remove import * Add the file * Add correlation reading * Edit scripts * Add below and above median permutation for cross * Update imports * Move rvs_in_sphere * Create utils * Split * Add import * Add normalised marks * Add import * Edit readme * Clean up submission file * Stop tracking submit files * Update gitignore * Add poisson field analytical expression * Add abstract generators * Add generators * Pass in the generator * Add a check for if there are any files * Start saving average density * Update nb * Update readme * Update units * Edit jobs * Update submits * Update reader * Add random crossing * Update crossing script * Add crossing with random * Update readme * Update notebook
1012 lines
35 KiB
Python
1012 lines
35 KiB
Python
# Copyright (C) 2022 Richard Stiskalek, Harry Desmond
|
|
# This program is free software; you can redistribute it and/or modify it
|
|
# under the terms of the GNU General Public License as published by the
|
|
# Free Software Foundation; either version 3 of the License, or (at your
|
|
# option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful, but
|
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
|
# Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License along
|
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
"""
|
|
Tools for summarising various results.
|
|
"""
|
|
from os.path import (join, isfile)
|
|
from glob import glob
|
|
import numpy
|
|
from scipy.special import factorial
|
|
import joblib
|
|
from tqdm import tqdm
|
|
|
|
|
|
###############################################################################
|
|
# PKReader #
|
|
###############################################################################
|
|
|
|
|
|
class PKReader:
|
|
"""
|
|
A shortcut object for reading in the power spectrum files.
|
|
|
|
Parameters
|
|
----------
|
|
ic_ids : list of int
|
|
IC IDs to be read.
|
|
hw : float
|
|
Box half-width.
|
|
fskel : str, optional
|
|
The skeleton path. By default
|
|
`/mnt/extraspace/rstiskalek/csiborg/crosspk/out_{}_{}_{}.p`, where
|
|
the formatting options are `ic0, ic1, hw`.
|
|
dtype : dtype, optional
|
|
Output precision. By default `numpy.float32`.
|
|
"""
|
|
def __init__(self, ic_ids, hw, fskel=None, dtype=numpy.float32):
|
|
self.ic_ids = ic_ids
|
|
self.hw = hw
|
|
if fskel is None:
|
|
fskel = "/mnt/extraspace/rstiskalek/csiborg/crosspk/out_{}_{}_{}.p"
|
|
self.fskel = fskel
|
|
self.dtype = dtype
|
|
|
|
@staticmethod
|
|
def _set_klim(kmin, kmax):
|
|
"""
|
|
Sets limits on the wavenumber to 0 and infinity if `None`s provided.
|
|
"""
|
|
if kmin is None:
|
|
kmin = 0
|
|
if kmax is None:
|
|
kmax = numpy.infty
|
|
return kmin, kmax
|
|
|
|
def read_autos(self, kmin=None, kmax=None):
|
|
"""
|
|
Read in the autocorrelation power spectra.
|
|
|
|
Parameters
|
|
----------
|
|
kmin : float, optional
|
|
The minimum wavenumber. By default `None`, i.e. 0.
|
|
kmin : float, optional
|
|
The maximum wavenumber. By default `None`, i.e. infinity.
|
|
|
|
Returns
|
|
-------
|
|
ks : 1-dimensional array
|
|
Array of wavenumbers.
|
|
pks : 2-dimensional array of shape `(len(self.ic_ids), ks.size)`
|
|
Autocorrelation of each simulation.
|
|
"""
|
|
kmin, kmax = self._set_klim(kmin, kmax)
|
|
ks, pks, sel = None, None, None
|
|
for i, nsim in enumerate(self.ic_ids):
|
|
pk = joblib.load(self.fskel.format(nsim, nsim, self.hw))
|
|
# Get cuts and pre-allocate arrays
|
|
if i == 0:
|
|
x = pk.k3D
|
|
sel = (kmin < x) & (x < kmax)
|
|
ks = x[sel].astype(self.dtype)
|
|
pks = numpy.full((len(self.ic_ids), numpy.sum(sel)), numpy.nan,
|
|
dtype=self.dtype)
|
|
pks[i, :] = pk.Pk[sel, 0, 0]
|
|
|
|
return ks, pks
|
|
|
|
def read_single_cross(self, ic0, ic1, kmin=None, kmax=None):
|
|
"""
|
|
Read cross-correlation between IC IDs `ic0` and `ic1`.
|
|
|
|
Parameters
|
|
----------
|
|
ic0 : int
|
|
The first IC ID.
|
|
ic1 : int
|
|
The second IC ID.
|
|
kmin : float, optional
|
|
The minimum wavenumber. By default `None`, i.e. 0.
|
|
kmin : float, optional
|
|
The maximum wavenumber. By default `None`, i.e. infinity.
|
|
|
|
Returns
|
|
-------
|
|
ks : 1-dimensional array
|
|
Array of wavenumbers.
|
|
xpk : 1-dimensional array of shape `(ks.size, )`
|
|
Cross-correlation.
|
|
"""
|
|
if ic0 == ic1:
|
|
raise ValueError("Requested cross correlation for the same ICs.")
|
|
kmin, kmax = self._set_klim(kmin, kmax)
|
|
# Check their ordering. The latter must be larger.
|
|
ics = (ic0, ic1)
|
|
if ic0 > ic1:
|
|
ics = ics[::-1]
|
|
|
|
pk = joblib.load(self.fskel.format(*ics, self.hw))
|
|
ks = pk.k3D
|
|
sel = (kmin < ks) & (ks < kmax)
|
|
ks = ks[sel].astype(self.dtype)
|
|
xpk = pk.XPk[sel, 0, 0].astype(self.dtype)
|
|
|
|
return ks, xpk
|
|
|
|
def read_cross(self, kmin=None, kmax=None):
|
|
"""
|
|
Read cross-correlation between all IC pairs.
|
|
|
|
Parameters
|
|
----------
|
|
kmin : float, optional
|
|
The minimum wavenumber. By default `None`, i.e. 0.
|
|
kmin : float, optional
|
|
The maximum wavenumber. By default `None`, i.e. infinity.
|
|
|
|
Returns
|
|
-------
|
|
ks : 1-dimensional array
|
|
Array of wavenumbers.
|
|
xpks : 3-dimensional array of shape (`nics, nics - 1, ks.size`)
|
|
Cross-correlations. The first column is the the IC and is being
|
|
cross-correlated with the remaining ICs, in the second column.
|
|
"""
|
|
nics = len(self.ic_ids)
|
|
|
|
ks, xpks = None, None
|
|
for i, ic0 in enumerate(tqdm(self.ic_ids)):
|
|
k = 0
|
|
for ic1 in self.ic_ids:
|
|
# We don't want cross-correlation
|
|
if ic0 == ic1:
|
|
continue
|
|
x, y = self.read_single_cross(ic0, ic1, kmin, kmax)
|
|
# If in the first iteration pre-allocate arrays
|
|
if ks is None:
|
|
ks = x
|
|
xpks = numpy.full((nics, nics - 1, ks.size), numpy.nan,
|
|
dtype=self.dtype)
|
|
xpks[i, k, :] = y
|
|
# Bump up the iterator
|
|
k += 1
|
|
|
|
return ks, xpks
|
|
|
|
|
|
###############################################################################
|
|
# PKReader #
|
|
###############################################################################
|
|
|
|
|
|
class kNNCDFReader:
|
|
"""
|
|
Shortcut object to read in the kNN CDF data.
|
|
"""
|
|
def read(self, run, folder, rmin=None, rmax=None, to_clip=True):
|
|
"""
|
|
Read the auto- or cross-correlation kNN-CDF data. Infers the type from
|
|
the data files.
|
|
|
|
Parameters
|
|
----------
|
|
run : str
|
|
Run ID to read in.
|
|
folder : str
|
|
Path to the folder where the auto-correlation kNN-CDF is stored.
|
|
rmin : float, optional
|
|
Minimum separation. By default ignored.
|
|
rmax : float, optional
|
|
Maximum separation. By default ignored.
|
|
to_clip : bool, optional
|
|
Whether to clip the auto-correlation CDF. Ignored for
|
|
cross-correlation.
|
|
|
|
Returns
|
|
-------
|
|
rs : 1-dimensional array of shape `(neval, )`
|
|
Separations where the CDF is evaluated.
|
|
out : 3-dimensional array of shape `(len(files), len(ks), neval)`
|
|
Array of CDFs or cross-correlations.
|
|
"""
|
|
run += ".p"
|
|
files = [f for f in glob(join(folder, "*")) if run in f]
|
|
if len(files) == 0:
|
|
raise RuntimeError("No files found for run `{}`.".format(run[:-2]))
|
|
|
|
for i, file in enumerate(files):
|
|
data = joblib.load(file)
|
|
if i == 0: # Initialise the array
|
|
if "corr" in data.keys():
|
|
kind = "corr"
|
|
isauto = False
|
|
else:
|
|
kind = "cdf"
|
|
isauto = True
|
|
out = numpy.full((len(files), *data[kind].shape), numpy.nan,
|
|
dtype=numpy.float32)
|
|
rs = data["rs"]
|
|
out[i, ...] = data[kind]
|
|
|
|
if isauto and to_clip:
|
|
out[i, ...] = self.clipped_cdf(out[i, ...])
|
|
|
|
# Apply separation cuts
|
|
mask = (rs >= rmin if rmin is not None else rs > 0)
|
|
mask &= (rs <= rmax if rmax is not None else rs < numpy.infty)
|
|
rs = rs[mask]
|
|
out = out[..., mask]
|
|
|
|
return rs, out
|
|
|
|
@staticmethod
|
|
def peaked_cdf(cdf, make_copy=True):
|
|
"""
|
|
Transform the CDF to a peaked CDF.
|
|
|
|
Parameters
|
|
----------
|
|
cdf : 1- or 2- or 3-dimensional array
|
|
CDF to be transformed along the last axis.
|
|
make_copy : bool, optional
|
|
Whether to make a copy of the CDF before transforming it to avoid
|
|
overwriting it.
|
|
|
|
Returns
|
|
-------
|
|
peaked_cdf : 1- or 2- or 3-dimensional array
|
|
"""
|
|
cdf = numpy.copy(cdf) if make_copy else cdf
|
|
cdf[cdf > 0.5] = 1 - cdf[cdf > 0.5]
|
|
return cdf
|
|
|
|
@staticmethod
|
|
def clipped_cdf(cdf):
|
|
"""
|
|
Clip the CDF, setting values where the CDF is either 0 or after the
|
|
first occurence of 1 to `numpy.nan`.
|
|
|
|
Parameters
|
|
----------
|
|
cdf : 2- or 3-dimensional array
|
|
CDF to be clipped.
|
|
|
|
Returns
|
|
-------
|
|
clipped_cdf : 2- or 3-dimensional array
|
|
The clipped CDF.
|
|
"""
|
|
cdf = numpy.copy(cdf)
|
|
if cdf.ndim == 2:
|
|
cdf = cdf.reshape(1, *cdf.shape)
|
|
nknns, nneighbours, __ = cdf.shape
|
|
|
|
for i in range(nknns):
|
|
for k in range(nneighbours):
|
|
ns = numpy.where(cdf[i, k, :] == 1.)[0]
|
|
if ns.size > 1:
|
|
cdf[i, k, ns[1]:] = numpy.nan
|
|
cdf[cdf == 0] = numpy.nan
|
|
|
|
cdf = cdf[0, ...] if nknns == 1 else cdf # Reshape if necessary
|
|
return cdf
|
|
|
|
@staticmethod
|
|
def prob_k(cdf):
|
|
r"""
|
|
Calculate the PDF that a spherical volume of radius :math:`r` contains
|
|
:math:`k` objects, i.e. :math:`P(k | V = 4 \pi r^3 / 3)`.
|
|
|
|
Parameters
|
|
----------
|
|
cdf : 3-dimensional array of shape `(len(files), len(ks), len(rs))`
|
|
Array of CDFs
|
|
|
|
Returns
|
|
-------
|
|
pk : 3-dimensional array of shape `(len(files), len(ks)- 1, len(rs))`
|
|
"""
|
|
out = numpy.full_like(cdf[..., 1:, :], numpy.nan, dtype=numpy.float32)
|
|
nks = cdf.shape[-2]
|
|
out[..., 0, :] = 1 - cdf[..., 0, :]
|
|
|
|
for k in range(1, nks - 1):
|
|
out[..., k, :] = cdf[..., k - 1, :] - cdf[..., k, :]
|
|
|
|
return out
|
|
|
|
def mean_prob_k(self, cdf):
|
|
"""
|
|
Calculate the mean PDF that a spherical volume of radius :math:`r`
|
|
contains :math:`k` objects, i.e. :math:`P(k | V = 4 \pi r^3 / 3)`,
|
|
averaged over the IC realisations.
|
|
|
|
Parameters
|
|
----------
|
|
cdf : 3-dimensional array of shape `(len(files), len(ks), len(rs))`
|
|
Array of CDFs
|
|
Returns
|
|
-------
|
|
out : 3-dimensional array of shape `(len(ks) - 1, len(rs), 2)`
|
|
Mean :math:`P(k | V = 4 \pi r^3 / 3) and its standard deviation,
|
|
stored along the last dimension, respectively.
|
|
"""
|
|
pk = self.prob_k(cdf)
|
|
return numpy.stack([numpy.mean(pk, axis=0), numpy.std(pk, axis=0)],
|
|
axis=-1)
|
|
|
|
def poisson_prob_k(self, rs, k, ndensity):
|
|
"""
|
|
Calculate the analytical PDF that a spherical volume of
|
|
radius :math:`r` contains :math:`k` objects, i.e.
|
|
:math:`P(k | V = 4 \pi r^3 / 3)`, assuming a Poisson field (uniform
|
|
distribution of points).
|
|
|
|
Parameters
|
|
----------
|
|
rs : 1-dimensional array
|
|
Array of separations.
|
|
k : int
|
|
Number of objects.
|
|
ndensity : float
|
|
Number density of objects.
|
|
|
|
Returns
|
|
-------
|
|
pk : 1-dimensional array
|
|
The PDF that a spherical volume of radius :math:`r` contains
|
|
:math:`k` objects.
|
|
"""
|
|
V = 4 * numpy.pi / 3 * rs**3
|
|
return (ndensity * V)**k / factorial(k) * numpy.exp(-ndensity * V)
|
|
|
|
@staticmethod
|
|
def cross_files(ic, folder):
|
|
"""
|
|
Return the file paths corresponding to the cross-correlation of a given
|
|
IC.
|
|
|
|
Parameters
|
|
----------
|
|
ic : int
|
|
The desired IC.
|
|
folder : str
|
|
The folder containing the cross-correlation files.
|
|
|
|
Returns
|
|
-------
|
|
filepath : list of str
|
|
"""
|
|
return [file for file in glob(join(folder, "*")) if str(ic) in file]
|
|
|
|
|
|
###############################################################################
|
|
# PKReader #
|
|
###############################################################################
|
|
|
|
|
|
class PairOverlap:
|
|
r"""
|
|
A shortcut object for reading in the results of matching two simulations.
|
|
|
|
Parameters
|
|
----------
|
|
cat0, catx: :py:class:`csiborgtools.read.HaloCatalogue`
|
|
Halo catalogues corresponding to the reference and cross
|
|
simulations.
|
|
fskel : str, optional
|
|
Path to the overlap. By default `None`, i.e.
|
|
`/mnt/extraspace/rstiskalek/csiborg/overlap/cross_{}_{}.npz`.
|
|
min_mass : float, optional
|
|
Minimum :math:`M_{\rm tot} / M_\odot` mass in the reference catalogue.
|
|
By default no threshold.
|
|
max_dist : float, optional
|
|
Maximum comoving distance in the reference catalogue. By default upper
|
|
limit.
|
|
"""
|
|
_cat0 = None
|
|
_catx = None
|
|
_data = None
|
|
|
|
def __init__(self, cat0, catx, fskel=None, min_mass=None, max_dist=None):
|
|
self._cat0 = cat0
|
|
self._catx = catx
|
|
|
|
if fskel is None:
|
|
fskel = join("/mnt/extraspace/rstiskalek/csiborg/overlap",
|
|
"cross_{}_{}.npz")
|
|
|
|
fpath = fskel.format(cat0.n_sim, catx.n_sim)
|
|
fpath_inv = fskel.format(catx.n_sim, cat0.n_sim)
|
|
if isfile(fpath):
|
|
is_inverted = False
|
|
elif isfile(fpath_inv):
|
|
fpath = fpath_inv
|
|
is_inverted = True
|
|
else:
|
|
raise FileNotFoundError(
|
|
"No overlap file found for combination `{}` and `{}`."
|
|
.format(cat0.n_sim, catx.n_sim))
|
|
|
|
# We can set catalogues already now even if inverted
|
|
d = numpy.load(fpath, allow_pickle=True)
|
|
ngp_overlap = d["ngp_overlap"]
|
|
smoothed_overlap = d["smoothed_overlap"]
|
|
match_indxs = d["match_indxs"]
|
|
if is_inverted:
|
|
indxs = d["cross_indxs"]
|
|
# Invert the matches
|
|
match_indxs, ngp_overlap, smoothed_overlap = self._invert_match(
|
|
match_indxs, ngp_overlap, smoothed_overlap, indxs.size,)
|
|
else:
|
|
indxs = d["ref_indxs"]
|
|
|
|
self._data = {
|
|
"index": indxs,
|
|
"match_indxs": match_indxs,
|
|
"ngp_overlap": ngp_overlap,
|
|
"smoothed_overlap": smoothed_overlap,
|
|
}
|
|
|
|
self._make_refmask(min_mass, max_dist)
|
|
|
|
@staticmethod
|
|
def _invert_match(match_indxs, ngp_overlap, smoothed_overlap, cross_size):
|
|
"""
|
|
Invert reference and cross matching, possible since the overlap
|
|
definition is symmetric.
|
|
|
|
Parameters
|
|
----------
|
|
match_indxs : array of 1-dimensional arrays
|
|
Indices of halos from the original cross catalogue matched to the
|
|
reference catalogue.
|
|
ngp_overlap : array of 1-dimensional arrays
|
|
NGP pair overlap of halos between the original reference and cross
|
|
simulations.
|
|
smoothed_overlap : array of 1-dimensional arrays
|
|
Smoothed pair overlap of halos between the original reference and
|
|
cross simulations.
|
|
cross_size : int
|
|
The size of the cross catalogue.
|
|
|
|
Returns
|
|
-------
|
|
inv_match_indxs : array of 1-dimensional arrays
|
|
The inverted match indices.
|
|
ind_ngp_overlap : array of 1-dimensional arrays
|
|
The corresponding NGP overlaps to `inv_match_indxs`.
|
|
ind_smoothed_overlap : array of 1-dimensional arrays
|
|
The corresponding smoothed overlaps to `inv_match_indxs`.
|
|
"""
|
|
# 1. Invert the match. Each reference halo has a list of counterparts
|
|
# so loop over those to each counterpart assign a reference halo
|
|
# and at the same time also add the overlaps
|
|
inv_match_indxs = [[] for __ in range(cross_size)]
|
|
inv_ngp_overlap = [[] for __ in range(cross_size)]
|
|
inv_smoothed_overlap = [[] for __ in range(cross_size)]
|
|
for ref_id in range(match_indxs.size):
|
|
for cross_id, ngp_cross, smoothed_cross in zip(match_indxs[ref_id],
|
|
ngp_overlap[ref_id],
|
|
smoothed_overlap[ref_id]): # noqa
|
|
inv_match_indxs[cross_id].append(ref_id)
|
|
inv_ngp_overlap[cross_id].append(ngp_cross)
|
|
inv_smoothed_overlap[cross_id].append(smoothed_cross)
|
|
|
|
# 2. Convert the cross matches and overlaps to proper numpy arrays
|
|
# and ensure that the overlaps are ordered.
|
|
for n in range(len(inv_match_indxs)):
|
|
inv_match_indxs[n] = numpy.asanyarray(inv_match_indxs[n],
|
|
dtype=numpy.int32)
|
|
inv_ngp_overlap[n] = numpy.asanyarray(inv_ngp_overlap[n],
|
|
dtype=numpy.float32)
|
|
inv_smoothed_overlap[n] = numpy.asanyarray(inv_smoothed_overlap[n],
|
|
dtype=numpy.float32)
|
|
|
|
ordering = numpy.argsort(inv_ngp_overlap[n])[::-1]
|
|
inv_match_indxs[n] = inv_match_indxs[n][ordering]
|
|
inv_ngp_overlap[n] = inv_ngp_overlap[n][ordering]
|
|
inv_smoothed_overlap[n] = inv_smoothed_overlap[n][ordering]
|
|
|
|
inv_match_indxs = numpy.asarray(inv_match_indxs, dtype=object)
|
|
inv_ngp_overlap = numpy.asarray(inv_ngp_overlap, dtype=object)
|
|
inv_smoothed_overlap = numpy.asarray(inv_smoothed_overlap,
|
|
dtype=object)
|
|
|
|
return inv_match_indxs, inv_ngp_overlap, inv_smoothed_overlap
|
|
|
|
def _make_refmask(self, min_mass, max_dist):
|
|
r"""
|
|
Create a mask for the reference catalogue that accounts for the mass
|
|
and distance cuts. Note that *no* masking is applied to the cross
|
|
catalogue.
|
|
|
|
Parameters
|
|
----------
|
|
min_mass : float, optional
|
|
The minimum :math:`M_{rm tot} / M_\odot` mass.
|
|
max_dist : float, optional
|
|
The maximum comoving distance of a halo.
|
|
|
|
Returns
|
|
-------
|
|
None
|
|
"""
|
|
# Enforce a cut on the reference catalogue
|
|
min_mass = 0 if min_mass is None else min_mass
|
|
max_dist = numpy.infty if max_dist is None else max_dist
|
|
m = ((self.cat0()["totpartmass"] > min_mass)
|
|
& (self.cat0()["dist"] < max_dist))
|
|
# Now remove indices that are below this cut
|
|
for p in ("index", "match_indxs", "ngp_overlap", "smoothed_overlap"):
|
|
self._data[p] = self._data[p][m]
|
|
|
|
self._data["refmask"] = m
|
|
|
|
def overlap(self, from_smoothed):
|
|
"""
|
|
Pair overlap of matched halos between the reference and cross
|
|
simulations.
|
|
|
|
Parameters
|
|
----------
|
|
from_smoothed : bool
|
|
Whether to use the smoothed overlap.
|
|
|
|
Returns
|
|
-------
|
|
overlap : 1-dimensional array of arrays
|
|
"""
|
|
if from_smoothed:
|
|
return self["smoothed_overlap"]
|
|
return self["ngp_overlap"]
|
|
|
|
def summed_overlap(self, from_smoothed):
|
|
"""
|
|
Summed overlap of each halo in the reference simulation with the cross
|
|
simulation.
|
|
|
|
Parameters
|
|
----------
|
|
from_smoothed : bool
|
|
Whether to use the smoothed overlap or not.
|
|
|
|
Returns
|
|
-------
|
|
summed_overlap : 1-dimensional array of shape `(nhalos, )`
|
|
"""
|
|
overlap = self.overlap(from_smoothed)
|
|
return numpy.array([numpy.sum(cross)for cross in overlap])
|
|
|
|
def prob_nomatch(self, from_smoothed):
|
|
"""
|
|
Probability of no match for each halo in the reference simulation with
|
|
the cross simulation. Defined as a product of 1 - overlap with other
|
|
halos.
|
|
|
|
Parameters
|
|
----------
|
|
from_smoothed : bool
|
|
Whether to use the smoothed overlap or not.
|
|
|
|
Returns
|
|
-------
|
|
prob_nomatch : 1-dimensional array of shape `(nhalos, )`
|
|
"""
|
|
overlap = self.overlap(from_smoothed)
|
|
return numpy.array([numpy.product(1 - overlap) for overlap in overlap])
|
|
|
|
def dist(self, in_initial, norm_kind=None):
|
|
"""
|
|
Pair distances of matched halos between the reference and cross
|
|
simulations.
|
|
|
|
Parameters
|
|
----------
|
|
in_initial : bool
|
|
Whether to calculate separation in the initial or final snapshot.
|
|
norm_kind : str, optional
|
|
The kind of normalisation to apply to the distances. Can be `r200`,
|
|
`ref_patch` or `sum_patch`.
|
|
|
|
Returns
|
|
-------
|
|
dist : array of 1-dimensional arrays of shape `(nhalos, )`
|
|
"""
|
|
assert (norm_kind is None
|
|
or norm_kind in ("r200", "ref_patch", "sum_patch"))
|
|
# Get positions either in the initial or final snapshot
|
|
if in_initial:
|
|
pos0, posx = self.cat0().positions0, self.catx().positions0
|
|
else:
|
|
pos0, posx = self.cat0().positions, self.catx().positions
|
|
pos0 = pos0[self["refmask"], :] # Apply the reference catalogue mask
|
|
|
|
# Get the normalisation array if applicable
|
|
if norm_kind == "r200":
|
|
norm = self.cat0("r200")
|
|
if norm_kind == "ref_patch":
|
|
norm = self.cat0("lagpatch")
|
|
if norm_kind == "sum_patch":
|
|
patch0 = self.cat0("lagpatch")
|
|
patchx = self.catx("lagpatch")
|
|
norm = [None] * len(self)
|
|
for i, ind in enumerate(self["match_indxs"]):
|
|
norm[i] = patch0[i] + patchx[ind]
|
|
norm = numpy.array(norm, dtype=object)
|
|
|
|
# Now calculate distances
|
|
dist = [None] * len(self)
|
|
for i, ind in enumerate(self["match_indxs"]):
|
|
# n refers to the reference halo catalogue position
|
|
dist[i] = numpy.linalg.norm(pos0[i, :] - posx[ind, :], axis=1)
|
|
|
|
if norm_kind is not None:
|
|
dist[i] /= norm[i]
|
|
|
|
return numpy.array(dist, dtype=object)
|
|
|
|
def mass_ratio(self, mass_kind="totpartmass", in_log=True, in_abs=True):
|
|
"""
|
|
Pair mass ratio of matched halos between the reference and cross
|
|
simulations.
|
|
|
|
Parameters
|
|
----------
|
|
mass_kind : str, optional
|
|
The mass kind whose ratio is to be calculated. Must be a valid
|
|
catalogue key. By default `totpartmass`, i.e. the total particle
|
|
mass associated with a halo.
|
|
in_log : bool, optional
|
|
Whether to return logarithm of the ratio. By default `True`.
|
|
in_abs : bool, optional
|
|
Whether to return absolute value of the ratio. By default `True`.
|
|
|
|
Returns
|
|
-------
|
|
ratio : array of 1-dimensional arrays of shape `(nhalos, )`
|
|
"""
|
|
mass0, massx = self.cat0(mass_kind), self.catx(mass_kind)
|
|
|
|
ratio = [None] * len(self)
|
|
for i, ind in enumerate(self["match_indxs"]):
|
|
ratio[i] = mass0[i] / massx[ind]
|
|
if in_log:
|
|
ratio[i] = numpy.log10(ratio[i])
|
|
if in_abs:
|
|
ratio[i] = numpy.abs(ratio[i])
|
|
return numpy.array(ratio, dtype=object)
|
|
|
|
def counterpart_mass(self, from_smoothed, overlap_threshold=0.,
|
|
in_log=False, mass_kind="totpartmass"):
|
|
"""
|
|
Calculate the expected counterpart mass of each halo in the reference
|
|
simulation from the crossed simulation.
|
|
|
|
Parameters
|
|
-----------
|
|
from_smoothed : bool
|
|
Whether to use the smoothed overlap or not.
|
|
overlap_threshold : float, optional
|
|
Minimum overlap required for a halo to be considered a match. By
|
|
default 0.0, i.e. no threshold.
|
|
in_log : bool, optional
|
|
Whether to calculate the expectation value in log space. By default
|
|
`False`.
|
|
mass_kind : str, optional
|
|
The mass kind whose ratio is to be calculated. Must be a valid
|
|
catalogue key. By default `totpartmass`, i.e. the total particle
|
|
mass associated with a halo.
|
|
|
|
Returns
|
|
-------
|
|
mean, std : 1-dimensional arrays of shape `(nhalos, )`
|
|
"""
|
|
mean = numpy.full(len(self), numpy.nan, dtype=numpy.float32)
|
|
std = numpy.full(len(self), numpy.nan, dtype=numpy.float32)
|
|
|
|
massx = self.catx(mass_kind) # Create references to speed
|
|
overlap = self.overlap(from_smoothed) # up the loop below
|
|
|
|
for i, match_ind in enumerate(self["match_indxs"]):
|
|
# Skip if no match
|
|
if match_ind.size == 0:
|
|
continue
|
|
|
|
massx_ = massx[match_ind] # Again just create references
|
|
overlap_ = overlap[i] # to the appropriate elements
|
|
|
|
# Optionally apply overlap threshold
|
|
if overlap_threshold > 0.:
|
|
mask = overlap_ > overlap_threshold
|
|
if numpy.sum(mask) == 0:
|
|
continue
|
|
massx_ = massx_[mask]
|
|
overlap_ = overlap_[mask]
|
|
|
|
massx_ = numpy.log10(massx_) if in_log else massx_
|
|
# Weighted average and *biased* standard deviation
|
|
mean_ = numpy.average(massx_, weights=overlap_)
|
|
std_ = numpy.average((massx_ - mean_)**2, weights=overlap_)**0.5
|
|
|
|
# If in log, convert back to linear
|
|
mean_ = 10**mean_ if in_log else mean_
|
|
std_ = mean_ * std_ * numpy.log(10) if in_log else std_
|
|
|
|
mean[i] = mean_
|
|
std[i] = std_
|
|
|
|
return mean, std
|
|
|
|
def copy_per_match(self, par):
|
|
"""
|
|
Make an array like `self.match_indxs` where each of its element is an
|
|
equal value array of the pair clump property from the reference
|
|
catalogue.
|
|
|
|
Parameters
|
|
----------
|
|
par : str
|
|
Property to be copied over.
|
|
|
|
Returns
|
|
-------
|
|
out : 1-dimensional array of shape `(nhalos, )`
|
|
"""
|
|
vals = self.cat0(par)
|
|
out = [None] * len(self)
|
|
for i, ind in enumerate(self["match_indxs"]):
|
|
out[i] = numpy.ones(ind.size) * vals[i]
|
|
return numpy.array(out, dtype=object)
|
|
|
|
def cat0(self, key=None, index=None):
|
|
"""
|
|
Return the reference halo catalogue if `key` is `None`, otherwise
|
|
return values from the reference catalogue and apply `refmask`.
|
|
|
|
Parameters
|
|
----------
|
|
key : str, optional
|
|
Key to get. If `None` return the whole catalogue.
|
|
index : int or array, optional
|
|
Indices to get, if `None` return all.
|
|
|
|
Returns
|
|
-------
|
|
out : :py:class:`csiborgtools.read.HaloCatalogue` or array
|
|
"""
|
|
if key is None:
|
|
return self._cat0
|
|
out = self._cat0[key][self["refmask"]]
|
|
return out if index is None else out[index]
|
|
|
|
def catx(self, key=None, index=None):
|
|
"""
|
|
Return the cross halo catalogue if `key` is `None`, otherwise
|
|
return values from the reference catalogue.
|
|
|
|
Parameters
|
|
----------
|
|
key : str, optional
|
|
Key to get. If `None` return the whole catalogue.
|
|
index : int or array, optional
|
|
Indices to get, if `None` return all.
|
|
|
|
Returns
|
|
-------
|
|
out : :py:class:`csiborgtools.read.HaloCatalogue` or array
|
|
"""
|
|
if key is None:
|
|
return self._catx
|
|
out = self._catx[key]
|
|
return out if index is None else out[index]
|
|
|
|
def __getitem__(self, key):
|
|
"""
|
|
Must be one of `index`, `match_indxs`, `ngp_overlap`,
|
|
`smoothed_overlap` or `refmask`.
|
|
"""
|
|
assert key in ("index", "match_indxs", "ngp_overlap",
|
|
"smoothed_overlap", "refmask")
|
|
return self._data[key]
|
|
|
|
def __len__(self):
|
|
return self["index"].size
|
|
|
|
|
|
class NPairsOverlap:
|
|
r"""
|
|
A shortcut object for reading in the results of matching a reference
|
|
simulation with many cross simulations.
|
|
|
|
Parameters
|
|
----------
|
|
cat0 : :py:class:`csiborgtools.read.HaloCatalogue`
|
|
Reference simulation halo catalogue.
|
|
catxs : list of :py:class:`csiborgtools.read.HaloCatalogue`
|
|
List of cross simulation halo catalogues.
|
|
fskel : str, optional
|
|
Path to the overlap. By default `None`, i.e.
|
|
`/mnt/extraspace/rstiskalek/csiborg/overlap/cross_{}_{}.npz`.
|
|
min_mass : float, optional
|
|
Minimum :math:`M_{\rm tot} / M_\odot` mass in the reference catalogue.
|
|
By default no threshold.
|
|
max_dist : float, optional
|
|
Maximum comoving distance in the reference catalogue. By default upper
|
|
limit.
|
|
"""
|
|
_pairs = None
|
|
|
|
def __init__(self, cat0, catxs, fskel=None, min_mass=None, max_dist=None):
|
|
self._pairs = [PairOverlap(cat0, catx, fskel=fskel, min_mass=min_mass,
|
|
max_dist=max_dist) for catx in catxs]
|
|
|
|
def summed_overlap(self, from_smoothed, verbose=False):
|
|
"""
|
|
Summed overlap of each halo in the reference simulation with the cross
|
|
simulations.
|
|
|
|
Parameters
|
|
----------
|
|
from_smoothed : bool
|
|
Whether to use the smoothed overlap or not.
|
|
verbose : bool, optional
|
|
Verbosity flag.
|
|
|
|
Returns
|
|
-------
|
|
summed_overlap : 2-dimensional array of shape `(nhalos, ncatxs)`
|
|
"""
|
|
out = [None] * len(self)
|
|
for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs):
|
|
out[i] = pair.summed_overlap(from_smoothed)
|
|
return numpy.vstack(out).T
|
|
|
|
def prob_nomatch(self, from_smoothed, verbose=False):
|
|
"""
|
|
Probability of no match for each halo in the reference simulation with
|
|
the cross simulation.
|
|
|
|
Parameters
|
|
----------
|
|
from_smoothed : bool
|
|
Whether to use the smoothed overlap or not.
|
|
verbose : bool, optional
|
|
Verbosity flag.
|
|
|
|
Returns
|
|
-------
|
|
prob_nomatch : 2-dimensional array of shape `(nhalos, ncatxs)`
|
|
"""
|
|
out = [None] * len(self)
|
|
for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs):
|
|
out[i] = pair.prob_nomatch(from_smoothed)
|
|
return numpy.vstack(out).T
|
|
|
|
def counterpart_mass(self, from_smoothed, overlap_threshold=0.,
|
|
in_log=False, mass_kind="totpartmass",
|
|
return_full=True, verbose=False):
|
|
"""
|
|
Calculate the expected counterpart mass of each halo in the reference
|
|
simulation from the crossed simulation.
|
|
|
|
Parameters
|
|
-----------
|
|
from_smoothed : bool
|
|
Whether to use the smoothed overlap or not.
|
|
overlap_threshold : float, optional
|
|
Minimum overlap required for a halo to be considered a match. By
|
|
default 0.0, i.e. no threshold.
|
|
in_log : bool, optional
|
|
Whether to calculate the expectation value in log space. By default
|
|
`False`.
|
|
mass_kind : str, optional
|
|
The mass kind whose ratio is to be calculated. Must be a valid
|
|
catalogue key. By default `totpartmass`, i.e. the total particle
|
|
mass associated with a halo.
|
|
return_full : bool, optional
|
|
Whether to return the full results of matching each pair or
|
|
calculate summary statistics by Gaussian averaging.
|
|
verbose : bool, optional
|
|
Verbosity flag. By default `False`.
|
|
|
|
Returns
|
|
-------
|
|
mu, std : 1-dimensional arrays of shape `(nhalos,)`
|
|
Summary expected mass and standard deviation from all cross
|
|
simulations.
|
|
mus, stds : 2-dimensional arrays of shape `(nhalos, ncatx)`, optional
|
|
Expected mass and standard deviation from each cross simulation.
|
|
Returned only if `return_full` is `True`.
|
|
"""
|
|
mus, stds = [None] * len(self), [None] * len(self)
|
|
for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs):
|
|
mus[i], stds[i] = pair.counterpart_mass(
|
|
from_smoothed=from_smoothed,
|
|
overlap_threshold=overlap_threshold, in_log=in_log,
|
|
mass_kind=mass_kind)
|
|
mus, stds = numpy.vstack(mus).T, numpy.vstack(stds).T
|
|
|
|
probmatch = 1 - self.prob_nomatch(from_smoothed) # Prob of > 0 matches
|
|
# Normalise it for weighted sums etc.
|
|
norm_probmatch = numpy.apply_along_axis(
|
|
lambda x: x / numpy.sum(x), axis=1, arr=probmatch)
|
|
|
|
# Mean and standard deviation of weighted stacked Gaussians
|
|
mu = numpy.sum(norm_probmatch * mus, axis=1)
|
|
std = numpy.sum(norm_probmatch * (mus**2 + stds**2), axis=1) - mu**2
|
|
std **= 0.5
|
|
|
|
if return_full:
|
|
return mu, std, mus, stds
|
|
return mu, std
|
|
|
|
@property
|
|
def pairs(self):
|
|
"""
|
|
List of `PairOverlap` objects in this reader.
|
|
|
|
Returns
|
|
-------
|
|
pairs : list of :py:class:`csiborgtools.read.PairOverlap`
|
|
"""
|
|
return self._pairs
|
|
|
|
@property
|
|
def cat0(self):
|
|
return self.pairs[0].cat0 # All pairs have the same ref catalogue
|
|
|
|
def __len__(self):
|
|
return len(self.pairs)
|
|
|
|
|
|
def binned_resample_mean(x, y, prob, bins, nresample=50, seed=42):
|
|
"""
|
|
Calculate binned average of `y` by MC resampling. Each point is kept with
|
|
probability `prob`.
|
|
|
|
Parameters
|
|
----------
|
|
x : 1-dimensional array
|
|
Independent variable.
|
|
y : 1-dimensional array
|
|
Dependent variable.
|
|
prob : 1-dimensional array
|
|
Sample probability.
|
|
bins : 1-dimensional array
|
|
Bin edges to bin `x`.
|
|
nresample : int, optional
|
|
Number of MC resamples. By default 50.
|
|
seed : int, optional
|
|
Random seed.
|
|
|
|
Returns
|
|
-------
|
|
bin_centres : 1-dimensional array
|
|
Bin centres.
|
|
stat : 2-dimensional array
|
|
Mean and its standard deviation from MC resampling.
|
|
"""
|
|
assert (x.ndim == 1) & (x.shape == y.shape == prob.shape)
|
|
|
|
gen = numpy.random.RandomState(seed)
|
|
|
|
loop_stat = numpy.full(nresample, numpy.nan) # Preallocate loop arr
|
|
stat = numpy.full((bins.size - 1, 2), numpy.nan) # Preallocate output
|
|
|
|
for i in range(bins.size - 1):
|
|
mask = (x > bins[i]) & (x <= bins[i + 1])
|
|
nsamples = numpy.sum(mask)
|
|
|
|
loop_stat[:] = numpy.nan # Clear it
|
|
for j in range(nresample):
|
|
loop_stat[j] = numpy.mean(y[mask][gen.rand(nsamples) < prob[mask]])
|
|
|
|
stat[i, 0] = numpy.mean(loop_stat)
|
|
stat[i, 1] = numpy.std(loop_stat)
|
|
|
|
bin_centres = (bins[1:] + bins[:-1]) / 2
|
|
|
|
return bin_centres, stat
|