Overlap reader (#28)

* Edit output path

* remove matching of many sims

* Add a class skeleton

* Add a setter for only nsim

* Deletei mport

* Simplify cross and delete combined halo catalogue

* Simplify reading in catalogues

* Add distance calculation

* Add mass ratio

* Clean up the submission script

* Add the summer overlap

* Docs

* Add array copying

* Add import

* Add resampling mean
This commit is contained in:
Richard Stiskalek 2023-02-08 12:05:32 +00:00 committed by GitHub
parent 8dea3da4de
commit 1c859dbdac
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 473 additions and 402 deletions

View file

@ -16,10 +16,10 @@
import numpy import numpy
from scipy.ndimage import gaussian_filter from scipy.ndimage import gaussian_filter
from tqdm import (tqdm, trange) from tqdm import (tqdm, trange)
from datetime import datetime
from astropy.coordinates import SkyCoord from astropy.coordinates import SkyCoord
from numba import jit from numba import jit
from gc import collect from ..read import (concatenate_clumps, clumps_pos2cell)
from ..read import (CombinedHaloCatalogue, concatenate_clumps, clumps_pos2cell) # noqa
def brute_spatial_separation(c1, c2, angular=False, N=None, verbose=False): def brute_spatial_separation(c1, c2, angular=False, N=None, verbose=False):
@ -77,58 +77,118 @@ class RealisationsMatcher:
neighbours in all remaining IC realisations that are within some mass neighbours in all remaining IC realisations that are within some mass
range of it. range of it.
Parameters Parameters
---------- ----------
cats : :py:class`csiborgtools.read.CombinedHaloCatalogue` nmult : float or int, optional
Combined halo catalogue to search. Multiple of the sum of pair initial Lagrangian patch sizes
""" within which to return neighbours. By default 1.
_cats = None dlogmass : float, optional
Tolerance on the absolute logarithmic mass difference of potential
matches. By default 2.
mass_kind : str, optional
The mass kind whose similarity is to be checked. Must be a valid
catalogue key. By default `totpartmass`, i.e. the total particle
mass associated with a halo.
overlapper_kwargs : dict, optional
Keyword arguments passed to `ParticleOverlapper`.
remove_nooverlap : bool, optional
Whether to remove pairs with exactly zero overlap. By default `True`.
def __init__(self, cats): """
self.cats = cats _nmult = None
_dlogmass = None
_mass_kind = None
_overlapper = None
_remove_nooverlap = None
def __init__(self, nmult=1., dlogmass=2., mass_kind="totpartmass",
overlapper_kwargs={}, remove_nooverlap=True):
self.nmult = nmult
self.dlogmass = dlogmass
self.mass_kind = mass_kind
self._overlapper = ParticleOverlap(**overlapper_kwargs)
self.remove_nooverlap = remove_nooverlap
@property @property
def cats(self): def nmult(self):
""" """
Combined catalogues. Multiple of the sum of pair initial Lagrangian patch sizes within which
to return neighbours.
Returns Returns
------- -------
cats : :py:class`csiborgtools.read.CombinedHaloCatalogue` nmult : float
Combined halo catalogue.
""" """
return self._cats return self._nmult
@cats.setter @nmult.setter
def cats(self, cats): def nmult(self, nmult):
""" """Set `nmult`."""
Sets `cats`, ensures that this is a `CombinedHaloCatalogue` object. assert nmult > 0
""" self._nmult = nmult
if not isinstance(cats, CombinedHaloCatalogue):
raise TypeError("`cats` must be of type `CombinedHaloCatalogue`.")
self._cats = cats
def search_sim_indices(self, n_sim): @property
def dlogmass(self):
""" """
Return indices of all other IC realisations but of `n_sim`. Tolerance on the absolute logarithmic mass difference of potential
matches.
Parameters
----------
n_sim : int
IC realisation index of `self.cats` to be skipped.
Returns Returns
------- -------
indxs : list of int dlogmass : float
The remaining IC indices.
""" """
return [i for i in range(self.cats.N) if i != n_sim] return self._dlogmass
def _check_masskind(self, mass_kind): @dlogmass.setter
"""Check that `mass_kind` is a valid key.""" def dlogmass(self, dlogmass):
if mass_kind not in self.cats[0].keys: """Set `dlogmass`."""
raise ValueError("Invalid mass kind `{}`.".format(mass_kind)) assert dlogmass > 0
self._dlogmass = dlogmass
@property
def mass_kind(self):
"""
The mass kind whose similarity is to be checked.
Returns
-------
mass_kind : str
"""
return self._mass_kind
@mass_kind.setter
def mass_kind(self, mass_kind):
"""Set `mass_kind`."""
assert isinstance(mass_kind, str)
self._mass_kind = mass_kind
@property
def remove_nooverlap(self):
"""
Whether to remove pairs with exactly zero overlap.
Returns
-------
remove_nooverlap : bool
"""
return self._remove_nooverlap
@remove_nooverlap.setter
def remove_nooverlap(self, remove_nooverlap):
"""Set `remove_nooverlap`."""
assert isinstance(remove_nooverlap, bool)
self._remove_nooverlap = remove_nooverlap
@property
def overlapper(self):
"""
The overlapper object.
Returns
-------
overlapper : :py:class:`csiborgtools.match.ParticleOverlap`
"""
return self._overlapper
@staticmethod @staticmethod
def _cat2clump_mapping(cat_indxs, clump_indxs): def _cat2clump_mapping(cat_indxs, clump_indxs):
@ -154,264 +214,111 @@ class RealisationsMatcher:
mapping[ind2] = ind1 mapping[ind2] = ind1
return mapping return mapping
def cross_knn_position_single(self, n_sim, nmult=1, dlogmass=None, def cross(self, nsim0, nsimx, cat0, catx, overlap=False, verbose=True):
mass_kind="totpartmass", overlap=False,
overlapper_kwargs={}, select_initial=True,
remove_nooverlap=True, fast_neighbours=False,
verbose=True):
r""" r"""
Find all neighbours within a multiple of the sum of either the initial Find all neighbours whose CM separation is less than `nmult` times the
Lagrangian patch sizes (distance at :math:`z = 70`) or :math:`R_{200c}` sum of their initial Lagrangian patch sizes. Enforces that the
(distance at :math:`z = 0`). Enforces that the neighbours' are similar neighbours' are similar in mass up to `dlogmass` dex.
in mass up to `dlogmass` dex and optionally calculates their overlap.
Parameters Parameters
---------- ----------
n_sim : int nsim0, nsimx : int
Index of an IC realisation in `self.cats` whose halos' neighbours The reference and cross simulation IDs.
in the remaining simulations to search for. cat0, catx: :py:class:`csiborgtools.read.HaloCatalogue`
nmult : float or int, optional Halo catalogues corresponding to `nsim0` and `nsimx`, respectively.
Multiple of the sum of pair Lagrangian patch sizes or
:math:`R_{200c}` within which to return neighbours. By default 1.
dlogmass : float, optional
Tolerance on mass logarithmic mass difference. By default `None`.
mass_kind : str, optional
The mass kind whose similarity is to be checked. Must be a valid
catalogue key. By default `totpartmass`, i.e. the total particle
mass associated with a halo.
overlap : bool, optional overlap : bool, optional
Whether to calculate overlap between clumps in the initial whether to calculate overlap between clumps in the initial
snapshot. By default `False`. This operation is slow. snapshot. by default `false`. this operation is slow.
overlapper_kwargs : dict, optional
Keyword arguments passed to `ParticleOverlapper`.
select_initial : bool, optional
Whether to select nearest neighbour at the initial or final
snapshot. By default `True`, i.e. at the initial snapshot.
remove_nooverlap : bool, optional
Whether to remove pairs with exactly zero overlap. By default
`True`.
fast_neighbours : bool, optional
Whether to calculate neighbours within a fixed radius of each
clump. Note that this will result in missing some matches. If
`True` then `nmult` is a multiple of either the initial patch size
of :math:`R_{200c}`.
verbose : bool, optional verbose : bool, optional
Iterator verbosity flag. By default `True`. iterator verbosity flag. by default `true`.
Returns Returns
------- -------
matches : composite array indxs : 1-dimensional array of shape `(nhalos, )`
Array, indices are `(n_sims - 1, 5, n_halos, n_matches)`. The Indices of halos in the reference catalogue.
2nd axis is `index` of the neighbouring halo in its catalogue, match_indxs : 1-dimensional array of arrays
`dist` is the 3D distance to the halo whose neighbours are Indices of halo counterparts in the cross catalogue.
searched, `dist0` is the separation of the initial CMs, and overlaps : 1-dimensional array of arrays
`overlap` is the overlap over the initial clumps, respectively. Overlaps with the cross catalogue.
""" """
self._check_masskind(mass_kind) assert (nsim0 == cat0.paths.n_sim) & (nsimx == catx.paths.n_sim)
# Halo properties of this simulation
logmass = numpy.log10(self.cats[n_sim][mass_kind])
pos = self.cats[n_sim].positions # Grav potential minimum
pos0 = self.cats[n_sim].positions0 # CM positions
if select_initial:
R = self.cats[n_sim]["lagpatch"] # Initial Lagrangian patch size
else:
R = self.cats[n_sim]["r200"] # R200c at z = 0
if overlap: # Query the KNN
overlapper = ParticleOverlap(**overlapper_kwargs)
if verbose:
print("Loading initial clump particles for `n_sim = {}`."
.format(n_sim), flush=True)
# Grab a paths object. What it is set to is unimportant
paths = self.cats[0].paths
with open(paths.clump0_path(self.cats.n_sims[n_sim]), "rb") as f:
clumps0 = numpy.load(f, allow_pickle=True)
clumps_pos2cell(clumps0, overlapper)
# Precalculate min and max cell along each axis
mins0, maxs0 = get_clumplims(clumps0,
ncells=overlapper.inv_clength,
nshift=overlapper.nshift)
cat2clumps0 = self._cat2clump_mapping(self.cats[n_sim]["index"],
clumps0["ID"])
matches = [None] * (self.cats.N - 1)
# Verbose iterator
if verbose: if verbose:
iters = enumerate(tqdm(self.search_sim_indices(n_sim))) print("{}: querying the KNN.".format(datetime.now()), flush=True)
else: match_indxs = radius_neighbours(
iters = enumerate(self.search_sim_indices(n_sim)) catx.knn0, cat0.positions0, radiusX=cat0["lagpatch"],
iters = enumerate(self.search_sim_indices(n_sim)) radiusKNN=catx["lagpatch"], nmult=self.nmult, enforce_in32=True,
# Search for neighbours in the other simulations at z = 70 verbose=verbose)
for count, i in iters:
# Remove neighbours whose mass is too large/small
if self.dlogmass is not None:
for j, indx in enumerate(match_indxs):
# |log(M1 / M2)|
p = self.mass_kind
aratio = numpy.abs(numpy.log10(catx[p][indx] / cat0[p][j]))
match_indxs[j] = match_indxs[j][aratio < self.dlogmass]
# Initialise the array outside in case `overlap` is `False`
cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
if overlap:
if verbose: if verbose:
print("Querying the KNN for `n_sim = {}`.".format(n_sim), print("Loading the clump particles", flush=True)
flush=True) with open(cat0.paths.clump0_path(nsim0), "rb") as f:
# Query the KNN either fast (miss some) or slow (get all) clumps0 = numpy.load(f, allow_pickle=True)
if select_initial: with open(catx.paths.clump0_path(nsimx), 'rb') as f:
if fast_neighbours: clumpsx = numpy.load(f, allow_pickle=True)
dist0, indxs = self.cats[i].radius_neigbours(
pos0, R * nmult, select_initial=True)
else:
dist0, indxs = radius_neighbours(
self.cats[i].knn0, pos0, radiusX=R,
radiusKNN=self.cats[i]["lagpatch"], nmult=nmult,
verbose=verbose)
else:
# Will switch dist0 <-> dist at the end
if fast_neighbours:
dist0, indxs = self.cats[i].radius_neigbours(
pos, R * nmult, select_initial=False)
else:
dist0, indxs = radius_neighbours(
self.cats[i].knn, pos, radiusX=R,
radiusKNN=self.cats[i]["r200"], nmult=nmult,
verbose=verbose)
# Enforce int32 and float32
for n in range(dist0.size):
dist0[n] = dist0[n].astype(numpy.float32)
indxs[n] = indxs[n].astype(numpy.int32)
# Get rid of neighbors whose mass is too off # Convert 3D positions to particle IDs
if dlogmass is not None: clumps_pos2cell(clumps0, self.overlapper)
for j, indx in enumerate(indxs): clumps_pos2cell(clumpsx, self.overlapper)
match_logmass = numpy.log10(self.cats[i][mass_kind][indx])
mask = numpy.abs(match_logmass - logmass[j]) < dlogmass
dist0[j] = dist0[j][mask]
indxs[j] = indx[mask]
# Find the distance at z = 0 (or z = 70 dep. on `search_initial``) # Calculate the particle field
dist = [numpy.asanyarray([], dtype=numpy.float32)] * dist0.size if verbose:
with_neigbours = numpy.where([ii.size > 0 for ii in indxs])[0] print("Creating and smoothing the crossed field.", flush=True)
# Fill the pre-allocated array on positions with neighbours delta = self.overlapper.make_delta(concatenate_clumps(clumpsx),
for k in with_neigbours: to_smooth=False)
if select_initial: delta = self.overlapper.smooth_highres(delta)
dist[k] = numpy.linalg.norm(
pos[k] - self.cats[i].positions[indxs[k]], axis=1)
else:
dist[k] = numpy.linalg.norm(
pos0[k] - self.cats[i].positions0[indxs[k]], axis=1)
# Calculate the initial snapshot overlap # Min and max cells along each axis for each halo
cross = [numpy.asanyarray([], dtype=numpy.float32)] * dist0.size limkwargs = {"ncells": self.overlapper.inv_clength,
if overlap: "nshift": self.overlapper.nshift}
if verbose: mins0, maxs0 = get_clumplims(clumps0, **limkwargs)
print("Loading initial clump particles for `n_sim = {}` " minsx, maxsx = get_clumplims(clumpsx, **limkwargs)
"to compare against `n_sim = {}`.".format(i, n_sim),
flush=True)
with open(paths.clump0_path(self.cats.n_sims[i]), 'rb') as f:
clumpsx = numpy.load(f, allow_pickle=True)
clumps_pos2cell(clumpsx, overlapper)
# Calculate the particle field # Mapping from a catalogue halo index to the list of clumps
if verbose: cat2clumps0 = self._cat2clump_mapping(cat0["index"], clumps0["ID"])
print("Loading and smoothing the crossed total field.", cat2clumpsx = self._cat2clump_mapping(catx["index"], clumpsx["ID"])
flush=True)
particles = concatenate_clumps(clumpsx)
delta = overlapper.make_delta(particles, to_smooth=False)
del particles; collect() # noqa - no longer needed
delta = overlapper.smooth_highres(delta)
if verbose:
print("Smoothed up the field.", flush=True)
# Precalculate min and max cell along each axis
minsx, maxsx = get_clumplims(clumpsx,
ncells=overlapper.inv_clength,
nshift=overlapper.nshift)
cat2clumpsx = self._cat2clump_mapping(self.cats[i]["index"], # Loop only over halos that have neighbours
clumpsx["ID"]) wneigbours = numpy.where([ii.size > 0 for ii in match_indxs])[0]
# Loop only over halos that have neighbours for k in tqdm(wneigbours) if verbose else wneigbours:
for k in tqdm(with_neigbours) if verbose else with_neigbours: match0 = cat2clumps0[k] # Clump pos matching this halo
# Find which clump matches index of this halo from cat # The clump, its mass and mins & maxs
match0 = cat2clumps0[k] cl0 = clumps0["clump"][match0]
mass0 = numpy.sum(cl0['M'])
mins0_current, maxs0_current = mins0[match0], maxs0[match0]
# Unpack this clum, its mamss and mins and maxs # Array to store overlaps of this halo
cl0 = clumps0["clump"][match0] crosses = numpy.full(match_indxs[k].size, numpy.nan,
mass0 = numpy.sum(cl0['M']) numpy.float32)
mins0_current, maxs0_current = mins0[match0], maxs0[match0] # Loop over matches of this halo from the other simulation
# Preallocate this array. for ii, ind in enumerate(match_indxs[k]):
crosses = numpy.full(indxs[k].size, numpy.nan, matchx = cat2clumpsx[ind] # Clump pos matching this halo
numpy.float32) clx = clumpsx["clump"][matchx]
crosses[ii] = self.overlapper(
cl0, clx, delta, mins0_current, maxs0_current,
minsx[matchx], maxsx[matchx],
mass1=mass0, mass2=numpy.sum(clx['M']))
cross[k] = crosses
# Loop over the ones we cross-correlate with # Optionally remove matches with exactly 0 overlap
for ii, ind in enumerate(indxs[k]): if self.remove_nooverlap:
# Again which cross clump to this index mask = cross[k] > 0
matchx = cat2clumpsx[ind] match_indxs[k] = match_indxs[k][mask]
clx = clumpsx["clump"][matchx] cross[k] = cross[k][mask]
crosses[ii] = overlapper(
cl0, clx, delta, mins0_current, maxs0_current,
minsx[matchx], maxsx[matchx],
mass1=mass0, mass2=numpy.sum(clx['M']))
cross[k] = crosses return cat0["index"], match_indxs, cross
# Optionally remove points whose overlap is exaclt zero
if remove_nooverlap:
mask = cross[k] > 0
indxs[k] = indxs[k][mask]
dist[k] = dist[k][mask]
dist0[k] = dist0[k][mask]
cross[k] = cross[k][mask]
# Append as a composite array. Flip dist order if not select_init
if select_initial:
matches[count] = numpy.asarray(
[indxs, dist, dist0, cross], dtype=object)
else:
matches[count] = numpy.asarray(
[indxs, dist0, dist, cross], dtype=object)
return numpy.asarray(matches, dtype=object)
def cross_knn_position_all(self, nmult=1, dlogmass=None,
mass_kind="totpartmass", overlap=False,
overlapper_kwargs={},
select_initial=True, remove_nooverlap=True,
verbose=True):
r"""
Find all counterparts of halos in all simulations listed in
`self.cats`. See `self.cross_knn_position_single` for more details.
Parameters
----------
nmult : float or int, optional
Multiple of the sum of pair Lagrangian patch sizes or
:math:`R_{200c}` within which to return neighbours. By default 1.
dlogmass : float, optional
Tolerance on mass logarithmic mass difference. By default `None`.
mass_kind : str, optional
The mass kind whose similarity is to be checked. Must be a valid
catalogue key. By default `totpartmass`, i.e. the total particle
mass associated with a halo.
overlap : bool, optional
Whether to calculate overlap between clumps in the initial
snapshot. By default `False`. Note that this operation is
substantially slower.
overlapper_kwargs : dict, optional
Keyword arguments passed to `ParticleOverlapper`.
select_initial : bool, optional
Whether to select nearest neighbour at the initial or final
snapshot. By default `True`, i.e. at the initial snapshot.
remove_nooverlap : bool, optional
Whether to remove pairs with exactly zero overlap. By default
`True`.
verbose : bool, optional
Iterator verbosity flag. By default `True`.
Returns
-------
matches : list of composite arrays
List whose length is `n_sims`. For description of its elements see
`self.cross_knn_position_single(...)`.
"""
N = self.cats.N # Number of catalogues
matches = [None] * N
# Loop over each catalogue
for i in trange(N) if verbose else range(N):
matches[i] = self.cross_knn_position_single(
i, nmult, dlogmass, mass_kind=mass_kind, overlap=overlap,
overlapper_kwargs=overlapper_kwargs,
select_initial=select_initial,
remove_nooverlap=remove_nooverlap, verbose=verbose)
return matches
############################################################################### ###############################################################################
@ -1002,7 +909,8 @@ def dist_percentile(dist, qs, distmax=0.075):
return x return x
def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1., verbose=True): def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1., enforce_in32=False,
verbose=True):
""" """
Find all neigbours of a trained KNN model whose center of mass separation Find all neigbours of a trained KNN model whose center of mass separation
is less than `nmult` times the sum of their respective radii. is less than `nmult` times the sum of their respective radii.
@ -1020,13 +928,14 @@ def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1., verbose=True):
Patch radii corresponding to clumps used to train `knn`. Patch radii corresponding to clumps used to train `knn`.
nmult : float, optional nmult : float, optional
Multiple of the sum of two radii below which to consider a match. Multiple of the sum of two radii below which to consider a match.
enforce_int32 : bool, optional
Whether to enforce 32-bit integer precision of the indices. By default
`False`.
verbose : bool, optional verbose : bool, optional
Verbosity flag. Verbosity flag.
Returns Returns
------- -------
dists : 1-dimensional array `(n_samples,)` of arrays
Distance from `X` to matches from `knn`.
indxs : 1-dimensional array `(n_samples,)` of arrays indxs : 1-dimensional array `(n_samples,)` of arrays
Matches to `X` from `knn`. Matches to `X` from `knn`.
""" """
@ -1035,7 +944,6 @@ def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1., verbose=True):
assert radiusKNN.size == knn.n_samples_fit_ # patchknn matches the knn? assert radiusKNN.size == knn.n_samples_fit_ # patchknn matches the knn?
nsamples = X.shape[0] nsamples = X.shape[0]
dists = [None] * nsamples # Initiate lists
indxs = [None] * nsamples indxs = [None] * nsamples
patchknn_max = numpy.max(radiusKNN) # Maximum for completeness patchknn_max = numpy.max(radiusKNN) # Maximum for completeness
@ -1046,9 +954,8 @@ def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1., verbose=True):
# Note that `dist` and `indx` are wrapped in 1-element arrays # Note that `dist` and `indx` are wrapped in 1-element arrays
# so we take the first item where appropriate # so we take the first item where appropriate
mask = (dist[0] / (radiusX[i] + radiusKNN[indx[0]])) < nmult mask = (dist[0] / (radiusX[i] + radiusKNN[indx[0]])) < nmult
dists[i] = dist[0][mask]
indxs[i] = indx[0][mask] indxs[i] = indx[0][mask]
if enforce_in32:
indxs[i] = indxs[i].astype(numpy.int32)
dists = numpy.asarray(dists, dtype=object) # Turn into array of arrays return numpy.asarray(indxs, dtype=object)
indxs = numpy.asarray(indxs, dtype=object)
return dists, indxs

View file

@ -14,9 +14,8 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .readsim import (CSiBORGPaths, ParticleReader, read_mmain, read_initcm, halfwidth_select) # noqa from .readsim import (CSiBORGPaths, ParticleReader, read_mmain, read_initcm, halfwidth_select) # noqa
from .make_cat import (HaloCatalogue, CombinedHaloCatalogue, concatenate_clumps, # noqa from .make_cat import (HaloCatalogue, concatenate_clumps, clumps_pos2cell) # noqa
clumps_pos2cell) # noqa
from .readobs import (PlanckClusters, MCXCClusters, TwoMPPGalaxies, # noqa from .readobs import (PlanckClusters, MCXCClusters, TwoMPPGalaxies, # noqa
TwoMPPGroups, SDSS) # noqa TwoMPPGroups, SDSS) # noqa
from .outsim import (dump_split, combine_splits, make_ascii_powmes) # noqa from .outsim import (dump_split, combine_splits, make_ascii_powmes) # noqa
from .summaries import PKReader # noqa from .summaries import (PKReader, OverlapReader, binned_resample_mean) # noqa

View file

@ -17,10 +17,8 @@ Functions to read in the particle and clump files.
""" """
import numpy import numpy
from os.path import join from os.path import join
from tqdm import trange
from copy import deepcopy
from sklearn.neighbors import NearestNeighbors from sklearn.neighbors import NearestNeighbors
from .readsim import (read_mmain, read_initcm) from .readsim import (CSiBORGPaths, read_mmain, read_initcm)
from ..utils import (flip_cols, add_columns) from ..utils import (flip_cols, add_columns)
from ..units import (BoxUnits, cartesian_to_radec) from ..units import (BoxUnits, cartesian_to_radec)
@ -47,7 +45,11 @@ class HaloCatalogue:
_positions = None _positions = None
_positions0 = None _positions0 = None
def __init__(self, paths, min_m500=None, max_dist=None): def __init__(self, nsim, min_m500=None, max_dist=None):
# Set up paths
paths = CSiBORGPaths(n_sim=nsim)
paths.n_snap = paths.get_maximum_snapshot()
self._paths = paths
self._box = BoxUnits(paths) self._box = BoxUnits(paths)
min_m500 = 0 if min_m500 is None else min_m500 min_m500 = 0 if min_m500 is None else min_m500
max_dist = numpy.infty if max_dist is None else max_dist max_dist = numpy.infty if max_dist is None else max_dist
@ -378,99 +380,6 @@ class HaloCatalogue:
return self._data[key] return self._data[key]
class CombinedHaloCatalogue:
r"""
A combined halo catalogue, containing `HaloCatalogue` for each IC
realisation at the latest redshift.
Parameters
----------
paths : py:class`csiborgtools.read.CSiBORGPaths`
CSiBORG paths-handling object. Doest not have to have set set `n_sim`
and `n_snap`.
min_m500 : float, optional
The minimum :math:`M_{rm 500c} / M_\odot` mass. By default no
threshold.
max_dist : float, optional
The maximum comoving distance of a halo. By default no upper limit.
verbose : bool, optional
Verbosity flag for reading the catalogues.
"""
_n_sims = None
_n_snaps = None
_cats = None
def __init__(self, paths, min_m500=None, max_dist=None, verbose=True):
# Read simulations and their maximum snapshots
# NOTE later change this back to all simulations
# self._n_sims = [7468, 7588, 8020, 8452, 8836]
self._n_sims = [7468, 7588]
# self._n_sims = paths.ic_ids
n_snaps = [paths.get_maximum_snapshot(i) for i in self._n_sims]
self._n_snaps = numpy.asanyarray(n_snaps)
cats = [None] * self.N
for i in trange(self.N) if verbose else range(self.N):
paths = deepcopy(paths)
paths.set_info(self.n_sims[i], self.n_snaps[i])
cats[i] = HaloCatalogue(paths, min_m500, max_dist)
self._cats = cats
@property
def N(self):
"""
Number of IC realisations in this combined catalogue.
Returns
-------
N : int
Number of catalogues.
"""
return len(self.n_sims)
@property
def n_sims(self):
"""
IC realisations CSiBORG identifiers.
Returns
-------
ids : 1-dimensional array
Array of IDs.
"""
return self._n_sims
@property
def n_snaps(self):
"""
Snapshot numbers corresponding to `self.n_sims`.
Returns
-------
n_snaps : 1-dimensional array
Array of snapshot numbers.
"""
return self._n_snaps
@property
def cats(self):
"""
Catalogues associated with this object.
Returns
-------
cats : list of `HaloCatalogue`
Catalogues.
"""
return self._cats
def __getitem__(self, n):
if n > self.N:
raise ValueError("Catalogue count is {}, requested catalogue {}."
.format(self.N, n))
return self.cats[n]
def concatenate_clumps(clumps): def concatenate_clumps(clumps):
""" """
Concatenate an array of clumps to a single array containing all particles. Concatenate an array of clumps to a single array containing all particles.

View file

@ -88,7 +88,8 @@ class CSiBORGPaths:
self.to_new = to_new self.to_new = to_new
if n_sim is not None and n_snap is not None: if n_sim is not None and n_snap is not None:
self.set_info(n_sim, n_snap) self.set_info(n_sim, n_snap)
# "/mnt/extraspace/rstiskalek/csiborg/initmatch/clump_cm_7468.npy" if n_sim is not None:
self.n_sim = n_sim
@property @property
def srcdir(self): def srcdir(self):

View file

@ -166,3 +166,268 @@ class PKReader:
k += 1 k += 1
return ks, xpks return ks, xpks
class OverlapReader:
"""
TODO: docs
"""
def __init__(self, nsim0, nsimx, cat0, catx, fskel=None):
if fskel is None:
fskel = "/mnt/extraspace/rstiskalek/csiborg/overlap/"
fskel += "cross_{}_{}.npz"
self._data = numpy.load(fskel.format(nsim0, nsimx), allow_pickle=True)
self._set_cats(nsim0, nsimx, cat0, catx)
@property
def nsim0(self):
"""
The reference simulation ID.
Returns
-------
nsim0 : int
"""
return self._nsim0
@property
def nsimx(self):
"""
The cross simulation ID.
Returns
-------
nsimx : int
"""
return self._nsimx
@property
def cat0(self):
"""
The reference halo catalogue.
Returns
-------
cat0 : :py:class:`csiborgtools.read.HaloCatalogue`
"""
return self._cat0
@property
def catx(self):
"""
The cross halo catalogue.
Returns
-------
catx : :py:class:`csiborgtools.read.HaloCatalogue`
"""
return self._catx
def _set_cats(self, nsim0, nsimx, cat0, catx):
"""
Set the simulation IDs and catalogues.
Parameters
----------
nsim0, nsimx : int
The reference and cross simulation IDs.
cat0, catx: :py:class:`csiborgtools.read.HaloCatalogue`
Halo catalogues corresponding to `nsim0` and `nsimx`, respectively.
Returns
-------
None
"""
assert (nsim0 == cat0.paths.n_sim) & (nsimx == catx.paths.n_sim)
self._nsim0 = nsim0
self._nsimx = nsimx
self._cat0 = cat0
self._catx = catx
@property
def indxs(self):
"""
Indices of halos from the reference catalogue.
Returns
-------
indxs : 1-dimensional array
"""
return self._data["indxs"]
@property
def match_indxs(self):
"""
Indices of halos from the cross catalogue.
Returns
-------
match_indxs : array of 1-dimensional arrays of shape `(nhalos, )`
"""
return self._data["match_indxs"]
@property
def overlap(self):
"""
Pair overlap of halos between the reference and cross simulations.
Returns
-------
ovelap : array of 1-dimensional arrays of shape `(nhalos, )`
"""
return self._data["cross"]
def dist(self, in_initial, norm=None):
"""
Final snapshot pair distances.
Parameters
----------
in_initial : bool
Whether to calculate separation in the initial or final snapshot.
Returns
-------
dist : array of 1-dimensional arrays of shape `(nhalos, )`
"""
assert norm is None or norm in ("r200", "ref_patch", "sum_patch")
# Positions either in the initial or final snapshot
if in_initial:
pos0 = self.cat0.positions0
posx = self.catx.positions0
else:
pos0 = self.cat0.positions
posx = self.catx.positions
dist = [None] * self.indxs.size
for n, ind in enumerate(self.match_indxs):
dist[n] = numpy.linalg.norm(pos0[n, :] - posx[ind, :], axis=1)
# Normalisation
if norm == "r200":
dist[n] /= self.cat0["r200"][n]
if norm == "ref_patch":
dist[n] /= self.cat0["lagpatch"][n]
if norm == "sum_patch":
dist[n] /= (self.cat0["lagpatch"][n]
+ self.catx["lagpatch"][ind])
return numpy.array(dist, dtype=object)
def mass_ratio(self, mass_kind="totpartmass", in_log=True, in_abs=True):
"""
Pair mass ratio.
Parameters
----------
mass_kind : str, optional
The mass kind whose ratio is to be calculated. Must be a valid
catalogue key. By default `totpartmass`, i.e. the total particle
mass associated with a halo.
in_log : bool, optional
Whether to return logarithm of the ratio. By default `True`.
in_abs : bool, optional
Whether to return absolute value of the ratio. By default `True`.
Returns
-------
ratio : array of 1-dimensional arrays of shape `(nhalos, )`
"""
mass0 = self.cat0[mass_kind]
massx = self.catx[mass_kind]
ratio = [None] * self.indxs.size
for n, ind in enumerate(self.match_indxs):
ratio[n] = mass0[n] / massx[ind]
if in_log:
ratio[n] = numpy.log10(ratio[n])
if in_abs:
ratio[n] = numpy.abs(ratio[n])
return numpy.array(ratio, dtype=object)
def summed_overlap(self):
"""
Summed overlap of each halo in the reference simulation with the cross
simulation.
Parameters
----------
None
Returns
-------
summed_overlap : 1-dimensional array of shape `(nhalos, )`
"""
return numpy.array([numpy.sum(cross) for cross in self._data["cross"]])
def copy_per_match(self, par):
"""
Make an array like `self.match_indxs` where each of its element is an
equal value array of the pair clump property from the reference
catalogue.
Parameters
----------
par : str
Property to be copied over.
Returns
-------
out : 1-dimensional array of shape `(nhalos, )`
"""
out = [None] * self.indxs.size
for n, ind in enumerate(self.match_indxs):
out[n] = numpy.ones(ind.size) * self.cat0[par][n]
return numpy.array(out, dtype=object)
def binned_resample_mean(x, y, prob, bins, nresample=50, seed=42):
"""
Calculate binned average of `y` by MC resampling. Each point is kept with
probability `prob`.
Parameters
----------
x : 1-dimensional array
Independent variable.
y : 1-dimensional array
Dependent variable.
prob : 1-dimensional array
Sample probability.
bins : 1-dimensional array
Bin edges to bin `x`.
nresample : int, optional
Number of MC resamples. By default 50.
seed : int, optional
Random seed.
Returns
-------
bin_centres : 1-dimensional array
Bin centres.
stat : 2-dimensional array
Mean and its standard deviation from MC resampling.
"""
assert (x.ndim == 1) & (x.shape == y.shape == prob.shape)
gen = numpy.random.RandomState(seed)
loop_stat = numpy.full(nresample, numpy.nan) # Preallocate loop arr
stat = numpy.full((bins.size - 1, 2), numpy.nan) # Preallocate output
for i in range(bins.size - 1):
mask = (x > bins[i]) & (x <= bins[i + 1])
nsamples = numpy.sum(mask)
loop_stat[:] = numpy.nan # Clear it
for j in range(nresample):
loop_stat[j] = numpy.mean(y[mask][gen.rand(nsamples) < prob[mask]])
stat[i, 0] = numpy.mean(loop_stat)
stat[i, 1] = numpy.std(loop_stat)
bin_centres = (bins[1:] + bins[:-1]) / 2
return bin_centres, stat

View file

@ -32,35 +32,25 @@ import utils
parser = ArgumentParser() parser = ArgumentParser()
parser.add_argument("--nmult", type=float) parser.add_argument("--nmult", type=float)
parser.add_argument("--overlap", type=lambda x: bool(strtobool(x))) parser.add_argument("--overlap", type=lambda x: bool(strtobool(x)))
parser.add_argument("--select_initial", type=lambda x: bool(strtobool(x)))
parser.add_argument("--fast_neighbours", type=lambda x: bool(strtobool(x)))
args = parser.parse_args() args = parser.parse_args()
# File paths # File paths
ic = 7468 nsim0 = 7468
fperm = join(utils.dumpdir, "overlap", "cross_{}.npy") nsimx = 7588
fperm = join(utils.dumpdir, "overlap", "cross_{}_{}.npy")
paths = csiborgtools.read.CSiBORGPaths(to_new=False)
paths.set_info(ic, paths.get_maximum_snapshot(ic))
print("{}: loading catalogues.".format(datetime.now()), flush=True) print("{}: loading catalogues.".format(datetime.now()), flush=True)
cat = csiborgtools.read.CombinedHaloCatalogue(paths) cat0 = csiborgtools.read.HaloCatalogue(nsim0)
catx = csiborgtools.read.HaloCatalogue(nsimx)
matcher = csiborgtools.match.RealisationsMatcher(cat)
nsim0 = cat.n_sims[0]
nsimx = cat.n_sims[1]
matcher = csiborgtools.match.RealisationsMatcher()
print("{}: crossing the simulations.".format(datetime.now()), flush=True) print("{}: crossing the simulations.".format(datetime.now()), flush=True)
indxs, match_indxs, cross = matcher.cross(
out = matcher.cross_knn_position_single( nsim0, nsimx, cat0, catx, overlap=False)
0, nmult=args.nmult, dlogmass=2., overlap=args.overlap,
select_initial=args.select_initial, fast_neighbours=args.fast_neighbours)
# Dump the result # Dump the result
fout = fperm.format(nsim0) fout = fperm.format(nsim0, nsimx)
print("Saving results to `{}`.".format(fout), flush=True) print("Saving results to `{}`.".format(fout), flush=True)
with open(fout, "wb") as f: with open(fout, "wb") as f:
numpy.save(fout, out) numpy.savez(fout, indxs=indxs, match_indxs=match_indxs, cross=cross)
print("All finished.", flush=True) print("All finished.", flush=True)