Overlap reader (#28)

* Edit output path

* remove matching of many sims

* Add a class skeleton

* Add a setter for only nsim

* Deletei mport

* Simplify cross and delete combined halo catalogue

* Simplify reading in catalogues

* Add distance calculation

* Add mass ratio

* Clean up the submission script

* Add the summer overlap

* Docs

* Add array copying

* Add import

* Add resampling mean
This commit is contained in:
Richard Stiskalek 2023-02-08 12:05:32 +00:00 committed by GitHub
parent 8dea3da4de
commit 1c859dbdac
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 473 additions and 402 deletions

View file

@ -16,10 +16,10 @@
import numpy
from scipy.ndimage import gaussian_filter
from tqdm import (tqdm, trange)
from datetime import datetime
from astropy.coordinates import SkyCoord
from numba import jit
from gc import collect
from ..read import (CombinedHaloCatalogue, concatenate_clumps, clumps_pos2cell) # noqa
from ..read import (concatenate_clumps, clumps_pos2cell)
def brute_spatial_separation(c1, c2, angular=False, N=None, verbose=False):
@ -77,58 +77,118 @@ class RealisationsMatcher:
neighbours in all remaining IC realisations that are within some mass
range of it.
Parameters
----------
cats : :py:class`csiborgtools.read.CombinedHaloCatalogue`
Combined halo catalogue to search.
"""
_cats = None
nmult : float or int, optional
Multiple of the sum of pair initial Lagrangian patch sizes
within which to return neighbours. By default 1.
dlogmass : float, optional
Tolerance on the absolute logarithmic mass difference of potential
matches. By default 2.
mass_kind : str, optional
The mass kind whose similarity is to be checked. Must be a valid
catalogue key. By default `totpartmass`, i.e. the total particle
mass associated with a halo.
overlapper_kwargs : dict, optional
Keyword arguments passed to `ParticleOverlapper`.
remove_nooverlap : bool, optional
Whether to remove pairs with exactly zero overlap. By default `True`.
def __init__(self, cats):
self.cats = cats
"""
_nmult = None
_dlogmass = None
_mass_kind = None
_overlapper = None
_remove_nooverlap = None
def __init__(self, nmult=1., dlogmass=2., mass_kind="totpartmass",
overlapper_kwargs={}, remove_nooverlap=True):
self.nmult = nmult
self.dlogmass = dlogmass
self.mass_kind = mass_kind
self._overlapper = ParticleOverlap(**overlapper_kwargs)
self.remove_nooverlap = remove_nooverlap
@property
def cats(self):
def nmult(self):
"""
Combined catalogues.
Multiple of the sum of pair initial Lagrangian patch sizes within which
to return neighbours.
Returns
-------
cats : :py:class`csiborgtools.read.CombinedHaloCatalogue`
Combined halo catalogue.
nmult : float
"""
return self._cats
return self._nmult
@cats.setter
def cats(self, cats):
"""
Sets `cats`, ensures that this is a `CombinedHaloCatalogue` object.
"""
if not isinstance(cats, CombinedHaloCatalogue):
raise TypeError("`cats` must be of type `CombinedHaloCatalogue`.")
self._cats = cats
@nmult.setter
def nmult(self, nmult):
"""Set `nmult`."""
assert nmult > 0
self._nmult = nmult
def search_sim_indices(self, n_sim):
@property
def dlogmass(self):
"""
Return indices of all other IC realisations but of `n_sim`.
Parameters
----------
n_sim : int
IC realisation index of `self.cats` to be skipped.
Tolerance on the absolute logarithmic mass difference of potential
matches.
Returns
-------
indxs : list of int
The remaining IC indices.
dlogmass : float
"""
return [i for i in range(self.cats.N) if i != n_sim]
return self._dlogmass
def _check_masskind(self, mass_kind):
"""Check that `mass_kind` is a valid key."""
if mass_kind not in self.cats[0].keys:
raise ValueError("Invalid mass kind `{}`.".format(mass_kind))
@dlogmass.setter
def dlogmass(self, dlogmass):
"""Set `dlogmass`."""
assert dlogmass > 0
self._dlogmass = dlogmass
@property
def mass_kind(self):
"""
The mass kind whose similarity is to be checked.
Returns
-------
mass_kind : str
"""
return self._mass_kind
@mass_kind.setter
def mass_kind(self, mass_kind):
"""Set `mass_kind`."""
assert isinstance(mass_kind, str)
self._mass_kind = mass_kind
@property
def remove_nooverlap(self):
"""
Whether to remove pairs with exactly zero overlap.
Returns
-------
remove_nooverlap : bool
"""
return self._remove_nooverlap
@remove_nooverlap.setter
def remove_nooverlap(self, remove_nooverlap):
"""Set `remove_nooverlap`."""
assert isinstance(remove_nooverlap, bool)
self._remove_nooverlap = remove_nooverlap
@property
def overlapper(self):
"""
The overlapper object.
Returns
-------
overlapper : :py:class:`csiborgtools.match.ParticleOverlap`
"""
return self._overlapper
@staticmethod
def _cat2clump_mapping(cat_indxs, clump_indxs):
@ -154,264 +214,111 @@ class RealisationsMatcher:
mapping[ind2] = ind1
return mapping
def cross_knn_position_single(self, n_sim, nmult=1, dlogmass=None,
mass_kind="totpartmass", overlap=False,
overlapper_kwargs={}, select_initial=True,
remove_nooverlap=True, fast_neighbours=False,
verbose=True):
def cross(self, nsim0, nsimx, cat0, catx, overlap=False, verbose=True):
r"""
Find all neighbours within a multiple of the sum of either the initial
Lagrangian patch sizes (distance at :math:`z = 70`) or :math:`R_{200c}`
(distance at :math:`z = 0`). Enforces that the neighbours' are similar
in mass up to `dlogmass` dex and optionally calculates their overlap.
Find all neighbours whose CM separation is less than `nmult` times the
sum of their initial Lagrangian patch sizes. Enforces that the
neighbours' are similar in mass up to `dlogmass` dex.
Parameters
----------
n_sim : int
Index of an IC realisation in `self.cats` whose halos' neighbours
in the remaining simulations to search for.
nmult : float or int, optional
Multiple of the sum of pair Lagrangian patch sizes or
:math:`R_{200c}` within which to return neighbours. By default 1.
dlogmass : float, optional
Tolerance on mass logarithmic mass difference. By default `None`.
mass_kind : str, optional
The mass kind whose similarity is to be checked. Must be a valid
catalogue key. By default `totpartmass`, i.e. the total particle
mass associated with a halo.
nsim0, nsimx : int
The reference and cross simulation IDs.
cat0, catx: :py:class:`csiborgtools.read.HaloCatalogue`
Halo catalogues corresponding to `nsim0` and `nsimx`, respectively.
overlap : bool, optional
Whether to calculate overlap between clumps in the initial
snapshot. By default `False`. This operation is slow.
overlapper_kwargs : dict, optional
Keyword arguments passed to `ParticleOverlapper`.
select_initial : bool, optional
Whether to select nearest neighbour at the initial or final
snapshot. By default `True`, i.e. at the initial snapshot.
remove_nooverlap : bool, optional
Whether to remove pairs with exactly zero overlap. By default
`True`.
fast_neighbours : bool, optional
Whether to calculate neighbours within a fixed radius of each
clump. Note that this will result in missing some matches. If
`True` then `nmult` is a multiple of either the initial patch size
of :math:`R_{200c}`.
whether to calculate overlap between clumps in the initial
snapshot. by default `false`. this operation is slow.
verbose : bool, optional
Iterator verbosity flag. By default `True`.
iterator verbosity flag. by default `true`.
Returns
-------
matches : composite array
Array, indices are `(n_sims - 1, 5, n_halos, n_matches)`. The
2nd axis is `index` of the neighbouring halo in its catalogue,
`dist` is the 3D distance to the halo whose neighbours are
searched, `dist0` is the separation of the initial CMs, and
`overlap` is the overlap over the initial clumps, respectively.
indxs : 1-dimensional array of shape `(nhalos, )`
Indices of halos in the reference catalogue.
match_indxs : 1-dimensional array of arrays
Indices of halo counterparts in the cross catalogue.
overlaps : 1-dimensional array of arrays
Overlaps with the cross catalogue.
"""
self._check_masskind(mass_kind)
# Halo properties of this simulation
logmass = numpy.log10(self.cats[n_sim][mass_kind])
pos = self.cats[n_sim].positions # Grav potential minimum
pos0 = self.cats[n_sim].positions0 # CM positions
if select_initial:
R = self.cats[n_sim]["lagpatch"] # Initial Lagrangian patch size
else:
R = self.cats[n_sim]["r200"] # R200c at z = 0
assert (nsim0 == cat0.paths.n_sim) & (nsimx == catx.paths.n_sim)
if overlap:
overlapper = ParticleOverlap(**overlapper_kwargs)
if verbose:
print("Loading initial clump particles for `n_sim = {}`."
.format(n_sim), flush=True)
# Grab a paths object. What it is set to is unimportant
paths = self.cats[0].paths
with open(paths.clump0_path(self.cats.n_sims[n_sim]), "rb") as f:
clumps0 = numpy.load(f, allow_pickle=True)
clumps_pos2cell(clumps0, overlapper)
# Precalculate min and max cell along each axis
mins0, maxs0 = get_clumplims(clumps0,
ncells=overlapper.inv_clength,
nshift=overlapper.nshift)
cat2clumps0 = self._cat2clump_mapping(self.cats[n_sim]["index"],
clumps0["ID"])
matches = [None] * (self.cats.N - 1)
# Verbose iterator
# Query the KNN
if verbose:
iters = enumerate(tqdm(self.search_sim_indices(n_sim)))
else:
iters = enumerate(self.search_sim_indices(n_sim))
iters = enumerate(self.search_sim_indices(n_sim))
# Search for neighbours in the other simulations at z = 70
for count, i in iters:
print("{}: querying the KNN.".format(datetime.now()), flush=True)
match_indxs = radius_neighbours(
catx.knn0, cat0.positions0, radiusX=cat0["lagpatch"],
radiusKNN=catx["lagpatch"], nmult=self.nmult, enforce_in32=True,
verbose=verbose)
# Remove neighbours whose mass is too large/small
if self.dlogmass is not None:
for j, indx in enumerate(match_indxs):
# |log(M1 / M2)|
p = self.mass_kind
aratio = numpy.abs(numpy.log10(catx[p][indx] / cat0[p][j]))
match_indxs[j] = match_indxs[j][aratio < self.dlogmass]
# Initialise the array outside in case `overlap` is `False`
cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
if overlap:
if verbose:
print("Querying the KNN for `n_sim = {}`.".format(n_sim),
flush=True)
# Query the KNN either fast (miss some) or slow (get all)
if select_initial:
if fast_neighbours:
dist0, indxs = self.cats[i].radius_neigbours(
pos0, R * nmult, select_initial=True)
else:
dist0, indxs = radius_neighbours(
self.cats[i].knn0, pos0, radiusX=R,
radiusKNN=self.cats[i]["lagpatch"], nmult=nmult,
verbose=verbose)
else:
# Will switch dist0 <-> dist at the end
if fast_neighbours:
dist0, indxs = self.cats[i].radius_neigbours(
pos, R * nmult, select_initial=False)
else:
dist0, indxs = radius_neighbours(
self.cats[i].knn, pos, radiusX=R,
radiusKNN=self.cats[i]["r200"], nmult=nmult,
verbose=verbose)
# Enforce int32 and float32
for n in range(dist0.size):
dist0[n] = dist0[n].astype(numpy.float32)
indxs[n] = indxs[n].astype(numpy.int32)
print("Loading the clump particles", flush=True)
with open(cat0.paths.clump0_path(nsim0), "rb") as f:
clumps0 = numpy.load(f, allow_pickle=True)
with open(catx.paths.clump0_path(nsimx), 'rb') as f:
clumpsx = numpy.load(f, allow_pickle=True)
# Get rid of neighbors whose mass is too off
if dlogmass is not None:
for j, indx in enumerate(indxs):
match_logmass = numpy.log10(self.cats[i][mass_kind][indx])
mask = numpy.abs(match_logmass - logmass[j]) < dlogmass
dist0[j] = dist0[j][mask]
indxs[j] = indx[mask]
# Convert 3D positions to particle IDs
clumps_pos2cell(clumps0, self.overlapper)
clumps_pos2cell(clumpsx, self.overlapper)
# Find the distance at z = 0 (or z = 70 dep. on `search_initial``)
dist = [numpy.asanyarray([], dtype=numpy.float32)] * dist0.size
with_neigbours = numpy.where([ii.size > 0 for ii in indxs])[0]
# Fill the pre-allocated array on positions with neighbours
for k in with_neigbours:
if select_initial:
dist[k] = numpy.linalg.norm(
pos[k] - self.cats[i].positions[indxs[k]], axis=1)
else:
dist[k] = numpy.linalg.norm(
pos0[k] - self.cats[i].positions0[indxs[k]], axis=1)
# Calculate the particle field
if verbose:
print("Creating and smoothing the crossed field.", flush=True)
delta = self.overlapper.make_delta(concatenate_clumps(clumpsx),
to_smooth=False)
delta = self.overlapper.smooth_highres(delta)
# Calculate the initial snapshot overlap
cross = [numpy.asanyarray([], dtype=numpy.float32)] * dist0.size
if overlap:
if verbose:
print("Loading initial clump particles for `n_sim = {}` "
"to compare against `n_sim = {}`.".format(i, n_sim),
flush=True)
with open(paths.clump0_path(self.cats.n_sims[i]), 'rb') as f:
clumpsx = numpy.load(f, allow_pickle=True)
clumps_pos2cell(clumpsx, overlapper)
# Min and max cells along each axis for each halo
limkwargs = {"ncells": self.overlapper.inv_clength,
"nshift": self.overlapper.nshift}
mins0, maxs0 = get_clumplims(clumps0, **limkwargs)
minsx, maxsx = get_clumplims(clumpsx, **limkwargs)
# Calculate the particle field
if verbose:
print("Loading and smoothing the crossed total field.",
flush=True)
particles = concatenate_clumps(clumpsx)
delta = overlapper.make_delta(particles, to_smooth=False)
del particles; collect() # noqa - no longer needed
delta = overlapper.smooth_highres(delta)
if verbose:
print("Smoothed up the field.", flush=True)
# Precalculate min and max cell along each axis
minsx, maxsx = get_clumplims(clumpsx,
ncells=overlapper.inv_clength,
nshift=overlapper.nshift)
# Mapping from a catalogue halo index to the list of clumps
cat2clumps0 = self._cat2clump_mapping(cat0["index"], clumps0["ID"])
cat2clumpsx = self._cat2clump_mapping(catx["index"], clumpsx["ID"])
cat2clumpsx = self._cat2clump_mapping(self.cats[i]["index"],
clumpsx["ID"])
# Loop only over halos that have neighbours
for k in tqdm(with_neigbours) if verbose else with_neigbours:
# Find which clump matches index of this halo from cat
match0 = cat2clumps0[k]
# Loop only over halos that have neighbours
wneigbours = numpy.where([ii.size > 0 for ii in match_indxs])[0]
for k in tqdm(wneigbours) if verbose else wneigbours:
match0 = cat2clumps0[k] # Clump pos matching this halo
# The clump, its mass and mins & maxs
cl0 = clumps0["clump"][match0]
mass0 = numpy.sum(cl0['M'])
mins0_current, maxs0_current = mins0[match0], maxs0[match0]
# Unpack this clum, its mamss and mins and maxs
cl0 = clumps0["clump"][match0]
mass0 = numpy.sum(cl0['M'])
mins0_current, maxs0_current = mins0[match0], maxs0[match0]
# Preallocate this array.
crosses = numpy.full(indxs[k].size, numpy.nan,
numpy.float32)
# Array to store overlaps of this halo
crosses = numpy.full(match_indxs[k].size, numpy.nan,
numpy.float32)
# Loop over matches of this halo from the other simulation
for ii, ind in enumerate(match_indxs[k]):
matchx = cat2clumpsx[ind] # Clump pos matching this halo
clx = clumpsx["clump"][matchx]
crosses[ii] = self.overlapper(
cl0, clx, delta, mins0_current, maxs0_current,
minsx[matchx], maxsx[matchx],
mass1=mass0, mass2=numpy.sum(clx['M']))
cross[k] = crosses
# Loop over the ones we cross-correlate with
for ii, ind in enumerate(indxs[k]):
# Again which cross clump to this index
matchx = cat2clumpsx[ind]
clx = clumpsx["clump"][matchx]
crosses[ii] = overlapper(
cl0, clx, delta, mins0_current, maxs0_current,
minsx[matchx], maxsx[matchx],
mass1=mass0, mass2=numpy.sum(clx['M']))
# Optionally remove matches with exactly 0 overlap
if self.remove_nooverlap:
mask = cross[k] > 0
match_indxs[k] = match_indxs[k][mask]
cross[k] = cross[k][mask]
cross[k] = crosses
# Optionally remove points whose overlap is exaclt zero
if remove_nooverlap:
mask = cross[k] > 0
indxs[k] = indxs[k][mask]
dist[k] = dist[k][mask]
dist0[k] = dist0[k][mask]
cross[k] = cross[k][mask]
# Append as a composite array. Flip dist order if not select_init
if select_initial:
matches[count] = numpy.asarray(
[indxs, dist, dist0, cross], dtype=object)
else:
matches[count] = numpy.asarray(
[indxs, dist0, dist, cross], dtype=object)
return numpy.asarray(matches, dtype=object)
def cross_knn_position_all(self, nmult=1, dlogmass=None,
mass_kind="totpartmass", overlap=False,
overlapper_kwargs={},
select_initial=True, remove_nooverlap=True,
verbose=True):
r"""
Find all counterparts of halos in all simulations listed in
`self.cats`. See `self.cross_knn_position_single` for more details.
Parameters
----------
nmult : float or int, optional
Multiple of the sum of pair Lagrangian patch sizes or
:math:`R_{200c}` within which to return neighbours. By default 1.
dlogmass : float, optional
Tolerance on mass logarithmic mass difference. By default `None`.
mass_kind : str, optional
The mass kind whose similarity is to be checked. Must be a valid
catalogue key. By default `totpartmass`, i.e. the total particle
mass associated with a halo.
overlap : bool, optional
Whether to calculate overlap between clumps in the initial
snapshot. By default `False`. Note that this operation is
substantially slower.
overlapper_kwargs : dict, optional
Keyword arguments passed to `ParticleOverlapper`.
select_initial : bool, optional
Whether to select nearest neighbour at the initial or final
snapshot. By default `True`, i.e. at the initial snapshot.
remove_nooverlap : bool, optional
Whether to remove pairs with exactly zero overlap. By default
`True`.
verbose : bool, optional
Iterator verbosity flag. By default `True`.
Returns
-------
matches : list of composite arrays
List whose length is `n_sims`. For description of its elements see
`self.cross_knn_position_single(...)`.
"""
N = self.cats.N # Number of catalogues
matches = [None] * N
# Loop over each catalogue
for i in trange(N) if verbose else range(N):
matches[i] = self.cross_knn_position_single(
i, nmult, dlogmass, mass_kind=mass_kind, overlap=overlap,
overlapper_kwargs=overlapper_kwargs,
select_initial=select_initial,
remove_nooverlap=remove_nooverlap, verbose=verbose)
return matches
return cat0["index"], match_indxs, cross
###############################################################################
@ -1002,7 +909,8 @@ def dist_percentile(dist, qs, distmax=0.075):
return x
def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1., verbose=True):
def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1., enforce_in32=False,
verbose=True):
"""
Find all neigbours of a trained KNN model whose center of mass separation
is less than `nmult` times the sum of their respective radii.
@ -1020,13 +928,14 @@ def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1., verbose=True):
Patch radii corresponding to clumps used to train `knn`.
nmult : float, optional
Multiple of the sum of two radii below which to consider a match.
enforce_int32 : bool, optional
Whether to enforce 32-bit integer precision of the indices. By default
`False`.
verbose : bool, optional
Verbosity flag.
Returns
-------
dists : 1-dimensional array `(n_samples,)` of arrays
Distance from `X` to matches from `knn`.
indxs : 1-dimensional array `(n_samples,)` of arrays
Matches to `X` from `knn`.
"""
@ -1035,7 +944,6 @@ def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1., verbose=True):
assert radiusKNN.size == knn.n_samples_fit_ # patchknn matches the knn?
nsamples = X.shape[0]
dists = [None] * nsamples # Initiate lists
indxs = [None] * nsamples
patchknn_max = numpy.max(radiusKNN) # Maximum for completeness
@ -1046,9 +954,8 @@ def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1., verbose=True):
# Note that `dist` and `indx` are wrapped in 1-element arrays
# so we take the first item where appropriate
mask = (dist[0] / (radiusX[i] + radiusKNN[indx[0]])) < nmult
dists[i] = dist[0][mask]
indxs[i] = indx[0][mask]
if enforce_in32:
indxs[i] = indxs[i].astype(numpy.int32)
dists = numpy.asarray(dists, dtype=object) # Turn into array of arrays
indxs = numpy.asarray(indxs, dtype=object)
return dists, indxs
return numpy.asarray(indxs, dtype=object)

View file

@ -14,9 +14,8 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .readsim import (CSiBORGPaths, ParticleReader, read_mmain, read_initcm, halfwidth_select) # noqa
from .make_cat import (HaloCatalogue, CombinedHaloCatalogue, concatenate_clumps, # noqa
clumps_pos2cell) # noqa
from .make_cat import (HaloCatalogue, concatenate_clumps, clumps_pos2cell) # noqa
from .readobs import (PlanckClusters, MCXCClusters, TwoMPPGalaxies, # noqa
TwoMPPGroups, SDSS) # noqa
from .outsim import (dump_split, combine_splits, make_ascii_powmes) # noqa
from .summaries import PKReader # noqa
from .summaries import (PKReader, OverlapReader, binned_resample_mean) # noqa

View file

@ -17,10 +17,8 @@ Functions to read in the particle and clump files.
"""
import numpy
from os.path import join
from tqdm import trange
from copy import deepcopy
from sklearn.neighbors import NearestNeighbors
from .readsim import (read_mmain, read_initcm)
from .readsim import (CSiBORGPaths, read_mmain, read_initcm)
from ..utils import (flip_cols, add_columns)
from ..units import (BoxUnits, cartesian_to_radec)
@ -47,7 +45,11 @@ class HaloCatalogue:
_positions = None
_positions0 = None
def __init__(self, paths, min_m500=None, max_dist=None):
def __init__(self, nsim, min_m500=None, max_dist=None):
# Set up paths
paths = CSiBORGPaths(n_sim=nsim)
paths.n_snap = paths.get_maximum_snapshot()
self._paths = paths
self._box = BoxUnits(paths)
min_m500 = 0 if min_m500 is None else min_m500
max_dist = numpy.infty if max_dist is None else max_dist
@ -378,99 +380,6 @@ class HaloCatalogue:
return self._data[key]
class CombinedHaloCatalogue:
r"""
A combined halo catalogue, containing `HaloCatalogue` for each IC
realisation at the latest redshift.
Parameters
----------
paths : py:class`csiborgtools.read.CSiBORGPaths`
CSiBORG paths-handling object. Doest not have to have set set `n_sim`
and `n_snap`.
min_m500 : float, optional
The minimum :math:`M_{rm 500c} / M_\odot` mass. By default no
threshold.
max_dist : float, optional
The maximum comoving distance of a halo. By default no upper limit.
verbose : bool, optional
Verbosity flag for reading the catalogues.
"""
_n_sims = None
_n_snaps = None
_cats = None
def __init__(self, paths, min_m500=None, max_dist=None, verbose=True):
# Read simulations and their maximum snapshots
# NOTE later change this back to all simulations
# self._n_sims = [7468, 7588, 8020, 8452, 8836]
self._n_sims = [7468, 7588]
# self._n_sims = paths.ic_ids
n_snaps = [paths.get_maximum_snapshot(i) for i in self._n_sims]
self._n_snaps = numpy.asanyarray(n_snaps)
cats = [None] * self.N
for i in trange(self.N) if verbose else range(self.N):
paths = deepcopy(paths)
paths.set_info(self.n_sims[i], self.n_snaps[i])
cats[i] = HaloCatalogue(paths, min_m500, max_dist)
self._cats = cats
@property
def N(self):
"""
Number of IC realisations in this combined catalogue.
Returns
-------
N : int
Number of catalogues.
"""
return len(self.n_sims)
@property
def n_sims(self):
"""
IC realisations CSiBORG identifiers.
Returns
-------
ids : 1-dimensional array
Array of IDs.
"""
return self._n_sims
@property
def n_snaps(self):
"""
Snapshot numbers corresponding to `self.n_sims`.
Returns
-------
n_snaps : 1-dimensional array
Array of snapshot numbers.
"""
return self._n_snaps
@property
def cats(self):
"""
Catalogues associated with this object.
Returns
-------
cats : list of `HaloCatalogue`
Catalogues.
"""
return self._cats
def __getitem__(self, n):
if n > self.N:
raise ValueError("Catalogue count is {}, requested catalogue {}."
.format(self.N, n))
return self.cats[n]
def concatenate_clumps(clumps):
"""
Concatenate an array of clumps to a single array containing all particles.

View file

@ -88,7 +88,8 @@ class CSiBORGPaths:
self.to_new = to_new
if n_sim is not None and n_snap is not None:
self.set_info(n_sim, n_snap)
# "/mnt/extraspace/rstiskalek/csiborg/initmatch/clump_cm_7468.npy"
if n_sim is not None:
self.n_sim = n_sim
@property
def srcdir(self):

View file

@ -166,3 +166,268 @@ class PKReader:
k += 1
return ks, xpks
class OverlapReader:
"""
TODO: docs
"""
def __init__(self, nsim0, nsimx, cat0, catx, fskel=None):
if fskel is None:
fskel = "/mnt/extraspace/rstiskalek/csiborg/overlap/"
fskel += "cross_{}_{}.npz"
self._data = numpy.load(fskel.format(nsim0, nsimx), allow_pickle=True)
self._set_cats(nsim0, nsimx, cat0, catx)
@property
def nsim0(self):
"""
The reference simulation ID.
Returns
-------
nsim0 : int
"""
return self._nsim0
@property
def nsimx(self):
"""
The cross simulation ID.
Returns
-------
nsimx : int
"""
return self._nsimx
@property
def cat0(self):
"""
The reference halo catalogue.
Returns
-------
cat0 : :py:class:`csiborgtools.read.HaloCatalogue`
"""
return self._cat0
@property
def catx(self):
"""
The cross halo catalogue.
Returns
-------
catx : :py:class:`csiborgtools.read.HaloCatalogue`
"""
return self._catx
def _set_cats(self, nsim0, nsimx, cat0, catx):
"""
Set the simulation IDs and catalogues.
Parameters
----------
nsim0, nsimx : int
The reference and cross simulation IDs.
cat0, catx: :py:class:`csiborgtools.read.HaloCatalogue`
Halo catalogues corresponding to `nsim0` and `nsimx`, respectively.
Returns
-------
None
"""
assert (nsim0 == cat0.paths.n_sim) & (nsimx == catx.paths.n_sim)
self._nsim0 = nsim0
self._nsimx = nsimx
self._cat0 = cat0
self._catx = catx
@property
def indxs(self):
"""
Indices of halos from the reference catalogue.
Returns
-------
indxs : 1-dimensional array
"""
return self._data["indxs"]
@property
def match_indxs(self):
"""
Indices of halos from the cross catalogue.
Returns
-------
match_indxs : array of 1-dimensional arrays of shape `(nhalos, )`
"""
return self._data["match_indxs"]
@property
def overlap(self):
"""
Pair overlap of halos between the reference and cross simulations.
Returns
-------
ovelap : array of 1-dimensional arrays of shape `(nhalos, )`
"""
return self._data["cross"]
def dist(self, in_initial, norm=None):
"""
Final snapshot pair distances.
Parameters
----------
in_initial : bool
Whether to calculate separation in the initial or final snapshot.
Returns
-------
dist : array of 1-dimensional arrays of shape `(nhalos, )`
"""
assert norm is None or norm in ("r200", "ref_patch", "sum_patch")
# Positions either in the initial or final snapshot
if in_initial:
pos0 = self.cat0.positions0
posx = self.catx.positions0
else:
pos0 = self.cat0.positions
posx = self.catx.positions
dist = [None] * self.indxs.size
for n, ind in enumerate(self.match_indxs):
dist[n] = numpy.linalg.norm(pos0[n, :] - posx[ind, :], axis=1)
# Normalisation
if norm == "r200":
dist[n] /= self.cat0["r200"][n]
if norm == "ref_patch":
dist[n] /= self.cat0["lagpatch"][n]
if norm == "sum_patch":
dist[n] /= (self.cat0["lagpatch"][n]
+ self.catx["lagpatch"][ind])
return numpy.array(dist, dtype=object)
def mass_ratio(self, mass_kind="totpartmass", in_log=True, in_abs=True):
"""
Pair mass ratio.
Parameters
----------
mass_kind : str, optional
The mass kind whose ratio is to be calculated. Must be a valid
catalogue key. By default `totpartmass`, i.e. the total particle
mass associated with a halo.
in_log : bool, optional
Whether to return logarithm of the ratio. By default `True`.
in_abs : bool, optional
Whether to return absolute value of the ratio. By default `True`.
Returns
-------
ratio : array of 1-dimensional arrays of shape `(nhalos, )`
"""
mass0 = self.cat0[mass_kind]
massx = self.catx[mass_kind]
ratio = [None] * self.indxs.size
for n, ind in enumerate(self.match_indxs):
ratio[n] = mass0[n] / massx[ind]
if in_log:
ratio[n] = numpy.log10(ratio[n])
if in_abs:
ratio[n] = numpy.abs(ratio[n])
return numpy.array(ratio, dtype=object)
def summed_overlap(self):
"""
Summed overlap of each halo in the reference simulation with the cross
simulation.
Parameters
----------
None
Returns
-------
summed_overlap : 1-dimensional array of shape `(nhalos, )`
"""
return numpy.array([numpy.sum(cross) for cross in self._data["cross"]])
def copy_per_match(self, par):
"""
Make an array like `self.match_indxs` where each of its element is an
equal value array of the pair clump property from the reference
catalogue.
Parameters
----------
par : str
Property to be copied over.
Returns
-------
out : 1-dimensional array of shape `(nhalos, )`
"""
out = [None] * self.indxs.size
for n, ind in enumerate(self.match_indxs):
out[n] = numpy.ones(ind.size) * self.cat0[par][n]
return numpy.array(out, dtype=object)
def binned_resample_mean(x, y, prob, bins, nresample=50, seed=42):
"""
Calculate binned average of `y` by MC resampling. Each point is kept with
probability `prob`.
Parameters
----------
x : 1-dimensional array
Independent variable.
y : 1-dimensional array
Dependent variable.
prob : 1-dimensional array
Sample probability.
bins : 1-dimensional array
Bin edges to bin `x`.
nresample : int, optional
Number of MC resamples. By default 50.
seed : int, optional
Random seed.
Returns
-------
bin_centres : 1-dimensional array
Bin centres.
stat : 2-dimensional array
Mean and its standard deviation from MC resampling.
"""
assert (x.ndim == 1) & (x.shape == y.shape == prob.shape)
gen = numpy.random.RandomState(seed)
loop_stat = numpy.full(nresample, numpy.nan) # Preallocate loop arr
stat = numpy.full((bins.size - 1, 2), numpy.nan) # Preallocate output
for i in range(bins.size - 1):
mask = (x > bins[i]) & (x <= bins[i + 1])
nsamples = numpy.sum(mask)
loop_stat[:] = numpy.nan # Clear it
for j in range(nresample):
loop_stat[j] = numpy.mean(y[mask][gen.rand(nsamples) < prob[mask]])
stat[i, 0] = numpy.mean(loop_stat)
stat[i, 1] = numpy.std(loop_stat)
bin_centres = (bins[1:] + bins[:-1]) / 2
return bin_centres, stat

View file

@ -32,35 +32,25 @@ import utils
parser = ArgumentParser()
parser.add_argument("--nmult", type=float)
parser.add_argument("--overlap", type=lambda x: bool(strtobool(x)))
parser.add_argument("--select_initial", type=lambda x: bool(strtobool(x)))
parser.add_argument("--fast_neighbours", type=lambda x: bool(strtobool(x)))
args = parser.parse_args()
# File paths
ic = 7468
fperm = join(utils.dumpdir, "overlap", "cross_{}.npy")
paths = csiborgtools.read.CSiBORGPaths(to_new=False)
paths.set_info(ic, paths.get_maximum_snapshot(ic))
nsim0 = 7468
nsimx = 7588
fperm = join(utils.dumpdir, "overlap", "cross_{}_{}.npy")
print("{}: loading catalogues.".format(datetime.now()), flush=True)
cat = csiborgtools.read.CombinedHaloCatalogue(paths)
matcher = csiborgtools.match.RealisationsMatcher(cat)
nsim0 = cat.n_sims[0]
nsimx = cat.n_sims[1]
cat0 = csiborgtools.read.HaloCatalogue(nsim0)
catx = csiborgtools.read.HaloCatalogue(nsimx)
matcher = csiborgtools.match.RealisationsMatcher()
print("{}: crossing the simulations.".format(datetime.now()), flush=True)
out = matcher.cross_knn_position_single(
0, nmult=args.nmult, dlogmass=2., overlap=args.overlap,
select_initial=args.select_initial, fast_neighbours=args.fast_neighbours)
indxs, match_indxs, cross = matcher.cross(
nsim0, nsimx, cat0, catx, overlap=False)
# Dump the result
fout = fperm.format(nsim0)
fout = fperm.format(nsim0, nsimx)
print("Saving results to `{}`.".format(fout), flush=True)
with open(fout, "wb") as f:
numpy.save(fout, out)
numpy.savez(fout, indxs=indxs, match_indxs=match_indxs, cross=cross)
print("All finished.", flush=True)