csiborgtools/csiborgtools/match/match.py
Richard Stiskalek e972f8e3f2
Add pynbody and other support (#92)
* Simplify box units

* Move old scripts

* Add printing

* Update readers

* Disable boundscheck

* Add new ordering

* Clean up imports

* Enforce dtype and add mass to quijote

* Simplify print statements

* Fix little typos

* Fix key bug

* Bug fixing

* Delete boring comments

* Improve ultimate clumps for PHEW

* Delete boring comments

* Add basic reading

* Remove 0th index HID

* Add flipping of X and Z

* Updates to halo catalogues

* Add ordered caching

* Fix flipping

* Add new flags

* Fix PHEW empty clumps

* Stop over-wrriting

* Little improvements to angular neighbours

* Add catalogue masking

* Change if-else statements

* Cache only filtered data

* Add PHEW cats

* Add comments

* Sort imports

* Get Quijote workign

* Docs

* Add HMF calculation

* Move to old

* Fix angular

* Add great circle distance

* Update imports

* Update impotrts

* Update docs

* Remove unused import

* Fix a quick bug

* Update compatibility

* Rename files

* Renaming

* Improve compatiblity

* Rename snapsht

* Fix snapshot bug

* Update interface

* Finish updating interface

* Update all paths

* Add old scripts

* Add basic halo

* Update imports

* Improve snapshot processing

* Update ordering

* Fix how CM positions accessed

* Add merger paths

* Add imports

* Add merger reading

* Add making a merger tree

* Add a basic merger tree reader

* Add imports

* Add main branch walking + comments + debuggin

* Get tree running

* Add working merger tree walking along main branch

* Add units conversion for merger data

* Add hid_to_array_index

* Update merger tree

* Add mergertree mass to PHEWcat

* Edit comments

* Add this to track changes...

* Fix a little bug

* Add mergertree mass

* Add cache clearing

* Improve summing substructure code

* Littbe bug

* Little updates to the merger tree reader

* Update .giignore

* Add box selection

* Add optional deletingf of a group

* add to keep track of changes

* Update changes

* Remove

* Add manual tracker

* Fix bug

* Add m200c_to_r200c

* Add manual halo tracking

* Remove skipped snapshots

* update cosmo params to match csiborg

* remove old comments

* Add SDSSxALFALFA

* Fix bugs

* Rename

* Edit paths

* Updates

* Add comments

* Add comment

* Add hour conversion

* Add imports

* Add new observation class

* Add selection

* Add imports

* Fix small bug

* Add field copying for safety

* Add matching to survey without masking

* Add P(k) calculation

* Add nb

* Edit comment

* Move files

* Remove merger import

* Edit setup.yp

* Fix typo

* Edit import warnigns

* update nb

* Update README

* Update README

* Update README

* Add skeleton

* Add skeleton
2023-12-07 14:23:32 +00:00

1048 lines
38 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Support for matching halos between CSiBORG IC realisations based on their
Lagrangian patch overlap.
"""
from abc import ABC
from datetime import datetime
from functools import lru_cache
from math import ceil
import numpy
from numba import jit
from scipy.ndimage import gaussian_filter
from tqdm import tqdm, trange
class BaseMatcher(ABC):
"""Base class for `RealisationsMatcher` and `ParticleOverlap`."""
_box_size = None
_bckg_halfsize = None
@property
def box_size(self):
"""Number of cells in the box."""
if self._box_size is None:
raise RuntimeError("`box_size` has not been set.")
return self._box_size
@box_size.setter
def box_size(self, value):
if not (isinstance(value, int) and value > 0):
raise ValueError("`box_size` must be a positive integer.")
if not value != 0 and (value & (value - 1) == 0):
raise ValueError("`box_size` must be a power of 2.")
self._box_size = value
@property
def bckg_halfsize(self):
"""
Background half-size for density field calculation. This is the
grid distance from the center of the box to each side over which to
evaluate the background density field. Must be less than or equal to
half the box size.
"""
if self._bckg_halfsize is None:
raise RuntimeError("`bckg_halfsize` has not been set.")
return self._bckg_halfsize
@bckg_halfsize.setter
def bckg_halfsize(self, value):
if not (isinstance(value, int) and value > 0):
raise ValueError("`bckg_halfsize` must be a positive integer.")
if value > self.box_size // 2:
raise ValueError("`bckg_halfsize` must be <= half the box size.")
self._bckg_halfsize = value
###############################################################################
# Realisations matcher for calculating overlaps #
###############################################################################
class RealisationsMatcher(BaseMatcher):
"""
Matches haloes between IC realisations.
Parameters
----------
box_size : int
Number of cells in the box.
bckg_halfsize : int
Background half-size for density field calculation. This is the
grid distance from the center of the box to each side over which to
evaluate the background density field. Must be less than or equal to
half the box size.
nmult : float or int, optional
Multiplier of the sum of the initial Lagrangian patch sizes of a halo
pair. Determines the range within which neighbors are returned.
dlogmass : float, optional
Tolerance on the absolute logarithmic mass difference of potential
matches.
mass_kind : str, optional
Mass kind whose similarity is to be checked. Must be a valid key in the
halo catalogue.
"""
_nmult = None
_dlogmass = None
_mass_kind = None
_overlapper = None
def __init__(self, box_size, bckg_halfsize, nmult=1.0, dlogmass=2.0,
mass_kind="totpartmass"):
self.box_size = box_size
self.bckg_halfsize = bckg_halfsize
self.nmult = nmult
self.dlogmass = dlogmass
self.mass_kind = mass_kind
self._overlapper = ParticleOverlap(box_size, bckg_halfsize)
@property
def nmult(self):
"""
Multiplier of the sum of the initial Lagrangian patch sizes of a halo
pair. Determines the range within which neighbors are returned.
"""
return self._nmult
@nmult.setter
def nmult(self, value):
if not (value > 0 and isinstance(value, (int, float))):
raise ValueError("`nmult` must be a positive integer or float.")
self._nmult = float(value)
@property
def dlogmass(self):
"""
Tolerance on the absolute logarithmic mass difference of potential
matches.
"""
return self._dlogmass
@dlogmass.setter
def dlogmass(self, value):
if not (value > 0 and isinstance(value, (float, int))):
raise ValueError("`dlogmass` must be a positive float.")
self._dlogmass = float(value)
@property
def mass_kind(self):
"""
Mass kind whose similarity is to be checked. Must be a valid key in the
halo catalogue.
"""
return self._mass_kind
@mass_kind.setter
def mass_kind(self, value):
if not isinstance(value, str):
raise ValueError("`mass_kind` must be a string.")
self._mass_kind = value
@property
def overlapper(self):
"""The overlapper object."""
return self._overlapper
def cross(self, cat0, catx, delta_bckg, cache_size=10000, verbose=True):
r"""
Find all neighbours whose CM separation is less than `nmult` times the
sum of their initial Lagrangian patch sizes and calculate their
overlap. Enforces that the neighbours are similar in mass up to
`dlogmass` dex.
Parameters
----------
cat0 : instance of :py:class:`csiborgtools.read.BaseCatalogue`
Halo catalogue of the reference simulation.
catx : instance of :py:class:`csiborgtools.read.BaseCatalogue`
Halo catalogue of the cross simulation.
delta_bckg : 3-dimensional array
Summed background density field of the reference and cross
simulations calculated with particles assigned to halos at the
final snapshot. Calculated on a grid determined by `bckg_halfsize`.
cache_size : int, optional
Caching size for loading the cross simulation halos.
verbose : bool, optional
Iterator verbosity flag. By default `true`.
Returns
-------
match_indxs : 1-dimensional array of arrays
The outer array corresponds to halos in the reference catalogue,
the inner array corresponds to the array positions of matches in
the cross catalogue.
overlaps : 1-dimensional array of arrays
Overlaps with the cross catalogue. Follows similar pattern as
`match_indxs`.
"""
# We begin by querying the kNN for the nearest neighbours of each halo
# in the reference simulation from the cross simulation in the initial
# snapshot.
match_indxs = radius_neighbours(
catx.knn(in_initial=True, subtract_observer=False, periodic=True),
cat0.position(in_initial=True),
radiusX=cat0["lagpatch_size"], radiusKNN=catx["lagpatch_size"],
nmult=self.nmult, enforce_int32=True, verbose=verbose)
# We next remove neighbours whose mass is too large/small.
if self.dlogmass is not None:
for i, indx in enumerate(match_indxs):
# |log(M1 / M2)|
p = self.mass_kind
aratio = numpy.abs(numpy.log10(catx[p][indx] / cat0[p][i]))
match_indxs[i] = match_indxs[i][aratio < self.dlogmass]
# We will cache the halos from the cross simulation to speed up the I/O
@lru_cache(maxsize=cache_size)
def load_cached_halox(hid):
return load_processed_halo(hid, catx, nshift=0,
ncells=self.box_size)
iterator = tqdm(
cat0["index"],
desc=f"{datetime.now()}: calculating NGP overlaps",
disable=not verbose
)
cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
for i, k0 in enumerate(iterator):
# If we have no matches continue to the next halo.
matches = match_indxs[i]
if matches.size == 0:
continue
# Next, we find this halo's particles, total mass, minimum and
# maximum cells and convert positions to cells.
pos0, mass0, totmass0, mins0, maxs0 = load_processed_halo(
k0, cat0, nshift=0, ncells=self.box_size)
# We now loop over matches of this halo and calculate their
# overlap, storing them in `_cross`.
_cross = numpy.full(matches.size, numpy.nan, dtype=numpy.float32)
for j, kx in enumerate(catx["index"][matches]):
posx, massx, totmassx, minsx, maxsx = load_cached_halox(kx)
_cross[j] = self.overlapper(
pos0, posx, mass0, massx, delta_bckg, mins0, maxs0,
minsx, maxsx, totmass1=totmass0, totmass2=totmassx)
cross[i] = _cross
# We remove all matches that have zero overlap to save space.
mask = cross[i] > 0
match_indxs[i] = match_indxs[i][mask]
cross[i] = cross[i][mask]
# And finally we sort the matches by their overlap.
ordering = numpy.argsort(cross[i])[::-1]
match_indxs[i] = match_indxs[i][ordering]
cross[i] = cross[i][ordering]
cross = numpy.asanyarray(cross, dtype=object)
return match_indxs, cross
def smoothed_cross(self, cat0, catx, delta_bckg, match_indxs,
smooth_kwargs, cache_size=10000, verbose=True):
r"""
Calculate the smoothed overlaps for pairs previously identified via
`self.cross(...)` to have a non-zero NGP overlap.
Parameters
----------
cat0 : instance of :py:class:`csiborgtools.read.BaseCatalogue`
Halo catalogue of the reference simulation.
catx : instance of :py:class:`csiborgtools.read.BaseCatalogue`
Halo catalogue of the cross simulation.
delta_bckg : 3-dimensional array
Smoothed summed background density field of the reference and cross
simulations calculated with particles assigned to halos at the
final snapshot. Calculated on a grid determined by `bckg_halfsize`.
match_indxs : 1-dimensional array of arrays
Indices of halo counterparts in the cross catalogue.
smooth_kwargs : kwargs
Kwargs to be passed to :py:func:`scipy.ndimage.gaussian_filter`.
cache_size : int, optional
Caching size for loading the cross simulation halos.
verbose : bool, optional
Iterator verbosity flag. By default `true`.
Returns
-------
overlaps : 1-dimensional array of arrays
"""
nshift = read_nshift(smooth_kwargs)
@lru_cache(maxsize=cache_size)
def load_cached_halox(hid):
return load_processed_halo(hid, catx, nshift=nshift,
ncells=self.box_size)
iterator = tqdm(
cat0["index"],
desc=f"{datetime.now()}: calculating smoothed overlaps",
disable=not verbose
)
cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
for i, k0 in enumerate(iterator):
pos0, mass0, __, mins0, maxs0 = load_processed_halo(
k0, cat0, nshift=nshift, ncells=self.box_size)
# Now loop over the matches and calculate the smoothed overlap.
_cross = numpy.full(match_indxs[i].size, numpy.nan, numpy.float32)
for j, kx in enumerate(catx["index"][match_indxs[i]]):
posx, massx, __, minsx, maxsx = load_cached_halox(kx)
_cross[j] = self.overlapper(pos0, posx, mass0, massx,
delta_bckg, mins0, maxs0, minsx,
maxsx, smooth_kwargs=smooth_kwargs)
cross[i] = _cross
return numpy.asanyarray(cross, dtype=object)
###############################################################################
# Overlap calculator #
###############################################################################
class ParticleOverlap(BaseMatcher):
r"""
Halo overlaps calculator. The density field calculation is based on the
nearest grid position particle assignment scheme, with optional Gaussian
smoothing.
Parameters
----------
box_size : int
Number of cells in the box.
bckg_halfsize : int
Background half-size for density field calculation. This is the
grid distance from the center of the box to each side over which to
evaluate the background density field. Must be less than or equal to
half the box size.
"""
def __init__(self, box_size, bckg_halfsize):
self.box_size = box_size
self.bckg_halfsize = bckg_halfsize
def make_bckg_delta(self, cat, delta=None, verbose=False):
"""
Calculate a NGP density field of particles belonging to halos of a
halo catalogue `halo_cat`. Particles are only counted within the
high-resolution region of the simulation. Smoothing must be applied
separately.
Parameters
----------
cat : instance of :py:class:`csiborgtools.read.BaseCatalogue`
Halo catalogue of the reference simulation.
delta : 3-dimensional array, optional
Array to store the density field. If `None` a new array is
created.
verbose : bool, optional
Verbosity flag for loading the halos' particles.
Returns
-------
delta : 3-dimensional array
"""
cellmin = self.box_size // 2 - self.bckg_halfsize
cellmax = self.box_size // 2 + self.bckg_halfsize
ncells = cellmax - cellmin
# We then pre-allocate the density field/check it is of the right shape
if delta is None:
delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
else:
assert ((delta.shape == (ncells,) * 3)
& (delta.dtype == numpy.float32))
iterator = tqdm(
cat["index"],
desc=f"{datetime.now()} Calculating the background field",
disable=not verbose
)
for hid in iterator:
pos = cat.halo_particles(hid, "pos", in_initial=True)
if pos is None:
continue
mass = cat.halo_particles(hid, "mass", in_initial=True)
pos = pos2cell(pos, self.box_size)
# We mask out particles outside the cubical high-resolution region
mask = numpy.all((cellmin <= pos) & (pos < cellmax), axis=1)
pos = pos[mask]
fill_delta(delta, pos[:, 0], pos[:, 1], pos[:, 2],
*(cellmin,) * 3, mass[mask])
return delta
def make_delta(self, pos, mass, mins=None, maxs=None, subbox=False,
smooth_kwargs=None):
"""
Calculate a NGP density field of a halo on a regular grid. Optionally
can be smoothed with a Gaussian kernel.
Parameters
----------
pos : 2-dimensional array
Halo's particles position array.
mass : 1-dimensional array
Halo's particles mass array.
mins, maxs : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension.
subbox : bool, optional
Whether to calculate the field on a grid enclosing the halo.
smooth_kwargs : kwargs, optional
Kwargs to be passed to :py:func:`scipy.ndimage.gaussian_filter`.
If `None` no smoothing is applied.
Returns
-------
delta : 3-dimensional array
"""
nshift = read_nshift(smooth_kwargs)
cells = pos2cell(pos, self.box_size)
if not (mins is None and maxs is None):
assert mins.dtype.char in numpy.typecodes["AllInteger"]
assert maxs.dtype.char in numpy.typecodes["AllInteger"]
if subbox:
if mins is None or maxs is None:
mins, maxs = get_halo_cell_limits(cells, self.box_size, nshift)
ncells = maxs - mins + 1
else:
mins = [0, 0, 0]
ncells = (self.box_size, ) * 3
delta = numpy.zeros(ncells, dtype=numpy.float32)
fill_delta(delta, cells[:, 0], cells[:, 1], cells[:, 2], *mins, mass)
if smooth_kwargs is not None:
gaussian_filter(delta, output=delta, **smooth_kwargs)
return delta
def make_deltas(self, pos1, pos2, mass1, mass2, mins1=None, maxs1=None,
mins2=None, maxs2=None, smooth_kwargs=None):
"""
Calculate a NGP density fields of two halos on a grid that encloses
them both. Optionally can be smoothed with a Gaussian kernel.
Parameters
----------
pos1 : 2-dimensional array
Particle positions of the first halo.
pos2 : 2-dimensional array
Particle positions of the second halo.
mass1 : 1-dimensional array
Particle masses of the first halo.
mass2 : 1-dimensional array
Particle masses of the second halo.
mins1, maxs1 : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension of `halo1`.
Optional.
mins2, maxs2 : 1-dimensional arrays of shape `(3,)`
Minimum and maximum cell numbers along each dimension of `halo2`.
Optional.
smooth_kwargs : kwargs, optional
Kwargs to be passed to :py:func:`scipy.ndimage.gaussian_filter`.
If `None` no smoothing is applied.
Returns
-------
delta1, delta2 : 3-dimensional arrays
Density arrays of `halo1` and `halo2`, respectively.
cellmins : len-3 tuple
Tuple of left-most cell ID in the full box.
nonzero : 2-dimensional array
Indices where the lower mass halo has a non-zero density.
Calculated only if no smoothing is applied, otherwise `None`.
"""
nshift = read_nshift(smooth_kwargs)
pos1 = pos2cell(pos1, self.box_size)
pos2 = pos2cell(pos2, self.box_size)
xc1, yc1, zc1 = [pos1[:, i] for i in range(3)]
xc2, yc2, zc2 = [pos2[:, i] for i in range(3)]
if any(obj is None for obj in (mins1, maxs1, mins2, maxs2)):
# Minimum cell number of the two halos along each dimension
xmin = min(numpy.min(xc1), numpy.min(xc2)) - nshift
ymin = min(numpy.min(yc1), numpy.min(yc2)) - nshift
zmin = min(numpy.min(zc1), numpy.min(zc2)) - nshift
# Make sure shifting does not go beyond boundaries
xmin, ymin, zmin = [max(px, 0) for px in (xmin, ymin, zmin)]
# Maximum cell number of the two halos along each dimension
xmax = max(numpy.max(xc1), numpy.max(xc2)) + nshift
ymax = max(numpy.max(yc1), numpy.max(yc2)) + nshift
zmax = max(numpy.max(zc1), numpy.max(zc2)) + nshift
# Make sure shifting does not go beyond boundaries
xmax, ymax, zmax = [min(px, self.box_size - 1)
for px in (xmax, ymax, zmax)]
else:
xmin, ymin, zmin = [min(mins1[i], mins2[i]) for i in range(3)]
xmax, ymax, zmax = [max(maxs1[i], maxs2[i]) for i in range(3)]
cellmins = (xmin, ymin, zmin)
ncells = (xmax - xmin + 1, ymax - ymin + 1, zmax - zmin + 1,)
delta1 = numpy.zeros(ncells, dtype=numpy.float32)
delta2 = numpy.zeros(ncells, dtype=numpy.float32)
# If no smoothing figure out the nonzero indices of the smaller halo
if smooth_kwargs is None:
if pos1.shape[0] > pos2.shape[0]:
fill_delta(delta1, xc1, yc1, zc1, *cellmins, mass1)
nonzero = fill_delta_indxs(delta2, xc2, yc2, zc2, *cellmins,
mass2)
else:
nonzero = fill_delta_indxs(delta1, xc1, yc1, zc1, *cellmins,
mass1)
fill_delta(delta2, xc2, yc2, zc2, *cellmins, mass2)
else:
fill_delta(delta1, xc1, yc1, zc1, *cellmins, mass1)
fill_delta(delta2, xc2, yc2, zc2, *cellmins, mass2)
nonzero = None
if smooth_kwargs is not None:
gaussian_filter(delta1, output=delta1, **smooth_kwargs)
gaussian_filter(delta2, output=delta2, **smooth_kwargs)
return delta1, delta2, cellmins, nonzero
def __call__(self, pos1, pos2, mass1, mass2, delta_bckg,
mins1=None, maxs1=None, mins2=None, maxs2=None,
totmass1=None, totmass2=None, smooth_kwargs=None):
"""
Calculate overlap between `halo1` and `halo2`. See
`calculate_overlap(...)` for further information. Be careful so that
the background density field is calculated with the same
`smooth_kwargs`. If any smoothing is applied then loops over the full
density fields, otherwise only over the non-zero cells of the lower
mass halo.
Parameters
----------
pos1 : 2-dimensional array
Particle positions of the first halo.
pos2 : 2-dimensional array
Particle positions of the second halo.
mass1 : 1-dimensional array
Particle masses of the first halo.
mass2 : 1-dimensional array
Particle masses of the second halo.
cellmins : len-3 tuple
Tuple of left-most cell ID in the full box.
delta_bckg : 3-dimensional array
Summed background density field of the reference and cross
simulations calculated with particles assigned to halos at the
final snapshot. Assumed to only be sampled in cells
:math:`[512, 1536)^3`.
mins1, maxs1 : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension of `halo1`.
Optional.
mins2, maxs2 : 1-dimensional arrays of shape `(3,)`
Minimum and maximum cell numbers along each dimension of `halo2`,
optional.
totmass1, totmass2 : floats, optional
Total mass of `halo1` and `halo2`, respectively. Must be provided
if `loop_nonzero` is `True`.
smooth_kwargs : kwargs, optional
Kwargs to be passed to :py:func:`scipy.ndimage.gaussian_filter`.
If `None` no smoothing is applied.
Returns
-------
overlap : float
"""
delta1, delta2, cellmins, nonzero = self.make_deltas(
pos1, pos2, mass1, mass2, mins1, maxs1, mins2, maxs2,
smooth_kwargs=smooth_kwargs)
if smooth_kwargs is not None:
return calculate_overlap(delta1, delta2, cellmins, delta_bckg,
self.box_size, self.bckg_halfsize)
totmass1 = numpy.sum(mass1) if totmass1 is None else totmass1
totmass2 = numpy.sum(mass2) if totmass2 is None else totmass2
return calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg,
nonzero, totmass1, totmass2,
self.box_size, self.bckg_halfsize)
###############################################################################
# Halo matching supplementary functions #
###############################################################################
def pos2cell(pos, ncells):
if pos.dtype.char in numpy.typecodes["AllInteger"]:
return pos
return numpy.floor(pos * ncells).astype(numpy.int32)
def read_nshift(smooth_kwargs):
return 0 if smooth_kwargs is None else ceil(3 * smooth_kwargs["sigma"])
@jit(nopython=True)
def fill_delta(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
"""
Fill array `delta` by adding `weights` to the specified cells.
Parameters
----------
delta : 3-dimensional array
Grid to be filled with weights.
xcell, ycell, zcell : 1-dimensional arrays
Indices where to assign `weights`.
xmin, ymin, zmin : ints
Minimum cell IDs of particles.
weights : 1-dimensional arrays
Weights
Returns
-------
None
"""
n_particles = xcell.size
for n in range(n_particles):
i, j, k = xcell[n] - xmin, ycell[n] - ymin, zcell[n] - zmin
delta[i, j, k] += weights[n]
@jit(nopython=True)
def fill_delta_indxs(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
"""
Fill array `delta` by adding `weights` to the specified cells. Returns
the indices where `delta` was assigned a value.
Parameters
----------
delta : 3-dimensional array
Grid to be filled with weights.
xcell, ycell, zcell : 1-dimensional arrays
Indices where to assign `weights`.
xmin, ymin, zmin : ints
Minimum cell IDs of particles.
weights : 1-dimensional arrays
Weights.
Returns
-------
cells : 1-dimensional array
Indices where `delta` was assigned a value.
"""
n_particles = xcell.size
cells = numpy.full((n_particles, 3), numpy.nan, numpy.int32)
count_nonzero = 0
for n in range(n_particles):
i, j, k = xcell[n] - xmin, ycell[n] - ymin, zcell[n] - zmin
if delta[i, j, k] == 0:
cells[count_nonzero] = i, j, k
count_nonzero += 1
delta[i, j, k] += weights[n]
return cells[:count_nonzero]
@jit(nopython=True)
def get_halo_cell_limits(pos, ncells, nshift=0):
"""
Get the lower and upper limit of a halo's cell numbers. Optionally,
floating point positions are also supported. However, in this case `nshift`
must be 0. Be careful, no error will be raised.
Parameters
----------
pos : 2-dimensional array
Halo particle array. The first three columns must be the cell numbers
corresponding to `x`, `y`, `z`.
ncells : int
Number of grid cells of the box along a single dimension.
nshift : int, optional
Lower and upper shift of the halo limits.
Returns
-------
mins, maxs : 1-dimensional arrays of shape `(3, )`
"""
dtype = pos.dtype
mins = numpy.full(3, numpy.nan, dtype=dtype)
maxs = numpy.full(3, numpy.nan, dtype=dtype)
for i in range(3):
mins[i] = max(numpy.min(pos[:, i]) - nshift, 0)
maxs[i] = min(numpy.max(pos[:, i]) + nshift, ncells - 1)
return mins, maxs
@jit(nopython=True, boundscheck=False)
def calculate_overlap(delta1, delta2, cellmins, delta_bckg, box_size,
bckg_halfsize):
"""
Calculate overlap between two halos' density fields on the same grid.
Parameters
----------
delta1, delta2 : 3D array
Density fields of the first and second halos, respectively.
cellmins : tuple (len=3)
Lower cell ID in the full box.
delta_bckg : 3D array
Combined background density field of reference and cross simulations
on `bckg_halfsize` grid.
box_size : int
Cell count in the box.
bckg_halfsize : int
Grid distance from box center for background density.
≤ 0.5 * box_size.
Returns
-------
overlap : float
"""
totmass = 0.0
intersect = 0.0
bckg_size = 2 * bckg_halfsize
bckg_offset = box_size // 2 - bckg_halfsize
i0, j0, k0 = cellmins
imax, jmax, kmax = delta1.shape
for i in range(imax):
ii = i0 + i - bckg_offset
ishighres = 0 <= ii < bckg_size
for j in range(jmax):
jj = j0 + j - bckg_offset
ishighres &= 0 <= jj < bckg_size
for k in range(kmax):
kk = k0 + k - bckg_offset
ishighres &= 0 <= kk < bckg_size
m1, m2 = delta1[i, j, k], delta2[i, j, k]
totmass += m1 + m2
prod = 2 * m1 * m2
if prod > 0: # If both cells are non-zero
bcgk = delta_bckg[ii, jj, kk] if ishighres else m1 + m2
intersect += prod / bcgk if bcgk > 0 else prod / (m1 + m2)
return intersect / (totmass - intersect)
@jit(nopython=True, boundscheck=False)
def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
mass1, mass2, box_size, bckg_halfsize):
"""
Calculate overlap of two halos' density fields on the same grid.
Parameters
----------
delta1, delta2 : 3D array
Density fields of the first and second halos, respectively.
cellmins : tuple (len=3)
Lower cell ID in the full box.
delta_bckg : 3D array
Combined background density from reference and cross simulations
on `bckg_halfsize` grid.
nonzero : 2D array (shape: (n_cells, 3))
Non-zero cells for the lower mass halo (from `fill_delta_indxs`).
mass1, mass2 : float, optional
Halos' total masses. Calculated from density if not provided.
box_size : int
Cell count in the box.
bckg_halfsize : int
Grid distance from box center for background density; ≤ 0.5 * box_size.
Returns
-------
overlap : float
"""
intersect = 0.0
bckg_size = 2 * bckg_halfsize
bckg_offset = box_size // 2 - bckg_halfsize
i0, j0, k0 = cellmins
for n in range(nonzero.shape[0]):
i, j, k = nonzero[n, :]
m1, m2 = delta1[i, j, k], delta2[i, j, k]
prod = 2 * m1 * m2
if prod > 0:
ii = i0 + i - bckg_offset # Indices of this cell in the
jj = j0 + j - bckg_offset # background density field.
kk = k0 + k - bckg_offset
ishighres = 0 <= ii < bckg_size # Is this cell is in the high
ishighres &= 0 <= jj < bckg_size # resolution region for which the
ishighres &= 0 <= kk < bckg_size # background field is calculated.
bckg = delta_bckg[ii, jj, kk] if ishighres else m1 + m2
intersect += prod / bckg if bckg > 0 else prod / (m1 + m2)
return intersect / (mass1 + mass2 - intersect)
def load_processed_halo(hid, cat, ncells, nshift):
"""
Load a processed halo from the `.h5` file. This is to be wrapped by a
cacher.
Parameters
----------
hid : int
Halo ID.
cat : instance of :py:class:`csiborgtools.read.BaseCatalogue`
Halo catalogue.
ncells : int
Number of cells in the box density field.
nshift : int
Cell padding for the density field.
Returns
-------
pos : 2-dimensional array
Array of cell particle positions.
mass : 1-dimensional array
Array of particle masses.
totmass : float
Total mass of the halo.
mins : len-3 tuple
Minimum cell indices of the halo.
maxs : len-3 tuple
Maximum cell indices of the halo.
"""
pos = cat.halo_particles(hid, "pos", in_initial=True)
mass = cat.halo_particles(hid, "mass", in_initial=True)
pos = pos2cell(pos, ncells)
mins, maxs = get_halo_cell_limits(pos, ncells=ncells, nshift=nshift)
return pos, mass, numpy.sum(mass), mins, maxs
def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1.0,
enforce_int32=False, verbose=True):
"""
Find all neigbours of a fitted kNN model whose center of mass separation
is less than `nmult` times the sum of their respective radii.
Parameters
----------
knn : :py:class:`sklearn.neighbors.NearestNeighbors`
Fitted nearest neighbour search.
X : 2-dimensional array of shape `(n_samples, 3)`
Array of halo positions from the cross simulation.
radiusX: 1-dimensional array of shape `(n_samples, )`
Lagrangian patch radii corresponding to haloes in `X`.
radiusKNN : 1-dimensional array
Lagrangian patch radii corresponding to haloes used to train the kNN.
nmult : float, optional
Multiple of the sum of two radii below which to consider a match.
enforce_int32 : bool, optional
Whether to enforce 32-bit integer precision of the indices. By default
`False`.
verbose : bool, optional
Verbosity flag.
Returns
-------
indxs : 1-dimensional array `(n_samples,)` of arrays
Matches to `X` from `knn`.
"""
if X.shape != (radiusX.size, 3):
raise ValueError("Mismatch in shape of `X` or `radiusX`")
if radiusKNN.size != knn.n_samples_fit_:
raise ValueError("Mismatch in shape of `radiusKNN` or `knn`")
patchknn_max = numpy.max(radiusKNN)
iterator = trange(len(X),
desc=f"{datetime.now()}: querying the kNN",
disable=not verbose)
indxs = [None] * len(X)
for i in iterator:
dist, indx = knn.radius_neighbors(
X[i].reshape(1, -1), radiusX[i] + patchknn_max,
sort_results=True)
# Note that `dist` and `indx` are wrapped in 1-element arrays
# so we take the first item where appropriate
mask = (dist[0] / (radiusX[i] + radiusKNN[indx[0]])) < nmult
indxs[i] = indx[0][mask]
if enforce_int32:
indxs[i] = indxs[i].astype(numpy.int32)
return numpy.asarray(indxs, dtype=object)
def find_neighbour(nsim0, cats):
"""
Find the nearest neighbour of halos from a reference catalogue indexed
`nsim0` in the remaining simulations. Note that this must be the same
simulation suite.
Parameters
----------
nsim0 : int
Index of the reference simulation.
cats : dict
Dictionary of halo catalogues. Keys must be the simulation indices.
Returns
-------
dists : 2-dimensional array of shape `(nhalos, len(cats) - 1)`
Distances to the nearest neighbour.
cross_hindxs : 2-dimensional array of shape `(nhalos, len(cats) - 1)`
Halo indices of the nearest neighbour.
"""
assert all(isinstance(cat, type(cats[nsim0])) for cat in cats.values())
cat0 = cats[nsim0]
X = cat0.position(in_initial=False)
nhalos = X.shape[0]
num_cats = len(cats) - 1
dists = numpy.full((nhalos, num_cats), numpy.nan, dtype=numpy.float32)
cross_hindxs = numpy.full((nhalos, num_cats), numpy.nan, dtype=numpy.int32)
# Filter out the reference simulation from the dictionary
filtered_cats = {k: v for k, v in cats.items() if k != nsim0}
for i, catx in enumerate(filtered_cats):
dist, ind = catx.nearest_neighbours(X, radius=1, in_initial=False,
knearest=True)
dists[:, i] = numpy.ravel(dist)
cross_hindxs[:, i] = catx["index"][numpy.ravel(ind)]
return dists, cross_hindxs
def matching_max(cat0, catx, mass_kind, mult, periodic, overlap=None,
match_indxs=None, verbose=True):
"""
Halo matching algorithm based on [1].
Parameters
----------
cat0 : instance of :py:class:`csiborgtools.read.BaseCatalogue`
Halo catalogue of the reference simulation.
catx : instance of :py:class:`csiborgtools.read.BaseCatalogue`
Halo catalogue of the cross simulation.
mass_kind : str
Name of the mass column.
mult : float
Multiple of R200c below which to consider a match.
periodic : bool
Whether to account for periodic boundary conditions.
overlap : array of 1-dimensional arrays, optional
Overlap of halos from `cat0` with halos from `catx`. If `overlap` or
`match_indxs` is not provided, then the overlap of the identified halos
is not calculated.
match_indxs : array of 1-dimensional arrays, optional
Indicies of halos from `catx` having a non-zero overlap with halos
from `cat0`.
verbose : bool, optional
Verbosity flag.
Returns
-------
out : structured array
Array of matches. Columns are `hid0`, `hidx`, `dist`, `success`.
References
----------
[1] Maxwell L Hutt, Harry Desmond, Julien Devriendt, Adrianne Slyz; The
effect of local Universe constraints on halo abundance and clustering;
Monthly Notices of the Royal Astronomical Society, Volume 516, Issue 3,
November 2022, Pages 35923601, https://doi.org/10.1093/mnras/stac2407
"""
pos0 = cat0.position(in_initial=False)
knnx = catx.knn(in_initial=False, subtract_observer=False,
periodic=periodic)
rad0 = cat0["r200c"]
mass0 = numpy.log10(cat0[mass_kind])
massx = numpy.log10(catx[mass_kind])
assert numpy.all(numpy.isfinite(mass0)) & numpy.all(numpy.isfinite(massx))
maskx = numpy.ones(len(catx), dtype=numpy.bool_)
dtypes = [("hid0", numpy.int32),
("hidx", numpy.int32),
("mass0", numpy.float32),
("massx", numpy.float32),
("dist", numpy.float32),
("success", numpy.bool_),
("match_overlap", numpy.float32),
("max_overlap", numpy.float32),
]
out = numpy.full(len(cat0), numpy.nan, dtype=dtypes)
out["success"] = False
for i in tqdm(numpy.argsort(mass0)[::-1], desc="Matching haloes",
disable=not verbose):
hid0 = cat0["index"][i]
out[i]["hid0"] = hid0
out[i]["mass0"] = 10**mass0[i]
if not numpy.isfinite(rad0[i]):
continue
neigh_dists, neigh_inds = knnx.radius_neighbors(pos0[i].reshape(1, -1),
mult * rad0[i])
neigh_dists, neigh_inds = neigh_dists[0], neigh_inds[0]
if neigh_dists.size == 0:
continue
# Sort the neighbours by mass difference
sort_order = numpy.argsort(numpy.abs(mass0[i] - massx[neigh_inds]))
neigh_dists = neigh_dists[sort_order]
neigh_inds = neigh_inds[sort_order]
for j, neigh_ind in enumerate(neigh_inds):
if maskx[neigh_ind]:
out[i]["hidx"] = catx["index"][neigh_ind]
out[i]["dist"] = neigh_dists[j]
out[i]["massx"] = 10**massx[neigh_ind]
out[i]["success"] = True
maskx[neigh_ind] = False
if overlap is not None and match_indxs is not None:
if neigh_ind in match_indxs[i]:
k = numpy.where(neigh_ind == match_indxs[i])[0][0]
out[i]["match_overlap"] = overlap[i][k]
if len(overlap[i]) > 0:
out[i]["max_overlap"] = numpy.max(overlap[i])
break
return out