Overlapper improvements (#53)

* Store indices as f32

* Fix init sorting

* Organise imports

* Rename pathing

* Add particle loading

* Improve particle reading

* Add h5py reader

* edit particle path

* Update particles loading

* update particles loading

* Fix particle dumping

* Add init fitting

* Fix bug due to insufficient precision

* Add commnet

* Add comment

* Add clumps catalogue to halo cat

* Add comment

* Make sure PIDS never forced to float32

* fix pid reading

* fix pid reading

* Update matching to work with new arrays

* Stop using cubical sub boxes, turn off nshift if no smoothing

* Improve caching

* Move function definitions

* Simplify calculation

* Add import

* Small updates to the halo

* Simplify calculation

* Simplify looping calculation

* fix tonew

* Add initial data

* Add skip condition

* Add unit conversion

* Add loading background in batches

* Rename mmain index

* Switch overlaps to h5

* Add finite lagpatch check

* fix column name

* Add verbosity flags

* Save halo IDs instead.

* Switch back to npz

* Delte nbs

* Reduce size of the box

* Load correct bckg of halos being matched

* Remove verbosity

* verbosity edits

* Change lower thresholds
This commit is contained in:
Richard Stiskalek 2023-05-06 16:52:48 +01:00 committed by GitHub
parent 1c9dacfde5
commit 56e39a8b1d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 864 additions and 3816 deletions

View file

@ -15,13 +15,9 @@
from warnings import warn
from csiborgtools.clustering.knn import kNN_CDF # noqa
from csiborgtools.clustering.utils import ( # noqa
BaseRVS,
RVSinbox,
RVSinsphere,
RVSonsphere,
normalised_marks,
)
from csiborgtools.clustering.utils import (BaseRVS, RVSinbox, # noqa
RVSinsphere, RVSonsphere,
normalised_marks)
try:
import Corrfunc # noqa

View file

@ -12,6 +12,6 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .halo import Clump, Halo # noqa
from .halo import Clump, Halo, dist_centmass # noqa
from .haloprofile import NFWPosterior, NFWProfile # noqa
from .utils import split_jobs # noqa

View file

@ -15,6 +15,7 @@
"""A clump object."""
from abc import ABC
from numba import jit
import numpy
@ -101,16 +102,21 @@ class BaseStructure(ABC):
"""
return numpy.vstack([self[p] for p in ("vx", "vy", "vz")]).T
@property
def r(self):
"""
Calculate the radial separation of the particles from the centre of the
object.
Radial separation of particles from the centre of the object.
Returns
-------
r : 1-dimensional array of shape `(n_particles, )`.
"""
return numpy.linalg.norm(self.pos, axis=1)
return self._get_r(self.pos)
@staticmethod
@jit(nopython=True)
def _get_r(pos):
return (pos[:, 0]**2 + pos[:, 1]**2 + pos[:, 2]**2)**0.5
def cmass(self, rmax, rmin):
"""
@ -130,7 +136,7 @@ class BaseStructure(ABC):
-------
cm : 1-dimensional array of shape `(3, )`
"""
r = self.r()
r = self.r
mask = (r >= rmin) & (r <= rmax)
return numpy.average(self.pos[mask], axis=0, weights=self["M"][mask])
@ -149,7 +155,7 @@ class BaseStructure(ABC):
-------
J : 1-dimensional array or shape `(3, )`
"""
r = self.r()
r = self.r
mask = (r >= rmin) & (r <= rmax)
pos = self.pos[mask] - self.cmass(rmax, rmin)
# Velocitities in the object CM frame
@ -172,17 +178,17 @@ class BaseStructure(ABC):
-------
enclosed_mass : float
"""
r = self.r()
r = self.r
return numpy.sum(self["M"][(r >= rmin) & (r <= rmax)])
def lambda_bullock(self, radius, npart_min=10):
def lambda_bullock(self, radmax, npart_min=10):
r"""
Bullock spin, see Eq. 5 in [1], in a radius of `radius`, which should
define to some overdensity radius.
Parameters
----------
radius : float
radmax : float
Radius in which to calculate the spin.
npart_min : int
Minimum number of enclosed particles for a radius to be
@ -198,14 +204,13 @@ class BaseStructure(ABC):
Bullock, J. S.; Dekel, A.; Kolatt, T. S.; Kravtsov, A. V.;
Klypin, A. A.; Porciani, C.; Primack, J. R.
"""
mask = self.r() <= radius
mask = self.r <= radmax
if numpy.sum(mask) < npart_min:
return numpy.nan
mass = self.enclosed_mass(radius)
V = numpy.sqrt(self.box.box_G * mass / radius)
out = numpy.linalg.norm(self.angular_momentum(radius))
out /= numpy.sqrt(2) * mass * V * radius
return out
mass = self.enclosed_mass(radmax)
circvel = numpy.sqrt(self.box.box_G * mass / radmax)
angmom_norm = numpy.linalg.norm(self.angular_momentum(radmax))
return angmom_norm / (numpy.sqrt(2) * mass * circvel * radmax)
def spherical_overdensity_mass(self, delta_mult, npart_min=10,
kind="crit"):
@ -236,18 +241,18 @@ class BaseStructure(ABC):
assert kind in ["crit", "matter"]
# We first sort the particles in an increasing separation
rs = self.r()
rs = self.r
order = numpy.argsort(rs)
rs = rs[order]
cmass = numpy.cumsum(self["M"][order]) # Cumulative mass
# We calculate the enclosed volume and indices where it is above target
vol = 4 * numpy.pi / 3 * (rs**3 - rs[0] ** 3)
vol = 4 * numpy.pi / 3 * rs**3
target_density = delta_mult * self.box.box_rhoc
if kind == "matter":
target_density *= self.box.cosmo.Om0
with numpy.errstate(divide="ignore"):
ks = numpy.where(cmass / vol > target_density)[0]
ks = numpy.where(cmass > target_density * vol)[0]
if ks.size == 0: # Never above the threshold?
return numpy.nan, numpy.nan
k = numpy.max(ks)
@ -257,7 +262,7 @@ class BaseStructure(ABC):
def __getitem__(self, key):
keys = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M']
if key not in self.keys:
if key not in keys:
raise RuntimeError(f"Invalid key `{key}`!")
return self.particles[:, keys.index(key)]
@ -304,3 +309,31 @@ class Halo(BaseStructure):
self.particles = particles
self.info = info
self.box = box
###############################################################################
# Other, supplementary functions #
###############################################################################
@jit(nopython=True)
def dist_centmass(clump):
"""
Calculate the clump (or halo) particles' distance from the centre of mass.
Parameters
----------
clump : 2-dimensional array of shape (n_particles, 7)
Particle array. The first four columns must be `x`, `y`, `z` and `M`.
Returns
-------
dist : 1-dimensional array of shape `(n_particles, )`
Particle distance from the centre of mass.
cm : len-3 list
Center of mass coordinates.
"""
mass = clump[:, 3]
x, y, z = clump[:, 0], clump[:, 1], clump[:, 2]
cmx, cmy, cmz = [numpy.average(xi, weights=mass) for xi in (x, y, z)]
dist = ((x - cmx)**2 + (y - cmy)**2 + (z - cmz)**2)**0.5
return dist, [cmx, cmy, cmz]

View file

@ -348,7 +348,7 @@ class NFWPosterior(NFWProfile):
Best fit NFW central density.
"""
assert isinstance(clump, Clump)
r = clump.r()
r = clump.r
rmin = numpy.min(r[r > 0]) # First particle that is not at r = 0
rmax, mtot = clump.spherical_overdensity_mass(200)
mask = (rmin <= r) & (r <= rmax)

View file

@ -12,14 +12,8 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .match import ( # noqa
ParticleOverlap,
RealisationsMatcher,
calculate_overlap,
calculate_overlap_indxs,
cosine_similarity,
dist_centmass,
dist_percentile,
)
from .match import (ParticleOverlap, RealisationsMatcher, # noqa
calculate_overlap, calculate_overlap_indxs,
cosine_similarity)
from .num_density import binned_counts, number_density # noqa
from .utils import concatenate_parts # noqa

View file

@ -16,12 +16,19 @@
Support for matching halos between CSiBORG IC realisations.
"""
from datetime import datetime
from functools import lru_cache
from math import ceil
import numpy
from numba import jit
from scipy.ndimage import gaussian_filter
from tqdm import tqdm, trange
from ..read import load_parent_particles
BCKG_HALFSIZE = 475
BOX_SIZE = 2048
###############################################################################
# Realisations matcher for calculating overlaps #
###############################################################################
@ -105,8 +112,8 @@ class RealisationsMatcher:
"""
return self._overlapper
def cross(self, cat0, catx, halos0_archive, halosx_archive, delta_bckg,
verbose=True):
def cross(self, cat0, catx, particles0, particlesx, clump_map0, clump_mapx,
delta_bckg, cache_size=10000, verbose=True):
r"""
Find all neighbours whose CM separation is less than `nmult` times the
sum of their initial Lagrangian patch sizes and calculate their
@ -119,19 +126,23 @@ class RealisationsMatcher:
Halo catalogue of the reference simulation.
catx : :py:class:`csiborgtools.read.HaloCatalogue`
Halo catalogue of the cross simulation.
halos0_archive : `NpzFile` object
Archive of halos' particles of the reference simulation, keys must
include `x`, `y`, `z` and `M`. The positions must already be
converted to cell numbers.
halosx_archive : `NpzFile` object
Archive of halos' particles of the cross simulation, keys must
include `x`, `y`, `z` and `M`. The positions must already be
converted to cell numbers.
particles0 : 2-dimensional array
Array of particles in box units in the reference simulation.
The columns must be `x`, `y`, `z` and `M`.
particlesx : 2-dimensional array
Array of particles in box units in the cross simulation.
The columns must be `x`, `y`, `z` and `M`.
clump_map0 : 2-dimensional array
Clump map of the reference simulation.
clump_mapx : 2-dimensional array
Clump map of the cross simulation.
delta_bckg : 3-dimensional array
Summed background density field of the reference and cross
simulations calculated with particles assigned to halos at the
final snapshot. Assumed to only be sampled in cells
:math:`[512, 1536)^3`.
cache_size : int, optional
Caching size for loading the cross simulation halos.
verbose : bool, optional
iterator verbosity flag. by default `true`.
@ -149,12 +160,12 @@ class RealisationsMatcher:
# in the reference simulation from the cross simulation in the initial
# snapshot.
if verbose:
now = datetime.now()
print(f"{now}: querying the KNN.", flush=True)
print(f"{datetime.now()}: querying the KNN.", flush=True)
match_indxs = radius_neighbours(
catx.knn(select_initial=True), cat0.positions(in_initial=True),
catx.knn(in_initial=True), cat0.position(in_initial=True),
radiusX=cat0["lagpatch"], radiusKNN=catx["lagpatch"],
nmult=self.nmult, enforce_int32=True, verbose=verbose)
# We next remove neighbours whose mass is too large/small.
if self.dlogmass is not None:
for i, indx in enumerate(match_indxs):
@ -163,12 +174,18 @@ class RealisationsMatcher:
aratio = numpy.abs(numpy.log10(catx[p][indx] / cat0[p][i]))
match_indxs[i] = match_indxs[i][aratio < self.dlogmass]
# We will make a dictionary to keep in memory the halos' particles from
# the cross simulations so that they are not loaded in several times
# and we only convert their positions to cells once. Possibly make an
# option to not do this to lower memory requirements?
cross_halos = {}
cross_lims = {}
clid2map0 = {clid: i for i, clid in enumerate(clump_map0[:, 0])}
clid2mapx = {clid: i for i, clid in enumerate(clump_mapx[:, 0])}
# We will cache the halos from the cross simulation to speed up the I/O
@lru_cache(maxsize=cache_size)
def load_cached_halox(hid):
return load_processed_halo(hid, particlesx, clump_mapx, clid2mapx,
catx.clumps_cat, nshift=0,
ncells=BOX_SIZE)
if verbose:
print(f"{datetime.now()}: calculating overlaps.", flush=True)
cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
indxs = cat0["index"]
for i, k0 in enumerate(tqdm(indxs) if verbose else indxs):
@ -178,36 +195,18 @@ class RealisationsMatcher:
continue
# Next, we find this halo's particles, total mass, minimum and
# maximum cells and convert positions to cells.
halo0 = halos0_archive[str(k0)]
mass0 = numpy.sum(halo0["M"])
mins0, maxs0 = get_halolims(halo0,
ncells=self.overlapper.inv_clength,
nshift=self.overlapper.nshift)
for p in ("x", "y", "z"):
halo0[p] = self.overlapper.pos2cell(halo0[p])
pos0, mass0, totmass0, mins0, maxs0 = load_processed_halo(
k0, particles0, clump_map0, clid2map0, cat0.clumps_cat,
nshift=0, ncells=BOX_SIZE)
# We now loop over matches of this halo and calculate their
# overlap, storing them in `_cross`.
_cross = numpy.full(matches.size, numpy.nan, dtype=numpy.float32)
for j, kf in enumerate(catx["index"][matches]):
# Attempt to load this cross halo from memory, if it fails get
# it from from the halo archive (and similarly for the limits)
# and convert the particle positions to cells.
try:
halox = cross_halos[kf]
minsx, maxsx = cross_lims[kf]
except KeyError:
halox = halosx_archive[str(kf)]
minsx, maxsx = get_halolims(
halox, ncells=self.overlapper.inv_clength,
nshift=self.overlapper.nshift)
for p in ("x", "y", "z"):
halox[p] = self.overlapper.pos2cell(halox[p])
cross_halos[kf] = halox
cross_lims[kf] = (minsx, maxsx)
massx = numpy.sum(halox["M"])
_cross[j] = self.overlapper(halo0, halox, delta_bckg, mins0,
maxs0, minsx, maxsx, mass1=mass0,
mass2=massx)
for j, kx in enumerate(catx["index"][matches]):
posx, massx, totmassx, minsx, maxsx = load_cached_halox(kx)
_cross[j] = self.overlapper(
pos0, posx, mass0, massx, delta_bckg, mins0, maxs0,
minsx, maxsx, totmass1=totmass0, totmass2=totmassx)
cross[i] = _cross
# We remove all matches that have zero overlap to save space.
@ -222,8 +221,9 @@ class RealisationsMatcher:
cross = numpy.asanyarray(cross, dtype=object)
return match_indxs, cross
def smoothed_cross(self, cat0, catx, halos0_archive, halosx_archive,
delta_bckg, match_indxs, smooth_kwargs, verbose=True):
def smoothed_cross(self, cat0, catx, particles0, particlesx, clump_map0,
clump_mapx, delta_bckg, match_indxs, smooth_kwargs,
cache_size=10000, verbose=True):
r"""
Calculate the smoothed overlaps for pair previously identified via
`self.cross(...)` to have a non-zero overlap.
@ -234,27 +234,27 @@ class RealisationsMatcher:
Halo catalogue of the reference simulation.
catx : :py:class:`csiborgtools.read.ClumpsCatalogue`
Halo catalogue of the cross simulation.
halos0_archive : `NpzFile` object
Archive of halos' particles of the reference simulation, keys must
include `x`, `y`, `z` and `M`. The positions must already be
converted to cell numbers.
halosx_archive : `NpzFile` object
Archive of halos' particles of the cross simulation, keys must
include `x`, `y`, `z` and `M`. The positions must already be
converted to cell numbers.
particles0 : 2-dimensional array
Array of particles in box units in the reference simulation.
The columns must be `x`, `y`, `z` and `M`.
particlesx : 2-dimensional array
Array of particles in box units in the cross simulation.
The columns must be `x`, `y`, `z` and `M`.
clump_map0 : 2-dimensional array
Clump map of the reference simulation.
clump_mapx : 2-dimensional array
Clump map of the cross simulation.
delta_bckg : 3-dimensional array
Smoothed summed background density field of the reference and cross
simulations calculated with particles assigned to halos at the
final snapshot. Assumed to only be sampled in cells
:math:`[512, 1536)^3`.
ref_indxs : 1-dimensional array
Halo IDs in the reference catalogue.
cross_indxs : 1-dimensional array
Halo IDs in the cross catalogue.
match_indxs : 1-dimensional array of arrays
Indices of halo counterparts in the cross catalogue.
smooth_kwargs : kwargs
Kwargs to be passed to :py:func:`scipy.ndimage.gaussian_filter`.
cache_size : int, optional
Caching size for loading the cross simulation halos.
verbose : bool, optional
Iterator verbosity flag. By default `True`.
@ -262,37 +262,33 @@ class RealisationsMatcher:
-------
overlaps : 1-dimensional array of arrays
"""
nshift = read_nshift(smooth_kwargs)
clid2map0 = {clid: i for i, clid in enumerate(clump_map0[:, 0])}
clid2mapx = {clid: i for i, clid in enumerate(clump_mapx[:, 0])}
cross_halos = {}
cross_lims = {}
cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
@lru_cache(maxsize=cache_size)
def load_cached_halox(hid):
return load_processed_halo(hid, particlesx, clump_mapx, clid2mapx,
catx.clumps_cat, nshift=nshift,
ncells=BOX_SIZE)
if verbose:
print(f"{datetime.now()}: calculating smoothed overlaps.",
flush=True)
indxs = cat0["index"]
cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
for i, k0 in enumerate(tqdm(indxs) if verbose else indxs):
halo0 = halos0_archive[str(k0)]
mins0, maxs0 = get_halolims(halo0,
ncells=self.overlapper.inv_clength,
nshift=self.overlapper.nshift)
pos0, mass0, __, mins0, maxs0 = load_processed_halo(
k0, particles0, clump_map0, clid2map0, cat0.clumps_cat,
nshift=nshift, ncells=BOX_SIZE)
# Now loop over the matches and calculate the smoothed overlap.
_cross = numpy.full(match_indxs[i].size, numpy.nan, numpy.float32)
for j, kf in enumerate(catx["index"][match_indxs[i]]):
# Attempt to load this cross halo from memory, if it fails get
# it from from the halo archive (and similarly for the limits).
try:
halox = cross_halos[kf]
minsx, maxsx = cross_lims[kf]
except KeyError:
halox = halosx_archive[str(kf)]
minsx, maxsx = get_halolims(
halox, ncells=self.overlapper.inv_clength,
nshift=self.overlapper.nshift)
cross_halos[kf] = halox
cross_lims[kf] = (minsx, maxsx)
_cross[j] = self.overlapper(halo0, halox, delta_bckg, mins0,
maxs0, minsx, maxsx,
smooth_kwargs=smooth_kwargs)
for j, kx in enumerate(catx["index"][match_indxs[i]]):
posx, massx, __, minsx, maxsx = load_cached_halox(kx)
_cross[j] = self.overlapper(pos0, posx, mass0, massx,
delta_bckg, mins0, maxs0, minsx,
maxsx, smooth_kwargs=smooth_kwargs)
cross[i] = _cross
return numpy.asanyarray(cross, dtype=object)
@ -341,57 +337,37 @@ class ParticleOverlap:
Gaussian smoothing.
"""
def __init__(self):
# Inverse cell length in box units. By default :math:`2^11`, which
# matches the initial RAMSES grid resolution.
self.inv_clength = 2**11
self.nshift = 5 # Hardcode this too to force consistency
self._clength = 1 / self.inv_clength
def pos2cell(self, pos):
def make_bckg_delta(self, particles, clump_map, clid2map, halo_cat,
delta=None, verbose=False):
"""
Convert position to cell number. If `pos` is in
`numpy.typecodes["AllInteger"]` assumes it to already be the cell
number.
Calculate a NGP density field of particles belonging to halos of a
halo catalogue `halo_cat`. Particles are only counted within the
high-resolution region of the simulation. Smoothing must be applied
separately.
Parameters
----------
pos : 1-dimensional array
Array of positions along an axis in the box.
Returns
-------
cells : 1-dimensional array
"""
# Check whether this is already the cell
if pos.dtype.char in numpy.typecodes["AllInteger"]:
return pos
return numpy.floor(pos * self.inv_clength).astype(numpy.int32)
def make_bckg_delta(self, halo_archive, delta=None, verbose=False):
"""
Calculate a NGP density field of particles belonging to halos within
the central :math:`1/2^3` high-resolution region of the simulation.
Smoothing must be applied separately.
Parameters
----------
halo_archive : `NpzFile` object
Archive of halos' particles of the reference simulation, keys must
include `x`, `y`, `z` and `M`.
particles : 2-dimensional array
Array of particles.
clump_map : 2-dimensional array
Array containing start and end indices in the particle array
corresponding to each clump.
clid2map : dict
Dictionary mapping clump IDs to `clump_map` array positions.
halo_cat: :py:class:`csiborgtools.read.HaloCatalogue`
Halo catalogue.
delta : 3-dimensional array, optional
Array to store the density field in. If `None` a new array is
created.
verbose : bool, optional
Verbosity flag for loading the files.
Verbosity flag for loading the halos' particles.
Returns
-------
delta : 3-dimensional array
"""
# We obtain the minimum/maximum cell IDs and number of cells
cellmin = self.inv_clength // 4 # The minimum cell ID
cellmax = 3 * self.inv_clength // 4 # The maximum cell ID
cellmin = BOX_SIZE // 2 - BCKG_HALFSIZE
cellmax = BOX_SIZE // 2 + BCKG_HALFSIZE
ncells = cellmax - cellmin
# We then pre-allocate the density field/check it is of the right shape
if delta is None:
@ -399,28 +375,25 @@ class ParticleOverlap:
else:
assert ((delta.shape == (ncells,) * 3)
& (delta.dtype == numpy.float32))
from tqdm import tqdm
# We now loop one-by-one over the halos fill the density field.
files = halo_archive.files
for file in tqdm(files) if verbose else files:
parts = halo_archive[file]
cells = [self.pos2cell(parts[p]) for p in ("x", "y", "z")]
mass = parts["M"]
clumps_cat = halo_cat.clumps_cat
for hid in tqdm(halo_cat["index"]) if verbose else halo_cat["index"]:
pos = load_parent_particles(hid, particles, clump_map, clid2map,
clumps_cat)
if pos is None:
continue
pos, mass = pos[:, :3], pos[:, 3]
pos = pos2cell(pos, BOX_SIZE)
# We mask out particles outside the cubical high-resolution region
mask = ((cellmin <= cells[0])
& (cells[0] < cellmax)
& (cellmin <= cells[1])
& (cells[1] < cellmax)
& (cellmin <= cells[2])
& (cells[2] < cellmax))
cells = [c[mask] for c in cells]
mass = mass[mask]
fill_delta(delta, *cells, *(cellmin,) * 3, mass)
mask = numpy.all((cellmin <= pos) & (pos < cellmax), axis=1)
pos = pos[mask]
fill_delta(delta, pos[:, 0], pos[:, 1], pos[:, 2],
*(cellmin,) * 3, mass[mask])
return delta
def make_delta(self, clump, mins=None, maxs=None, subbox=False,
def make_delta(self, pos, mass, mins=None, maxs=None, subbox=False,
smooth_kwargs=None):
"""
Calculate a NGP density field of a halo on a cubic grid. Optionally can
@ -428,8 +401,10 @@ class ParticleOverlap:
Parameters
----------
clump : structurered arrays
Clump structured array, keys must include `x`, `y`, `z` and `M`.
pos : 2-dimensional array
Halo particle position array.
mass : 1-dimensional array
Halo particle mass array.
mins, maxs : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension.
subbox : bool, optional
@ -443,50 +418,45 @@ class ParticleOverlap:
-------
delta : 3-dimensional array
"""
cells = [self.pos2cell(clump[p]) for p in ("x", "y", "z")]
nshift = read_nshift(smooth_kwargs)
cells = self.pos2cell(pos)
# Check that minima and maxima are integers
if not (mins is None and maxs is None):
assert mins.dtype.char in numpy.typecodes["AllInteger"]
assert maxs.dtype.char in numpy.typecodes["AllInteger"]
if subbox:
# Minimum xcell, ycell and zcell of this clump
if mins is None or maxs is None:
mins = numpy.asanyarray(
[max(numpy.min(cell) - self.nshift, 0) for cell in cells]
)
maxs = numpy.asanyarray(
[
min(numpy.max(cell) + self.nshift, self.inv_clength)
for cell in cells
]
)
mins, maxs = get_halolims(cells, BOX_SIZE, nshift)
ncells = numpy.max(maxs - mins) + 1 # To get the number of cells
ncells = maxs - mins + 1 # To get the number of cells
else:
mins = [0, 0, 0]
ncells = self.inv_clength
ncells = BOX_SIZE
# Preallocate and fill the array
delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
fill_delta(delta, *cells, *mins, clump["M"])
fill_delta(delta, cells[:, 0], cells[:, 1], cells[:, 2], *mins, mass)
if smooth_kwargs is not None:
gaussian_filter(delta, output=delta, **smooth_kwargs)
return delta
def make_deltas(self, clump1, clump2, mins1=None, maxs1=None, mins2=None,
maxs2=None, smooth_kwargs=None):
def make_deltas(self, pos1, pos2, mass1, mass2, mins1=None, maxs1=None,
mins2=None, maxs2=None, smooth_kwargs=None):
"""
Calculate a NGP density fields of two halos on a grid that encloses
them both. Optionally can be smoothed with a Gaussian kernel.
Parameters
----------
clump1, clump2 : structurered arrays
Particle structured array of the two clumps. Keys must include `x`,
`y`, `z` and `M`.
pos1 : 2-dimensional array
Particle positions of the first halo.
pos2 : 2-dimensional array
Particle positions of the second halo.
mass1 : 1-dimensional array
Particle masses of the first halo.
mass2 : 1-dimensional array
Particle masses of the second halo.
mins1, maxs1 : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension of `clump1`.
Optional.
@ -507,51 +477,51 @@ class ParticleOverlap:
Indices where the lower mass clump has a non-zero density.
Calculated only if no smoothing is applied, otherwise `None`.
"""
xc1, yc1, zc1 = (self.pos2cell(clump1[p]) for p in ("x", "y", "z"))
xc2, yc2, zc2 = (self.pos2cell(clump2[p]) for p in ("x", "y", "z"))
nshift = read_nshift(smooth_kwargs)
pos1 = pos2cell(pos1, BOX_SIZE)
pos2 = pos2cell(pos2, BOX_SIZE)
xc1, yc1, zc1 = [pos1[:, i] for i in range(3)]
xc2, yc2, zc2 = [pos2[:, i] for i in range(3)]
if any(obj is None for obj in (mins1, maxs1, mins2, maxs2)):
# Minimum cell number of the two halos along each dimension
xmin = min(numpy.min(xc1), numpy.min(xc2)) - self.nshift
ymin = min(numpy.min(yc1), numpy.min(yc2)) - self.nshift
zmin = min(numpy.min(zc1), numpy.min(zc2)) - self.nshift
xmin = min(numpy.min(xc1), numpy.min(xc2)) - nshift
ymin = min(numpy.min(yc1), numpy.min(yc2)) - nshift
zmin = min(numpy.min(zc1), numpy.min(zc2)) - nshift
# Make sure shifting does not go beyond boundaries
xmin, ymin, zmin = [max(px, 0) for px in (xmin, ymin, zmin)]
# Maximum cell number of the two halos along each dimension
xmax = max(numpy.max(xc1), numpy.max(xc2)) + self.nshift
ymax = max(numpy.max(yc1), numpy.max(yc2)) + self.nshift
zmax = max(numpy.max(zc1), numpy.max(zc2)) + self.nshift
xmax = max(numpy.max(xc1), numpy.max(xc2)) + nshift
ymax = max(numpy.max(yc1), numpy.max(yc2)) + nshift
zmax = max(numpy.max(zc1), numpy.max(zc2)) + nshift
# Make sure shifting does not go beyond boundaries
xmax, ymax, zmax = [
min(px, self.inv_clength - 1) for px in (xmax, ymax, zmax)
]
xmax, ymax, zmax = [min(px, BOX_SIZE - 1)
for px in (xmax, ymax, zmax)]
else:
xmin, ymin, zmin = [min(mins1[i], mins2[i]) for i in range(3)]
xmax, ymax, zmax = [max(maxs1[i], maxs2[i]) for i in range(3)]
cellmins = (xmin, ymin, zmin) # Cell minima
ncells = max(xmax - xmin, ymax - ymin, zmax - zmin) + 1 # Num cells
ncells = xmax - xmin + 1, ymax - ymin + 1, zmax - zmin + 1 # Num cells
# Preallocate and fill the arrays
delta1 = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
delta2 = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
delta1 = numpy.zeros(ncells, dtype=numpy.float32)
delta2 = numpy.zeros(ncells, dtype=numpy.float32)
# If no smoothing figure out the nonzero indices of the smaller clump
if smooth_kwargs is None:
if clump1.size > clump2.size:
fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1["M"])
nonzero = fill_delta_indxs(
delta2, xc2, yc2, zc2, *cellmins, clump2["M"]
)
if pos1.shape[0] > pos2.shape[0]:
fill_delta(delta1, xc1, yc1, zc1, *cellmins, mass1)
nonzero = fill_delta_indxs(delta2, xc2, yc2, zc2, *cellmins,
mass2)
else:
nonzero = fill_delta_indxs(
delta1, xc1, yc1, zc1, *cellmins, clump1["M"]
)
fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2["M"])
nonzero = fill_delta_indxs(delta1, xc1, yc1, zc1, *cellmins,
mass1)
fill_delta(delta2, xc2, yc2, zc2, *cellmins, mass2)
else:
fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1["M"])
fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2["M"])
fill_delta(delta1, xc1, yc1, zc1, *cellmins, mass1)
fill_delta(delta2, xc2, yc2, zc2, *cellmins, mass2)
nonzero = None
if smooth_kwargs is not None:
@ -559,9 +529,9 @@ class ParticleOverlap:
gaussian_filter(delta2, output=delta2, **smooth_kwargs)
return delta1, delta2, cellmins, nonzero
def __call__(self, clump1, clump2, delta_bckg, mins1=None, maxs1=None,
mins2=None, maxs2=None, mass1=None, mass2=None,
smooth_kwargs=None):
def __call__(self, pos1, pos2, mass1, mass2, delta_bckg,
mins1=None, maxs1=None, mins2=None, maxs2=None,
totmass1=None, totmass2=None, smooth_kwargs=None):
"""
Calculate overlap between `clump1` and `clump2`. See
`calculate_overlap(...)` for further information. Be careful so that
@ -572,9 +542,14 @@ class ParticleOverlap:
Parameters
----------
clump1, clump2 : structurered arrays
Structured arrays containing the particles of a given clump. Keys
must include `x`, `y`, `z` and `M`.
pos1 : 2-dimensional array
Particle positions of the first halo.
pos2 : 2-dimensional array
Particle positions of the second halo.
mass1 : 1-dimensional array
Particle masses of the first halo.
mass2 : 1-dimensional array
Particle masses of the second halo.
cellmins : len-3 tuple
Tuple of left-most cell ID in the full box.
delta_bckg : 3-dimensional array
@ -588,7 +563,7 @@ class ParticleOverlap:
mins2, maxs2 : 1-dimensional arrays of shape `(3,)`
Minimum and maximum cell numbers along each dimension of `clump2`,
optional.
mass1, mass2 : floats, optional
totmass1, totmass2 : floats, optional
Total mass of `clump1` and `clump2`, respectively. Must be provided
if `loop_nonzero` is `True`.
smooth_kwargs : kwargs, optional
@ -600,16 +575,16 @@ class ParticleOverlap:
overlap : float
"""
delta1, delta2, cellmins, nonzero = self.make_deltas(
clump1, clump2, mins1, maxs1, mins2, maxs2,
pos1, pos2, mass1, mass2, mins1, maxs1, mins2, maxs2,
smooth_kwargs=smooth_kwargs)
if smooth_kwargs is not None:
return calculate_overlap(delta1, delta2, cellmins, delta_bckg)
# Calculate masses not given
mass1 = numpy.sum(clump1["M"]) if mass1 is None else mass1
mass2 = numpy.sum(clump2["M"]) if mass2 is None else mass2
totmass1 = numpy.sum(mass1) if totmass1 is None else totmass1
totmass2 = numpy.sum(mass2) if totmass2 is None else totmass2
return calculate_overlap_indxs(
delta1, delta2, cellmins, delta_bckg, nonzero, mass1, mass2)
delta1, delta2, cellmins, delta_bckg, nonzero, totmass1, totmass2)
###############################################################################
@ -617,6 +592,49 @@ class ParticleOverlap:
###############################################################################
def pos2cell(pos, ncells):
"""
Convert position to cell number. If `pos` is in
`numpy.typecodes["AllInteger"]` assumes it to already be the cell
number.
Parameters
----------
pos : 1-dimensional array
Array of positions along an axis in the box.
ncells : int
Number of cells along the axis.
Returns
-------
cells : 1-dimensional array
"""
if pos.dtype.char in numpy.typecodes["AllInteger"]:
return pos
return numpy.floor(pos * ncells).astype(numpy.int32)
def read_nshift(smooth_kwargs):
"""
Read off the number of cells to pad the density field if smoothing is
applied. Defaults to the ceiling of twice of the smoothing scale.
Parameters
----------
smooth_kwargs : kwargs, optional
Kwargs to be passed to :py:func:`scipy.ndimage.gaussian_filter`.
If `None` no smoothing is applied.
Returns
-------
nshift : int
"""
if smooth_kwargs is None:
return 0
else:
return ceil(2 * smooth_kwargs["sigma"])
@jit(nopython=True)
def fill_delta(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
"""
@ -679,15 +697,14 @@ def fill_delta_indxs(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
return cells[:count_nonzero, :] # Cutoff unassigned places
def get_halolims(halo, ncells, nshift=None):
def get_halolims(pos, ncells, nshift=None):
"""
Get the lower and upper limit of a halo's positions or cell numbers.
Parameters
----------
halo : structured array
Structured array containing the particles of a given halo. Keys must
`x`, `y`, `z`.
pos : 2-dimensional array
Halo particle array. Columns must be `x`, `y`, `z`.
ncells : int
Number of grid cells of the box along a single dimension.
nshift : int, optional
@ -699,17 +716,16 @@ def get_halolims(halo, ncells, nshift=None):
Minimum and maximum along each axis.
"""
# Check that in case of `nshift` we have integer positions.
dtype = halo["x"].dtype
dtype = pos.dtype
if nshift is not None and dtype.char not in numpy.typecodes["AllInteger"]:
raise TypeError("`nshift` supported only positions are cells.")
nshift = 0 if nshift is None else nshift # To simplify code below
mins = numpy.full(3, numpy.nan, dtype=dtype)
maxs = numpy.full(3, numpy.nan, dtype=dtype)
for i, p in enumerate(["x", "y", "z"]):
mins[i] = max(numpy.min(halo[p]) - nshift, 0)
maxs[i] = min(numpy.max(halo[p]) + nshift, ncells - 1)
for i in range(3):
mins[i] = max(numpy.min(pos[:, i]) - nshift, 0)
maxs[i] = min(numpy.max(pos[:, i]) + nshift, ncells - 1)
return mins, maxs
@ -741,8 +757,8 @@ def calculate_overlap(delta1, delta2, cellmins, delta_bckg):
totmass = 0.0 # Total mass of clump 1 and clump 2
intersect = 0.0 # Weighted intersecting mass
i0, j0, k0 = cellmins # Unpack things
bckg_offset = 512 # Offset of the background density field
bckg_size = 1024
bckg_size = 2 * BCKG_HALFSIZE
bckg_offset = BOX_SIZE // 2 - BCKG_HALFSIZE
imax, jmax, kmax = delta1.shape
for i in range(imax):
@ -798,8 +814,8 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
"""
intersect = 0.0 # Weighted intersecting mass
i0, j0, k0 = cellmins # Unpack cell minimas
bckg_offset = 512 # Offset of the background density field
bckg_size = 1024 # Size of the background density field array
bckg_size = 2 * BCKG_HALFSIZE
bckg_offset = BOX_SIZE // 2 - BCKG_HALFSIZE
for n in range(nonzero.shape[0]):
i, j, k = nonzero[n, :]
@ -821,47 +837,51 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
return intersect / (mass1 + mass2 - intersect)
def dist_centmass(clump):
def load_processed_halo(hid, particles, clump_map, clid2map, clumps_cat,
ncells, nshift):
"""
Calculate the clump (or halo) particles' distance from the centre of mass.
Load a processed halo from the `.h5` file. This is to be wrapped by a
cacher.
Parameters
----------
clump : 2-dimensional array of shape (n_particles, 7)
Particle array. The first four columns must be `x`, `y`, `z` and `M`.
hid : int
Halo ID.
particles : 2-dimensional array
Array of particles in box units. The columns must be `x`, `y`, `z`
and `M`.
clump_map : 2-dimensional array
Array containing start and end indices in the particle array
corresponding to each clump.
clid2map : dict
Dictionary mapping clump IDs to `clump_map` array positions.
clumps_cat : :py:class:`csiborgtools.read.ClumpsCatalogue`
Clumps catalogue.
ncells : int
Number of cells in the original density field. Typically 2048.
nshift : int
Number of cells to pad the density field.
Returns
-------
dist : 1-dimensional array of shape `(n_particles, )`
Particle distance from the centre of mass.
cm : 1-dimensional array of shape `(3,)`
Center of mass coordinates.
pos : 2-dimensional array
Array of cell particle positions.
mass : 1-dimensional array
Array of particle masses.
totmass : float
Total mass of the halo.
mins : len-3 tuple
Minimum cell indices of the halo.
maxs : len-3 tuple
Maximum cell indices of the halo.
"""
# CM along each dimension
cm = numpy.average(clump[:, :3], weights=clump[:, 3], axis=0)
return numpy.linalg.norm(clump[:, :3] - cm, axis=1), cm
def dist_percentile(dist, qs, distmax=0.075):
"""
Calculate q-th percentiles of `dist`, with an upper limit of `distmax`.
Parameters
----------
dist : 1-dimensional array
Array of distances.
qs : 1-dimensional array
Percentiles to compute.
distmax : float, optional
The maximum distance. By default 0.075.
Returns
-------
x : 1-dimensional array
"""
x = numpy.percentile(dist, qs)
x[x > distmax] = distmax # Enforce the upper limit
return x
pos = load_parent_particles(hid, particles, clump_map, clid2map,
clumps_cat)
pos, mass = pos[:, :3], pos[:, 3]
pos = pos2cell(pos, ncells)
totmass = numpy.sum(mass)
mins, maxs = get_halolims(pos, ncells=ncells, nshift=nshift)
return pos, mass, totmass, mins, maxs
def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1.0,

View file

@ -15,16 +15,14 @@
from .box_units import BoxUnits # noqa
from .halo_cat import ClumpsCatalogue, HaloCatalogue # noqa
from .knn_summary import kNNCDFReader # noqa
from .obs import ( # noqa
SDSS,
MCXCClusters,
PlanckClusters,
TwoMPPGalaxies,
TwoMPPGroups,
)
from .overlap_summary import NPairsOverlap, PairOverlap, binned_resample_mean # noqa
from .obs import (SDSS, MCXCClusters, PlanckClusters, TwoMPPGalaxies, # noqa
TwoMPPGroups)
from .overlap_summary import (NPairsOverlap, PairOverlap, # noqa
binned_resample_mean)
from .paths import CSiBORGPaths # noqa
from .pk_summary import PKReader # noqa
from .readsim import MmainReader, ParticleReader, halfwidth_select, read_initcm # noqa
from .readsim import (MmainReader, ParticleReader, halfwidth_select, # noqa
load_clump_particles, load_parent_particles, read_initcm)
from .tpcf_summary import TPCFReader # noqa
from .utils import cartesian_to_radec, cols_to_structured, radec_to_cartesian # noqa
from .utils import (cartesian_to_radec, cols_to_structured, # noqa
radec_to_cartesian, read_h5)

View file

@ -12,7 +12,7 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""CSiBORG halo catalogue."""
"""CSiBORG halo and clumps catalogues."""
from abc import ABC
import numpy
@ -177,7 +177,7 @@ class BaseCatalogue(ABC):
knn : :py:class:`sklearn.neighbors.NearestNeighbors`
"""
knn = NearestNeighbors()
return knn.fit(self.positions(in_initial))
return knn.fit(self.position(in_initial))
def radius_neigbours(self, X, radius, in_initial):
r"""
@ -368,6 +368,8 @@ class HaloCatalogue(BaseCatalogue):
minmass : len-2 tuple
Minimum mass. The first element is the catalogue key and the second is
the value.
with_lagpatch : bool, optional
Whether to only load halos with a resolved Lagrangian patch.
load_fitted : bool, optional
Whether to load fitted quantities.
load_initial : bool, optional
@ -378,22 +380,39 @@ class HaloCatalogue(BaseCatalogue):
"""
def __init__(self, nsim, paths, maxdist=155.5 / 0.705, minmass=("M", 1e12),
load_fitted=True, load_initial=False, rawdata=False):
with_lagpatch=True, load_fitted=True, load_initial=True,
rawdata=False):
self.nsim = nsim
self.paths = paths
# Read in the mmain catalogue of summed substructure
mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim))
self._data = mmain["mmain"]
# We will also need the clumps catalogue
self._clumps_cat = ClumpsCatalogue(nsim, paths, rawdata=True,
load_fitted=False)
if load_fitted:
fits = numpy.load(paths.structfit_path(self.nsnap, nsim, "halos"))
cols = [col for col in fits.dtype.names if col != "index"]
X = [fits[col] for col in cols]
self._data = add_columns(self._data, X, cols)
# TODO: load initial positions
if load_initial:
fits = numpy.load(paths.initmatch_path(nsim, "fit"))
X, cols = [], []
for col in fits.dtype.names:
if col == "index":
continue
if col in ['x', 'y', 'z']:
cols.append(col + "0")
else:
cols.append(col)
X.append(fits[col])
self._data = add_columns(self._data, X, cols)
if not rawdata:
if with_lagpatch:
self._data = self._data[numpy.isfinite(self['lagpatch'])]
# Flip positions and convert from code units to cMpc. Convert M too
flip_cols(self._data, "x", "z")
for p in ("x", "y", "z"):
@ -402,9 +421,24 @@ class HaloCatalogue(BaseCatalogue):
"r500c", "m200c", "m500c", "r200m", "m200m"]
self._data = self.box.convert_from_boxunits(self._data, names)
if load_initial:
names = ["x0", "y0", "z0", "lagpatch"]
self._data = self.box.convert_from_boxunits(self._data, names)
if maxdist is not None:
dist = numpy.sqrt(self._data["x"]**2 + self._data["y"]**2
+ self._data["z"]**2)
self._data = self._data[dist < maxdist]
if minmass is not None:
self._data = self._data[self._data[minmass[0]] > minmass[1]]
@property
def clumps_cat(self):
"""
The raw clumps catalogue.
Returns
-------
clumps_cat : :py:class:`csiborgtools.read.ClumpsCatalogue`
"""
return self._clumps_cat

View file

@ -260,7 +260,7 @@ class CSiBORGPaths:
fname = f"{kind}_out_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npy"
return join(fdir, fname)
def overlap_path(self, nsim0, nsimx):
def overlap_path(self, nsim0, nsimx, smoothed):
"""
Path to the overlap files between two simulations.
@ -270,6 +270,8 @@ class CSiBORGPaths:
IC realisation index of the first simulation.
nsimx : int
IC realisation index of the second simulation.
smoothed : bool
Whether the overlap is smoothed or not.
Returns
-------
@ -280,6 +282,8 @@ class CSiBORGPaths:
mkdir(fdir)
warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1)
fname = f"overlap_{str(nsim0).zfill(5)}_{str(nsimx).zfill(5)}.npz"
if smoothed:
fname = fname.replace("overlap", "overlap_smoothed")
return join(fdir, fname)
def radpos_path(self, nsnap, nsim):
@ -305,37 +309,24 @@ class CSiBORGPaths:
fname = f"radpos_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npz"
return join(fdir, fname)
def particle_h5py_path(self, nsim, kind=None, dtype="float32"):
def particles_path(self, nsim):
"""
Path to the file containing all particles in a `.h5` file.
Path to the files containing all particles.
Parameters
----------
nsim : int
IC realisation index.
kind : str
Type of output. Must be one of `[None, 'pos', 'clumpmap']`.
dtype : str
Data type. Must be one of `['float32', 'float64']`.
Returns
-------
path : str
"""
assert kind in [None, "pos", "clumpmap"]
assert dtype in ["float32", "float64"]
fdir = join(self.postdir, "particles")
if not isdir(fdir):
makedirs(fdir)
warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1)
if kind is None:
fname = f"parts_{str(nsim).zfill(5)}.h5"
else:
fname = f"parts_{kind}_{str(nsim).zfill(5)}.h5"
if dtype == "float64":
fname = fname.replace(".h5", "_f64.h5")
fname = f"parts_{str(nsim).zfill(5)}.h5"
return join(fdir, fname)
def density_field_path(self, mas, nsim):

View file

@ -215,7 +215,10 @@ class ParticleReader:
Returns
-------
out : array
out : structured array or 2-dimensional array
Particle information.
pids : 1-dimensional array
Particle IDs.
"""
# Open the particle files
nparts, partfiles = self.open_particle(nsnap, nsim, verbose=verbose)
@ -233,6 +236,8 @@ class ParticleReader:
# Check there are no strange parameters
if isinstance(pars_extract, str):
pars_extract = [pars_extract]
if "ID" in pars_extract:
pars_extract.remove("ID")
for p in pars_extract:
if p not in fnames:
raise ValueError(f"Undefined parameter `{p}`.")
@ -250,6 +255,7 @@ class ParticleReader:
par2arrpos = {par: i for i, par in enumerate(pars_extract)}
out = numpy.full((npart_tot, len(pars_extract)), numpy.nan,
dtype=numpy.float32)
pids = numpy.full(npart_tot, numpy.nan, dtype=numpy.int32)
start_ind = self.nparts_to_start_ind(nparts)
iters = tqdm(range(ncpu)) if verbose else range(ncpu)
@ -257,19 +263,21 @@ class ParticleReader:
i = start_ind[cpu]
j = nparts[cpu]
for (fname, fdtype) in zip(fnames, fdtypes):
if fname in pars_extract:
single_part = self.read_sp(fdtype, partfiles[cpu])
single_part = self.read_sp(fdtype, partfiles[cpu])
if fname == "ID":
pids[i:i + j] = single_part
elif fname in pars_extract:
if return_structured:
out[fname][i:i + j] = single_part
else:
out[i:i + j, par2arrpos[fname]] = single_part
else:
dum[i:i + j] = self.read_sp(fdtype, partfiles[cpu])
dum[i:i + j] = single_part
# Close the fortran files
for partfile in partfiles:
partfile.close()
return out
return out, pids
def open_unbinding(self, nsnap, nsim, cpu):
"""
@ -389,11 +397,16 @@ class ParticleReader:
class MmainReader:
"""
Object to generate the summed substructure catalogue.
Parameters
----------
paths : :py:class:`csiborgtools.read.CSiBORGPaths`
Paths objects.
"""
_paths = None
def __init__(self, paths):
assert isinstance(paths, CSiBORGPaths) # REMOVE
assert isinstance(paths, CSiBORGPaths)
self._paths = paths
@property
@ -444,7 +457,7 @@ class MmainReader:
def make_mmain(self, nsim, verbose=False):
"""
Make the summed substructure catalogue for a final snapshot. Includes
the position of the paren, the summed mass and the fraction of mass in
the position of the parent, the summed mass and the fraction of mass in
substructure.
Parameters
@ -472,10 +485,10 @@ class MmainReader:
nmain = numpy.sum(mask_main)
# Preallocate already the output array
out = cols_to_structured(
nmain, [("ID", numpy.int32), ("x", numpy.float32),
nmain, [("index", numpy.int32), ("x", numpy.float32),
("y", numpy.float32), ("z", numpy.float32),
("M", numpy.float32), ("subfrac", numpy.float32)])
out["ID"] = clumparr["index"][mask_main]
out["index"] = clumparr["index"][mask_main]
# Because for these index == parent
for p in ('x', 'y', 'z'):
out[p] = clumparr[p][mask_main]
@ -483,7 +496,7 @@ class MmainReader:
for i in range(nmain):
# Should include the main halo itself, i.e. its own ultimate parent
out["M"][i] = numpy.sum(
clumparr["mass_cl"][ultimate_parent == out["ID"][i]])
clumparr["mass_cl"][ultimate_parent == out["index"][i]])
out["subfrac"] = 1 - clumparr["mass_cl"][mask_main] / out["M"]
return out, ultimate_parent
@ -549,3 +562,69 @@ def halfwidth_select(hw, particles):
for p in ('x', 'y', 'z'):
particles[p] = (particles[p] - 0.5 + hw) / (2 * hw)
return particles
def load_clump_particles(clid, particles, clump_map, clid2map):
"""
Load a clump's particles from a particle array. If it is not there, i.e
clump has no associated particles, return `None`.
Parameters
----------
clid : int
Clump ID.
particles : 2-dimensional array
Array of particles.
clump_map : 2-dimensional array
Array containing start and end indices in the particle array
corresponding to each clump.
clid2map : dict
Dictionary mapping clump IDs to `clump_map` array positions.
Returns
-------
clump_particles : 2-dimensional array
Particle array of this clump.
"""
try:
k0, kf = clump_map[clid2map[clid], 1:]
return particles[k0:kf + 1, :]
except KeyError:
return None
def load_parent_particles(hid, particles, clump_map, clid2map, clumps_cat):
"""
Load a parent halo's particles from a particle array. If it is not there,
return `None`.
Parameters
----------
hid : int
Halo ID.
particles : 2-dimensional array
Array of particles.
clump_map : 2-dimensional array
Array containing start and end indices in the particle array
corresponding to each clump.
clid2map : dict
Dictionary mapping clump IDs to `clump_map` array positions.
clumps_cat : :py:class:`csiborgtools.read.ClumpsCatalogue`
Clumps catalogue.
Returns
-------
halo : 2-dimensional array
Particle array of this halo.
"""
clids = clumps_cat["index"][clumps_cat["parent"] == hid]
# We first load the particles of each clump belonging to this parent
# and then concatenate them for further analysis.
clumps = []
for clid in clids:
parts = load_clump_particles(clid, particles, clump_map, clid2map)
if parts is not None:
clumps.append(parts)
if len(clumps) == 0:
return None
return numpy.concatenate(clumps)

View file

@ -15,7 +15,10 @@
"""
Various coordinate transformations.
"""
from os.path import isfile
import numpy
from h5py import File
###############################################################################
# Coordinate transforms #
@ -291,14 +294,35 @@ def extract_from_structured(arr, cols):
cols = [cols] if isinstance(cols, str) else cols
for col in cols:
if col not in arr.dtype.names:
raise ValueError("Invalid column `{}`!".format(col))
raise ValueError(f"Invalid column `{col}`!")
# Preallocate an array and populate it
out = numpy.zeros((arr.size, len(cols)), dtype=arr[cols[0]].dtype)
for i, col in enumerate(cols):
out[:, i] = arr[col]
# Optionally flatten
if len(cols) == 1:
return out.reshape(
-1,
)
return out.reshape(-1, )
return out
###############################################################################
# h5py functions #
###############################################################################
def read_h5(path):
"""
Return and return and open `h5py.File` object.
Parameters
----------
path : str
Path to the file.
Returns
-------
file : `h5py.File`
"""
if not isfile(path):
raise IOError(f"File `{path}` does not exist!")
return File(path, "r")