Overlapper improvements ()

* Store indices as f32

* Fix init sorting

* Organise imports

* Rename pathing

* Add particle loading

* Improve particle reading

* Add h5py reader

* edit particle path

* Update particles loading

* update particles loading

* Fix particle dumping

* Add init fitting

* Fix bug due to insufficient precision

* Add commnet

* Add comment

* Add clumps catalogue to halo cat

* Add comment

* Make sure PIDS never forced to float32

* fix pid reading

* fix pid reading

* Update matching to work with new arrays

* Stop using cubical sub boxes, turn off nshift if no smoothing

* Improve caching

* Move function definitions

* Simplify calculation

* Add import

* Small updates to the halo

* Simplify calculation

* Simplify looping calculation

* fix tonew

* Add initial data

* Add skip condition

* Add unit conversion

* Add loading background in batches

* Rename mmain index

* Switch overlaps to h5

* Add finite lagpatch check

* fix column name

* Add verbosity flags

* Save halo IDs instead.

* Switch back to npz

* Delte nbs

* Reduce size of the box

* Load correct bckg of halos being matched

* Remove verbosity

* verbosity edits

* Change lower thresholds
This commit is contained in:
Richard Stiskalek 2023-05-06 16:52:48 +01:00 committed by GitHub
parent 1c9dacfde5
commit 56e39a8b1d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 864 additions and 3816 deletions

View file

@ -15,13 +15,9 @@
from warnings import warn from warnings import warn
from csiborgtools.clustering.knn import kNN_CDF # noqa from csiborgtools.clustering.knn import kNN_CDF # noqa
from csiborgtools.clustering.utils import ( # noqa from csiborgtools.clustering.utils import (BaseRVS, RVSinbox, # noqa
BaseRVS, RVSinsphere, RVSonsphere,
RVSinbox, normalised_marks)
RVSinsphere,
RVSonsphere,
normalised_marks,
)
try: try:
import Corrfunc # noqa import Corrfunc # noqa

View file

@ -12,6 +12,6 @@
# You should have received a copy of the GNU General Public License along # You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc., # with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .halo import Clump, Halo # noqa from .halo import Clump, Halo, dist_centmass # noqa
from .haloprofile import NFWPosterior, NFWProfile # noqa from .haloprofile import NFWPosterior, NFWProfile # noqa
from .utils import split_jobs # noqa from .utils import split_jobs # noqa

View file

@ -15,6 +15,7 @@
"""A clump object.""" """A clump object."""
from abc import ABC from abc import ABC
from numba import jit
import numpy import numpy
@ -101,16 +102,21 @@ class BaseStructure(ABC):
""" """
return numpy.vstack([self[p] for p in ("vx", "vy", "vz")]).T return numpy.vstack([self[p] for p in ("vx", "vy", "vz")]).T
@property
def r(self): def r(self):
""" """
Calculate the radial separation of the particles from the centre of the Radial separation of particles from the centre of the object.
object.
Returns Returns
------- -------
r : 1-dimensional array of shape `(n_particles, )`. r : 1-dimensional array of shape `(n_particles, )`.
""" """
return numpy.linalg.norm(self.pos, axis=1) return self._get_r(self.pos)
@staticmethod
@jit(nopython=True)
def _get_r(pos):
return (pos[:, 0]**2 + pos[:, 1]**2 + pos[:, 2]**2)**0.5
def cmass(self, rmax, rmin): def cmass(self, rmax, rmin):
""" """
@ -130,7 +136,7 @@ class BaseStructure(ABC):
------- -------
cm : 1-dimensional array of shape `(3, )` cm : 1-dimensional array of shape `(3, )`
""" """
r = self.r() r = self.r
mask = (r >= rmin) & (r <= rmax) mask = (r >= rmin) & (r <= rmax)
return numpy.average(self.pos[mask], axis=0, weights=self["M"][mask]) return numpy.average(self.pos[mask], axis=0, weights=self["M"][mask])
@ -149,7 +155,7 @@ class BaseStructure(ABC):
------- -------
J : 1-dimensional array or shape `(3, )` J : 1-dimensional array or shape `(3, )`
""" """
r = self.r() r = self.r
mask = (r >= rmin) & (r <= rmax) mask = (r >= rmin) & (r <= rmax)
pos = self.pos[mask] - self.cmass(rmax, rmin) pos = self.pos[mask] - self.cmass(rmax, rmin)
# Velocitities in the object CM frame # Velocitities in the object CM frame
@ -172,17 +178,17 @@ class BaseStructure(ABC):
------- -------
enclosed_mass : float enclosed_mass : float
""" """
r = self.r() r = self.r
return numpy.sum(self["M"][(r >= rmin) & (r <= rmax)]) return numpy.sum(self["M"][(r >= rmin) & (r <= rmax)])
def lambda_bullock(self, radius, npart_min=10): def lambda_bullock(self, radmax, npart_min=10):
r""" r"""
Bullock spin, see Eq. 5 in [1], in a radius of `radius`, which should Bullock spin, see Eq. 5 in [1], in a radius of `radius`, which should
define to some overdensity radius. define to some overdensity radius.
Parameters Parameters
---------- ----------
radius : float radmax : float
Radius in which to calculate the spin. Radius in which to calculate the spin.
npart_min : int npart_min : int
Minimum number of enclosed particles for a radius to be Minimum number of enclosed particles for a radius to be
@ -198,14 +204,13 @@ class BaseStructure(ABC):
Bullock, J. S.; Dekel, A.; Kolatt, T. S.; Kravtsov, A. V.; Bullock, J. S.; Dekel, A.; Kolatt, T. S.; Kravtsov, A. V.;
Klypin, A. A.; Porciani, C.; Primack, J. R. Klypin, A. A.; Porciani, C.; Primack, J. R.
""" """
mask = self.r() <= radius mask = self.r <= radmax
if numpy.sum(mask) < npart_min: if numpy.sum(mask) < npart_min:
return numpy.nan return numpy.nan
mass = self.enclosed_mass(radius) mass = self.enclosed_mass(radmax)
V = numpy.sqrt(self.box.box_G * mass / radius) circvel = numpy.sqrt(self.box.box_G * mass / radmax)
out = numpy.linalg.norm(self.angular_momentum(radius)) angmom_norm = numpy.linalg.norm(self.angular_momentum(radmax))
out /= numpy.sqrt(2) * mass * V * radius return angmom_norm / (numpy.sqrt(2) * mass * circvel * radmax)
return out
def spherical_overdensity_mass(self, delta_mult, npart_min=10, def spherical_overdensity_mass(self, delta_mult, npart_min=10,
kind="crit"): kind="crit"):
@ -236,18 +241,18 @@ class BaseStructure(ABC):
assert kind in ["crit", "matter"] assert kind in ["crit", "matter"]
# We first sort the particles in an increasing separation # We first sort the particles in an increasing separation
rs = self.r() rs = self.r
order = numpy.argsort(rs) order = numpy.argsort(rs)
rs = rs[order] rs = rs[order]
cmass = numpy.cumsum(self["M"][order]) # Cumulative mass cmass = numpy.cumsum(self["M"][order]) # Cumulative mass
# We calculate the enclosed volume and indices where it is above target # We calculate the enclosed volume and indices where it is above target
vol = 4 * numpy.pi / 3 * (rs**3 - rs[0] ** 3) vol = 4 * numpy.pi / 3 * rs**3
target_density = delta_mult * self.box.box_rhoc target_density = delta_mult * self.box.box_rhoc
if kind == "matter": if kind == "matter":
target_density *= self.box.cosmo.Om0 target_density *= self.box.cosmo.Om0
with numpy.errstate(divide="ignore"): ks = numpy.where(cmass > target_density * vol)[0]
ks = numpy.where(cmass / vol > target_density)[0]
if ks.size == 0: # Never above the threshold? if ks.size == 0: # Never above the threshold?
return numpy.nan, numpy.nan return numpy.nan, numpy.nan
k = numpy.max(ks) k = numpy.max(ks)
@ -257,7 +262,7 @@ class BaseStructure(ABC):
def __getitem__(self, key): def __getitem__(self, key):
keys = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M'] keys = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M']
if key not in self.keys: if key not in keys:
raise RuntimeError(f"Invalid key `{key}`!") raise RuntimeError(f"Invalid key `{key}`!")
return self.particles[:, keys.index(key)] return self.particles[:, keys.index(key)]
@ -304,3 +309,31 @@ class Halo(BaseStructure):
self.particles = particles self.particles = particles
self.info = info self.info = info
self.box = box self.box = box
###############################################################################
# Other, supplementary functions #
###############################################################################
@jit(nopython=True)
def dist_centmass(clump):
"""
Calculate the clump (or halo) particles' distance from the centre of mass.
Parameters
----------
clump : 2-dimensional array of shape (n_particles, 7)
Particle array. The first four columns must be `x`, `y`, `z` and `M`.
Returns
-------
dist : 1-dimensional array of shape `(n_particles, )`
Particle distance from the centre of mass.
cm : len-3 list
Center of mass coordinates.
"""
mass = clump[:, 3]
x, y, z = clump[:, 0], clump[:, 1], clump[:, 2]
cmx, cmy, cmz = [numpy.average(xi, weights=mass) for xi in (x, y, z)]
dist = ((x - cmx)**2 + (y - cmy)**2 + (z - cmz)**2)**0.5
return dist, [cmx, cmy, cmz]

View file

@ -348,7 +348,7 @@ class NFWPosterior(NFWProfile):
Best fit NFW central density. Best fit NFW central density.
""" """
assert isinstance(clump, Clump) assert isinstance(clump, Clump)
r = clump.r() r = clump.r
rmin = numpy.min(r[r > 0]) # First particle that is not at r = 0 rmin = numpy.min(r[r > 0]) # First particle that is not at r = 0
rmax, mtot = clump.spherical_overdensity_mass(200) rmax, mtot = clump.spherical_overdensity_mass(200)
mask = (rmin <= r) & (r <= rmax) mask = (rmin <= r) & (r <= rmax)

View file

@ -12,14 +12,8 @@
# You should have received a copy of the GNU General Public License along # You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc., # with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .match import ( # noqa from .match import (ParticleOverlap, RealisationsMatcher, # noqa
ParticleOverlap, calculate_overlap, calculate_overlap_indxs,
RealisationsMatcher, cosine_similarity)
calculate_overlap,
calculate_overlap_indxs,
cosine_similarity,
dist_centmass,
dist_percentile,
)
from .num_density import binned_counts, number_density # noqa from .num_density import binned_counts, number_density # noqa
from .utils import concatenate_parts # noqa from .utils import concatenate_parts # noqa

View file

@ -16,12 +16,19 @@
Support for matching halos between CSiBORG IC realisations. Support for matching halos between CSiBORG IC realisations.
""" """
from datetime import datetime from datetime import datetime
from functools import lru_cache
from math import ceil
import numpy import numpy
from numba import jit from numba import jit
from scipy.ndimage import gaussian_filter from scipy.ndimage import gaussian_filter
from tqdm import tqdm, trange from tqdm import tqdm, trange
from ..read import load_parent_particles
BCKG_HALFSIZE = 475
BOX_SIZE = 2048
############################################################################### ###############################################################################
# Realisations matcher for calculating overlaps # # Realisations matcher for calculating overlaps #
############################################################################### ###############################################################################
@ -105,8 +112,8 @@ class RealisationsMatcher:
""" """
return self._overlapper return self._overlapper
def cross(self, cat0, catx, halos0_archive, halosx_archive, delta_bckg, def cross(self, cat0, catx, particles0, particlesx, clump_map0, clump_mapx,
verbose=True): delta_bckg, cache_size=10000, verbose=True):
r""" r"""
Find all neighbours whose CM separation is less than `nmult` times the Find all neighbours whose CM separation is less than `nmult` times the
sum of their initial Lagrangian patch sizes and calculate their sum of their initial Lagrangian patch sizes and calculate their
@ -119,19 +126,23 @@ class RealisationsMatcher:
Halo catalogue of the reference simulation. Halo catalogue of the reference simulation.
catx : :py:class:`csiborgtools.read.HaloCatalogue` catx : :py:class:`csiborgtools.read.HaloCatalogue`
Halo catalogue of the cross simulation. Halo catalogue of the cross simulation.
halos0_archive : `NpzFile` object particles0 : 2-dimensional array
Archive of halos' particles of the reference simulation, keys must Array of particles in box units in the reference simulation.
include `x`, `y`, `z` and `M`. The positions must already be The columns must be `x`, `y`, `z` and `M`.
converted to cell numbers. particlesx : 2-dimensional array
halosx_archive : `NpzFile` object Array of particles in box units in the cross simulation.
Archive of halos' particles of the cross simulation, keys must The columns must be `x`, `y`, `z` and `M`.
include `x`, `y`, `z` and `M`. The positions must already be clump_map0 : 2-dimensional array
converted to cell numbers. Clump map of the reference simulation.
clump_mapx : 2-dimensional array
Clump map of the cross simulation.
delta_bckg : 3-dimensional array delta_bckg : 3-dimensional array
Summed background density field of the reference and cross Summed background density field of the reference and cross
simulations calculated with particles assigned to halos at the simulations calculated with particles assigned to halos at the
final snapshot. Assumed to only be sampled in cells final snapshot. Assumed to only be sampled in cells
:math:`[512, 1536)^3`. :math:`[512, 1536)^3`.
cache_size : int, optional
Caching size for loading the cross simulation halos.
verbose : bool, optional verbose : bool, optional
iterator verbosity flag. by default `true`. iterator verbosity flag. by default `true`.
@ -149,12 +160,12 @@ class RealisationsMatcher:
# in the reference simulation from the cross simulation in the initial # in the reference simulation from the cross simulation in the initial
# snapshot. # snapshot.
if verbose: if verbose:
now = datetime.now() print(f"{datetime.now()}: querying the KNN.", flush=True)
print(f"{now}: querying the KNN.", flush=True)
match_indxs = radius_neighbours( match_indxs = radius_neighbours(
catx.knn(select_initial=True), cat0.positions(in_initial=True), catx.knn(in_initial=True), cat0.position(in_initial=True),
radiusX=cat0["lagpatch"], radiusKNN=catx["lagpatch"], radiusX=cat0["lagpatch"], radiusKNN=catx["lagpatch"],
nmult=self.nmult, enforce_int32=True, verbose=verbose) nmult=self.nmult, enforce_int32=True, verbose=verbose)
# We next remove neighbours whose mass is too large/small. # We next remove neighbours whose mass is too large/small.
if self.dlogmass is not None: if self.dlogmass is not None:
for i, indx in enumerate(match_indxs): for i, indx in enumerate(match_indxs):
@ -163,12 +174,18 @@ class RealisationsMatcher:
aratio = numpy.abs(numpy.log10(catx[p][indx] / cat0[p][i])) aratio = numpy.abs(numpy.log10(catx[p][indx] / cat0[p][i]))
match_indxs[i] = match_indxs[i][aratio < self.dlogmass] match_indxs[i] = match_indxs[i][aratio < self.dlogmass]
# We will make a dictionary to keep in memory the halos' particles from clid2map0 = {clid: i for i, clid in enumerate(clump_map0[:, 0])}
# the cross simulations so that they are not loaded in several times clid2mapx = {clid: i for i, clid in enumerate(clump_mapx[:, 0])}
# and we only convert their positions to cells once. Possibly make an
# option to not do this to lower memory requirements? # We will cache the halos from the cross simulation to speed up the I/O
cross_halos = {} @lru_cache(maxsize=cache_size)
cross_lims = {} def load_cached_halox(hid):
return load_processed_halo(hid, particlesx, clump_mapx, clid2mapx,
catx.clumps_cat, nshift=0,
ncells=BOX_SIZE)
if verbose:
print(f"{datetime.now()}: calculating overlaps.", flush=True)
cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
indxs = cat0["index"] indxs = cat0["index"]
for i, k0 in enumerate(tqdm(indxs) if verbose else indxs): for i, k0 in enumerate(tqdm(indxs) if verbose else indxs):
@ -178,36 +195,18 @@ class RealisationsMatcher:
continue continue
# Next, we find this halo's particles, total mass, minimum and # Next, we find this halo's particles, total mass, minimum and
# maximum cells and convert positions to cells. # maximum cells and convert positions to cells.
halo0 = halos0_archive[str(k0)] pos0, mass0, totmass0, mins0, maxs0 = load_processed_halo(
mass0 = numpy.sum(halo0["M"]) k0, particles0, clump_map0, clid2map0, cat0.clumps_cat,
mins0, maxs0 = get_halolims(halo0, nshift=0, ncells=BOX_SIZE)
ncells=self.overlapper.inv_clength,
nshift=self.overlapper.nshift)
for p in ("x", "y", "z"):
halo0[p] = self.overlapper.pos2cell(halo0[p])
# We now loop over matches of this halo and calculate their # We now loop over matches of this halo and calculate their
# overlap, storing them in `_cross`. # overlap, storing them in `_cross`.
_cross = numpy.full(matches.size, numpy.nan, dtype=numpy.float32) _cross = numpy.full(matches.size, numpy.nan, dtype=numpy.float32)
for j, kf in enumerate(catx["index"][matches]): for j, kx in enumerate(catx["index"][matches]):
# Attempt to load this cross halo from memory, if it fails get posx, massx, totmassx, minsx, maxsx = load_cached_halox(kx)
# it from from the halo archive (and similarly for the limits) _cross[j] = self.overlapper(
# and convert the particle positions to cells. pos0, posx, mass0, massx, delta_bckg, mins0, maxs0,
try: minsx, maxsx, totmass1=totmass0, totmass2=totmassx)
halox = cross_halos[kf]
minsx, maxsx = cross_lims[kf]
except KeyError:
halox = halosx_archive[str(kf)]
minsx, maxsx = get_halolims(
halox, ncells=self.overlapper.inv_clength,
nshift=self.overlapper.nshift)
for p in ("x", "y", "z"):
halox[p] = self.overlapper.pos2cell(halox[p])
cross_halos[kf] = halox
cross_lims[kf] = (minsx, maxsx)
massx = numpy.sum(halox["M"])
_cross[j] = self.overlapper(halo0, halox, delta_bckg, mins0,
maxs0, minsx, maxsx, mass1=mass0,
mass2=massx)
cross[i] = _cross cross[i] = _cross
# We remove all matches that have zero overlap to save space. # We remove all matches that have zero overlap to save space.
@ -222,8 +221,9 @@ class RealisationsMatcher:
cross = numpy.asanyarray(cross, dtype=object) cross = numpy.asanyarray(cross, dtype=object)
return match_indxs, cross return match_indxs, cross
def smoothed_cross(self, cat0, catx, halos0_archive, halosx_archive, def smoothed_cross(self, cat0, catx, particles0, particlesx, clump_map0,
delta_bckg, match_indxs, smooth_kwargs, verbose=True): clump_mapx, delta_bckg, match_indxs, smooth_kwargs,
cache_size=10000, verbose=True):
r""" r"""
Calculate the smoothed overlaps for pair previously identified via Calculate the smoothed overlaps for pair previously identified via
`self.cross(...)` to have a non-zero overlap. `self.cross(...)` to have a non-zero overlap.
@ -234,27 +234,27 @@ class RealisationsMatcher:
Halo catalogue of the reference simulation. Halo catalogue of the reference simulation.
catx : :py:class:`csiborgtools.read.ClumpsCatalogue` catx : :py:class:`csiborgtools.read.ClumpsCatalogue`
Halo catalogue of the cross simulation. Halo catalogue of the cross simulation.
halos0_archive : `NpzFile` object particles0 : 2-dimensional array
Archive of halos' particles of the reference simulation, keys must Array of particles in box units in the reference simulation.
include `x`, `y`, `z` and `M`. The positions must already be The columns must be `x`, `y`, `z` and `M`.
converted to cell numbers. particlesx : 2-dimensional array
halosx_archive : `NpzFile` object Array of particles in box units in the cross simulation.
Archive of halos' particles of the cross simulation, keys must The columns must be `x`, `y`, `z` and `M`.
include `x`, `y`, `z` and `M`. The positions must already be clump_map0 : 2-dimensional array
converted to cell numbers. Clump map of the reference simulation.
clump_mapx : 2-dimensional array
Clump map of the cross simulation.
delta_bckg : 3-dimensional array delta_bckg : 3-dimensional array
Smoothed summed background density field of the reference and cross Smoothed summed background density field of the reference and cross
simulations calculated with particles assigned to halos at the simulations calculated with particles assigned to halos at the
final snapshot. Assumed to only be sampled in cells final snapshot. Assumed to only be sampled in cells
:math:`[512, 1536)^3`. :math:`[512, 1536)^3`.
ref_indxs : 1-dimensional array
Halo IDs in the reference catalogue.
cross_indxs : 1-dimensional array
Halo IDs in the cross catalogue.
match_indxs : 1-dimensional array of arrays match_indxs : 1-dimensional array of arrays
Indices of halo counterparts in the cross catalogue. Indices of halo counterparts in the cross catalogue.
smooth_kwargs : kwargs smooth_kwargs : kwargs
Kwargs to be passed to :py:func:`scipy.ndimage.gaussian_filter`. Kwargs to be passed to :py:func:`scipy.ndimage.gaussian_filter`.
cache_size : int, optional
Caching size for loading the cross simulation halos.
verbose : bool, optional verbose : bool, optional
Iterator verbosity flag. By default `True`. Iterator verbosity flag. By default `True`.
@ -262,37 +262,33 @@ class RealisationsMatcher:
------- -------
overlaps : 1-dimensional array of arrays overlaps : 1-dimensional array of arrays
""" """
nshift = read_nshift(smooth_kwargs)
clid2map0 = {clid: i for i, clid in enumerate(clump_map0[:, 0])}
clid2mapx = {clid: i for i, clid in enumerate(clump_mapx[:, 0])}
cross_halos = {} @lru_cache(maxsize=cache_size)
cross_lims = {} def load_cached_halox(hid):
cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size return load_processed_halo(hid, particlesx, clump_mapx, clid2mapx,
catx.clumps_cat, nshift=nshift,
ncells=BOX_SIZE)
if verbose:
print(f"{datetime.now()}: calculating smoothed overlaps.",
flush=True)
indxs = cat0["index"] indxs = cat0["index"]
cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
for i, k0 in enumerate(tqdm(indxs) if verbose else indxs): for i, k0 in enumerate(tqdm(indxs) if verbose else indxs):
halo0 = halos0_archive[str(k0)] pos0, mass0, __, mins0, maxs0 = load_processed_halo(
mins0, maxs0 = get_halolims(halo0, k0, particles0, clump_map0, clid2map0, cat0.clumps_cat,
ncells=self.overlapper.inv_clength, nshift=nshift, ncells=BOX_SIZE)
nshift=self.overlapper.nshift)
# Now loop over the matches and calculate the smoothed overlap. # Now loop over the matches and calculate the smoothed overlap.
_cross = numpy.full(match_indxs[i].size, numpy.nan, numpy.float32) _cross = numpy.full(match_indxs[i].size, numpy.nan, numpy.float32)
for j, kf in enumerate(catx["index"][match_indxs[i]]): for j, kx in enumerate(catx["index"][match_indxs[i]]):
# Attempt to load this cross halo from memory, if it fails get posx, massx, __, minsx, maxsx = load_cached_halox(kx)
# it from from the halo archive (and similarly for the limits). _cross[j] = self.overlapper(pos0, posx, mass0, massx,
try: delta_bckg, mins0, maxs0, minsx,
halox = cross_halos[kf] maxsx, smooth_kwargs=smooth_kwargs)
minsx, maxsx = cross_lims[kf]
except KeyError:
halox = halosx_archive[str(kf)]
minsx, maxsx = get_halolims(
halox, ncells=self.overlapper.inv_clength,
nshift=self.overlapper.nshift)
cross_halos[kf] = halox
cross_lims[kf] = (minsx, maxsx)
_cross[j] = self.overlapper(halo0, halox, delta_bckg, mins0,
maxs0, minsx, maxsx,
smooth_kwargs=smooth_kwargs)
cross[i] = _cross cross[i] = _cross
return numpy.asanyarray(cross, dtype=object) return numpy.asanyarray(cross, dtype=object)
@ -341,57 +337,37 @@ class ParticleOverlap:
Gaussian smoothing. Gaussian smoothing.
""" """
def __init__(self): def make_bckg_delta(self, particles, clump_map, clid2map, halo_cat,
# Inverse cell length in box units. By default :math:`2^11`, which delta=None, verbose=False):
# matches the initial RAMSES grid resolution.
self.inv_clength = 2**11
self.nshift = 5 # Hardcode this too to force consistency
self._clength = 1 / self.inv_clength
def pos2cell(self, pos):
""" """
Convert position to cell number. If `pos` is in Calculate a NGP density field of particles belonging to halos of a
`numpy.typecodes["AllInteger"]` assumes it to already be the cell halo catalogue `halo_cat`. Particles are only counted within the
number. high-resolution region of the simulation. Smoothing must be applied
separately.
Parameters Parameters
---------- ----------
pos : 1-dimensional array particles : 2-dimensional array
Array of positions along an axis in the box. Array of particles.
clump_map : 2-dimensional array
Returns Array containing start and end indices in the particle array
------- corresponding to each clump.
cells : 1-dimensional array clid2map : dict
""" Dictionary mapping clump IDs to `clump_map` array positions.
# Check whether this is already the cell halo_cat: :py:class:`csiborgtools.read.HaloCatalogue`
if pos.dtype.char in numpy.typecodes["AllInteger"]: Halo catalogue.
return pos
return numpy.floor(pos * self.inv_clength).astype(numpy.int32)
def make_bckg_delta(self, halo_archive, delta=None, verbose=False):
"""
Calculate a NGP density field of particles belonging to halos within
the central :math:`1/2^3` high-resolution region of the simulation.
Smoothing must be applied separately.
Parameters
----------
halo_archive : `NpzFile` object
Archive of halos' particles of the reference simulation, keys must
include `x`, `y`, `z` and `M`.
delta : 3-dimensional array, optional delta : 3-dimensional array, optional
Array to store the density field in. If `None` a new array is Array to store the density field in. If `None` a new array is
created. created.
verbose : bool, optional verbose : bool, optional
Verbosity flag for loading the files. Verbosity flag for loading the halos' particles.
Returns Returns
------- -------
delta : 3-dimensional array delta : 3-dimensional array
""" """
# We obtain the minimum/maximum cell IDs and number of cells cellmin = BOX_SIZE // 2 - BCKG_HALFSIZE
cellmin = self.inv_clength // 4 # The minimum cell ID cellmax = BOX_SIZE // 2 + BCKG_HALFSIZE
cellmax = 3 * self.inv_clength // 4 # The maximum cell ID
ncells = cellmax - cellmin ncells = cellmax - cellmin
# We then pre-allocate the density field/check it is of the right shape # We then pre-allocate the density field/check it is of the right shape
if delta is None: if delta is None:
@ -399,28 +375,25 @@ class ParticleOverlap:
else: else:
assert ((delta.shape == (ncells,) * 3) assert ((delta.shape == (ncells,) * 3)
& (delta.dtype == numpy.float32)) & (delta.dtype == numpy.float32))
from tqdm import tqdm
# We now loop one-by-one over the halos fill the density field. clumps_cat = halo_cat.clumps_cat
files = halo_archive.files for hid in tqdm(halo_cat["index"]) if verbose else halo_cat["index"]:
for file in tqdm(files) if verbose else files: pos = load_parent_particles(hid, particles, clump_map, clid2map,
parts = halo_archive[file] clumps_cat)
cells = [self.pos2cell(parts[p]) for p in ("x", "y", "z")] if pos is None:
mass = parts["M"] continue
pos, mass = pos[:, :3], pos[:, 3]
pos = pos2cell(pos, BOX_SIZE)
# We mask out particles outside the cubical high-resolution region # We mask out particles outside the cubical high-resolution region
mask = ((cellmin <= cells[0]) mask = numpy.all((cellmin <= pos) & (pos < cellmax), axis=1)
& (cells[0] < cellmax) pos = pos[mask]
& (cellmin <= cells[1]) fill_delta(delta, pos[:, 0], pos[:, 1], pos[:, 2],
& (cells[1] < cellmax) *(cellmin,) * 3, mass[mask])
& (cellmin <= cells[2])
& (cells[2] < cellmax))
cells = [c[mask] for c in cells]
mass = mass[mask]
fill_delta(delta, *cells, *(cellmin,) * 3, mass)
return delta return delta
def make_delta(self, clump, mins=None, maxs=None, subbox=False, def make_delta(self, pos, mass, mins=None, maxs=None, subbox=False,
smooth_kwargs=None): smooth_kwargs=None):
""" """
Calculate a NGP density field of a halo on a cubic grid. Optionally can Calculate a NGP density field of a halo on a cubic grid. Optionally can
@ -428,8 +401,10 @@ class ParticleOverlap:
Parameters Parameters
---------- ----------
clump : structurered arrays pos : 2-dimensional array
Clump structured array, keys must include `x`, `y`, `z` and `M`. Halo particle position array.
mass : 1-dimensional array
Halo particle mass array.
mins, maxs : 1-dimensional arrays of shape `(3,)` mins, maxs : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension. Minimun and maximum cell numbers along each dimension.
subbox : bool, optional subbox : bool, optional
@ -443,50 +418,45 @@ class ParticleOverlap:
------- -------
delta : 3-dimensional array delta : 3-dimensional array
""" """
cells = [self.pos2cell(clump[p]) for p in ("x", "y", "z")] nshift = read_nshift(smooth_kwargs)
cells = self.pos2cell(pos)
# Check that minima and maxima are integers # Check that minima and maxima are integers
if not (mins is None and maxs is None): if not (mins is None and maxs is None):
assert mins.dtype.char in numpy.typecodes["AllInteger"] assert mins.dtype.char in numpy.typecodes["AllInteger"]
assert maxs.dtype.char in numpy.typecodes["AllInteger"] assert maxs.dtype.char in numpy.typecodes["AllInteger"]
if subbox: if subbox:
# Minimum xcell, ycell and zcell of this clump
if mins is None or maxs is None: if mins is None or maxs is None:
mins = numpy.asanyarray( mins, maxs = get_halolims(cells, BOX_SIZE, nshift)
[max(numpy.min(cell) - self.nshift, 0) for cell in cells]
)
maxs = numpy.asanyarray(
[
min(numpy.max(cell) + self.nshift, self.inv_clength)
for cell in cells
]
)
ncells = numpy.max(maxs - mins) + 1 # To get the number of cells ncells = maxs - mins + 1 # To get the number of cells
else: else:
mins = [0, 0, 0] mins = [0, 0, 0]
ncells = self.inv_clength ncells = BOX_SIZE
# Preallocate and fill the array # Preallocate and fill the array
delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32) delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
fill_delta(delta, *cells, *mins, clump["M"]) fill_delta(delta, cells[:, 0], cells[:, 1], cells[:, 2], *mins, mass)
if smooth_kwargs is not None: if smooth_kwargs is not None:
gaussian_filter(delta, output=delta, **smooth_kwargs) gaussian_filter(delta, output=delta, **smooth_kwargs)
return delta return delta
def make_deltas(self, clump1, clump2, mins1=None, maxs1=None, mins2=None, def make_deltas(self, pos1, pos2, mass1, mass2, mins1=None, maxs1=None,
maxs2=None, smooth_kwargs=None): mins2=None, maxs2=None, smooth_kwargs=None):
""" """
Calculate a NGP density fields of two halos on a grid that encloses Calculate a NGP density fields of two halos on a grid that encloses
them both. Optionally can be smoothed with a Gaussian kernel. them both. Optionally can be smoothed with a Gaussian kernel.
Parameters Parameters
---------- ----------
clump1, clump2 : structurered arrays pos1 : 2-dimensional array
Particle structured array of the two clumps. Keys must include `x`, Particle positions of the first halo.
`y`, `z` and `M`. pos2 : 2-dimensional array
Particle positions of the second halo.
mass1 : 1-dimensional array
Particle masses of the first halo.
mass2 : 1-dimensional array
Particle masses of the second halo.
mins1, maxs1 : 1-dimensional arrays of shape `(3,)` mins1, maxs1 : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension of `clump1`. Minimun and maximum cell numbers along each dimension of `clump1`.
Optional. Optional.
@ -507,51 +477,51 @@ class ParticleOverlap:
Indices where the lower mass clump has a non-zero density. Indices where the lower mass clump has a non-zero density.
Calculated only if no smoothing is applied, otherwise `None`. Calculated only if no smoothing is applied, otherwise `None`.
""" """
xc1, yc1, zc1 = (self.pos2cell(clump1[p]) for p in ("x", "y", "z")) nshift = read_nshift(smooth_kwargs)
xc2, yc2, zc2 = (self.pos2cell(clump2[p]) for p in ("x", "y", "z")) pos1 = pos2cell(pos1, BOX_SIZE)
pos2 = pos2cell(pos2, BOX_SIZE)
xc1, yc1, zc1 = [pos1[:, i] for i in range(3)]
xc2, yc2, zc2 = [pos2[:, i] for i in range(3)]
if any(obj is None for obj in (mins1, maxs1, mins2, maxs2)): if any(obj is None for obj in (mins1, maxs1, mins2, maxs2)):
# Minimum cell number of the two halos along each dimension # Minimum cell number of the two halos along each dimension
xmin = min(numpy.min(xc1), numpy.min(xc2)) - self.nshift xmin = min(numpy.min(xc1), numpy.min(xc2)) - nshift
ymin = min(numpy.min(yc1), numpy.min(yc2)) - self.nshift ymin = min(numpy.min(yc1), numpy.min(yc2)) - nshift
zmin = min(numpy.min(zc1), numpy.min(zc2)) - self.nshift zmin = min(numpy.min(zc1), numpy.min(zc2)) - nshift
# Make sure shifting does not go beyond boundaries # Make sure shifting does not go beyond boundaries
xmin, ymin, zmin = [max(px, 0) for px in (xmin, ymin, zmin)] xmin, ymin, zmin = [max(px, 0) for px in (xmin, ymin, zmin)]
# Maximum cell number of the two halos along each dimension # Maximum cell number of the two halos along each dimension
xmax = max(numpy.max(xc1), numpy.max(xc2)) + self.nshift xmax = max(numpy.max(xc1), numpy.max(xc2)) + nshift
ymax = max(numpy.max(yc1), numpy.max(yc2)) + self.nshift ymax = max(numpy.max(yc1), numpy.max(yc2)) + nshift
zmax = max(numpy.max(zc1), numpy.max(zc2)) + self.nshift zmax = max(numpy.max(zc1), numpy.max(zc2)) + nshift
# Make sure shifting does not go beyond boundaries # Make sure shifting does not go beyond boundaries
xmax, ymax, zmax = [ xmax, ymax, zmax = [min(px, BOX_SIZE - 1)
min(px, self.inv_clength - 1) for px in (xmax, ymax, zmax) for px in (xmax, ymax, zmax)]
]
else: else:
xmin, ymin, zmin = [min(mins1[i], mins2[i]) for i in range(3)] xmin, ymin, zmin = [min(mins1[i], mins2[i]) for i in range(3)]
xmax, ymax, zmax = [max(maxs1[i], maxs2[i]) for i in range(3)] xmax, ymax, zmax = [max(maxs1[i], maxs2[i]) for i in range(3)]
cellmins = (xmin, ymin, zmin) # Cell minima cellmins = (xmin, ymin, zmin) # Cell minima
ncells = max(xmax - xmin, ymax - ymin, zmax - zmin) + 1 # Num cells ncells = xmax - xmin + 1, ymax - ymin + 1, zmax - zmin + 1 # Num cells
# Preallocate and fill the arrays # Preallocate and fill the arrays
delta1 = numpy.zeros((ncells,) * 3, dtype=numpy.float32) delta1 = numpy.zeros(ncells, dtype=numpy.float32)
delta2 = numpy.zeros((ncells,) * 3, dtype=numpy.float32) delta2 = numpy.zeros(ncells, dtype=numpy.float32)
# If no smoothing figure out the nonzero indices of the smaller clump # If no smoothing figure out the nonzero indices of the smaller clump
if smooth_kwargs is None: if smooth_kwargs is None:
if clump1.size > clump2.size: if pos1.shape[0] > pos2.shape[0]:
fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1["M"]) fill_delta(delta1, xc1, yc1, zc1, *cellmins, mass1)
nonzero = fill_delta_indxs( nonzero = fill_delta_indxs(delta2, xc2, yc2, zc2, *cellmins,
delta2, xc2, yc2, zc2, *cellmins, clump2["M"] mass2)
)
else: else:
nonzero = fill_delta_indxs( nonzero = fill_delta_indxs(delta1, xc1, yc1, zc1, *cellmins,
delta1, xc1, yc1, zc1, *cellmins, clump1["M"] mass1)
) fill_delta(delta2, xc2, yc2, zc2, *cellmins, mass2)
fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2["M"])
else: else:
fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1["M"]) fill_delta(delta1, xc1, yc1, zc1, *cellmins, mass1)
fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2["M"]) fill_delta(delta2, xc2, yc2, zc2, *cellmins, mass2)
nonzero = None nonzero = None
if smooth_kwargs is not None: if smooth_kwargs is not None:
@ -559,9 +529,9 @@ class ParticleOverlap:
gaussian_filter(delta2, output=delta2, **smooth_kwargs) gaussian_filter(delta2, output=delta2, **smooth_kwargs)
return delta1, delta2, cellmins, nonzero return delta1, delta2, cellmins, nonzero
def __call__(self, clump1, clump2, delta_bckg, mins1=None, maxs1=None, def __call__(self, pos1, pos2, mass1, mass2, delta_bckg,
mins2=None, maxs2=None, mass1=None, mass2=None, mins1=None, maxs1=None, mins2=None, maxs2=None,
smooth_kwargs=None): totmass1=None, totmass2=None, smooth_kwargs=None):
""" """
Calculate overlap between `clump1` and `clump2`. See Calculate overlap between `clump1` and `clump2`. See
`calculate_overlap(...)` for further information. Be careful so that `calculate_overlap(...)` for further information. Be careful so that
@ -572,9 +542,14 @@ class ParticleOverlap:
Parameters Parameters
---------- ----------
clump1, clump2 : structurered arrays pos1 : 2-dimensional array
Structured arrays containing the particles of a given clump. Keys Particle positions of the first halo.
must include `x`, `y`, `z` and `M`. pos2 : 2-dimensional array
Particle positions of the second halo.
mass1 : 1-dimensional array
Particle masses of the first halo.
mass2 : 1-dimensional array
Particle masses of the second halo.
cellmins : len-3 tuple cellmins : len-3 tuple
Tuple of left-most cell ID in the full box. Tuple of left-most cell ID in the full box.
delta_bckg : 3-dimensional array delta_bckg : 3-dimensional array
@ -588,7 +563,7 @@ class ParticleOverlap:
mins2, maxs2 : 1-dimensional arrays of shape `(3,)` mins2, maxs2 : 1-dimensional arrays of shape `(3,)`
Minimum and maximum cell numbers along each dimension of `clump2`, Minimum and maximum cell numbers along each dimension of `clump2`,
optional. optional.
mass1, mass2 : floats, optional totmass1, totmass2 : floats, optional
Total mass of `clump1` and `clump2`, respectively. Must be provided Total mass of `clump1` and `clump2`, respectively. Must be provided
if `loop_nonzero` is `True`. if `loop_nonzero` is `True`.
smooth_kwargs : kwargs, optional smooth_kwargs : kwargs, optional
@ -600,16 +575,16 @@ class ParticleOverlap:
overlap : float overlap : float
""" """
delta1, delta2, cellmins, nonzero = self.make_deltas( delta1, delta2, cellmins, nonzero = self.make_deltas(
clump1, clump2, mins1, maxs1, mins2, maxs2, pos1, pos2, mass1, mass2, mins1, maxs1, mins2, maxs2,
smooth_kwargs=smooth_kwargs) smooth_kwargs=smooth_kwargs)
if smooth_kwargs is not None: if smooth_kwargs is not None:
return calculate_overlap(delta1, delta2, cellmins, delta_bckg) return calculate_overlap(delta1, delta2, cellmins, delta_bckg)
# Calculate masses not given # Calculate masses not given
mass1 = numpy.sum(clump1["M"]) if mass1 is None else mass1 totmass1 = numpy.sum(mass1) if totmass1 is None else totmass1
mass2 = numpy.sum(clump2["M"]) if mass2 is None else mass2 totmass2 = numpy.sum(mass2) if totmass2 is None else totmass2
return calculate_overlap_indxs( return calculate_overlap_indxs(
delta1, delta2, cellmins, delta_bckg, nonzero, mass1, mass2) delta1, delta2, cellmins, delta_bckg, nonzero, totmass1, totmass2)
############################################################################### ###############################################################################
@ -617,6 +592,49 @@ class ParticleOverlap:
############################################################################### ###############################################################################
def pos2cell(pos, ncells):
"""
Convert position to cell number. If `pos` is in
`numpy.typecodes["AllInteger"]` assumes it to already be the cell
number.
Parameters
----------
pos : 1-dimensional array
Array of positions along an axis in the box.
ncells : int
Number of cells along the axis.
Returns
-------
cells : 1-dimensional array
"""
if pos.dtype.char in numpy.typecodes["AllInteger"]:
return pos
return numpy.floor(pos * ncells).astype(numpy.int32)
def read_nshift(smooth_kwargs):
"""
Read off the number of cells to pad the density field if smoothing is
applied. Defaults to the ceiling of twice of the smoothing scale.
Parameters
----------
smooth_kwargs : kwargs, optional
Kwargs to be passed to :py:func:`scipy.ndimage.gaussian_filter`.
If `None` no smoothing is applied.
Returns
-------
nshift : int
"""
if smooth_kwargs is None:
return 0
else:
return ceil(2 * smooth_kwargs["sigma"])
@jit(nopython=True) @jit(nopython=True)
def fill_delta(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights): def fill_delta(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
""" """
@ -679,15 +697,14 @@ def fill_delta_indxs(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
return cells[:count_nonzero, :] # Cutoff unassigned places return cells[:count_nonzero, :] # Cutoff unassigned places
def get_halolims(halo, ncells, nshift=None): def get_halolims(pos, ncells, nshift=None):
""" """
Get the lower and upper limit of a halo's positions or cell numbers. Get the lower and upper limit of a halo's positions or cell numbers.
Parameters Parameters
---------- ----------
halo : structured array pos : 2-dimensional array
Structured array containing the particles of a given halo. Keys must Halo particle array. Columns must be `x`, `y`, `z`.
`x`, `y`, `z`.
ncells : int ncells : int
Number of grid cells of the box along a single dimension. Number of grid cells of the box along a single dimension.
nshift : int, optional nshift : int, optional
@ -699,17 +716,16 @@ def get_halolims(halo, ncells, nshift=None):
Minimum and maximum along each axis. Minimum and maximum along each axis.
""" """
# Check that in case of `nshift` we have integer positions. # Check that in case of `nshift` we have integer positions.
dtype = halo["x"].dtype dtype = pos.dtype
if nshift is not None and dtype.char not in numpy.typecodes["AllInteger"]: if nshift is not None and dtype.char not in numpy.typecodes["AllInteger"]:
raise TypeError("`nshift` supported only positions are cells.") raise TypeError("`nshift` supported only positions are cells.")
nshift = 0 if nshift is None else nshift # To simplify code below nshift = 0 if nshift is None else nshift # To simplify code below
mins = numpy.full(3, numpy.nan, dtype=dtype) mins = numpy.full(3, numpy.nan, dtype=dtype)
maxs = numpy.full(3, numpy.nan, dtype=dtype) maxs = numpy.full(3, numpy.nan, dtype=dtype)
for i in range(3):
for i, p in enumerate(["x", "y", "z"]): mins[i] = max(numpy.min(pos[:, i]) - nshift, 0)
mins[i] = max(numpy.min(halo[p]) - nshift, 0) maxs[i] = min(numpy.max(pos[:, i]) + nshift, ncells - 1)
maxs[i] = min(numpy.max(halo[p]) + nshift, ncells - 1)
return mins, maxs return mins, maxs
@ -741,8 +757,8 @@ def calculate_overlap(delta1, delta2, cellmins, delta_bckg):
totmass = 0.0 # Total mass of clump 1 and clump 2 totmass = 0.0 # Total mass of clump 1 and clump 2
intersect = 0.0 # Weighted intersecting mass intersect = 0.0 # Weighted intersecting mass
i0, j0, k0 = cellmins # Unpack things i0, j0, k0 = cellmins # Unpack things
bckg_offset = 512 # Offset of the background density field bckg_size = 2 * BCKG_HALFSIZE
bckg_size = 1024 bckg_offset = BOX_SIZE // 2 - BCKG_HALFSIZE
imax, jmax, kmax = delta1.shape imax, jmax, kmax = delta1.shape
for i in range(imax): for i in range(imax):
@ -798,8 +814,8 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
""" """
intersect = 0.0 # Weighted intersecting mass intersect = 0.0 # Weighted intersecting mass
i0, j0, k0 = cellmins # Unpack cell minimas i0, j0, k0 = cellmins # Unpack cell minimas
bckg_offset = 512 # Offset of the background density field bckg_size = 2 * BCKG_HALFSIZE
bckg_size = 1024 # Size of the background density field array bckg_offset = BOX_SIZE // 2 - BCKG_HALFSIZE
for n in range(nonzero.shape[0]): for n in range(nonzero.shape[0]):
i, j, k = nonzero[n, :] i, j, k = nonzero[n, :]
@ -821,47 +837,51 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
return intersect / (mass1 + mass2 - intersect) return intersect / (mass1 + mass2 - intersect)
def dist_centmass(clump): def load_processed_halo(hid, particles, clump_map, clid2map, clumps_cat,
ncells, nshift):
""" """
Calculate the clump (or halo) particles' distance from the centre of mass. Load a processed halo from the `.h5` file. This is to be wrapped by a
cacher.
Parameters Parameters
---------- ----------
clump : 2-dimensional array of shape (n_particles, 7) hid : int
Particle array. The first four columns must be `x`, `y`, `z` and `M`. Halo ID.
particles : 2-dimensional array
Array of particles in box units. The columns must be `x`, `y`, `z`
and `M`.
clump_map : 2-dimensional array
Array containing start and end indices in the particle array
corresponding to each clump.
clid2map : dict
Dictionary mapping clump IDs to `clump_map` array positions.
clumps_cat : :py:class:`csiborgtools.read.ClumpsCatalogue`
Clumps catalogue.
ncells : int
Number of cells in the original density field. Typically 2048.
nshift : int
Number of cells to pad the density field.
Returns Returns
------- -------
dist : 1-dimensional array of shape `(n_particles, )` pos : 2-dimensional array
Particle distance from the centre of mass. Array of cell particle positions.
cm : 1-dimensional array of shape `(3,)` mass : 1-dimensional array
Center of mass coordinates. Array of particle masses.
totmass : float
Total mass of the halo.
mins : len-3 tuple
Minimum cell indices of the halo.
maxs : len-3 tuple
Maximum cell indices of the halo.
""" """
# CM along each dimension pos = load_parent_particles(hid, particles, clump_map, clid2map,
cm = numpy.average(clump[:, :3], weights=clump[:, 3], axis=0) clumps_cat)
return numpy.linalg.norm(clump[:, :3] - cm, axis=1), cm pos, mass = pos[:, :3], pos[:, 3]
pos = pos2cell(pos, ncells)
totmass = numpy.sum(mass)
def dist_percentile(dist, qs, distmax=0.075): mins, maxs = get_halolims(pos, ncells=ncells, nshift=nshift)
""" return pos, mass, totmass, mins, maxs
Calculate q-th percentiles of `dist`, with an upper limit of `distmax`.
Parameters
----------
dist : 1-dimensional array
Array of distances.
qs : 1-dimensional array
Percentiles to compute.
distmax : float, optional
The maximum distance. By default 0.075.
Returns
-------
x : 1-dimensional array
"""
x = numpy.percentile(dist, qs)
x[x > distmax] = distmax # Enforce the upper limit
return x
def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1.0, def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1.0,

View file

@ -15,16 +15,14 @@
from .box_units import BoxUnits # noqa from .box_units import BoxUnits # noqa
from .halo_cat import ClumpsCatalogue, HaloCatalogue # noqa from .halo_cat import ClumpsCatalogue, HaloCatalogue # noqa
from .knn_summary import kNNCDFReader # noqa from .knn_summary import kNNCDFReader # noqa
from .obs import ( # noqa from .obs import (SDSS, MCXCClusters, PlanckClusters, TwoMPPGalaxies, # noqa
SDSS, TwoMPPGroups)
MCXCClusters, from .overlap_summary import (NPairsOverlap, PairOverlap, # noqa
PlanckClusters, binned_resample_mean)
TwoMPPGalaxies,
TwoMPPGroups,
)
from .overlap_summary import NPairsOverlap, PairOverlap, binned_resample_mean # noqa
from .paths import CSiBORGPaths # noqa from .paths import CSiBORGPaths # noqa
from .pk_summary import PKReader # noqa from .pk_summary import PKReader # noqa
from .readsim import MmainReader, ParticleReader, halfwidth_select, read_initcm # noqa from .readsim import (MmainReader, ParticleReader, halfwidth_select, # noqa
load_clump_particles, load_parent_particles, read_initcm)
from .tpcf_summary import TPCFReader # noqa from .tpcf_summary import TPCFReader # noqa
from .utils import cartesian_to_radec, cols_to_structured, radec_to_cartesian # noqa from .utils import (cartesian_to_radec, cols_to_structured, # noqa
radec_to_cartesian, read_h5)

View file

@ -12,7 +12,7 @@
# You should have received a copy of the GNU General Public License along # You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc., # with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""CSiBORG halo catalogue.""" """CSiBORG halo and clumps catalogues."""
from abc import ABC from abc import ABC
import numpy import numpy
@ -177,7 +177,7 @@ class BaseCatalogue(ABC):
knn : :py:class:`sklearn.neighbors.NearestNeighbors` knn : :py:class:`sklearn.neighbors.NearestNeighbors`
""" """
knn = NearestNeighbors() knn = NearestNeighbors()
return knn.fit(self.positions(in_initial)) return knn.fit(self.position(in_initial))
def radius_neigbours(self, X, radius, in_initial): def radius_neigbours(self, X, radius, in_initial):
r""" r"""
@ -368,6 +368,8 @@ class HaloCatalogue(BaseCatalogue):
minmass : len-2 tuple minmass : len-2 tuple
Minimum mass. The first element is the catalogue key and the second is Minimum mass. The first element is the catalogue key and the second is
the value. the value.
with_lagpatch : bool, optional
Whether to only load halos with a resolved Lagrangian patch.
load_fitted : bool, optional load_fitted : bool, optional
Whether to load fitted quantities. Whether to load fitted quantities.
load_initial : bool, optional load_initial : bool, optional
@ -378,22 +380,39 @@ class HaloCatalogue(BaseCatalogue):
""" """
def __init__(self, nsim, paths, maxdist=155.5 / 0.705, minmass=("M", 1e12), def __init__(self, nsim, paths, maxdist=155.5 / 0.705, minmass=("M", 1e12),
load_fitted=True, load_initial=False, rawdata=False): with_lagpatch=True, load_fitted=True, load_initial=True,
rawdata=False):
self.nsim = nsim self.nsim = nsim
self.paths = paths self.paths = paths
# Read in the mmain catalogue of summed substructure # Read in the mmain catalogue of summed substructure
mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim)) mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim))
self._data = mmain["mmain"] self._data = mmain["mmain"]
# We will also need the clumps catalogue
self._clumps_cat = ClumpsCatalogue(nsim, paths, rawdata=True,
load_fitted=False)
if load_fitted: if load_fitted:
fits = numpy.load(paths.structfit_path(self.nsnap, nsim, "halos")) fits = numpy.load(paths.structfit_path(self.nsnap, nsim, "halos"))
cols = [col for col in fits.dtype.names if col != "index"] cols = [col for col in fits.dtype.names if col != "index"]
X = [fits[col] for col in cols] X = [fits[col] for col in cols]
self._data = add_columns(self._data, X, cols) self._data = add_columns(self._data, X, cols)
# TODO: load initial positions if load_initial:
fits = numpy.load(paths.initmatch_path(nsim, "fit"))
X, cols = [], []
for col in fits.dtype.names:
if col == "index":
continue
if col in ['x', 'y', 'z']:
cols.append(col + "0")
else:
cols.append(col)
X.append(fits[col])
self._data = add_columns(self._data, X, cols)
if not rawdata: if not rawdata:
if with_lagpatch:
self._data = self._data[numpy.isfinite(self['lagpatch'])]
# Flip positions and convert from code units to cMpc. Convert M too # Flip positions and convert from code units to cMpc. Convert M too
flip_cols(self._data, "x", "z") flip_cols(self._data, "x", "z")
for p in ("x", "y", "z"): for p in ("x", "y", "z"):
@ -402,9 +421,24 @@ class HaloCatalogue(BaseCatalogue):
"r500c", "m200c", "m500c", "r200m", "m200m"] "r500c", "m200c", "m500c", "r200m", "m200m"]
self._data = self.box.convert_from_boxunits(self._data, names) self._data = self.box.convert_from_boxunits(self._data, names)
if load_initial:
names = ["x0", "y0", "z0", "lagpatch"]
self._data = self.box.convert_from_boxunits(self._data, names)
if maxdist is not None: if maxdist is not None:
dist = numpy.sqrt(self._data["x"]**2 + self._data["y"]**2 dist = numpy.sqrt(self._data["x"]**2 + self._data["y"]**2
+ self._data["z"]**2) + self._data["z"]**2)
self._data = self._data[dist < maxdist] self._data = self._data[dist < maxdist]
if minmass is not None: if minmass is not None:
self._data = self._data[self._data[minmass[0]] > minmass[1]] self._data = self._data[self._data[minmass[0]] > minmass[1]]
@property
def clumps_cat(self):
"""
The raw clumps catalogue.
Returns
-------
clumps_cat : :py:class:`csiborgtools.read.ClumpsCatalogue`
"""
return self._clumps_cat

View file

@ -260,7 +260,7 @@ class CSiBORGPaths:
fname = f"{kind}_out_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npy" fname = f"{kind}_out_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npy"
return join(fdir, fname) return join(fdir, fname)
def overlap_path(self, nsim0, nsimx): def overlap_path(self, nsim0, nsimx, smoothed):
""" """
Path to the overlap files between two simulations. Path to the overlap files between two simulations.
@ -270,6 +270,8 @@ class CSiBORGPaths:
IC realisation index of the first simulation. IC realisation index of the first simulation.
nsimx : int nsimx : int
IC realisation index of the second simulation. IC realisation index of the second simulation.
smoothed : bool
Whether the overlap is smoothed or not.
Returns Returns
------- -------
@ -280,6 +282,8 @@ class CSiBORGPaths:
mkdir(fdir) mkdir(fdir)
warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1) warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1)
fname = f"overlap_{str(nsim0).zfill(5)}_{str(nsimx).zfill(5)}.npz" fname = f"overlap_{str(nsim0).zfill(5)}_{str(nsimx).zfill(5)}.npz"
if smoothed:
fname = fname.replace("overlap", "overlap_smoothed")
return join(fdir, fname) return join(fdir, fname)
def radpos_path(self, nsnap, nsim): def radpos_path(self, nsnap, nsim):
@ -305,37 +309,24 @@ class CSiBORGPaths:
fname = f"radpos_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npz" fname = f"radpos_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npz"
return join(fdir, fname) return join(fdir, fname)
def particle_h5py_path(self, nsim, kind=None, dtype="float32"): def particles_path(self, nsim):
""" """
Path to the file containing all particles in a `.h5` file. Path to the files containing all particles.
Parameters Parameters
---------- ----------
nsim : int nsim : int
IC realisation index. IC realisation index.
kind : str
Type of output. Must be one of `[None, 'pos', 'clumpmap']`.
dtype : str
Data type. Must be one of `['float32', 'float64']`.
Returns Returns
------- -------
path : str path : str
""" """
assert kind in [None, "pos", "clumpmap"]
assert dtype in ["float32", "float64"]
fdir = join(self.postdir, "particles") fdir = join(self.postdir, "particles")
if not isdir(fdir): if not isdir(fdir):
makedirs(fdir) makedirs(fdir)
warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1) warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1)
if kind is None:
fname = f"parts_{str(nsim).zfill(5)}.h5" fname = f"parts_{str(nsim).zfill(5)}.h5"
else:
fname = f"parts_{kind}_{str(nsim).zfill(5)}.h5"
if dtype == "float64":
fname = fname.replace(".h5", "_f64.h5")
return join(fdir, fname) return join(fdir, fname)
def density_field_path(self, mas, nsim): def density_field_path(self, mas, nsim):

View file

@ -215,7 +215,10 @@ class ParticleReader:
Returns Returns
------- -------
out : array out : structured array or 2-dimensional array
Particle information.
pids : 1-dimensional array
Particle IDs.
""" """
# Open the particle files # Open the particle files
nparts, partfiles = self.open_particle(nsnap, nsim, verbose=verbose) nparts, partfiles = self.open_particle(nsnap, nsim, verbose=verbose)
@ -233,6 +236,8 @@ class ParticleReader:
# Check there are no strange parameters # Check there are no strange parameters
if isinstance(pars_extract, str): if isinstance(pars_extract, str):
pars_extract = [pars_extract] pars_extract = [pars_extract]
if "ID" in pars_extract:
pars_extract.remove("ID")
for p in pars_extract: for p in pars_extract:
if p not in fnames: if p not in fnames:
raise ValueError(f"Undefined parameter `{p}`.") raise ValueError(f"Undefined parameter `{p}`.")
@ -250,6 +255,7 @@ class ParticleReader:
par2arrpos = {par: i for i, par in enumerate(pars_extract)} par2arrpos = {par: i for i, par in enumerate(pars_extract)}
out = numpy.full((npart_tot, len(pars_extract)), numpy.nan, out = numpy.full((npart_tot, len(pars_extract)), numpy.nan,
dtype=numpy.float32) dtype=numpy.float32)
pids = numpy.full(npart_tot, numpy.nan, dtype=numpy.int32)
start_ind = self.nparts_to_start_ind(nparts) start_ind = self.nparts_to_start_ind(nparts)
iters = tqdm(range(ncpu)) if verbose else range(ncpu) iters = tqdm(range(ncpu)) if verbose else range(ncpu)
@ -257,19 +263,21 @@ class ParticleReader:
i = start_ind[cpu] i = start_ind[cpu]
j = nparts[cpu] j = nparts[cpu]
for (fname, fdtype) in zip(fnames, fdtypes): for (fname, fdtype) in zip(fnames, fdtypes):
if fname in pars_extract:
single_part = self.read_sp(fdtype, partfiles[cpu]) single_part = self.read_sp(fdtype, partfiles[cpu])
if fname == "ID":
pids[i:i + j] = single_part
elif fname in pars_extract:
if return_structured: if return_structured:
out[fname][i:i + j] = single_part out[fname][i:i + j] = single_part
else: else:
out[i:i + j, par2arrpos[fname]] = single_part out[i:i + j, par2arrpos[fname]] = single_part
else: else:
dum[i:i + j] = self.read_sp(fdtype, partfiles[cpu]) dum[i:i + j] = single_part
# Close the fortran files # Close the fortran files
for partfile in partfiles: for partfile in partfiles:
partfile.close() partfile.close()
return out return out, pids
def open_unbinding(self, nsnap, nsim, cpu): def open_unbinding(self, nsnap, nsim, cpu):
""" """
@ -389,11 +397,16 @@ class ParticleReader:
class MmainReader: class MmainReader:
""" """
Object to generate the summed substructure catalogue. Object to generate the summed substructure catalogue.
Parameters
----------
paths : :py:class:`csiborgtools.read.CSiBORGPaths`
Paths objects.
""" """
_paths = None _paths = None
def __init__(self, paths): def __init__(self, paths):
assert isinstance(paths, CSiBORGPaths) # REMOVE assert isinstance(paths, CSiBORGPaths)
self._paths = paths self._paths = paths
@property @property
@ -444,7 +457,7 @@ class MmainReader:
def make_mmain(self, nsim, verbose=False): def make_mmain(self, nsim, verbose=False):
""" """
Make the summed substructure catalogue for a final snapshot. Includes Make the summed substructure catalogue for a final snapshot. Includes
the position of the paren, the summed mass and the fraction of mass in the position of the parent, the summed mass and the fraction of mass in
substructure. substructure.
Parameters Parameters
@ -472,10 +485,10 @@ class MmainReader:
nmain = numpy.sum(mask_main) nmain = numpy.sum(mask_main)
# Preallocate already the output array # Preallocate already the output array
out = cols_to_structured( out = cols_to_structured(
nmain, [("ID", numpy.int32), ("x", numpy.float32), nmain, [("index", numpy.int32), ("x", numpy.float32),
("y", numpy.float32), ("z", numpy.float32), ("y", numpy.float32), ("z", numpy.float32),
("M", numpy.float32), ("subfrac", numpy.float32)]) ("M", numpy.float32), ("subfrac", numpy.float32)])
out["ID"] = clumparr["index"][mask_main] out["index"] = clumparr["index"][mask_main]
# Because for these index == parent # Because for these index == parent
for p in ('x', 'y', 'z'): for p in ('x', 'y', 'z'):
out[p] = clumparr[p][mask_main] out[p] = clumparr[p][mask_main]
@ -483,7 +496,7 @@ class MmainReader:
for i in range(nmain): for i in range(nmain):
# Should include the main halo itself, i.e. its own ultimate parent # Should include the main halo itself, i.e. its own ultimate parent
out["M"][i] = numpy.sum( out["M"][i] = numpy.sum(
clumparr["mass_cl"][ultimate_parent == out["ID"][i]]) clumparr["mass_cl"][ultimate_parent == out["index"][i]])
out["subfrac"] = 1 - clumparr["mass_cl"][mask_main] / out["M"] out["subfrac"] = 1 - clumparr["mass_cl"][mask_main] / out["M"]
return out, ultimate_parent return out, ultimate_parent
@ -549,3 +562,69 @@ def halfwidth_select(hw, particles):
for p in ('x', 'y', 'z'): for p in ('x', 'y', 'z'):
particles[p] = (particles[p] - 0.5 + hw) / (2 * hw) particles[p] = (particles[p] - 0.5 + hw) / (2 * hw)
return particles return particles
def load_clump_particles(clid, particles, clump_map, clid2map):
"""
Load a clump's particles from a particle array. If it is not there, i.e
clump has no associated particles, return `None`.
Parameters
----------
clid : int
Clump ID.
particles : 2-dimensional array
Array of particles.
clump_map : 2-dimensional array
Array containing start and end indices in the particle array
corresponding to each clump.
clid2map : dict
Dictionary mapping clump IDs to `clump_map` array positions.
Returns
-------
clump_particles : 2-dimensional array
Particle array of this clump.
"""
try:
k0, kf = clump_map[clid2map[clid], 1:]
return particles[k0:kf + 1, :]
except KeyError:
return None
def load_parent_particles(hid, particles, clump_map, clid2map, clumps_cat):
"""
Load a parent halo's particles from a particle array. If it is not there,
return `None`.
Parameters
----------
hid : int
Halo ID.
particles : 2-dimensional array
Array of particles.
clump_map : 2-dimensional array
Array containing start and end indices in the particle array
corresponding to each clump.
clid2map : dict
Dictionary mapping clump IDs to `clump_map` array positions.
clumps_cat : :py:class:`csiborgtools.read.ClumpsCatalogue`
Clumps catalogue.
Returns
-------
halo : 2-dimensional array
Particle array of this halo.
"""
clids = clumps_cat["index"][clumps_cat["parent"] == hid]
# We first load the particles of each clump belonging to this parent
# and then concatenate them for further analysis.
clumps = []
for clid in clids:
parts = load_clump_particles(clid, particles, clump_map, clid2map)
if parts is not None:
clumps.append(parts)
if len(clumps) == 0:
return None
return numpy.concatenate(clumps)

View file

@ -15,7 +15,10 @@
""" """
Various coordinate transformations. Various coordinate transformations.
""" """
from os.path import isfile
import numpy import numpy
from h5py import File
############################################################################### ###############################################################################
# Coordinate transforms # # Coordinate transforms #
@ -291,14 +294,35 @@ def extract_from_structured(arr, cols):
cols = [cols] if isinstance(cols, str) else cols cols = [cols] if isinstance(cols, str) else cols
for col in cols: for col in cols:
if col not in arr.dtype.names: if col not in arr.dtype.names:
raise ValueError("Invalid column `{}`!".format(col)) raise ValueError(f"Invalid column `{col}`!")
# Preallocate an array and populate it # Preallocate an array and populate it
out = numpy.zeros((arr.size, len(cols)), dtype=arr[cols[0]].dtype) out = numpy.zeros((arr.size, len(cols)), dtype=arr[cols[0]].dtype)
for i, col in enumerate(cols): for i, col in enumerate(cols):
out[:, i] = arr[col] out[:, i] = arr[col]
# Optionally flatten # Optionally flatten
if len(cols) == 1: if len(cols) == 1:
return out.reshape( return out.reshape(-1, )
-1,
)
return out return out
###############################################################################
# h5py functions #
###############################################################################
def read_h5(path):
"""
Return and return and open `h5py.File` object.
Parameters
----------
path : str
Path to the file.
Returns
-------
file : `h5py.File`
"""
if not isfile(path):
raise IOError(f"File `{path}` does not exist!")
return File(path, "r")

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -18,9 +18,7 @@ realisation must have been split in advance by `runsplit_halos`.
""" """
from argparse import ArgumentParser from argparse import ArgumentParser
from datetime import datetime from datetime import datetime
from os.path import join
import h5py
import numpy import numpy
from mpi4py import MPI from mpi4py import MPI
from tqdm import tqdm from tqdm import tqdm
@ -33,20 +31,26 @@ except ModuleNotFoundError:
sys.path.append("../") sys.path.append("../")
import csiborgtools import csiborgtools
parser = ArgumentParser()
parser.add_argument("--kind", type=str, choices=["halos", "clumps"])
args = parser.parse_args()
# Get MPI things # Get MPI things
comm = MPI.COMM_WORLD comm = MPI.COMM_WORLD
rank = comm.Get_rank() rank = comm.Get_rank()
nproc = comm.Get_size() nproc = comm.Get_size()
verbose = nproc == 1
parser = ArgumentParser()
parser.add_argument("--kind", type=str, choices=["halos", "clumps"])
parser.add_argument("--ics", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all simulations.")
args = parser.parse_args()
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
partreader = csiborgtools.read.ParticleReader(paths) partreader = csiborgtools.read.ParticleReader(paths)
nfwpost = csiborgtools.fits.NFWPosterior() nfwpost = csiborgtools.fits.NFWPosterior()
ftemp = join(paths.temp_dumpdir, "fit_clump_{}_{}_{}.npy")
if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics(tonew=False)
else:
ics = args.ics
cols_collect = [ cols_collect = [
("index", numpy.int32), ("index", numpy.int32),
("npart", numpy.int32), ("npart", numpy.int32),
@ -95,46 +99,19 @@ def fit_clump(particles, clump_info, box):
return out return out
def load_clump_particles(clumpid, particles, clump_map): # We MPI loop over all simulations.
""" jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
Load a clump's particles. If it is not there, i.e clump has no associated for nsim in [ics[i] for i in jobs]:
particles, return `None`. print(f"{datetime.now()}: rank {rank} calculating simulation `{nsim}`.",
"""
try:
return particles[clump_map[clumpid], :]
except KeyError:
return None
def load_parent_particles(clumpid, particles, clump_map, clumps_cat):
"""
Load a parent halo's particles.
"""
indxs = clumps_cat["index"][clumps_cat["parent"] == clumpid]
# We first load the particles of each clump belonging to this parent
# and then concatenate them for further analysis.
clumps = []
for ind in indxs:
parts = load_clump_particles(ind, particles, clump_map)
if parts is not None:
clumps.append(parts)
if len(clumps) == 0:
return None
return numpy.concatenate(clumps)
# We now start looping over all simulations
for i, nsim in enumerate(paths.get_ics(tonew=False)):
if rank == 0:
print(f"{datetime.now()}: calculating {i}th simulation `{nsim}`.",
flush=True) flush=True)
nsnap = max(paths.get_snapshots(nsim)) nsnap = max(paths.get_snapshots(nsim))
box = csiborgtools.read.BoxUnits(nsnap, nsim, paths) box = csiborgtools.read.BoxUnits(nsnap, nsim, paths)
# Particle archive # Particle archive
particles = h5py.File(paths.particle_h5py_path(nsim), 'r')["particles"] f = csiborgtools.read.read_h5(paths.particles_path(nsim))
clump_map = h5py.File(paths.particle_h5py_path(nsim, "clumpmap"), 'r') particles = f["particles"]
clump_map = f["clumpmap"]
clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True, clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
load_fitted=False) load_fitted=False)
# We check whether we fit halos or clumps, will be indexing over different # We check whether we fit halos or clumps, will be indexing over different
@ -143,66 +120,39 @@ for i, nsim in enumerate(paths.get_ics(tonew=False)):
ismain = clumps_cat.ismain ismain = clumps_cat.ismain
else: else:
ismain = numpy.ones(len(clumps_cat), dtype=bool) ismain = numpy.ones(len(clumps_cat), dtype=bool)
ntasks = len(clumps_cat)
# We split the clumps among the processes. Each CPU calculates a fraction
# of them and dumps the results in a structured array. Even if we are
# calculating parent halo this index runs over all clumps.
jobs = csiborgtools.fits.split_jobs(ntasks, nproc)[rank]
out = csiborgtools.read.cols_to_structured(len(jobs), cols_collect)
for i, j in enumerate(tqdm(jobs)) if nproc == 1 else enumerate(jobs):
clumpid = clumps_cat["index"][j]
out["index"][i] = clumpid
# Even if we are calculating parent halo this index runs over all clumps.
out = csiborgtools.read.cols_to_structured(len(clumps_cat), cols_collect)
indxs = clumps_cat["index"]
for i, clid in enumerate(tqdm(indxs)) if verbose else enumerate(indxs):
clid = clumps_cat["index"][i]
out["index"][i] = clid
# If we are fitting halos and this clump is not a main, then continue. # If we are fitting halos and this clump is not a main, then continue.
if args.kind == "halos" and not ismain[j]: if args.kind == "halos" and not ismain[i]:
continue continue
if args.kind == "halos": if args.kind == "halos":
part = load_parent_particles(clumpid, particles, clump_map, part = csiborgtools.read.load_parent_particles(
clumps_cat) clid, particles, clump_map, clid2map, clumps_cat)
else: else:
part = load_clump_particles(clumpid, particles, clump_map) part = csiborgtools.read.load_clump_particles(clid, particles,
clump_map, clid2map)
# We fit the particles if there are any. If not we assign the index, # We fit the particles if there are any. If not we assign the index,
# otherwise it would be NaN converted to integers (-2147483648) and # otherwise it would be NaN converted to integers (-2147483648) and
# yield an error further down. # yield an error further down.
if part is not None: if part is None:
_out = fit_clump(part, clumps_cat[j], box) continue
_out = fit_clump(part, clumps_cat[i], box)
for key in _out.keys(): for key in _out.keys():
out[key][i] = _out[key] out[key][i] = _out[key]
fout = ftemp.format(str(nsim).zfill(5), str(nsnap).zfill(5), rank) # Finally, we save the results. If we were analysing main halos, then
if nproc == 0: # remove array indices that do not correspond to parent halos.
print(f"{datetime.now()}: rank {rank} saving to `{fout}`.", flush=True)
numpy.save(fout, out)
# We saved this CPU's results in a temporary file. Wait now for the other
# CPUs and then collect results from the 0th rank and save them.
comm.Barrier()
if rank == 0:
print(f"{datetime.now()}: collecting results for simulation `{nsim}`.",
flush=True)
# We write to the output array. Load data from each CPU and append to
# the output array.
out = csiborgtools.read.cols_to_structured(ntasks, cols_collect)
clumpid2outpos = {indx: i
for i, indx in enumerate(clumps_cat["index"])}
for i in range(nproc):
inp = numpy.load(ftemp.format(str(nsim).zfill(5),
str(nsnap).zfill(5), i))
for j, clumpid in enumerate(inp["index"]):
k = clumpid2outpos[clumpid]
for key in inp.dtype.names:
out[key][k] = inp[key][j]
# If we were analysing main halos, then remove array indices that do
# not correspond to parent halos.
if args.kind == "halos": if args.kind == "halos":
out = out[ismain] out = out[ismain]
fout = paths.structfit_path(nsnap, nsim, args.kind) fout = paths.structfit_path(nsnap, nsim, args.kind)
print(f"Saving to `{fout}`.", flush=True) print(f"Saving to `{fout}`.", flush=True)
numpy.save(fout, out) numpy.save(fout, out)
# We now wait before moving on to another simulation.
comm.Barrier()

104
scripts/fit_init.py Normal file
View file

@ -0,0 +1,104 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to calculate the particle centre of mass, Lagrangian patch size in the
initial snapshot. The initial snapshot particles are read from the sorted
files.
"""
from argparse import ArgumentParser
from datetime import datetime
import numpy
from mpi4py import MPI
from tqdm import tqdm
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
# Get MPI things
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
verbose = nproc == 1
# Argument parser
parser = ArgumentParser()
parser.add_argument("--ics", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all simulations.")
args = parser.parse_args()
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
partreader = csiborgtools.read.ParticleReader(paths)
if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics(tonew=True)
else:
ics = args.ics
cols_collect = [("index", numpy.int32),
("x", numpy.float32),
("y", numpy.float32),
("z", numpy.float32),
("lagpatch", numpy.float32),]
# MPI loop over simulations
jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
for nsim in [ics[i] for i in jobs]:
nsnap = max(paths.get_snapshots(nsim))
print(f"{datetime.now()}: rank {rank} calculating simulation `{nsim}`.",
flush=True)
parts = csiborgtools.read.read_h5(paths.initmatch_path(nsim, "particles"))
parts = parts['particles']
clump_map = csiborgtools.read.read_h5(paths.particles_path(nsim))
clump_map = clump_map["clumpmap"]
clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
load_fitted=False)
clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
ismain = clumps_cat.ismain
out = csiborgtools.read.cols_to_structured(len(clumps_cat), cols_collect)
indxs = clumps_cat["index"]
for i, hid in enumerate(tqdm(indxs) if verbose else indxs):
out["index"][i] = hid
if not ismain[i]:
continue
part = csiborgtools.read.load_parent_particles(hid, parts, clump_map,
clid2map, clumps_cat)
# Skip if the halo is too small.
if part is None or part.size < 100:
continue
dist, cm = csiborgtools.fits.dist_centmass(part)
# We enforce a maximum patchsize of 0.075 in box coordinates.
patchsize = min(numpy.percentile(dist, 99), 0.075)
out["x"][i], out["y"][i], out["z"][i] = cm
out["lagpatch"][i] = patchsize
out = out[ismain]
# Now save it
fout = paths.initmatch_path(nsim, "fit")
print(f"{datetime.now()}: dumping fits to .. `{fout}`.",
flush=True)
with open(fout, "wb") as f:
numpy.save(f, out)

View file

@ -54,35 +54,6 @@ else:
nsims = args.ics nsims = args.ics
def load_clump_particles(clumpid, particles, clump_map):
"""
Load a clump's particles. If it is not there, i.e clump has no associated
particles, return `None`.
"""
try:
return particles[clump_map[clumpid], :]
except KeyError:
return None
def load_parent_particles(clumpid, particles, clump_map, clumps_cat):
"""
Load a parent halo's particles.
"""
indxs = clumps_cat["index"][clumps_cat["parent"] == clumpid]
# We first load the particles of each clump belonging to this parent
# and then concatenate them for further analysis.
clumps = []
for ind in indxs:
parts = load_clump_particles(ind, particles, clump_map)
if parts is not None:
clumps.append(parts)
if len(clumps) == 0:
return None
return numpy.concatenate(clumps)
# We loop over simulations. Here later optionally add MPI. # We loop over simulations. Here later optionally add MPI.
for i, nsim in enumerate(nsims): for i, nsim in enumerate(nsims):
if rank == 0: if rank == 0:
@ -91,10 +62,11 @@ for i, nsim in enumerate(nsims):
nsnap = max(paths.get_snapshots(nsim)) nsnap = max(paths.get_snapshots(nsim))
box = csiborgtools.read.BoxUnits(nsnap, nsim, paths) box = csiborgtools.read.BoxUnits(nsnap, nsim, paths)
particles = h5py.File(paths.particle_h5py_path(nsim), 'r')["particles"] f = csiborgtools.read.read_h5(paths.particles_path(nsim))
clump_map = h5py.File(paths.particle_h5py_path(nsim, "clumpmap"), 'r') particles = f["particles"]
clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, maxdist=None, clump_map = f["clumpmap"]
minmass=None, rawdata=True, clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
load_fitted=False) load_fitted=False)
ismain = clumps_cat.ismain ismain = clumps_cat.ismain
ntasks = len(clumps_cat) ntasks = len(clumps_cat)
@ -108,8 +80,8 @@ for i, nsim in enumerate(nsims):
continue continue
clumpid = clumps_cat["index"][j] clumpid = clumps_cat["index"][j]
parts = load_parent_particles(clumpid, particles, clump_map, parts = csiborgtools.read.load_parent_particles(
clumps_cat) clumpid, particles, clump_map, clid2map, clumps_cat)
# If we have no particles, then do not save anything. # If we have no particles, then do not save anything.
if parts is None: if parts is None:
continue continue
@ -124,8 +96,7 @@ for i, nsim in enumerate(nsims):
_out["r"] = r[mask] _out["r"] = r[mask]
_out["M"] = obj["M"][mask] _out["M"] = obj["M"][mask]
out[str(clumpid)] = _out
out[str(clumps_cat["index"][j])] = _out
# Finished, so we save everything. # Finished, so we save everything.
fout = paths.radpos_path(nsnap, nsim) fout = paths.radpos_path(nsnap, nsim)

View file

@ -13,6 +13,7 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""A script to calculate overlap between two CSiBORG realisations.""" """A script to calculate overlap between two CSiBORG realisations."""
from argparse import ArgumentParser from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime from datetime import datetime
from distutils.util import strtobool from distutils.util import strtobool
@ -26,13 +27,16 @@ except ModuleNotFoundError:
sys.path.append("../") sys.path.append("../")
import csiborgtools import csiborgtools
from csiborgtools.read import HaloCatalogue, read_h5
# Argument parser # Argument parser
parser = ArgumentParser() parser = ArgumentParser()
parser.add_argument("--nsim0", type=int) parser.add_argument("--nsim0", type=int)
parser.add_argument("--nsimx", type=int) parser.add_argument("--nsimx", type=int)
parser.add_argument("--nmult", type=float) parser.add_argument("--nmult", type=float)
parser.add_argument("--sigma", type=float) parser.add_argument("--sigma", type=float, default=None)
parser.add_argument("--smoothen", type=lambda x: bool(strtobool(x)),
default=None)
parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)), parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
default=False) default=False)
args = parser.parse_args() args = parser.parse_args()
@ -43,27 +47,52 @@ matcher = csiborgtools.match.RealisationsMatcher()
# Load the raw catalogues (i.e. no selection) including the initial CM # Load the raw catalogues (i.e. no selection) including the initial CM
# positions and the particle archives. # positions and the particle archives.
cat0 = csiborgtools.read.HaloCatalogue(args.nsim0, paths, load_initial=True, cat0 = HaloCatalogue(args.nsim0, paths, load_initial=True,
rawdata=True) minmass=("totpartmass", 1e12), with_lagpatch=True)
catx = csiborgtools.read.HaloCatalogue(args.nsimx, paths, load_initial=True, catx = HaloCatalogue(args.nsimx, paths, load_initial=True,
rawdata=True) minmass=("totpartmass", 1e12), with_lagpatch=True)
halos0_archive = paths.initmatch_path(args.nsim0, "particles")
halosx_archive = paths.initmatch_path(args.nsimx, "particles") clumpmap0 = read_h5(paths.particles_path(args.nsim0))["clumpmap"]
parts0 = read_h5(paths.initmatch_path(args.nsim0, "particles"))["particles"]
clid2map0 = {clid: i for i, clid in enumerate(clumpmap0[:, 0])}
clumpmapx = read_h5(paths.particles_path(args.nsimx))["clumpmap"]
partsx = read_h5(paths.initmatch_path(args.nsimx, "particles"))["particles"]
clid2mapx = {clid: i for i, clid in enumerate(clumpmapx[:, 0])}
# We generate the background density fields. Loads halos's particles one by one # We generate the background density fields. Loads halos's particles one by one
# from the archive, concatenates them and calculates the NGP density field. # from the archive, concatenates them and calculates the NGP density field.
if args.verbose: if args.verbose:
print(f"{datetime.now()}: generating the background density fields.", print(f"{datetime.now()}: generating the background density fields.",
flush=True) flush=True)
delta_bckg = overlapper.make_bckg_delta(halos0_archive, verbose=args.verbose) delta_bckg = overlapper.make_bckg_delta(parts0, clumpmap0, clid2map0, cat0,
delta_bckg = overlapper.make_bckg_delta(halosx_archive, delta=delta_bckg,
verbose=args.verbose) verbose=args.verbose)
delta_bckg = overlapper.make_bckg_delta(partsx, clumpmapx, clid2mapx, catx,
delta=delta_bckg, verbose=args.verbose)
# We calculate the overlap between the NGP fields. # We calculate the overlap between the NGP fields.
if args.verbose: if args.verbose:
print(f"{datetime.now()}: crossing the simulations.", flush=True) print(f"{datetime.now()}: crossing the simulations.", flush=True)
match_indxs, ngp_overlap = matcher.cross(cat0, catx, halos0_archive, match_indxs, ngp_overlap = matcher.cross(cat0, catx, parts0, partsx, clumpmap0,
halosx_archive, delta_bckg) clumpmapx, delta_bckg,
verbose=args.verbose)
# We wish to store the halo IDs of the matches, not their array positions in
# the catalogues
match_hids = deepcopy(match_indxs)
for i, matches in enumerate(match_indxs):
for j, match in enumerate(matches):
match_hids[i][j] = catx["index"][match]
fout = paths.overlap_path(args.nsim0, args.nsimx, smoothed=False)
numpy.savez(fout, ref_hids=cat0["index"], match_hids=match_hids,
ngp_overlap=ngp_overlap)
if args.verbose:
print(f"{datetime.now()}: calculated NGP overlap, saved to {fout}.",
flush=True)
if not args.smoothen:
quit()
# We now smoothen up the background density field for the smoothed overlap # We now smoothen up the background density field for the smoothed overlap
# calculation. # calculation.
@ -72,16 +101,12 @@ if args.verbose:
gaussian_filter(delta_bckg, output=delta_bckg, **smooth_kwargs) gaussian_filter(delta_bckg, output=delta_bckg, **smooth_kwargs)
# We calculate the smoothed overlap for the pairs whose NGP overlap is > 0. # We calculate the smoothed overlap for the pairs whose NGP overlap is > 0.
if args.verbose: smoothed_overlap = matcher.smoothed_cross(cat0, catx, parts0, partsx,
print(f"{datetime.now()}: calculating smoothed overlaps.", flush=True) clumpmap0, clumpmapx, delta_bckg,
smoothed_overlap = matcher.smoothed_cross(cat0, catx, halos0_archive,
halosx_archive, delta_bckg,
match_indxs, smooth_kwargs) match_indxs, smooth_kwargs)
# We save the results at long last. fout = paths.overlap_path(args.nsim0, args.nsimx, smoothed=True)
fout = paths.overlap_path(args.nsim0, args.nsimx) numpy.savez(fout, smoothed_overlap=smoothed_overlap, sigma=args.sigma)
if args.verbose: if args.verbose:
print(f"{datetime.now()}: saving results to `{fout}`.", flush=True) print(f"{datetime.now()}: calculated smoothed overlap, saved to {fout}.",
numpy.savez(fout, match_indxs=match_indxs, ngp_overlap=ngp_overlap, flush=True)
smoothed_overlap=smoothed_overlap, sigma=args.sigma)
print(f"{datetime.now()}: all finished.", flush=True)

View file

@ -12,18 +12,20 @@
# with this program; if not, write to the Free Software Foundation, Inc., # with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
""" """
Script to load in the simulation particles and dump them to a HDF5 file. Script to load in the simulation particles, load them by their clump ID and
Creates a mapping to access directly particles of a single clump. dump into a HDF5 file. Stores the first and last index of each clump in the
particle array. This can be used for fast slicing of the array to acces
particles of a single clump.
""" """
from datetime import datetime from datetime import datetime
from distutils.util import strtobool
from gc import collect from gc import collect
import h5py import h5py
import numba
import numpy import numpy
from mpi4py import MPI from mpi4py import MPI
from tqdm import tqdm from tqdm import trange
try: try:
import csiborgtools import csiborgtools
@ -44,75 +46,109 @@ nproc = comm.Get_size()
parser = ArgumentParser() parser = ArgumentParser()
parser.add_argument("--ics", type=int, nargs="+", default=None, parser.add_argument("--ics", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all simulations.") help="IC realisations. If `-1` processes all simulations.")
parser.add_argument("--pos_only", type=lambda x: bool(strtobool(x)),
help="Do we only dump positions?")
parser.add_argument("--dtype", type=str, choices=["float32", "float64"],
default="float32",)
args = parser.parse_args() args = parser.parse_args()
verbose = nproc == 1 verbose = nproc == 1
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
partreader = csiborgtools.read.ParticleReader(paths) partreader = csiborgtools.read.ParticleReader(paths)
# Keep "ID" as the last column!
if args.pos_only: pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"]
pars_extract = ['x', 'y', 'z', 'M']
else:
pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M']
if args.ics is None or args.ics[0] == -1: if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics(tonew=False) ics = paths.get_ics(tonew=False)
else: else:
ics = args.ics ics = args.ics
@numba.jit(nopython=True)
def minmax_clump(clid, clump_ids, start_loop=0):
"""
Find the start and end index of a clump in a sorted array of clump IDs.
This is much faster than using `numpy.where` and then `numpy.min` and
`numpy.max`.
"""
start = None
end = None
for i in range(start_loop, clump_ids.size):
n = clump_ids[i]
if n == clid:
if start is None:
start = i
end = i
elif n > clid:
break
return start, end
# MPI loop over individual simulations. We read in the particles from RAMSES # MPI loop over individual simulations. We read in the particles from RAMSES
# files and dump them to a HDF5 file. # files and dump them to a HDF5 file.
jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank] jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
for i in jobs: for i in jobs:
nsim = ics[i] nsim = ics[i]
nsnap = max(paths.get_snapshots(nsim)) nsnap = max(paths.get_snapshots(nsim))
print(f"{datetime.now()}: Rank {rank} loading particles {nsim}.", fname = paths.particles_path(nsim)
# We first read in the clump IDs of the particles and infer the sorting.
# Right away we dump the clump IDs to a HDF5 file and clear up memory.
print(f"{datetime.now()}: rank {rank} loading particles {nsim}.",
flush=True) flush=True)
part_cids = partreader.read_clumpid(nsnap, nsim, verbose=verbose)
sort_indxs = numpy.argsort(part_cids).astype(numpy.int32)
part_cids = part_cids[sort_indxs]
with h5py.File(fname, "w") as f:
f.create_dataset("clump_ids", data=part_cids)
f.close()
del part_cids
collect()
parts = partreader.read_particle(nsnap, nsim, pars_extract, # Next we read in the particles and sort them by their clump ID.
return_structured=False, verbose=verbose) # We cannot directly read this as an unstructured array because the float32
if args.dtype == "float64": # precision is insufficient to capture the clump IDs.
parts = parts.astype(numpy.float64) parts, pids = partreader.read_particle(
nsnap, nsim, pars_extract, return_structured=False, verbose=verbose)
kind = "pos" if args.pos_only else None # Now we in two steps save the particles and particle IDs.
print(f"{datetime.now()}: rank {rank} dumping particles from {nsim}.",
print(f"{datetime.now()}: Rank {rank} dumping particles from {nsim}.",
flush=True) flush=True)
parts = parts[sort_indxs]
pids = pids[sort_indxs]
del sort_indxs
collect()
with h5py.File(paths.particle_h5py_path(nsim, kind, args.dtype), "w") as f: with h5py.File(fname, "r+") as f:
f.create_dataset("particle_ids", data=pids)
f.close()
del pids
collect()
with h5py.File(fname, "r+") as f:
f.create_dataset("particles", data=parts) f.create_dataset("particles", data=parts)
f.close()
del parts del parts
collect() collect()
print(f"{datetime.now()}: Rank {rank} finished dumping of {nsim}.",
flush=True)
# If we are dumping only particle positions, then we are done.
if args.pos_only:
continue
print(f"{datetime.now()}: Rank {rank} mapping particles from {nsim}.", print(f"{datetime.now()}: rank {rank} creating clump mapping for {nsim}.",
flush=True) flush=True)
# If not, then load the clump IDs and prepare the memory mapping. We find # Load clump IDs back to memory
# which array positions correspond to which clump IDs and save it. With with h5py.File(fname, "r") as f:
# this we can then lazily load into memory the particles for each clump. part_cids = f["clump_ids"][:]
part_cids = partreader.read_clumpid(nsnap, nsim, verbose=verbose) # We loop over the unique clump IDs.
cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, load_fitted=False, unique_clump_ids = numpy.unique(part_cids)
rawdata=True) clump_map = numpy.full((unique_clump_ids.size, 3), numpy.nan,
clumpinds = cat["index"] dtype=numpy.int32)
# Some of the clumps have no particles, so we do not loop over them start_loop = 0
clumpinds = clumpinds[numpy.isin(clumpinds, part_cids)] niters = unique_clump_ids.size
for i in trange(niters) if verbose else range(niters):
out = {} clid = unique_clump_ids[i]
for i, cid in enumerate(tqdm(clumpinds) if verbose else clumpinds): k0, kf = minmax_clump(clid, part_cids, start_loop=start_loop)
out.update({str(cid): numpy.where(part_cids == cid)[0]}) clump_map[i, 0] = clid
clump_map[i, 1] = k0
clump_map[i, 2] = kf
start_loop = kf
# We save the mapping to a HDF5 file # We save the mapping to a HDF5 file
with h5py.File(paths.particle_h5py_path(nsim, "clumpmap"), "w") as f: with h5py.File(paths.particles_path(nsim), "r+") as f:
for cid, indxs in out.items(): f.create_dataset("clumpmap", data=clump_map)
f.create_dataset(cid, data=indxs) f.close()
del part_cids, cat, clumpinds, out del part_cids
collect() collect()

View file

@ -1,199 +0,0 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to calculate the particle centre of mass, Lagrangian patch size in the
initial snapshot and the particle mapping.
"""
from argparse import ArgumentParser
from os.path import join
from datetime import datetime
from gc import collect
import joblib
from os import remove
import h5py
import numpy
from mpi4py import MPI
from tqdm import trange
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
# Get MPI things
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
verbose = nproc == 1
# Argument parser
parser = ArgumentParser()
parser.add_argument("--ics", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all simulations.")
args = parser.parse_args()
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
partreader = csiborgtools.read.ParticleReader(paths)
ftemp = lambda kind, nsim, rank: join(paths.temp_dumpdir, f"{kind}_{nsim}_{rank}.p") # noqa
if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics(tonew=True)
else:
ics = args.ics
# We loop over simulations. Each simulation is then procesed with MPI, rank 0
# loads the data and broadcasts it to other ranks.
for nsim in ics:
nsnap = max(paths.get_snapshots(nsim))
if rank == 0:
print(f"{datetime.now()}: reading simulation {nsim}.", flush=True)
# We first load particles in the initial and final snapshots and sort
# them by their particle IDs so that we can match them by array
# position. `clump_ids` are the clump IDs of particles.
part0 = partreader.read_particle(1, nsim, ["x", "y", "z", "M", "ID"],
verbose=True,
return_structured=False)
part0 = part0[numpy.argsort(part0[:, -1])]
part0 = part0[:, :-1] # Now we no longer need the particle IDs
pid = partreader.read_particle(nsnap, nsim, ["ID"], verbose=True,
return_structured=False).reshape(-1, )
clump_ids = partreader.read_clumpid(nsnap, nsim, verbose=True)
clump_ids = clump_ids[numpy.argsort(pid)]
# Release the particle IDs, we will not need them anymore now that both
# particle arrays are matched in ordering.
del pid
collect()
# Particles whose clump ID is 0 are unassigned to a clump, so we can
# get rid of them to speed up subsequent operations. We will not need
# these. Again we release the mask.
mask = clump_ids > 0
clump_ids = clump_ids[mask]
part0 = part0[mask, :]
del mask
collect()
print(f"{datetime.now()}: dumping particles for {nsim}.", flush=True)
with h5py.File(paths.initmatch_path(nsim, "particles"), "w") as f:
f.create_dataset("particles", data=part0)
print(f"{datetime.now()}: broadcasting simulation {nsim}.", flush=True)
# Stop all ranks and figure out array shapes from the 0th rank
comm.Barrier()
if rank == 0:
shape = numpy.array([*part0.shape], dtype=numpy.int32)
else:
shape = numpy.empty(2, dtype=numpy.int32)
comm.Bcast(shape, root=0)
# Now broadcast the particle arrays to all ranks
if rank > 0:
part0 = numpy.empty(shape, dtype=numpy.float32)
clump_ids = numpy.empty(shape[0], dtype=numpy.int32)
comm.Bcast(part0, root=0)
comm.Bcast(clump_ids, root=0)
if rank == 0:
print(f"{datetime.now()}: simulation {nsim} broadcasted.", flush=True)
# Calculate the centre of mass of each parent halo, the Lagrangian patch
# size and optionally the initial snapshot particles belonging to this
# parent halo. Dumping the particles will take majority of time.
if rank == 0:
print(f"{datetime.now()}: calculating simulation {nsim}.", flush=True)
# We load up the clump catalogue which contains information about the
# ultimate parent halos of each clump. We will loop only over the clump
# IDs of ultimate parent halos and add their substructure particles and at
# the end save these.
cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, load_fitted=False,
rawdata=True)
parent_ids = cat["index"][cat.ismain]
parent_ids = parent_ids
hid2arrpos = {indx: j for j, indx in enumerate(parent_ids)}
# And we pre-allocate the output array for this simulation.
dtype = {"names": ["index", "x", "y", "z", "lagpatch"],
"formats": [numpy.int32] + [numpy.float32] * 4}
# We MPI loop over the individual halos
jobs = csiborgtools.fits.split_jobs(parent_ids.size, nproc)[rank]
_out_fits = numpy.full(len(jobs), numpy.nan, dtype=dtype)
_out_map = {}
for i in trange(len(jobs)) if verbose else range(len(jobs)):
clid = parent_ids[jobs[i]]
_out_fits["index"][i] = clid
mmain_indxs = cat["index"][cat["parent"] == clid]
mmain_mask = numpy.isin(clump_ids, mmain_indxs, assume_unique=True)
mmain_particles = part0[mmain_mask, :]
# If the number of particles is too small, we skip this halo.
if mmain_particles.size < 100:
continue
raddist, cmpos = csiborgtools.match.dist_centmass(mmain_particles)
patchsize = csiborgtools.match.dist_percentile(raddist, [99],
distmax=0.075)
# Write the temporary results
_out_fits["x"][i], _out_fits["y"][i], _out_fits["z"][i] = cmpos
_out_fits["lagpatch"][i] = patchsize
_out_map.update({str(clid): numpy.where(mmain_mask)[0]})
# Dump the results of this rank to a temporary file.
joblib.dump(_out_fits, ftemp("fits", nsim, rank))
joblib.dump(_out_map, ftemp("map", nsim, rank))
del part0, clump_ids,
collect()
# Now we wait for all ranks, then collect the results and save it.
comm.Barrier()
if rank == 0:
print(f"{datetime.now()}: collecting results for {nsim}.", flush=True)
out_fits = numpy.full(parent_ids.size, numpy.nan, dtype=dtype)
out_map = {}
for i in range(nproc):
# Merge the map dictionaries
out_map = out_map | joblib.load(ftemp("map", nsim, i))
# Now merge the structured arrays
_out_fits = joblib.load(ftemp("fits", nsim, i))
for j in range(_out_fits.size):
k = hid2arrpos[_out_fits["index"][j]]
for par in dtype["names"]:
out_fits[par][k] = _out_fits[par][j]
remove(ftemp("fits", nsim, i))
remove(ftemp("map", nsim, i))
# Now save it
fout_fit = paths.initmatch_path(nsim, "fit")
print(f"{datetime.now()}: dumping fits to .. `{fout_fit}`.",
flush=True)
with open(fout_fit, "wb") as f:
numpy.save(f, out_fits)
fout_map = paths.initmatch_path(nsim, "halomap")
print(f"{datetime.now()}: dumping mapping to .. `{fout_map}`.",
flush=True)
with h5py.File(fout_map, "w") as f:
for hid, indxs in out_map.items():
f.create_dataset(hid, data=indxs)
# We force clean up the memory before continuing.
del out_map, out_fits
collect()

82
scripts/pre_sortinit.py Normal file
View file

@ -0,0 +1,82 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to sort the initial snapshot particles according to their final
snapshot ordering, which is sorted by the clump IDs.
"""
from argparse import ArgumentParser
from datetime import datetime
import h5py
from gc import collect
import numpy
from mpi4py import MPI
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
# Get MPI things
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
verbose = nproc == 1
# Argument parser
parser = ArgumentParser()
parser.add_argument("--ics", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all simulations.")
args = parser.parse_args()
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
partreader = csiborgtools.read.ParticleReader(paths)
# NOTE: ID has to be the last column.
pars_extract = ["x", "y", "z", "M", "ID"]
if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics(tonew=True)
else:
ics = args.ics
# We loop over simulations. Each simulation is then procesed with MPI, rank 0
# loads the data and broadcasts it to other ranks.
jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
for i in jobs:
nsim = ics[i]
nsnap = max(paths.get_snapshots(nsim))
print(f"{datetime.now()}: reading and processing simulation {nsim}.",
flush=True)
# We first load the particle IDs in the final snapshot.
pidf = csiborgtools.read.read_h5(paths.particles_path(nsim))
pidf = pidf["particle_ids"]
# Then we load the particles in the initil snapshot and make sure that
# their particle IDs are sorted as in the final snapshot.
# Again, because of precision this must be read as structured.
part0, pid0 = partreader.read_particle(
1, nsim, pars_extract, return_structured=False, verbose=verbose)
# First enforce them to already be sorted and then apply reverse
# sorting from the final snapshot.
part0 = part0[numpy.argsort(pid0)]
del pid0
collect()
part0 = part0[numpy.argsort(numpy.argsort(pidf))]
print(f"{datetime.now()}: dumping particles for {nsim}.", flush=True)
with h5py.File(paths.initmatch_path(nsim, "particles"), "w") as f:
f.create_dataset("particles", data=part0)