Overlapper improvements (#53)

* Store indices as f32

* Fix init sorting

* Organise imports

* Rename pathing

* Add particle loading

* Improve particle reading

* Add h5py reader

* edit particle path

* Update particles loading

* update particles loading

* Fix particle dumping

* Add init fitting

* Fix bug due to insufficient precision

* Add commnet

* Add comment

* Add clumps catalogue to halo cat

* Add comment

* Make sure PIDS never forced to float32

* fix pid reading

* fix pid reading

* Update matching to work with new arrays

* Stop using cubical sub boxes, turn off nshift if no smoothing

* Improve caching

* Move function definitions

* Simplify calculation

* Add import

* Small updates to the halo

* Simplify calculation

* Simplify looping calculation

* fix tonew

* Add initial data

* Add skip condition

* Add unit conversion

* Add loading background in batches

* Rename mmain index

* Switch overlaps to h5

* Add finite lagpatch check

* fix column name

* Add verbosity flags

* Save halo IDs instead.

* Switch back to npz

* Delte nbs

* Reduce size of the box

* Load correct bckg of halos being matched

* Remove verbosity

* verbosity edits

* Change lower thresholds
This commit is contained in:
Richard Stiskalek 2023-05-06 16:52:48 +01:00 committed by GitHub
parent 1c9dacfde5
commit 56e39a8b1d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 864 additions and 3816 deletions

View File

@ -15,13 +15,9 @@
from warnings import warn
from csiborgtools.clustering.knn import kNN_CDF # noqa
from csiborgtools.clustering.utils import ( # noqa
BaseRVS,
RVSinbox,
RVSinsphere,
RVSonsphere,
normalised_marks,
)
from csiborgtools.clustering.utils import (BaseRVS, RVSinbox, # noqa
RVSinsphere, RVSonsphere,
normalised_marks)
try:
import Corrfunc # noqa

View File

@ -12,6 +12,6 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .halo import Clump, Halo # noqa
from .halo import Clump, Halo, dist_centmass # noqa
from .haloprofile import NFWPosterior, NFWProfile # noqa
from .utils import split_jobs # noqa

View File

@ -15,6 +15,7 @@
"""A clump object."""
from abc import ABC
from numba import jit
import numpy
@ -101,16 +102,21 @@ class BaseStructure(ABC):
"""
return numpy.vstack([self[p] for p in ("vx", "vy", "vz")]).T
@property
def r(self):
"""
Calculate the radial separation of the particles from the centre of the
object.
Radial separation of particles from the centre of the object.
Returns
-------
r : 1-dimensional array of shape `(n_particles, )`.
"""
return numpy.linalg.norm(self.pos, axis=1)
return self._get_r(self.pos)
@staticmethod
@jit(nopython=True)
def _get_r(pos):
return (pos[:, 0]**2 + pos[:, 1]**2 + pos[:, 2]**2)**0.5
def cmass(self, rmax, rmin):
"""
@ -130,7 +136,7 @@ class BaseStructure(ABC):
-------
cm : 1-dimensional array of shape `(3, )`
"""
r = self.r()
r = self.r
mask = (r >= rmin) & (r <= rmax)
return numpy.average(self.pos[mask], axis=0, weights=self["M"][mask])
@ -149,7 +155,7 @@ class BaseStructure(ABC):
-------
J : 1-dimensional array or shape `(3, )`
"""
r = self.r()
r = self.r
mask = (r >= rmin) & (r <= rmax)
pos = self.pos[mask] - self.cmass(rmax, rmin)
# Velocitities in the object CM frame
@ -172,17 +178,17 @@ class BaseStructure(ABC):
-------
enclosed_mass : float
"""
r = self.r()
r = self.r
return numpy.sum(self["M"][(r >= rmin) & (r <= rmax)])
def lambda_bullock(self, radius, npart_min=10):
def lambda_bullock(self, radmax, npart_min=10):
r"""
Bullock spin, see Eq. 5 in [1], in a radius of `radius`, which should
define to some overdensity radius.
Parameters
----------
radius : float
radmax : float
Radius in which to calculate the spin.
npart_min : int
Minimum number of enclosed particles for a radius to be
@ -198,14 +204,13 @@ class BaseStructure(ABC):
Bullock, J. S.; Dekel, A.; Kolatt, T. S.; Kravtsov, A. V.;
Klypin, A. A.; Porciani, C.; Primack, J. R.
"""
mask = self.r() <= radius
mask = self.r <= radmax
if numpy.sum(mask) < npart_min:
return numpy.nan
mass = self.enclosed_mass(radius)
V = numpy.sqrt(self.box.box_G * mass / radius)
out = numpy.linalg.norm(self.angular_momentum(radius))
out /= numpy.sqrt(2) * mass * V * radius
return out
mass = self.enclosed_mass(radmax)
circvel = numpy.sqrt(self.box.box_G * mass / radmax)
angmom_norm = numpy.linalg.norm(self.angular_momentum(radmax))
return angmom_norm / (numpy.sqrt(2) * mass * circvel * radmax)
def spherical_overdensity_mass(self, delta_mult, npart_min=10,
kind="crit"):
@ -236,18 +241,18 @@ class BaseStructure(ABC):
assert kind in ["crit", "matter"]
# We first sort the particles in an increasing separation
rs = self.r()
rs = self.r
order = numpy.argsort(rs)
rs = rs[order]
cmass = numpy.cumsum(self["M"][order]) # Cumulative mass
# We calculate the enclosed volume and indices where it is above target
vol = 4 * numpy.pi / 3 * (rs**3 - rs[0] ** 3)
vol = 4 * numpy.pi / 3 * rs**3
target_density = delta_mult * self.box.box_rhoc
if kind == "matter":
target_density *= self.box.cosmo.Om0
with numpy.errstate(divide="ignore"):
ks = numpy.where(cmass / vol > target_density)[0]
ks = numpy.where(cmass > target_density * vol)[0]
if ks.size == 0: # Never above the threshold?
return numpy.nan, numpy.nan
k = numpy.max(ks)
@ -257,7 +262,7 @@ class BaseStructure(ABC):
def __getitem__(self, key):
keys = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M']
if key not in self.keys:
if key not in keys:
raise RuntimeError(f"Invalid key `{key}`!")
return self.particles[:, keys.index(key)]
@ -304,3 +309,31 @@ class Halo(BaseStructure):
self.particles = particles
self.info = info
self.box = box
###############################################################################
# Other, supplementary functions #
###############################################################################
@jit(nopython=True)
def dist_centmass(clump):
"""
Calculate the clump (or halo) particles' distance from the centre of mass.
Parameters
----------
clump : 2-dimensional array of shape (n_particles, 7)
Particle array. The first four columns must be `x`, `y`, `z` and `M`.
Returns
-------
dist : 1-dimensional array of shape `(n_particles, )`
Particle distance from the centre of mass.
cm : len-3 list
Center of mass coordinates.
"""
mass = clump[:, 3]
x, y, z = clump[:, 0], clump[:, 1], clump[:, 2]
cmx, cmy, cmz = [numpy.average(xi, weights=mass) for xi in (x, y, z)]
dist = ((x - cmx)**2 + (y - cmy)**2 + (z - cmz)**2)**0.5
return dist, [cmx, cmy, cmz]

View File

@ -348,7 +348,7 @@ class NFWPosterior(NFWProfile):
Best fit NFW central density.
"""
assert isinstance(clump, Clump)
r = clump.r()
r = clump.r
rmin = numpy.min(r[r > 0]) # First particle that is not at r = 0
rmax, mtot = clump.spherical_overdensity_mass(200)
mask = (rmin <= r) & (r <= rmax)

View File

@ -12,14 +12,8 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .match import ( # noqa
ParticleOverlap,
RealisationsMatcher,
calculate_overlap,
calculate_overlap_indxs,
cosine_similarity,
dist_centmass,
dist_percentile,
)
from .match import (ParticleOverlap, RealisationsMatcher, # noqa
calculate_overlap, calculate_overlap_indxs,
cosine_similarity)
from .num_density import binned_counts, number_density # noqa
from .utils import concatenate_parts # noqa

View File

@ -16,12 +16,19 @@
Support for matching halos between CSiBORG IC realisations.
"""
from datetime import datetime
from functools import lru_cache
from math import ceil
import numpy
from numba import jit
from scipy.ndimage import gaussian_filter
from tqdm import tqdm, trange
from ..read import load_parent_particles
BCKG_HALFSIZE = 475
BOX_SIZE = 2048
###############################################################################
# Realisations matcher for calculating overlaps #
###############################################################################
@ -105,8 +112,8 @@ class RealisationsMatcher:
"""
return self._overlapper
def cross(self, cat0, catx, halos0_archive, halosx_archive, delta_bckg,
verbose=True):
def cross(self, cat0, catx, particles0, particlesx, clump_map0, clump_mapx,
delta_bckg, cache_size=10000, verbose=True):
r"""
Find all neighbours whose CM separation is less than `nmult` times the
sum of their initial Lagrangian patch sizes and calculate their
@ -119,19 +126,23 @@ class RealisationsMatcher:
Halo catalogue of the reference simulation.
catx : :py:class:`csiborgtools.read.HaloCatalogue`
Halo catalogue of the cross simulation.
halos0_archive : `NpzFile` object
Archive of halos' particles of the reference simulation, keys must
include `x`, `y`, `z` and `M`. The positions must already be
converted to cell numbers.
halosx_archive : `NpzFile` object
Archive of halos' particles of the cross simulation, keys must
include `x`, `y`, `z` and `M`. The positions must already be
converted to cell numbers.
particles0 : 2-dimensional array
Array of particles in box units in the reference simulation.
The columns must be `x`, `y`, `z` and `M`.
particlesx : 2-dimensional array
Array of particles in box units in the cross simulation.
The columns must be `x`, `y`, `z` and `M`.
clump_map0 : 2-dimensional array
Clump map of the reference simulation.
clump_mapx : 2-dimensional array
Clump map of the cross simulation.
delta_bckg : 3-dimensional array
Summed background density field of the reference and cross
simulations calculated with particles assigned to halos at the
final snapshot. Assumed to only be sampled in cells
:math:`[512, 1536)^3`.
cache_size : int, optional
Caching size for loading the cross simulation halos.
verbose : bool, optional
iterator verbosity flag. by default `true`.
@ -149,12 +160,12 @@ class RealisationsMatcher:
# in the reference simulation from the cross simulation in the initial
# snapshot.
if verbose:
now = datetime.now()
print(f"{now}: querying the KNN.", flush=True)
print(f"{datetime.now()}: querying the KNN.", flush=True)
match_indxs = radius_neighbours(
catx.knn(select_initial=True), cat0.positions(in_initial=True),
catx.knn(in_initial=True), cat0.position(in_initial=True),
radiusX=cat0["lagpatch"], radiusKNN=catx["lagpatch"],
nmult=self.nmult, enforce_int32=True, verbose=verbose)
# We next remove neighbours whose mass is too large/small.
if self.dlogmass is not None:
for i, indx in enumerate(match_indxs):
@ -163,12 +174,18 @@ class RealisationsMatcher:
aratio = numpy.abs(numpy.log10(catx[p][indx] / cat0[p][i]))
match_indxs[i] = match_indxs[i][aratio < self.dlogmass]
# We will make a dictionary to keep in memory the halos' particles from
# the cross simulations so that they are not loaded in several times
# and we only convert their positions to cells once. Possibly make an
# option to not do this to lower memory requirements?
cross_halos = {}
cross_lims = {}
clid2map0 = {clid: i for i, clid in enumerate(clump_map0[:, 0])}
clid2mapx = {clid: i for i, clid in enumerate(clump_mapx[:, 0])}
# We will cache the halos from the cross simulation to speed up the I/O
@lru_cache(maxsize=cache_size)
def load_cached_halox(hid):
return load_processed_halo(hid, particlesx, clump_mapx, clid2mapx,
catx.clumps_cat, nshift=0,
ncells=BOX_SIZE)
if verbose:
print(f"{datetime.now()}: calculating overlaps.", flush=True)
cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
indxs = cat0["index"]
for i, k0 in enumerate(tqdm(indxs) if verbose else indxs):
@ -178,36 +195,18 @@ class RealisationsMatcher:
continue
# Next, we find this halo's particles, total mass, minimum and
# maximum cells and convert positions to cells.
halo0 = halos0_archive[str(k0)]
mass0 = numpy.sum(halo0["M"])
mins0, maxs0 = get_halolims(halo0,
ncells=self.overlapper.inv_clength,
nshift=self.overlapper.nshift)
for p in ("x", "y", "z"):
halo0[p] = self.overlapper.pos2cell(halo0[p])
pos0, mass0, totmass0, mins0, maxs0 = load_processed_halo(
k0, particles0, clump_map0, clid2map0, cat0.clumps_cat,
nshift=0, ncells=BOX_SIZE)
# We now loop over matches of this halo and calculate their
# overlap, storing them in `_cross`.
_cross = numpy.full(matches.size, numpy.nan, dtype=numpy.float32)
for j, kf in enumerate(catx["index"][matches]):
# Attempt to load this cross halo from memory, if it fails get
# it from from the halo archive (and similarly for the limits)
# and convert the particle positions to cells.
try:
halox = cross_halos[kf]
minsx, maxsx = cross_lims[kf]
except KeyError:
halox = halosx_archive[str(kf)]
minsx, maxsx = get_halolims(
halox, ncells=self.overlapper.inv_clength,
nshift=self.overlapper.nshift)
for p in ("x", "y", "z"):
halox[p] = self.overlapper.pos2cell(halox[p])
cross_halos[kf] = halox
cross_lims[kf] = (minsx, maxsx)
massx = numpy.sum(halox["M"])
_cross[j] = self.overlapper(halo0, halox, delta_bckg, mins0,
maxs0, minsx, maxsx, mass1=mass0,
mass2=massx)
for j, kx in enumerate(catx["index"][matches]):
posx, massx, totmassx, minsx, maxsx = load_cached_halox(kx)
_cross[j] = self.overlapper(
pos0, posx, mass0, massx, delta_bckg, mins0, maxs0,
minsx, maxsx, totmass1=totmass0, totmass2=totmassx)
cross[i] = _cross
# We remove all matches that have zero overlap to save space.
@ -222,8 +221,9 @@ class RealisationsMatcher:
cross = numpy.asanyarray(cross, dtype=object)
return match_indxs, cross
def smoothed_cross(self, cat0, catx, halos0_archive, halosx_archive,
delta_bckg, match_indxs, smooth_kwargs, verbose=True):
def smoothed_cross(self, cat0, catx, particles0, particlesx, clump_map0,
clump_mapx, delta_bckg, match_indxs, smooth_kwargs,
cache_size=10000, verbose=True):
r"""
Calculate the smoothed overlaps for pair previously identified via
`self.cross(...)` to have a non-zero overlap.
@ -234,27 +234,27 @@ class RealisationsMatcher:
Halo catalogue of the reference simulation.
catx : :py:class:`csiborgtools.read.ClumpsCatalogue`
Halo catalogue of the cross simulation.
halos0_archive : `NpzFile` object
Archive of halos' particles of the reference simulation, keys must
include `x`, `y`, `z` and `M`. The positions must already be
converted to cell numbers.
halosx_archive : `NpzFile` object
Archive of halos' particles of the cross simulation, keys must
include `x`, `y`, `z` and `M`. The positions must already be
converted to cell numbers.
particles0 : 2-dimensional array
Array of particles in box units in the reference simulation.
The columns must be `x`, `y`, `z` and `M`.
particlesx : 2-dimensional array
Array of particles in box units in the cross simulation.
The columns must be `x`, `y`, `z` and `M`.
clump_map0 : 2-dimensional array
Clump map of the reference simulation.
clump_mapx : 2-dimensional array
Clump map of the cross simulation.
delta_bckg : 3-dimensional array
Smoothed summed background density field of the reference and cross
simulations calculated with particles assigned to halos at the
final snapshot. Assumed to only be sampled in cells
:math:`[512, 1536)^3`.
ref_indxs : 1-dimensional array
Halo IDs in the reference catalogue.
cross_indxs : 1-dimensional array
Halo IDs in the cross catalogue.
match_indxs : 1-dimensional array of arrays
Indices of halo counterparts in the cross catalogue.
smooth_kwargs : kwargs
Kwargs to be passed to :py:func:`scipy.ndimage.gaussian_filter`.
cache_size : int, optional
Caching size for loading the cross simulation halos.
verbose : bool, optional
Iterator verbosity flag. By default `True`.
@ -262,37 +262,33 @@ class RealisationsMatcher:
-------
overlaps : 1-dimensional array of arrays
"""
nshift = read_nshift(smooth_kwargs)
clid2map0 = {clid: i for i, clid in enumerate(clump_map0[:, 0])}
clid2mapx = {clid: i for i, clid in enumerate(clump_mapx[:, 0])}
cross_halos = {}
cross_lims = {}
cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
@lru_cache(maxsize=cache_size)
def load_cached_halox(hid):
return load_processed_halo(hid, particlesx, clump_mapx, clid2mapx,
catx.clumps_cat, nshift=nshift,
ncells=BOX_SIZE)
if verbose:
print(f"{datetime.now()}: calculating smoothed overlaps.",
flush=True)
indxs = cat0["index"]
cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
for i, k0 in enumerate(tqdm(indxs) if verbose else indxs):
halo0 = halos0_archive[str(k0)]
mins0, maxs0 = get_halolims(halo0,
ncells=self.overlapper.inv_clength,
nshift=self.overlapper.nshift)
pos0, mass0, __, mins0, maxs0 = load_processed_halo(
k0, particles0, clump_map0, clid2map0, cat0.clumps_cat,
nshift=nshift, ncells=BOX_SIZE)
# Now loop over the matches and calculate the smoothed overlap.
_cross = numpy.full(match_indxs[i].size, numpy.nan, numpy.float32)
for j, kf in enumerate(catx["index"][match_indxs[i]]):
# Attempt to load this cross halo from memory, if it fails get
# it from from the halo archive (and similarly for the limits).
try:
halox = cross_halos[kf]
minsx, maxsx = cross_lims[kf]
except KeyError:
halox = halosx_archive[str(kf)]
minsx, maxsx = get_halolims(
halox, ncells=self.overlapper.inv_clength,
nshift=self.overlapper.nshift)
cross_halos[kf] = halox
cross_lims[kf] = (minsx, maxsx)
_cross[j] = self.overlapper(halo0, halox, delta_bckg, mins0,
maxs0, minsx, maxsx,
smooth_kwargs=smooth_kwargs)
for j, kx in enumerate(catx["index"][match_indxs[i]]):
posx, massx, __, minsx, maxsx = load_cached_halox(kx)
_cross[j] = self.overlapper(pos0, posx, mass0, massx,
delta_bckg, mins0, maxs0, minsx,
maxsx, smooth_kwargs=smooth_kwargs)
cross[i] = _cross
return numpy.asanyarray(cross, dtype=object)
@ -341,57 +337,37 @@ class ParticleOverlap:
Gaussian smoothing.
"""
def __init__(self):
# Inverse cell length in box units. By default :math:`2^11`, which
# matches the initial RAMSES grid resolution.
self.inv_clength = 2**11
self.nshift = 5 # Hardcode this too to force consistency
self._clength = 1 / self.inv_clength
def pos2cell(self, pos):
def make_bckg_delta(self, particles, clump_map, clid2map, halo_cat,
delta=None, verbose=False):
"""
Convert position to cell number. If `pos` is in
`numpy.typecodes["AllInteger"]` assumes it to already be the cell
number.
Calculate a NGP density field of particles belonging to halos of a
halo catalogue `halo_cat`. Particles are only counted within the
high-resolution region of the simulation. Smoothing must be applied
separately.
Parameters
----------
pos : 1-dimensional array
Array of positions along an axis in the box.
Returns
-------
cells : 1-dimensional array
"""
# Check whether this is already the cell
if pos.dtype.char in numpy.typecodes["AllInteger"]:
return pos
return numpy.floor(pos * self.inv_clength).astype(numpy.int32)
def make_bckg_delta(self, halo_archive, delta=None, verbose=False):
"""
Calculate a NGP density field of particles belonging to halos within
the central :math:`1/2^3` high-resolution region of the simulation.
Smoothing must be applied separately.
Parameters
----------
halo_archive : `NpzFile` object
Archive of halos' particles of the reference simulation, keys must
include `x`, `y`, `z` and `M`.
particles : 2-dimensional array
Array of particles.
clump_map : 2-dimensional array
Array containing start and end indices in the particle array
corresponding to each clump.
clid2map : dict
Dictionary mapping clump IDs to `clump_map` array positions.
halo_cat: :py:class:`csiborgtools.read.HaloCatalogue`
Halo catalogue.
delta : 3-dimensional array, optional
Array to store the density field in. If `None` a new array is
created.
verbose : bool, optional
Verbosity flag for loading the files.
Verbosity flag for loading the halos' particles.
Returns
-------
delta : 3-dimensional array
"""
# We obtain the minimum/maximum cell IDs and number of cells
cellmin = self.inv_clength // 4 # The minimum cell ID
cellmax = 3 * self.inv_clength // 4 # The maximum cell ID
cellmin = BOX_SIZE // 2 - BCKG_HALFSIZE
cellmax = BOX_SIZE // 2 + BCKG_HALFSIZE
ncells = cellmax - cellmin
# We then pre-allocate the density field/check it is of the right shape
if delta is None:
@ -399,28 +375,25 @@ class ParticleOverlap:
else:
assert ((delta.shape == (ncells,) * 3)
& (delta.dtype == numpy.float32))
from tqdm import tqdm
# We now loop one-by-one over the halos fill the density field.
files = halo_archive.files
for file in tqdm(files) if verbose else files:
parts = halo_archive[file]
cells = [self.pos2cell(parts[p]) for p in ("x", "y", "z")]
mass = parts["M"]
clumps_cat = halo_cat.clumps_cat
for hid in tqdm(halo_cat["index"]) if verbose else halo_cat["index"]:
pos = load_parent_particles(hid, particles, clump_map, clid2map,
clumps_cat)
if pos is None:
continue
pos, mass = pos[:, :3], pos[:, 3]
pos = pos2cell(pos, BOX_SIZE)
# We mask out particles outside the cubical high-resolution region
mask = ((cellmin <= cells[0])
& (cells[0] < cellmax)
& (cellmin <= cells[1])
& (cells[1] < cellmax)
& (cellmin <= cells[2])
& (cells[2] < cellmax))
cells = [c[mask] for c in cells]
mass = mass[mask]
fill_delta(delta, *cells, *(cellmin,) * 3, mass)
mask = numpy.all((cellmin <= pos) & (pos < cellmax), axis=1)
pos = pos[mask]
fill_delta(delta, pos[:, 0], pos[:, 1], pos[:, 2],
*(cellmin,) * 3, mass[mask])
return delta
def make_delta(self, clump, mins=None, maxs=None, subbox=False,
def make_delta(self, pos, mass, mins=None, maxs=None, subbox=False,
smooth_kwargs=None):
"""
Calculate a NGP density field of a halo on a cubic grid. Optionally can
@ -428,8 +401,10 @@ class ParticleOverlap:
Parameters
----------
clump : structurered arrays
Clump structured array, keys must include `x`, `y`, `z` and `M`.
pos : 2-dimensional array
Halo particle position array.
mass : 1-dimensional array
Halo particle mass array.
mins, maxs : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension.
subbox : bool, optional
@ -443,50 +418,45 @@ class ParticleOverlap:
-------
delta : 3-dimensional array
"""
cells = [self.pos2cell(clump[p]) for p in ("x", "y", "z")]
nshift = read_nshift(smooth_kwargs)
cells = self.pos2cell(pos)
# Check that minima and maxima are integers
if not (mins is None and maxs is None):
assert mins.dtype.char in numpy.typecodes["AllInteger"]
assert maxs.dtype.char in numpy.typecodes["AllInteger"]
if subbox:
# Minimum xcell, ycell and zcell of this clump
if mins is None or maxs is None:
mins = numpy.asanyarray(
[max(numpy.min(cell) - self.nshift, 0) for cell in cells]
)
maxs = numpy.asanyarray(
[
min(numpy.max(cell) + self.nshift, self.inv_clength)
for cell in cells
]
)
mins, maxs = get_halolims(cells, BOX_SIZE, nshift)
ncells = numpy.max(maxs - mins) + 1 # To get the number of cells
ncells = maxs - mins + 1 # To get the number of cells
else:
mins = [0, 0, 0]
ncells = self.inv_clength
ncells = BOX_SIZE
# Preallocate and fill the array
delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
fill_delta(delta, *cells, *mins, clump["M"])
fill_delta(delta, cells[:, 0], cells[:, 1], cells[:, 2], *mins, mass)
if smooth_kwargs is not None:
gaussian_filter(delta, output=delta, **smooth_kwargs)
return delta
def make_deltas(self, clump1, clump2, mins1=None, maxs1=None, mins2=None,
maxs2=None, smooth_kwargs=None):
def make_deltas(self, pos1, pos2, mass1, mass2, mins1=None, maxs1=None,
mins2=None, maxs2=None, smooth_kwargs=None):
"""
Calculate a NGP density fields of two halos on a grid that encloses
them both. Optionally can be smoothed with a Gaussian kernel.
Parameters
----------
clump1, clump2 : structurered arrays
Particle structured array of the two clumps. Keys must include `x`,
`y`, `z` and `M`.
pos1 : 2-dimensional array
Particle positions of the first halo.
pos2 : 2-dimensional array
Particle positions of the second halo.
mass1 : 1-dimensional array
Particle masses of the first halo.
mass2 : 1-dimensional array
Particle masses of the second halo.
mins1, maxs1 : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension of `clump1`.
Optional.
@ -507,51 +477,51 @@ class ParticleOverlap:
Indices where the lower mass clump has a non-zero density.
Calculated only if no smoothing is applied, otherwise `None`.
"""
xc1, yc1, zc1 = (self.pos2cell(clump1[p]) for p in ("x", "y", "z"))
xc2, yc2, zc2 = (self.pos2cell(clump2[p]) for p in ("x", "y", "z"))
nshift = read_nshift(smooth_kwargs)
pos1 = pos2cell(pos1, BOX_SIZE)
pos2 = pos2cell(pos2, BOX_SIZE)
xc1, yc1, zc1 = [pos1[:, i] for i in range(3)]
xc2, yc2, zc2 = [pos2[:, i] for i in range(3)]
if any(obj is None for obj in (mins1, maxs1, mins2, maxs2)):
# Minimum cell number of the two halos along each dimension
xmin = min(numpy.min(xc1), numpy.min(xc2)) - self.nshift
ymin = min(numpy.min(yc1), numpy.min(yc2)) - self.nshift
zmin = min(numpy.min(zc1), numpy.min(zc2)) - self.nshift
xmin = min(numpy.min(xc1), numpy.min(xc2)) - nshift
ymin = min(numpy.min(yc1), numpy.min(yc2)) - nshift
zmin = min(numpy.min(zc1), numpy.min(zc2)) - nshift
# Make sure shifting does not go beyond boundaries
xmin, ymin, zmin = [max(px, 0) for px in (xmin, ymin, zmin)]
# Maximum cell number of the two halos along each dimension
xmax = max(numpy.max(xc1), numpy.max(xc2)) + self.nshift
ymax = max(numpy.max(yc1), numpy.max(yc2)) + self.nshift
zmax = max(numpy.max(zc1), numpy.max(zc2)) + self.nshift
xmax = max(numpy.max(xc1), numpy.max(xc2)) + nshift
ymax = max(numpy.max(yc1), numpy.max(yc2)) + nshift
zmax = max(numpy.max(zc1), numpy.max(zc2)) + nshift
# Make sure shifting does not go beyond boundaries
xmax, ymax, zmax = [
min(px, self.inv_clength - 1) for px in (xmax, ymax, zmax)
]
xmax, ymax, zmax = [min(px, BOX_SIZE - 1)
for px in (xmax, ymax, zmax)]
else:
xmin, ymin, zmin = [min(mins1[i], mins2[i]) for i in range(3)]
xmax, ymax, zmax = [max(maxs1[i], maxs2[i]) for i in range(3)]
cellmins = (xmin, ymin, zmin) # Cell minima
ncells = max(xmax - xmin, ymax - ymin, zmax - zmin) + 1 # Num cells
ncells = xmax - xmin + 1, ymax - ymin + 1, zmax - zmin + 1 # Num cells
# Preallocate and fill the arrays
delta1 = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
delta2 = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
delta1 = numpy.zeros(ncells, dtype=numpy.float32)
delta2 = numpy.zeros(ncells, dtype=numpy.float32)
# If no smoothing figure out the nonzero indices of the smaller clump
if smooth_kwargs is None:
if clump1.size > clump2.size:
fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1["M"])
nonzero = fill_delta_indxs(
delta2, xc2, yc2, zc2, *cellmins, clump2["M"]
)
if pos1.shape[0] > pos2.shape[0]:
fill_delta(delta1, xc1, yc1, zc1, *cellmins, mass1)
nonzero = fill_delta_indxs(delta2, xc2, yc2, zc2, *cellmins,
mass2)
else:
nonzero = fill_delta_indxs(
delta1, xc1, yc1, zc1, *cellmins, clump1["M"]
)
fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2["M"])
nonzero = fill_delta_indxs(delta1, xc1, yc1, zc1, *cellmins,
mass1)
fill_delta(delta2, xc2, yc2, zc2, *cellmins, mass2)
else:
fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1["M"])
fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2["M"])
fill_delta(delta1, xc1, yc1, zc1, *cellmins, mass1)
fill_delta(delta2, xc2, yc2, zc2, *cellmins, mass2)
nonzero = None
if smooth_kwargs is not None:
@ -559,9 +529,9 @@ class ParticleOverlap:
gaussian_filter(delta2, output=delta2, **smooth_kwargs)
return delta1, delta2, cellmins, nonzero
def __call__(self, clump1, clump2, delta_bckg, mins1=None, maxs1=None,
mins2=None, maxs2=None, mass1=None, mass2=None,
smooth_kwargs=None):
def __call__(self, pos1, pos2, mass1, mass2, delta_bckg,
mins1=None, maxs1=None, mins2=None, maxs2=None,
totmass1=None, totmass2=None, smooth_kwargs=None):
"""
Calculate overlap between `clump1` and `clump2`. See
`calculate_overlap(...)` for further information. Be careful so that
@ -572,9 +542,14 @@ class ParticleOverlap:
Parameters
----------
clump1, clump2 : structurered arrays
Structured arrays containing the particles of a given clump. Keys
must include `x`, `y`, `z` and `M`.
pos1 : 2-dimensional array
Particle positions of the first halo.
pos2 : 2-dimensional array
Particle positions of the second halo.
mass1 : 1-dimensional array
Particle masses of the first halo.
mass2 : 1-dimensional array
Particle masses of the second halo.
cellmins : len-3 tuple
Tuple of left-most cell ID in the full box.
delta_bckg : 3-dimensional array
@ -588,7 +563,7 @@ class ParticleOverlap:
mins2, maxs2 : 1-dimensional arrays of shape `(3,)`
Minimum and maximum cell numbers along each dimension of `clump2`,
optional.
mass1, mass2 : floats, optional
totmass1, totmass2 : floats, optional
Total mass of `clump1` and `clump2`, respectively. Must be provided
if `loop_nonzero` is `True`.
smooth_kwargs : kwargs, optional
@ -600,16 +575,16 @@ class ParticleOverlap:
overlap : float
"""
delta1, delta2, cellmins, nonzero = self.make_deltas(
clump1, clump2, mins1, maxs1, mins2, maxs2,
pos1, pos2, mass1, mass2, mins1, maxs1, mins2, maxs2,
smooth_kwargs=smooth_kwargs)
if smooth_kwargs is not None:
return calculate_overlap(delta1, delta2, cellmins, delta_bckg)
# Calculate masses not given
mass1 = numpy.sum(clump1["M"]) if mass1 is None else mass1
mass2 = numpy.sum(clump2["M"]) if mass2 is None else mass2
totmass1 = numpy.sum(mass1) if totmass1 is None else totmass1
totmass2 = numpy.sum(mass2) if totmass2 is None else totmass2
return calculate_overlap_indxs(
delta1, delta2, cellmins, delta_bckg, nonzero, mass1, mass2)
delta1, delta2, cellmins, delta_bckg, nonzero, totmass1, totmass2)
###############################################################################
@ -617,6 +592,49 @@ class ParticleOverlap:
###############################################################################
def pos2cell(pos, ncells):
"""
Convert position to cell number. If `pos` is in
`numpy.typecodes["AllInteger"]` assumes it to already be the cell
number.
Parameters
----------
pos : 1-dimensional array
Array of positions along an axis in the box.
ncells : int
Number of cells along the axis.
Returns
-------
cells : 1-dimensional array
"""
if pos.dtype.char in numpy.typecodes["AllInteger"]:
return pos
return numpy.floor(pos * ncells).astype(numpy.int32)
def read_nshift(smooth_kwargs):
"""
Read off the number of cells to pad the density field if smoothing is
applied. Defaults to the ceiling of twice of the smoothing scale.
Parameters
----------
smooth_kwargs : kwargs, optional
Kwargs to be passed to :py:func:`scipy.ndimage.gaussian_filter`.
If `None` no smoothing is applied.
Returns
-------
nshift : int
"""
if smooth_kwargs is None:
return 0
else:
return ceil(2 * smooth_kwargs["sigma"])
@jit(nopython=True)
def fill_delta(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
"""
@ -679,15 +697,14 @@ def fill_delta_indxs(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
return cells[:count_nonzero, :] # Cutoff unassigned places
def get_halolims(halo, ncells, nshift=None):
def get_halolims(pos, ncells, nshift=None):
"""
Get the lower and upper limit of a halo's positions or cell numbers.
Parameters
----------
halo : structured array
Structured array containing the particles of a given halo. Keys must
`x`, `y`, `z`.
pos : 2-dimensional array
Halo particle array. Columns must be `x`, `y`, `z`.
ncells : int
Number of grid cells of the box along a single dimension.
nshift : int, optional
@ -699,17 +716,16 @@ def get_halolims(halo, ncells, nshift=None):
Minimum and maximum along each axis.
"""
# Check that in case of `nshift` we have integer positions.
dtype = halo["x"].dtype
dtype = pos.dtype
if nshift is not None and dtype.char not in numpy.typecodes["AllInteger"]:
raise TypeError("`nshift` supported only positions are cells.")
nshift = 0 if nshift is None else nshift # To simplify code below
mins = numpy.full(3, numpy.nan, dtype=dtype)
maxs = numpy.full(3, numpy.nan, dtype=dtype)
for i, p in enumerate(["x", "y", "z"]):
mins[i] = max(numpy.min(halo[p]) - nshift, 0)
maxs[i] = min(numpy.max(halo[p]) + nshift, ncells - 1)
for i in range(3):
mins[i] = max(numpy.min(pos[:, i]) - nshift, 0)
maxs[i] = min(numpy.max(pos[:, i]) + nshift, ncells - 1)
return mins, maxs
@ -741,8 +757,8 @@ def calculate_overlap(delta1, delta2, cellmins, delta_bckg):
totmass = 0.0 # Total mass of clump 1 and clump 2
intersect = 0.0 # Weighted intersecting mass
i0, j0, k0 = cellmins # Unpack things
bckg_offset = 512 # Offset of the background density field
bckg_size = 1024
bckg_size = 2 * BCKG_HALFSIZE
bckg_offset = BOX_SIZE // 2 - BCKG_HALFSIZE
imax, jmax, kmax = delta1.shape
for i in range(imax):
@ -798,8 +814,8 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
"""
intersect = 0.0 # Weighted intersecting mass
i0, j0, k0 = cellmins # Unpack cell minimas
bckg_offset = 512 # Offset of the background density field
bckg_size = 1024 # Size of the background density field array
bckg_size = 2 * BCKG_HALFSIZE
bckg_offset = BOX_SIZE // 2 - BCKG_HALFSIZE
for n in range(nonzero.shape[0]):
i, j, k = nonzero[n, :]
@ -821,47 +837,51 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
return intersect / (mass1 + mass2 - intersect)
def dist_centmass(clump):
def load_processed_halo(hid, particles, clump_map, clid2map, clumps_cat,
ncells, nshift):
"""
Calculate the clump (or halo) particles' distance from the centre of mass.
Load a processed halo from the `.h5` file. This is to be wrapped by a
cacher.
Parameters
----------
clump : 2-dimensional array of shape (n_particles, 7)
Particle array. The first four columns must be `x`, `y`, `z` and `M`.
hid : int
Halo ID.
particles : 2-dimensional array
Array of particles in box units. The columns must be `x`, `y`, `z`
and `M`.
clump_map : 2-dimensional array
Array containing start and end indices in the particle array
corresponding to each clump.
clid2map : dict
Dictionary mapping clump IDs to `clump_map` array positions.
clumps_cat : :py:class:`csiborgtools.read.ClumpsCatalogue`
Clumps catalogue.
ncells : int
Number of cells in the original density field. Typically 2048.
nshift : int
Number of cells to pad the density field.
Returns
-------
dist : 1-dimensional array of shape `(n_particles, )`
Particle distance from the centre of mass.
cm : 1-dimensional array of shape `(3,)`
Center of mass coordinates.
pos : 2-dimensional array
Array of cell particle positions.
mass : 1-dimensional array
Array of particle masses.
totmass : float
Total mass of the halo.
mins : len-3 tuple
Minimum cell indices of the halo.
maxs : len-3 tuple
Maximum cell indices of the halo.
"""
# CM along each dimension
cm = numpy.average(clump[:, :3], weights=clump[:, 3], axis=0)
return numpy.linalg.norm(clump[:, :3] - cm, axis=1), cm
def dist_percentile(dist, qs, distmax=0.075):
"""
Calculate q-th percentiles of `dist`, with an upper limit of `distmax`.
Parameters
----------
dist : 1-dimensional array
Array of distances.
qs : 1-dimensional array
Percentiles to compute.
distmax : float, optional
The maximum distance. By default 0.075.
Returns
-------
x : 1-dimensional array
"""
x = numpy.percentile(dist, qs)
x[x > distmax] = distmax # Enforce the upper limit
return x
pos = load_parent_particles(hid, particles, clump_map, clid2map,
clumps_cat)
pos, mass = pos[:, :3], pos[:, 3]
pos = pos2cell(pos, ncells)
totmass = numpy.sum(mass)
mins, maxs = get_halolims(pos, ncells=ncells, nshift=nshift)
return pos, mass, totmass, mins, maxs
def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1.0,

View File

@ -15,16 +15,14 @@
from .box_units import BoxUnits # noqa
from .halo_cat import ClumpsCatalogue, HaloCatalogue # noqa
from .knn_summary import kNNCDFReader # noqa
from .obs import ( # noqa
SDSS,
MCXCClusters,
PlanckClusters,
TwoMPPGalaxies,
TwoMPPGroups,
)
from .overlap_summary import NPairsOverlap, PairOverlap, binned_resample_mean # noqa
from .obs import (SDSS, MCXCClusters, PlanckClusters, TwoMPPGalaxies, # noqa
TwoMPPGroups)
from .overlap_summary import (NPairsOverlap, PairOverlap, # noqa
binned_resample_mean)
from .paths import CSiBORGPaths # noqa
from .pk_summary import PKReader # noqa
from .readsim import MmainReader, ParticleReader, halfwidth_select, read_initcm # noqa
from .readsim import (MmainReader, ParticleReader, halfwidth_select, # noqa
load_clump_particles, load_parent_particles, read_initcm)
from .tpcf_summary import TPCFReader # noqa
from .utils import cartesian_to_radec, cols_to_structured, radec_to_cartesian # noqa
from .utils import (cartesian_to_radec, cols_to_structured, # noqa
radec_to_cartesian, read_h5)

View File

@ -12,7 +12,7 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""CSiBORG halo catalogue."""
"""CSiBORG halo and clumps catalogues."""
from abc import ABC
import numpy
@ -177,7 +177,7 @@ class BaseCatalogue(ABC):
knn : :py:class:`sklearn.neighbors.NearestNeighbors`
"""
knn = NearestNeighbors()
return knn.fit(self.positions(in_initial))
return knn.fit(self.position(in_initial))
def radius_neigbours(self, X, radius, in_initial):
r"""
@ -368,6 +368,8 @@ class HaloCatalogue(BaseCatalogue):
minmass : len-2 tuple
Minimum mass. The first element is the catalogue key and the second is
the value.
with_lagpatch : bool, optional
Whether to only load halos with a resolved Lagrangian patch.
load_fitted : bool, optional
Whether to load fitted quantities.
load_initial : bool, optional
@ -378,22 +380,39 @@ class HaloCatalogue(BaseCatalogue):
"""
def __init__(self, nsim, paths, maxdist=155.5 / 0.705, minmass=("M", 1e12),
load_fitted=True, load_initial=False, rawdata=False):
with_lagpatch=True, load_fitted=True, load_initial=True,
rawdata=False):
self.nsim = nsim
self.paths = paths
# Read in the mmain catalogue of summed substructure
mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim))
self._data = mmain["mmain"]
# We will also need the clumps catalogue
self._clumps_cat = ClumpsCatalogue(nsim, paths, rawdata=True,
load_fitted=False)
if load_fitted:
fits = numpy.load(paths.structfit_path(self.nsnap, nsim, "halos"))
cols = [col for col in fits.dtype.names if col != "index"]
X = [fits[col] for col in cols]
self._data = add_columns(self._data, X, cols)
# TODO: load initial positions
if load_initial:
fits = numpy.load(paths.initmatch_path(nsim, "fit"))
X, cols = [], []
for col in fits.dtype.names:
if col == "index":
continue
if col in ['x', 'y', 'z']:
cols.append(col + "0")
else:
cols.append(col)
X.append(fits[col])
self._data = add_columns(self._data, X, cols)
if not rawdata:
if with_lagpatch:
self._data = self._data[numpy.isfinite(self['lagpatch'])]
# Flip positions and convert from code units to cMpc. Convert M too
flip_cols(self._data, "x", "z")
for p in ("x", "y", "z"):
@ -402,9 +421,24 @@ class HaloCatalogue(BaseCatalogue):
"r500c", "m200c", "m500c", "r200m", "m200m"]
self._data = self.box.convert_from_boxunits(self._data, names)
if load_initial:
names = ["x0", "y0", "z0", "lagpatch"]
self._data = self.box.convert_from_boxunits(self._data, names)
if maxdist is not None:
dist = numpy.sqrt(self._data["x"]**2 + self._data["y"]**2
+ self._data["z"]**2)
self._data = self._data[dist < maxdist]
if minmass is not None:
self._data = self._data[self._data[minmass[0]] > minmass[1]]
@property
def clumps_cat(self):
"""
The raw clumps catalogue.
Returns
-------
clumps_cat : :py:class:`csiborgtools.read.ClumpsCatalogue`
"""
return self._clumps_cat

View File

@ -260,7 +260,7 @@ class CSiBORGPaths:
fname = f"{kind}_out_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npy"
return join(fdir, fname)
def overlap_path(self, nsim0, nsimx):
def overlap_path(self, nsim0, nsimx, smoothed):
"""
Path to the overlap files between two simulations.
@ -270,6 +270,8 @@ class CSiBORGPaths:
IC realisation index of the first simulation.
nsimx : int
IC realisation index of the second simulation.
smoothed : bool
Whether the overlap is smoothed or not.
Returns
-------
@ -280,6 +282,8 @@ class CSiBORGPaths:
mkdir(fdir)
warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1)
fname = f"overlap_{str(nsim0).zfill(5)}_{str(nsimx).zfill(5)}.npz"
if smoothed:
fname = fname.replace("overlap", "overlap_smoothed")
return join(fdir, fname)
def radpos_path(self, nsnap, nsim):
@ -305,37 +309,24 @@ class CSiBORGPaths:
fname = f"radpos_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npz"
return join(fdir, fname)
def particle_h5py_path(self, nsim, kind=None, dtype="float32"):
def particles_path(self, nsim):
"""
Path to the file containing all particles in a `.h5` file.
Path to the files containing all particles.
Parameters
----------
nsim : int
IC realisation index.
kind : str
Type of output. Must be one of `[None, 'pos', 'clumpmap']`.
dtype : str
Data type. Must be one of `['float32', 'float64']`.
Returns
-------
path : str
"""
assert kind in [None, "pos", "clumpmap"]
assert dtype in ["float32", "float64"]
fdir = join(self.postdir, "particles")
if not isdir(fdir):
makedirs(fdir)
warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1)
if kind is None:
fname = f"parts_{str(nsim).zfill(5)}.h5"
else:
fname = f"parts_{kind}_{str(nsim).zfill(5)}.h5"
if dtype == "float64":
fname = fname.replace(".h5", "_f64.h5")
fname = f"parts_{str(nsim).zfill(5)}.h5"
return join(fdir, fname)
def density_field_path(self, mas, nsim):

View File

@ -215,7 +215,10 @@ class ParticleReader:
Returns
-------
out : array
out : structured array or 2-dimensional array
Particle information.
pids : 1-dimensional array
Particle IDs.
"""
# Open the particle files
nparts, partfiles = self.open_particle(nsnap, nsim, verbose=verbose)
@ -233,6 +236,8 @@ class ParticleReader:
# Check there are no strange parameters
if isinstance(pars_extract, str):
pars_extract = [pars_extract]
if "ID" in pars_extract:
pars_extract.remove("ID")
for p in pars_extract:
if p not in fnames:
raise ValueError(f"Undefined parameter `{p}`.")
@ -250,6 +255,7 @@ class ParticleReader:
par2arrpos = {par: i for i, par in enumerate(pars_extract)}
out = numpy.full((npart_tot, len(pars_extract)), numpy.nan,
dtype=numpy.float32)
pids = numpy.full(npart_tot, numpy.nan, dtype=numpy.int32)
start_ind = self.nparts_to_start_ind(nparts)
iters = tqdm(range(ncpu)) if verbose else range(ncpu)
@ -257,19 +263,21 @@ class ParticleReader:
i = start_ind[cpu]
j = nparts[cpu]
for (fname, fdtype) in zip(fnames, fdtypes):
if fname in pars_extract:
single_part = self.read_sp(fdtype, partfiles[cpu])
single_part = self.read_sp(fdtype, partfiles[cpu])
if fname == "ID":
pids[i:i + j] = single_part
elif fname in pars_extract:
if return_structured:
out[fname][i:i + j] = single_part
else:
out[i:i + j, par2arrpos[fname]] = single_part
else:
dum[i:i + j] = self.read_sp(fdtype, partfiles[cpu])
dum[i:i + j] = single_part
# Close the fortran files
for partfile in partfiles:
partfile.close()
return out
return out, pids
def open_unbinding(self, nsnap, nsim, cpu):
"""
@ -389,11 +397,16 @@ class ParticleReader:
class MmainReader:
"""
Object to generate the summed substructure catalogue.
Parameters
----------
paths : :py:class:`csiborgtools.read.CSiBORGPaths`
Paths objects.
"""
_paths = None
def __init__(self, paths):
assert isinstance(paths, CSiBORGPaths) # REMOVE
assert isinstance(paths, CSiBORGPaths)
self._paths = paths
@property
@ -444,7 +457,7 @@ class MmainReader:
def make_mmain(self, nsim, verbose=False):
"""
Make the summed substructure catalogue for a final snapshot. Includes
the position of the paren, the summed mass and the fraction of mass in
the position of the parent, the summed mass and the fraction of mass in
substructure.
Parameters
@ -472,10 +485,10 @@ class MmainReader:
nmain = numpy.sum(mask_main)
# Preallocate already the output array
out = cols_to_structured(
nmain, [("ID", numpy.int32), ("x", numpy.float32),
nmain, [("index", numpy.int32), ("x", numpy.float32),
("y", numpy.float32), ("z", numpy.float32),
("M", numpy.float32), ("subfrac", numpy.float32)])
out["ID"] = clumparr["index"][mask_main]
out["index"] = clumparr["index"][mask_main]
# Because for these index == parent
for p in ('x', 'y', 'z'):
out[p] = clumparr[p][mask_main]
@ -483,7 +496,7 @@ class MmainReader:
for i in range(nmain):
# Should include the main halo itself, i.e. its own ultimate parent
out["M"][i] = numpy.sum(
clumparr["mass_cl"][ultimate_parent == out["ID"][i]])
clumparr["mass_cl"][ultimate_parent == out["index"][i]])
out["subfrac"] = 1 - clumparr["mass_cl"][mask_main] / out["M"]
return out, ultimate_parent
@ -549,3 +562,69 @@ def halfwidth_select(hw, particles):
for p in ('x', 'y', 'z'):
particles[p] = (particles[p] - 0.5 + hw) / (2 * hw)
return particles
def load_clump_particles(clid, particles, clump_map, clid2map):
"""
Load a clump's particles from a particle array. If it is not there, i.e
clump has no associated particles, return `None`.
Parameters
----------
clid : int
Clump ID.
particles : 2-dimensional array
Array of particles.
clump_map : 2-dimensional array
Array containing start and end indices in the particle array
corresponding to each clump.
clid2map : dict
Dictionary mapping clump IDs to `clump_map` array positions.
Returns
-------
clump_particles : 2-dimensional array
Particle array of this clump.
"""
try:
k0, kf = clump_map[clid2map[clid], 1:]
return particles[k0:kf + 1, :]
except KeyError:
return None
def load_parent_particles(hid, particles, clump_map, clid2map, clumps_cat):
"""
Load a parent halo's particles from a particle array. If it is not there,
return `None`.
Parameters
----------
hid : int
Halo ID.
particles : 2-dimensional array
Array of particles.
clump_map : 2-dimensional array
Array containing start and end indices in the particle array
corresponding to each clump.
clid2map : dict
Dictionary mapping clump IDs to `clump_map` array positions.
clumps_cat : :py:class:`csiborgtools.read.ClumpsCatalogue`
Clumps catalogue.
Returns
-------
halo : 2-dimensional array
Particle array of this halo.
"""
clids = clumps_cat["index"][clumps_cat["parent"] == hid]
# We first load the particles of each clump belonging to this parent
# and then concatenate them for further analysis.
clumps = []
for clid in clids:
parts = load_clump_particles(clid, particles, clump_map, clid2map)
if parts is not None:
clumps.append(parts)
if len(clumps) == 0:
return None
return numpy.concatenate(clumps)

View File

@ -15,7 +15,10 @@
"""
Various coordinate transformations.
"""
from os.path import isfile
import numpy
from h5py import File
###############################################################################
# Coordinate transforms #
@ -291,14 +294,35 @@ def extract_from_structured(arr, cols):
cols = [cols] if isinstance(cols, str) else cols
for col in cols:
if col not in arr.dtype.names:
raise ValueError("Invalid column `{}`!".format(col))
raise ValueError(f"Invalid column `{col}`!")
# Preallocate an array and populate it
out = numpy.zeros((arr.size, len(cols)), dtype=arr[cols[0]].dtype)
for i, col in enumerate(cols):
out[:, i] = arr[col]
# Optionally flatten
if len(cols) == 1:
return out.reshape(
-1,
)
return out.reshape(-1, )
return out
###############################################################################
# h5py functions #
###############################################################################
def read_h5(path):
"""
Return and return and open `h5py.File` object.
Parameters
----------
path : str
Path to the file.
Returns
-------
file : `h5py.File`
"""
if not isfile(path):
raise IOError(f"File `{path}` does not exist!")
return File(path, "r")

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -18,9 +18,7 @@ realisation must have been split in advance by `runsplit_halos`.
"""
from argparse import ArgumentParser
from datetime import datetime
from os.path import join
import h5py
import numpy
from mpi4py import MPI
from tqdm import tqdm
@ -33,20 +31,26 @@ except ModuleNotFoundError:
sys.path.append("../")
import csiborgtools
parser = ArgumentParser()
parser.add_argument("--kind", type=str, choices=["halos", "clumps"])
args = parser.parse_args()
# Get MPI things
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
verbose = nproc == 1
parser = ArgumentParser()
parser.add_argument("--kind", type=str, choices=["halos", "clumps"])
parser.add_argument("--ics", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all simulations.")
args = parser.parse_args()
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
partreader = csiborgtools.read.ParticleReader(paths)
nfwpost = csiborgtools.fits.NFWPosterior()
ftemp = join(paths.temp_dumpdir, "fit_clump_{}_{}_{}.npy")
if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics(tonew=False)
else:
ics = args.ics
cols_collect = [
("index", numpy.int32),
("npart", numpy.int32),
@ -63,7 +67,7 @@ cols_collect = [
("lambda200c", numpy.float32),
("r200m", numpy.float32),
("m200m", numpy.float32),
]
]
def fit_clump(particles, clump_info, box):
@ -95,46 +99,19 @@ def fit_clump(particles, clump_info, box):
return out
def load_clump_particles(clumpid, particles, clump_map):
"""
Load a clump's particles. If it is not there, i.e clump has no associated
particles, return `None`.
"""
try:
return particles[clump_map[clumpid], :]
except KeyError:
return None
def load_parent_particles(clumpid, particles, clump_map, clumps_cat):
"""
Load a parent halo's particles.
"""
indxs = clumps_cat["index"][clumps_cat["parent"] == clumpid]
# We first load the particles of each clump belonging to this parent
# and then concatenate them for further analysis.
clumps = []
for ind in indxs:
parts = load_clump_particles(ind, particles, clump_map)
if parts is not None:
clumps.append(parts)
if len(clumps) == 0:
return None
return numpy.concatenate(clumps)
# We now start looping over all simulations
for i, nsim in enumerate(paths.get_ics(tonew=False)):
if rank == 0:
print(f"{datetime.now()}: calculating {i}th simulation `{nsim}`.",
flush=True)
# We MPI loop over all simulations.
jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
for nsim in [ics[i] for i in jobs]:
print(f"{datetime.now()}: rank {rank} calculating simulation `{nsim}`.",
flush=True)
nsnap = max(paths.get_snapshots(nsim))
box = csiborgtools.read.BoxUnits(nsnap, nsim, paths)
# Particle archive
particles = h5py.File(paths.particle_h5py_path(nsim), 'r')["particles"]
clump_map = h5py.File(paths.particle_h5py_path(nsim, "clumpmap"), 'r')
f = csiborgtools.read.read_h5(paths.particles_path(nsim))
particles = f["particles"]
clump_map = f["clumpmap"]
clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
load_fitted=False)
# We check whether we fit halos or clumps, will be indexing over different
@ -143,66 +120,39 @@ for i, nsim in enumerate(paths.get_ics(tonew=False)):
ismain = clumps_cat.ismain
else:
ismain = numpy.ones(len(clumps_cat), dtype=bool)
ntasks = len(clumps_cat)
# We split the clumps among the processes. Each CPU calculates a fraction
# of them and dumps the results in a structured array. Even if we are
# calculating parent halo this index runs over all clumps.
jobs = csiborgtools.fits.split_jobs(ntasks, nproc)[rank]
out = csiborgtools.read.cols_to_structured(len(jobs), cols_collect)
for i, j in enumerate(tqdm(jobs)) if nproc == 1 else enumerate(jobs):
clumpid = clumps_cat["index"][j]
out["index"][i] = clumpid
# Even if we are calculating parent halo this index runs over all clumps.
out = csiborgtools.read.cols_to_structured(len(clumps_cat), cols_collect)
indxs = clumps_cat["index"]
for i, clid in enumerate(tqdm(indxs)) if verbose else enumerate(indxs):
clid = clumps_cat["index"][i]
out["index"][i] = clid
# If we are fitting halos and this clump is not a main, then continue.
if args.kind == "halos" and not ismain[j]:
if args.kind == "halos" and not ismain[i]:
continue
if args.kind == "halos":
part = load_parent_particles(clumpid, particles, clump_map,
clumps_cat)
part = csiborgtools.read.load_parent_particles(
clid, particles, clump_map, clid2map, clumps_cat)
else:
part = load_clump_particles(clumpid, particles, clump_map)
part = csiborgtools.read.load_clump_particles(clid, particles,
clump_map, clid2map)
# We fit the particles if there are any. If not we assign the index,
# otherwise it would be NaN converted to integers (-2147483648) and
# yield an error further down.
if part is not None:
_out = fit_clump(part, clumps_cat[j], box)
for key in _out.keys():
out[key][i] = _out[key]
if part is None:
continue
fout = ftemp.format(str(nsim).zfill(5), str(nsnap).zfill(5), rank)
if nproc == 0:
print(f"{datetime.now()}: rank {rank} saving to `{fout}`.", flush=True)
_out = fit_clump(part, clumps_cat[i], box)
for key in _out.keys():
out[key][i] = _out[key]
# Finally, we save the results. If we were analysing main halos, then
# remove array indices that do not correspond to parent halos.
if args.kind == "halos":
out = out[ismain]
fout = paths.structfit_path(nsnap, nsim, args.kind)
print(f"Saving to `{fout}`.", flush=True)
numpy.save(fout, out)
# We saved this CPU's results in a temporary file. Wait now for the other
# CPUs and then collect results from the 0th rank and save them.
comm.Barrier()
if rank == 0:
print(f"{datetime.now()}: collecting results for simulation `{nsim}`.",
flush=True)
# We write to the output array. Load data from each CPU and append to
# the output array.
out = csiborgtools.read.cols_to_structured(ntasks, cols_collect)
clumpid2outpos = {indx: i
for i, indx in enumerate(clumps_cat["index"])}
for i in range(nproc):
inp = numpy.load(ftemp.format(str(nsim).zfill(5),
str(nsnap).zfill(5), i))
for j, clumpid in enumerate(inp["index"]):
k = clumpid2outpos[clumpid]
for key in inp.dtype.names:
out[key][k] = inp[key][j]
# If we were analysing main halos, then remove array indices that do
# not correspond to parent halos.
if args.kind == "halos":
out = out[ismain]
fout = paths.structfit_path(nsnap, nsim, args.kind)
print(f"Saving to `{fout}`.", flush=True)
numpy.save(fout, out)
# We now wait before moving on to another simulation.
comm.Barrier()

104
scripts/fit_init.py Normal file
View File

@ -0,0 +1,104 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to calculate the particle centre of mass, Lagrangian patch size in the
initial snapshot. The initial snapshot particles are read from the sorted
files.
"""
from argparse import ArgumentParser
from datetime import datetime
import numpy
from mpi4py import MPI
from tqdm import tqdm
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
# Get MPI things
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
verbose = nproc == 1
# Argument parser
parser = ArgumentParser()
parser.add_argument("--ics", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all simulations.")
args = parser.parse_args()
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
partreader = csiborgtools.read.ParticleReader(paths)
if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics(tonew=True)
else:
ics = args.ics
cols_collect = [("index", numpy.int32),
("x", numpy.float32),
("y", numpy.float32),
("z", numpy.float32),
("lagpatch", numpy.float32),]
# MPI loop over simulations
jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
for nsim in [ics[i] for i in jobs]:
nsnap = max(paths.get_snapshots(nsim))
print(f"{datetime.now()}: rank {rank} calculating simulation `{nsim}`.",
flush=True)
parts = csiborgtools.read.read_h5(paths.initmatch_path(nsim, "particles"))
parts = parts['particles']
clump_map = csiborgtools.read.read_h5(paths.particles_path(nsim))
clump_map = clump_map["clumpmap"]
clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
load_fitted=False)
clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
ismain = clumps_cat.ismain
out = csiborgtools.read.cols_to_structured(len(clumps_cat), cols_collect)
indxs = clumps_cat["index"]
for i, hid in enumerate(tqdm(indxs) if verbose else indxs):
out["index"][i] = hid
if not ismain[i]:
continue
part = csiborgtools.read.load_parent_particles(hid, parts, clump_map,
clid2map, clumps_cat)
# Skip if the halo is too small.
if part is None or part.size < 100:
continue
dist, cm = csiborgtools.fits.dist_centmass(part)
# We enforce a maximum patchsize of 0.075 in box coordinates.
patchsize = min(numpy.percentile(dist, 99), 0.075)
out["x"][i], out["y"][i], out["z"][i] = cm
out["lagpatch"][i] = patchsize
out = out[ismain]
# Now save it
fout = paths.initmatch_path(nsim, "fit")
print(f"{datetime.now()}: dumping fits to .. `{fout}`.",
flush=True)
with open(fout, "wb") as f:
numpy.save(f, out)

View File

@ -54,35 +54,6 @@ else:
nsims = args.ics
def load_clump_particles(clumpid, particles, clump_map):
"""
Load a clump's particles. If it is not there, i.e clump has no associated
particles, return `None`.
"""
try:
return particles[clump_map[clumpid], :]
except KeyError:
return None
def load_parent_particles(clumpid, particles, clump_map, clumps_cat):
"""
Load a parent halo's particles.
"""
indxs = clumps_cat["index"][clumps_cat["parent"] == clumpid]
# We first load the particles of each clump belonging to this parent
# and then concatenate them for further analysis.
clumps = []
for ind in indxs:
parts = load_clump_particles(ind, particles, clump_map)
if parts is not None:
clumps.append(parts)
if len(clumps) == 0:
return None
return numpy.concatenate(clumps)
# We loop over simulations. Here later optionally add MPI.
for i, nsim in enumerate(nsims):
if rank == 0:
@ -91,10 +62,11 @@ for i, nsim in enumerate(nsims):
nsnap = max(paths.get_snapshots(nsim))
box = csiborgtools.read.BoxUnits(nsnap, nsim, paths)
particles = h5py.File(paths.particle_h5py_path(nsim), 'r')["particles"]
clump_map = h5py.File(paths.particle_h5py_path(nsim, "clumpmap"), 'r')
clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, maxdist=None,
minmass=None, rawdata=True,
f = csiborgtools.read.read_h5(paths.particles_path(nsim))
particles = f["particles"]
clump_map = f["clumpmap"]
clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
load_fitted=False)
ismain = clumps_cat.ismain
ntasks = len(clumps_cat)
@ -108,8 +80,8 @@ for i, nsim in enumerate(nsims):
continue
clumpid = clumps_cat["index"][j]
parts = load_parent_particles(clumpid, particles, clump_map,
clumps_cat)
parts = csiborgtools.read.load_parent_particles(
clumpid, particles, clump_map, clid2map, clumps_cat)
# If we have no particles, then do not save anything.
if parts is None:
continue
@ -124,8 +96,7 @@ for i, nsim in enumerate(nsims):
_out["r"] = r[mask]
_out["M"] = obj["M"][mask]
out[str(clumps_cat["index"][j])] = _out
out[str(clumpid)] = _out
# Finished, so we save everything.
fout = paths.radpos_path(nsnap, nsim)

View File

@ -13,6 +13,7 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""A script to calculate overlap between two CSiBORG realisations."""
from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime
from distutils.util import strtobool
@ -26,13 +27,16 @@ except ModuleNotFoundError:
sys.path.append("../")
import csiborgtools
from csiborgtools.read import HaloCatalogue, read_h5
# Argument parser
parser = ArgumentParser()
parser.add_argument("--nsim0", type=int)
parser.add_argument("--nsimx", type=int)
parser.add_argument("--nmult", type=float)
parser.add_argument("--sigma", type=float)
parser.add_argument("--sigma", type=float, default=None)
parser.add_argument("--smoothen", type=lambda x: bool(strtobool(x)),
default=None)
parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
default=False)
args = parser.parse_args()
@ -43,27 +47,52 @@ matcher = csiborgtools.match.RealisationsMatcher()
# Load the raw catalogues (i.e. no selection) including the initial CM
# positions and the particle archives.
cat0 = csiborgtools.read.HaloCatalogue(args.nsim0, paths, load_initial=True,
rawdata=True)
catx = csiborgtools.read.HaloCatalogue(args.nsimx, paths, load_initial=True,
rawdata=True)
halos0_archive = paths.initmatch_path(args.nsim0, "particles")
halosx_archive = paths.initmatch_path(args.nsimx, "particles")
cat0 = HaloCatalogue(args.nsim0, paths, load_initial=True,
minmass=("totpartmass", 1e12), with_lagpatch=True)
catx = HaloCatalogue(args.nsimx, paths, load_initial=True,
minmass=("totpartmass", 1e12), with_lagpatch=True)
clumpmap0 = read_h5(paths.particles_path(args.nsim0))["clumpmap"]
parts0 = read_h5(paths.initmatch_path(args.nsim0, "particles"))["particles"]
clid2map0 = {clid: i for i, clid in enumerate(clumpmap0[:, 0])}
clumpmapx = read_h5(paths.particles_path(args.nsimx))["clumpmap"]
partsx = read_h5(paths.initmatch_path(args.nsimx, "particles"))["particles"]
clid2mapx = {clid: i for i, clid in enumerate(clumpmapx[:, 0])}
# We generate the background density fields. Loads halos's particles one by one
# from the archive, concatenates them and calculates the NGP density field.
if args.verbose:
print(f"{datetime.now()}: generating the background density fields.",
flush=True)
delta_bckg = overlapper.make_bckg_delta(halos0_archive, verbose=args.verbose)
delta_bckg = overlapper.make_bckg_delta(halosx_archive, delta=delta_bckg,
delta_bckg = overlapper.make_bckg_delta(parts0, clumpmap0, clid2map0, cat0,
verbose=args.verbose)
delta_bckg = overlapper.make_bckg_delta(partsx, clumpmapx, clid2mapx, catx,
delta=delta_bckg, verbose=args.verbose)
# We calculate the overlap between the NGP fields.
if args.verbose:
print(f"{datetime.now()}: crossing the simulations.", flush=True)
match_indxs, ngp_overlap = matcher.cross(cat0, catx, halos0_archive,
halosx_archive, delta_bckg)
match_indxs, ngp_overlap = matcher.cross(cat0, catx, parts0, partsx, clumpmap0,
clumpmapx, delta_bckg,
verbose=args.verbose)
# We wish to store the halo IDs of the matches, not their array positions in
# the catalogues
match_hids = deepcopy(match_indxs)
for i, matches in enumerate(match_indxs):
for j, match in enumerate(matches):
match_hids[i][j] = catx["index"][match]
fout = paths.overlap_path(args.nsim0, args.nsimx, smoothed=False)
numpy.savez(fout, ref_hids=cat0["index"], match_hids=match_hids,
ngp_overlap=ngp_overlap)
if args.verbose:
print(f"{datetime.now()}: calculated NGP overlap, saved to {fout}.",
flush=True)
if not args.smoothen:
quit()
# We now smoothen up the background density field for the smoothed overlap
# calculation.
@ -72,16 +101,12 @@ if args.verbose:
gaussian_filter(delta_bckg, output=delta_bckg, **smooth_kwargs)
# We calculate the smoothed overlap for the pairs whose NGP overlap is > 0.
if args.verbose:
print(f"{datetime.now()}: calculating smoothed overlaps.", flush=True)
smoothed_overlap = matcher.smoothed_cross(cat0, catx, halos0_archive,
halosx_archive, delta_bckg,
smoothed_overlap = matcher.smoothed_cross(cat0, catx, parts0, partsx,
clumpmap0, clumpmapx, delta_bckg,
match_indxs, smooth_kwargs)
# We save the results at long last.
fout = paths.overlap_path(args.nsim0, args.nsimx)
fout = paths.overlap_path(args.nsim0, args.nsimx, smoothed=True)
numpy.savez(fout, smoothed_overlap=smoothed_overlap, sigma=args.sigma)
if args.verbose:
print(f"{datetime.now()}: saving results to `{fout}`.", flush=True)
numpy.savez(fout, match_indxs=match_indxs, ngp_overlap=ngp_overlap,
smoothed_overlap=smoothed_overlap, sigma=args.sigma)
print(f"{datetime.now()}: all finished.", flush=True)
print(f"{datetime.now()}: calculated smoothed overlap, saved to {fout}.",
flush=True)

View File

@ -12,18 +12,20 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to load in the simulation particles and dump them to a HDF5 file.
Creates a mapping to access directly particles of a single clump.
Script to load in the simulation particles, load them by their clump ID and
dump into a HDF5 file. Stores the first and last index of each clump in the
particle array. This can be used for fast slicing of the array to acces
particles of a single clump.
"""
from datetime import datetime
from distutils.util import strtobool
from gc import collect
import h5py
import numba
import numpy
from mpi4py import MPI
from tqdm import tqdm
from tqdm import trange
try:
import csiborgtools
@ -44,75 +46,109 @@ nproc = comm.Get_size()
parser = ArgumentParser()
parser.add_argument("--ics", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all simulations.")
parser.add_argument("--pos_only", type=lambda x: bool(strtobool(x)),
help="Do we only dump positions?")
parser.add_argument("--dtype", type=str, choices=["float32", "float64"],
default="float32",)
args = parser.parse_args()
verbose = nproc == 1
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
partreader = csiborgtools.read.ParticleReader(paths)
if args.pos_only:
pars_extract = ['x', 'y', 'z', 'M']
else:
pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M']
# Keep "ID" as the last column!
pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"]
if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics(tonew=False)
else:
ics = args.ics
@numba.jit(nopython=True)
def minmax_clump(clid, clump_ids, start_loop=0):
"""
Find the start and end index of a clump in a sorted array of clump IDs.
This is much faster than using `numpy.where` and then `numpy.min` and
`numpy.max`.
"""
start = None
end = None
for i in range(start_loop, clump_ids.size):
n = clump_ids[i]
if n == clid:
if start is None:
start = i
end = i
elif n > clid:
break
return start, end
# MPI loop over individual simulations. We read in the particles from RAMSES
# files and dump them to a HDF5 file.
jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
for i in jobs:
nsim = ics[i]
nsnap = max(paths.get_snapshots(nsim))
print(f"{datetime.now()}: Rank {rank} loading particles {nsim}.",
fname = paths.particles_path(nsim)
# We first read in the clump IDs of the particles and infer the sorting.
# Right away we dump the clump IDs to a HDF5 file and clear up memory.
print(f"{datetime.now()}: rank {rank} loading particles {nsim}.",
flush=True)
part_cids = partreader.read_clumpid(nsnap, nsim, verbose=verbose)
sort_indxs = numpy.argsort(part_cids).astype(numpy.int32)
part_cids = part_cids[sort_indxs]
with h5py.File(fname, "w") as f:
f.create_dataset("clump_ids", data=part_cids)
f.close()
del part_cids
collect()
parts = partreader.read_particle(nsnap, nsim, pars_extract,
return_structured=False, verbose=verbose)
if args.dtype == "float64":
parts = parts.astype(numpy.float64)
kind = "pos" if args.pos_only else None
print(f"{datetime.now()}: Rank {rank} dumping particles from {nsim}.",
# Next we read in the particles and sort them by their clump ID.
# We cannot directly read this as an unstructured array because the float32
# precision is insufficient to capture the clump IDs.
parts, pids = partreader.read_particle(
nsnap, nsim, pars_extract, return_structured=False, verbose=verbose)
# Now we in two steps save the particles and particle IDs.
print(f"{datetime.now()}: rank {rank} dumping particles from {nsim}.",
flush=True)
parts = parts[sort_indxs]
pids = pids[sort_indxs]
del sort_indxs
collect()
with h5py.File(paths.particle_h5py_path(nsim, kind, args.dtype), "w") as f:
with h5py.File(fname, "r+") as f:
f.create_dataset("particle_ids", data=pids)
f.close()
del pids
collect()
with h5py.File(fname, "r+") as f:
f.create_dataset("particles", data=parts)
f.close()
del parts
collect()
print(f"{datetime.now()}: Rank {rank} finished dumping of {nsim}.",
flush=True)
# If we are dumping only particle positions, then we are done.
if args.pos_only:
continue
print(f"{datetime.now()}: Rank {rank} mapping particles from {nsim}.",
print(f"{datetime.now()}: rank {rank} creating clump mapping for {nsim}.",
flush=True)
# If not, then load the clump IDs and prepare the memory mapping. We find
# which array positions correspond to which clump IDs and save it. With
# this we can then lazily load into memory the particles for each clump.
part_cids = partreader.read_clumpid(nsnap, nsim, verbose=verbose)
cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, load_fitted=False,
rawdata=True)
clumpinds = cat["index"]
# Some of the clumps have no particles, so we do not loop over them
clumpinds = clumpinds[numpy.isin(clumpinds, part_cids)]
out = {}
for i, cid in enumerate(tqdm(clumpinds) if verbose else clumpinds):
out.update({str(cid): numpy.where(part_cids == cid)[0]})
# Load clump IDs back to memory
with h5py.File(fname, "r") as f:
part_cids = f["clump_ids"][:]
# We loop over the unique clump IDs.
unique_clump_ids = numpy.unique(part_cids)
clump_map = numpy.full((unique_clump_ids.size, 3), numpy.nan,
dtype=numpy.int32)
start_loop = 0
niters = unique_clump_ids.size
for i in trange(niters) if verbose else range(niters):
clid = unique_clump_ids[i]
k0, kf = minmax_clump(clid, part_cids, start_loop=start_loop)
clump_map[i, 0] = clid
clump_map[i, 1] = k0
clump_map[i, 2] = kf
start_loop = kf
# We save the mapping to a HDF5 file
with h5py.File(paths.particle_h5py_path(nsim, "clumpmap"), "w") as f:
for cid, indxs in out.items():
f.create_dataset(cid, data=indxs)
with h5py.File(paths.particles_path(nsim), "r+") as f:
f.create_dataset("clumpmap", data=clump_map)
f.close()
del part_cids, cat, clumpinds, out
del part_cids
collect()

View File

@ -1,199 +0,0 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to calculate the particle centre of mass, Lagrangian patch size in the
initial snapshot and the particle mapping.
"""
from argparse import ArgumentParser
from os.path import join
from datetime import datetime
from gc import collect
import joblib
from os import remove
import h5py
import numpy
from mpi4py import MPI
from tqdm import trange
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
# Get MPI things
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
verbose = nproc == 1
# Argument parser
parser = ArgumentParser()
parser.add_argument("--ics", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all simulations.")
args = parser.parse_args()
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
partreader = csiborgtools.read.ParticleReader(paths)
ftemp = lambda kind, nsim, rank: join(paths.temp_dumpdir, f"{kind}_{nsim}_{rank}.p") # noqa
if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics(tonew=True)
else:
ics = args.ics
# We loop over simulations. Each simulation is then procesed with MPI, rank 0
# loads the data and broadcasts it to other ranks.
for nsim in ics:
nsnap = max(paths.get_snapshots(nsim))
if rank == 0:
print(f"{datetime.now()}: reading simulation {nsim}.", flush=True)
# We first load particles in the initial and final snapshots and sort
# them by their particle IDs so that we can match them by array
# position. `clump_ids` are the clump IDs of particles.
part0 = partreader.read_particle(1, nsim, ["x", "y", "z", "M", "ID"],
verbose=True,
return_structured=False)
part0 = part0[numpy.argsort(part0[:, -1])]
part0 = part0[:, :-1] # Now we no longer need the particle IDs
pid = partreader.read_particle(nsnap, nsim, ["ID"], verbose=True,
return_structured=False).reshape(-1, )
clump_ids = partreader.read_clumpid(nsnap, nsim, verbose=True)
clump_ids = clump_ids[numpy.argsort(pid)]
# Release the particle IDs, we will not need them anymore now that both
# particle arrays are matched in ordering.
del pid
collect()
# Particles whose clump ID is 0 are unassigned to a clump, so we can
# get rid of them to speed up subsequent operations. We will not need
# these. Again we release the mask.
mask = clump_ids > 0
clump_ids = clump_ids[mask]
part0 = part0[mask, :]
del mask
collect()
print(f"{datetime.now()}: dumping particles for {nsim}.", flush=True)
with h5py.File(paths.initmatch_path(nsim, "particles"), "w") as f:
f.create_dataset("particles", data=part0)
print(f"{datetime.now()}: broadcasting simulation {nsim}.", flush=True)
# Stop all ranks and figure out array shapes from the 0th rank
comm.Barrier()
if rank == 0:
shape = numpy.array([*part0.shape], dtype=numpy.int32)
else:
shape = numpy.empty(2, dtype=numpy.int32)
comm.Bcast(shape, root=0)
# Now broadcast the particle arrays to all ranks
if rank > 0:
part0 = numpy.empty(shape, dtype=numpy.float32)
clump_ids = numpy.empty(shape[0], dtype=numpy.int32)
comm.Bcast(part0, root=0)
comm.Bcast(clump_ids, root=0)
if rank == 0:
print(f"{datetime.now()}: simulation {nsim} broadcasted.", flush=True)
# Calculate the centre of mass of each parent halo, the Lagrangian patch
# size and optionally the initial snapshot particles belonging to this
# parent halo. Dumping the particles will take majority of time.
if rank == 0:
print(f"{datetime.now()}: calculating simulation {nsim}.", flush=True)
# We load up the clump catalogue which contains information about the
# ultimate parent halos of each clump. We will loop only over the clump
# IDs of ultimate parent halos and add their substructure particles and at
# the end save these.
cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, load_fitted=False,
rawdata=True)
parent_ids = cat["index"][cat.ismain]
parent_ids = parent_ids
hid2arrpos = {indx: j for j, indx in enumerate(parent_ids)}
# And we pre-allocate the output array for this simulation.
dtype = {"names": ["index", "x", "y", "z", "lagpatch"],
"formats": [numpy.int32] + [numpy.float32] * 4}
# We MPI loop over the individual halos
jobs = csiborgtools.fits.split_jobs(parent_ids.size, nproc)[rank]
_out_fits = numpy.full(len(jobs), numpy.nan, dtype=dtype)
_out_map = {}
for i in trange(len(jobs)) if verbose else range(len(jobs)):
clid = parent_ids[jobs[i]]
_out_fits["index"][i] = clid
mmain_indxs = cat["index"][cat["parent"] == clid]
mmain_mask = numpy.isin(clump_ids, mmain_indxs, assume_unique=True)
mmain_particles = part0[mmain_mask, :]
# If the number of particles is too small, we skip this halo.
if mmain_particles.size < 100:
continue
raddist, cmpos = csiborgtools.match.dist_centmass(mmain_particles)
patchsize = csiborgtools.match.dist_percentile(raddist, [99],
distmax=0.075)
# Write the temporary results
_out_fits["x"][i], _out_fits["y"][i], _out_fits["z"][i] = cmpos
_out_fits["lagpatch"][i] = patchsize
_out_map.update({str(clid): numpy.where(mmain_mask)[0]})
# Dump the results of this rank to a temporary file.
joblib.dump(_out_fits, ftemp("fits", nsim, rank))
joblib.dump(_out_map, ftemp("map", nsim, rank))
del part0, clump_ids,
collect()
# Now we wait for all ranks, then collect the results and save it.
comm.Barrier()
if rank == 0:
print(f"{datetime.now()}: collecting results for {nsim}.", flush=True)
out_fits = numpy.full(parent_ids.size, numpy.nan, dtype=dtype)
out_map = {}
for i in range(nproc):
# Merge the map dictionaries
out_map = out_map | joblib.load(ftemp("map", nsim, i))
# Now merge the structured arrays
_out_fits = joblib.load(ftemp("fits", nsim, i))
for j in range(_out_fits.size):
k = hid2arrpos[_out_fits["index"][j]]
for par in dtype["names"]:
out_fits[par][k] = _out_fits[par][j]
remove(ftemp("fits", nsim, i))
remove(ftemp("map", nsim, i))
# Now save it
fout_fit = paths.initmatch_path(nsim, "fit")
print(f"{datetime.now()}: dumping fits to .. `{fout_fit}`.",
flush=True)
with open(fout_fit, "wb") as f:
numpy.save(f, out_fits)
fout_map = paths.initmatch_path(nsim, "halomap")
print(f"{datetime.now()}: dumping mapping to .. `{fout_map}`.",
flush=True)
with h5py.File(fout_map, "w") as f:
for hid, indxs in out_map.items():
f.create_dataset(hid, data=indxs)
# We force clean up the memory before continuing.
del out_map, out_fits
collect()

82
scripts/pre_sortinit.py Normal file
View File

@ -0,0 +1,82 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to sort the initial snapshot particles according to their final
snapshot ordering, which is sorted by the clump IDs.
"""
from argparse import ArgumentParser
from datetime import datetime
import h5py
from gc import collect
import numpy
from mpi4py import MPI
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
# Get MPI things
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
verbose = nproc == 1
# Argument parser
parser = ArgumentParser()
parser.add_argument("--ics", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all simulations.")
args = parser.parse_args()
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
partreader = csiborgtools.read.ParticleReader(paths)
# NOTE: ID has to be the last column.
pars_extract = ["x", "y", "z", "M", "ID"]
if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics(tonew=True)
else:
ics = args.ics
# We loop over simulations. Each simulation is then procesed with MPI, rank 0
# loads the data and broadcasts it to other ranks.
jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
for i in jobs:
nsim = ics[i]
nsnap = max(paths.get_snapshots(nsim))
print(f"{datetime.now()}: reading and processing simulation {nsim}.",
flush=True)
# We first load the particle IDs in the final snapshot.
pidf = csiborgtools.read.read_h5(paths.particles_path(nsim))
pidf = pidf["particle_ids"]
# Then we load the particles in the initil snapshot and make sure that
# their particle IDs are sorted as in the final snapshot.
# Again, because of precision this must be read as structured.
part0, pid0 = partreader.read_particle(
1, nsim, pars_extract, return_structured=False, verbose=verbose)
# First enforce them to already be sorted and then apply reverse
# sorting from the final snapshot.
part0 = part0[numpy.argsort(pid0)]
del pid0
collect()
part0 = part0[numpy.argsort(numpy.argsort(pidf))]
print(f"{datetime.now()}: dumping particles for {nsim}.", flush=True)
with h5py.File(paths.initmatch_path(nsim, "particles"), "w") as f:
f.create_dataset("particles", data=part0)