Switch initial radius definition (#26)

* Add search for neighbours at z = 0

* Add initial snapshot KNN

* Add initi search either z =0 or  z = 70

* Add import

* Add clumps_pos2cell

* Add function argument

* Add import

* Add spherical overlap and speed up make_delta

* Add clump limits calculation

* Sped up make_delta

* Add patch size conversion

* Add patch sizes

* Add patch size calculation

* Force catalogues to be in float32

* Optimised script

* Option to remove points with no overlap

* Add spherical aproximate overlap

* Fix subboxing bug

* Remove print diagnostics

* Add Lagrangian patch size

* Add patch size documentation

* Move when clumpsx converted to int

* Edit docs

* Remove spherical overlap

* New Langrangian patch calculation
This commit is contained in:
Richard Stiskalek 2023-01-31 15:09:35 +00:00 committed by GitHub
parent 912a38acfb
commit beb811e84c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 426 additions and 142 deletions

View file

@ -13,6 +13,7 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .match import (brute_spatial_separation, RealisationsMatcher, cosine_similarity, ParticleOverlap) # noqa
from .match import (brute_spatial_separation, RealisationsMatcher, cosine_similarity, # noqa
ParticleOverlap, get_clumplims, lagpatch_size) # noqa
from .num_density import (binned_counts, number_density) # noqa
# from .correlation import (get_randoms_sphere, sphere_angular_tpcf) # noqa

View file

@ -14,11 +14,13 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import numpy
from scipy.interpolate import interp1d
from scipy.ndimage import gaussian_filter
from tqdm import (tqdm, trange)
from astropy.coordinates import SkyCoord
from numba import jit
from ..read import (CombinedHaloCatalogue, concatenate_clumps)
from gc import collect
from ..read import (CombinedHaloCatalogue, concatenate_clumps, clumps_pos2cell) # noqa
def brute_spatial_separation(c1, c2, angular=False, N=None, verbose=False):
@ -154,13 +156,14 @@ class RealisationsMatcher:
return mapping
def cross_knn_position_single(self, n_sim, nmult=5, dlogmass=None,
mass_kind="totpartmass", init_dist=False,
overlap=False, overlapper_kwargs={},
verbose=True):
mass_kind="totpartmass", overlap=False,
overlapper_kwargs={}, select_initial=True,
remove_nooverlap=True, verbose=True):
r"""
Find all neighbours within :math:`n_{\rm mult} R_{200c}` of halos in
the `nsim`th simulation. Also enforces that the neighbours'
:math:`\log M / M_\odot` be within `dlogmass` dex.
Find all neighbours within a multiple of either :math:`R_{\rm init}`
(distance at :math:`z = 70`) or :math:`R_{200c}` (distance at
:math:`z = 0`) of halos in the `nsim`th simulation. Enforces that the
neighbours' are similar in mass up to `dlogmass` dex.
Parameters
----------
@ -168,45 +171,49 @@ class RealisationsMatcher:
Index of an IC realisation in `self.cats` whose halos' neighbours
in the remaining simulations to search for.
nmult : float or int, optional
Multiple of :math:`R_{200c}` within which to return neighbours. By
default 5.
Multiple of :math:`R_{\rm init}` or :math:`R_{200c}` within which
to return neighbours. By default 5.
dlogmass : float, optional
Tolerance on mass logarithmic mass difference. By default `None`.
mass_kind : str, optional
The mass kind whose similarity is to be checked. Must be a valid
catalogue key. By default `totpartmass`, i.e. the total particle
mass associated with a halo.
init_dist : bool, optional
Whether to calculate separation of the initial CMs. By default
`False`.
overlap : bool, optional
Whether to calculate overlap between clumps in the initial
snapshot. By default `False`. Note that this operation is
substantially slower.
snapshot. By default `False`. This operation is slow.
overlapper_kwargs : dict, optional
Keyword arguments passed to `ParticleOverlapper`.
select_initial : bool, optional
Whether to select nearest neighbour at the initial or final
snapshot. By default `True`, i.e. at the initial snapshot.
remove_nooverlap : bool, optional
Whether to remove pairs with exactly zero overlap. By default
`True`.
verbose : bool, optional
Iterator verbosity flag. By default `True`.
Returns
-------
matches : composite array
Array, indices are `(n_sims - 1, 4, n_halos, n_matches)`. The
Array, indices are `(n_sims - 1, 5, n_halos, n_matches)`. The
2nd axis is `index` of the neighbouring halo in its catalogue,
`dist` is the 3D distance to the halo whose neighbours are
searched, `dist0` is the separation of the initial CMs and
`overlap` is the overlap over the initial clumps, all respectively.
The latter two are calculated only if `init_dist` or `overlap` is
`True`.
searched, `dist0` is the separation of the initial CMs, and
`overlap` is the overlap over the initial clumps, respectively.
"""
self._check_masskind(mass_kind)
# Radius, mass and positions of halos in `n_sim` IC realisation
# Halo properties of this simulation
logmass = numpy.log10(self.cats[n_sim][mass_kind])
R = self.cats[n_sim]["r200"]
pos = self.cats[n_sim].positions
if init_dist:
pos0 = self.cats[n_sim].positions0 # These are CM positions
pos = self.cats[n_sim].positions # Grav potential minimum
pos0 = self.cats[n_sim].positions0 # CM positions
if select_initial:
R = self.cats[n_sim]["patch_size"] # Initial Lagrangian patch size
else:
R = self.cats[n_sim]["r200"] # R200c at z = 0
if overlap:
overlapper = ParticleOverlap(**overlapper_kwargs)
if verbose:
print("Loading initial clump particles for `n_sim = {}`."
.format(n_sim), flush=True)
@ -214,7 +221,11 @@ class RealisationsMatcher:
paths = self.cats[0].paths
with open(paths.clump0_path(self.cats.n_sims[n_sim]), "rb") as f:
clumps0 = numpy.load(f, allow_pickle=True)
overlapper = ParticleOverlap(**overlapper_kwargs)
clumps_pos2cell(clumps0, overlapper)
# Precalculate min and max cell along each axis
mins0, maxs0 = get_clumplims(clumps0,
ncells=overlapper.inv_clength,
nshift=overlapper.nshift)
cat2clumps0 = self._cat2clump_mapping(self.cats[n_sim]["index"],
clumps0["ID"])
@ -225,28 +236,42 @@ class RealisationsMatcher:
else:
iters = enumerate(self.search_sim_indices(n_sim))
iters = enumerate(self.search_sim_indices(n_sim))
# Search for neighbours in the other simulations
# Search for neighbours in the other simulations at z = 70
for count, i in iters:
dist, indxs = self.cats[i].radius_neigbours(pos, R * nmult)
if select_initial:
dist0, indxs = self.cats[i].radius_initial_neigbours(
pos0, R * nmult)
else:
# Will switch dist0 <-> dist at the end
dist0, indxs = self.cats[i].radius_neigbours(
pos, R * nmult)
# Enforce int32 and float32
for n in range(dist0.size):
dist0[n] = dist0[n].astype(numpy.float32)
indxs[n] = indxs[n].astype(numpy.int32)
# Get rid of neighbors whose mass is too off
if dlogmass is not None:
for j, indx in enumerate(indxs):
match_logmass = numpy.log10(self.cats[i][mass_kind][indx])
mask = numpy.abs(match_logmass - logmass[j]) < dlogmass
dist[j] = dist[j][mask]
dist0[j] = dist0[j][mask]
indxs[j] = indx[mask]
# Find distance to the between the initial CM
dist0 = [numpy.asanyarray([], dtype=numpy.float64)] * dist.size
if init_dist:
with_neigbours = numpy.where([ii.size > 0 for ii in indxs])[0]
# Fill the pre-allocated array on positions with neighbours
for k in with_neigbours:
dist0[k] = numpy.linalg.norm(
# Find the distance at z = 0 (or z = 70 dep. on `search_initial``)
dist = [numpy.asanyarray([], dtype=numpy.float32)] * dist0.size
with_neigbours = numpy.where([ii.size > 0 for ii in indxs])[0]
# Fill the pre-allocated array on positions with neighbours
for k in with_neigbours:
if select_initial:
dist[k] = numpy.linalg.norm(
pos[k] - self.cats[i].positions[indxs[k]], axis=1)
else:
dist[k] = numpy.linalg.norm(
pos0[k] - self.cats[i].positions0[indxs[k]], axis=1)
# Calculate the initial snapshot overlap
cross = [numpy.asanyarray([], dtype=numpy.float64)] * dist.size
cross = [numpy.asanyarray([], dtype=numpy.float32)] * dist0.size
if overlap:
if verbose:
print("Loading initial clump particles for `n_sim = {}` "
@ -254,6 +279,7 @@ class RealisationsMatcher:
flush=True)
with open(paths.clump0_path(self.cats.n_sims[i]), 'rb') as f:
clumpsx = numpy.load(f, allow_pickle=True)
clumps_pos2cell(clumpsx, overlapper)
# Calculate the particle field
if verbose:
@ -261,38 +287,61 @@ class RealisationsMatcher:
flush=True)
particles = concatenate_clumps(clumpsx)
delta = overlapper.make_delta(particles, to_smooth=False)
del particles; collect() # noqa - no longer needed
delta = overlapper.smooth_highres(delta)
if verbose:
print("Smoothed up the field.", flush=True)
# Precalculate min and max cell along each axis
minsx, maxsx = get_clumplims(clumpsx,
ncells=overlapper.inv_clength,
nshift=overlapper.nshift)
cat2clumpsx = self._cat2clump_mapping(self.cats[i]["index"],
clumpsx["ID"])
# Loop only over halos that have neighbours
with_neigbours = numpy.where([ii.size > 0 for ii in indxs])[0]
for k in tqdm(with_neigbours) if verbose else with_neigbours:
# Find which clump matches index of this halo from cat
match0 = cat2clumps0[k]
# Get the clump and pre-calculate its cell assignment
# Unpack this clum and its mins and maxs
cl0 = clumps0["clump"][match0]
dint = numpy.full(indxs[k].size, numpy.nan, numpy.float64)
mins0_current, maxs0_current = mins0[match0], maxs0[match0]
# Preallocate this array.
crosses = numpy.full(indxs[k].size, numpy.nan,
numpy.float32)
# Loop over the ones we cross-correlate with
for ii, ind in enumerate(indxs[k]):
# Again which cross clump to this index
matchx = cat2clumpsx[ind]
dint[ii] = overlapper(cl0, clumpsx["clump"][matchx],
delta)
crosses[ii] = overlapper(
cl0, clumpsx["clump"][matchx], delta,
mins0_current, maxs0_current,
minsx[matchx], maxsx[matchx])
cross[k] = dint
cross[k] = crosses
# Optionally remove points whose overlap is exaclt zero
if remove_nooverlap:
mask = cross[k] > 0
indxs[k] = indxs[k][mask]
dist[k] = dist[k][mask]
dist0[k] = dist0[k][mask]
cross[k] = cross[k][mask]
# Append as a composite array
matches[count] = numpy.asarray(
[indxs, dist, dist0, cross], dtype=object)
# Append as a composite array. Flip dist order if not select_init
if select_initial:
matches[count] = numpy.asarray(
[indxs, dist, dist0, cross], dtype=object)
else:
matches[count] = numpy.asarray(
[indxs, dist0, dist, cross], dtype=object)
return numpy.asarray(matches, dtype=object)
def cross_knn_position_all(self, nmult=5, dlogmass=None,
mass_kind="totpartmass", init_dist=False,
overlap=False, overlapper_kwargs={},
select_initial=True, remove_nooverlap=True,
verbose=True):
r"""
Find all neighbours within :math:`n_{\rm mult} R_{200c}` of halos in
@ -319,6 +368,12 @@ class RealisationsMatcher:
substantially slower.
overlapper_kwargs : dict, optional
Keyword arguments passed to `ParticleOverlapper`.
select_initial : bool, optional
Whether to select nearest neighbour at the initial or final
snapshot. By default `True`, i.e. at the initial snapshot.
remove_nooverlap : bool, optional
Whether to remove pairs with exactly zero overlap. By default
`True`.
verbose : bool, optional
Iterator verbosity flag. By default `True`.
@ -333,9 +388,11 @@ class RealisationsMatcher:
# Loop over each catalogue
for i in trange(N) if verbose else range(N):
matches[i] = self.cross_knn_position_single(
i, nmult, dlogmass, mass_kind=mass_kind, init_dist=init_dist,
overlap=overlap, overlapper_kwargs=overlapper_kwargs,
verbose=verbose)
i, nmult, dlogmass, mass_kind=mass_kind,
init_dist=init_dist, overlap=overlap,
overlapper_kwargs=overlapper_kwargs,
select_initial=select_initial,
remove_nooverlap=remove_nooverlap, verbose=verbose)
return matches
@ -446,7 +503,9 @@ class ParticleOverlap:
def pos2cell(self, pos):
"""
Convert position to cell number.
Convert position to cell number. If `pos` is in
`numpy.typecodes["AllInteger"]` assumes it to already be the cell
number.
Parameters
----------
@ -456,6 +515,9 @@ class ParticleOverlap:
-------
cells : 1-dimensional array
"""
# Check whether this is already the cell
if pos.dtype.char in numpy.typecodes["AllInteger"]:
return pos
return numpy.floor(pos * self.inv_clength).astype(int)
def smooth_highres(self, delta):
@ -486,7 +548,8 @@ class ParticleOverlap:
delta[start:end, start:end, start:end] = highres
return delta
def make_delta(self, clump, subbox=False, to_smooth=True):
def make_delta(self, clump, mins=None, maxs=None, subbox=False,
to_smooth=True):
"""
Calculate a NGP density field of a halo on a cubic grid.
@ -494,6 +557,8 @@ class ParticleOverlap:
----------
clump: structurered arrays
Clump structured array, keys must include `x`, `y`, `z` and `M`.
mins, maxs : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension.
subbox : bool, optional
Whether to calculate the density field on a grid strictly enclosing
the clump.
@ -504,30 +569,37 @@ class ParticleOverlap:
-------
delta : 3-dimensional array
"""
coords = ('x', 'y', 'z')
xcell, ycell, zcell = (self.pos2cell(clump[p]) for p in coords)
if subbox:
# Shift the box so that each non-zero grid cell is 0th
xcell -= max(numpy.min(xcell) - self.nshift, 0)
ycell -= max(numpy.min(ycell) - self.nshift, 0)
zcell -= max(numpy.min(zcell) - self.nshift, 0)
cells = [self.pos2cell(clump[p]) for p in ('x', 'y', 'z')]
ncells = max(*(numpy.max(p) + self.nshift
for p in (xcell, ycell, zcell)))
ncells += 1 # Bump up by one to get NUMBER of cells
ncells = min(ncells, self.inv_clength)
# Check that minima and maxima are integers
if not (mins is None and maxs is None):
assert mins.dtype.char in numpy.typecodes["AllInteger"]
assert maxs.dtype.char in numpy.typecodes["AllInteger"]
if subbox:
# Minimum xcell, ycell and zcell of this clump
if mins is None or maxs is None:
mins = numpy.asanyarray(
[max(numpy.min(cell) - self.nshift, 0) for cell in cells])
maxs = numpy.asanyarray(
[min(numpy.max(cell) + self.nshift, self.inv_clength)
for cell in cells])
ncells = numpy.max(maxs - mins) + 1 # To get the number of cells
else:
mins = (0, 0, 0,)
ncells = self.inv_clength
# Preallocate and fill the array
delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
fill_delta(delta, xcell, ycell, zcell, clump['M'])
fill_delta(delta, *cells, *mins, clump['M'])
if to_smooth and self.smooth_scale is not None:
gaussian_filter(delta, self.smooth_scale, output=delta)
return delta
def make_deltas(self, clump1, clump2):
def make_deltas(self, clump1, clump2, mins1=None, maxs1=None,
mins2=None, maxs2=None):
"""
Calculate a NGP density fields of two halos on a grid that encloses
them both.
@ -537,6 +609,12 @@ class ParticleOverlap:
clump1, clump2 : structurered arrays
Particle structured array of the two clumps. Keys must include `x`,
`y`, `z` and `M`.
mins1, maxs1 : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension of `clump1`.
Optional.
mins2, maxs2 : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension of `clump2`.
Optional.
Returns
-------
@ -545,39 +623,36 @@ class ParticleOverlap:
cellmins : len-3 tuple
Tuple of left-most cell ID in the full box.
"""
coords = ('x', 'y', 'z')
xcell1, ycell1, zcell1 = (self.pos2cell(clump1[p]) for p in coords)
xcell2, ycell2, zcell2 = (self.pos2cell(clump2[p]) for p in coords)
xc1, yc1, zc1 = (self.pos2cell(clump1[p]) for p in ('x', 'y', 'z'))
xc2, yc2, zc2 = (self.pos2cell(clump2[p]) for p in ('x', 'y', 'z'))
# Minimum cell number of the two halos along each dimension
xmin = min(numpy.min(xcell1), numpy.min(xcell2)) - self.nshift
ymin = min(numpy.min(ycell1), numpy.min(ycell2)) - self.nshift
zmin = min(numpy.min(zcell1), numpy.min(zcell2)) - self.nshift
xmin, ymin, zmin = max(xmin, 0), max(ymin, 0), max(zmin, 0)
cellmins = (xmin, ymin, zmin)
# Maximum cell number of the two halos along each dimension
xmax = max(numpy.max(xcell1), numpy.max(xcell2))
ymax = max(numpy.max(ycell1), numpy.max(ycell2))
zmax = max(numpy.max(zcell1), numpy.max(zcell2))
if any(obj is None for obj in (mins1, maxs1, mins2, maxs2)):
# Minimum cell number of the two halos along each dimension
xmin = min(numpy.min(xc1), numpy.min(xc2)) - self.nshift
ymin = min(numpy.min(yc1), numpy.min(yc2)) - self.nshift
zmin = min(numpy.min(zc1), numpy.min(zc2)) - self.nshift
# Make sure shifting does not go beyond boundaries
xmin, ymin, zmin = [max(px, 0) for px in (xmin, ymin, zmin)]
# Number of cells is the maximum + 1
ncells = max(xmax - xmin, ymax - ymin, zmax - zmin) + self.nshift
ncells += 1
ncells = min(ncells, self.inv_clength)
# Maximum cell number of the two halos along each dimension
xmax = max(numpy.max(xc1), numpy.max(xc2)) + self.nshift
ymax = max(numpy.max(yc1), numpy.max(yc2)) + self.nshift
zmax = max(numpy.max(zc1), numpy.max(zc2)) + self.nshift
# Make sure shifting does not go beyond boundaries
xmax, ymax, zmax = [min(px, self.inv_clength - 1)
for px in (xmax, ymax, zmax)]
else:
xmin, ymin, zmin = [min(mins1[i], mins2[i]) for i in range(3)]
xmax, ymax, zmax = [max(maxs1[i], maxs2[i]) for i in range(3)]
# Shift the box so that the first non-zero grid cell is 0th
xcell1 -= xmin
xcell2 -= xmin
ycell1 -= ymin
ycell2 -= ymin
zcell1 -= zmin
zcell2 -= zmin
cellmins = (xmin, ymin, zmin, ) # Cell minima
ncells = max(xmax - xmin, ymax - ymin, zmax - zmin) + 1 # Num cells
# Preallocate and fill the array
delta1 = numpy.zeros((ncells,)*3, dtype=numpy.float32)
fill_delta(delta1, xcell1, ycell1, zcell1, clump1['M'])
fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1['M'])
delta2 = numpy.zeros((ncells,)*3, dtype=numpy.float32)
fill_delta(delta2, xcell2, ycell2, zcell2, clump2['M'])
fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2['M'])
if self.smooth_scale is not None:
gaussian_filter(delta1, self.smooth_scale, output=delta1)
@ -606,7 +681,8 @@ class ParticleOverlap:
"""
return _calculate_overlap(delta1, delta2, cellmins, delta2_full)
def __call__(self, clump1, clump2, delta2_full):
def __call__(self, clump1, clump2, delta2_full, mins1=None, maxs1=None,
mins2=None, maxs2=None):
"""
Calculate overlap between `clump1` and `clump2`. See
`self.overlap(...)` and `self.make_deltas(...)` for further
@ -622,17 +698,24 @@ class ParticleOverlap:
delta2_full : 3-dimensional array
Density field of the whole box calculated with particles assigned
to halos at zero redshift.
mins1, maxs1 : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension of `clump1`.
Optional.
mins2, maxs2 : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension of `clump2`.
Optional.
Returns
-------
overlap : float
"""
delta1, delta2, cellmins = self.make_deltas(clump1, clump2)
delta1, delta2, cellmins = self.make_deltas(
clump1, clump2, mins1, maxs1, mins2, maxs2)
return _calculate_overlap(delta1, delta2, cellmins, delta2_full)
@jit(nopython=True)
def fill_delta(delta, xcell, ycell, zcell, weights):
def fill_delta(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
"""
Fill array delta at the specified indices with their weights. This is a JIT
implementation.
@ -643,6 +726,8 @@ def fill_delta(delta, xcell, ycell, zcell, weights):
Grid to be filled with weights.
xcell, ycell, zcell : 1-dimensional arrays
Indices where to assign `weights`.
xmin, ymin, zmin : ints
Minimum cell IDs of particles.
weights : 1-dimensional arrays
Particle mass.
@ -651,7 +736,43 @@ def fill_delta(delta, xcell, ycell, zcell, weights):
None
"""
for i in range(xcell.size):
delta[xcell[i], ycell[i], zcell[i]] += weights[i]
delta[xcell[i] - xmin, ycell[i] - ymin, zcell[i] - zmin] += weights[i]
def get_clumplims(clumps, ncells, nshift=None):
"""
Get the lower and upper limit of clumps' positions or cell numbers.
Parameters
----------
clumps : array of arrays
Array of clump structured arrays.
ncells : int
Number of grid cells of the box along a single dimension.
nshift : int, optional
Lower and upper shift of the clump limits.
Returns
-------
mins, maxs : 2-dimensional arrays of shape `(n_samples, 3)`
The minimum and maximum along each axis.
"""
dtype = clumps[0][0]['x'].dtype # dtype of the first clump's 'x'
# Check that for real positions we cannot apply nshift
if nshift is not None and dtype.char not in numpy.typecodes["AllInteger"]:
raise TypeError("`nshift` supported only positions are cells.")
nshift = 0 if nshift is None else nshift # To simplify code below
nclumps = clumps.size
mins = numpy.full((nclumps, 3), numpy.nan, dtype=dtype)
maxs = numpy.full((nclumps, 3), numpy.nan, dtype=dtype)
for i, clump in enumerate(clumps):
for j, p in enumerate(['x', 'y', 'z']):
mins[i, j] = max(numpy.min(clump[0][p]) - nshift, 0)
maxs[i, j] = min(numpy.max(clump[0][p]) + nshift, ncells - 1)
return mins, maxs
@jit(nopython=True)
@ -682,18 +803,20 @@ def _calculate_overlap(delta1, delta2, cellmins, delta2_full):
weight = 0. # Weight to account for other halos
count = 0 # Total number of pixels that are both non-zero
i0, j0, k0 = cellmins # Unpack things
for i in range(imax):
ii = cellmins[0] + i
ii = i0 + i
for j in range(jmax):
jj = cellmins[1] + j
jj = j0 + j
for k in range(kmax):
kk = cellmins[2] + k
kk = k0 + k
cell1, cell2 = delta1[i, j, k], delta2[i, j, k]
totmass += cell1 + cell2
cell = cell1 + cell2
totmass += cell
# If both are zero then skip
if cell1 > 0 and cell2 > 0:
intersect += cell1 + cell2
if cell1 * cell2 > 0:
intersect += cell
weight += cell2 / delta2_full[ii, jj, kk]
count += 1
@ -701,3 +824,64 @@ def _calculate_overlap(delta1, delta2, cellmins, delta2_full):
intersect *= 0.5
weight = weight / count if count > 0 else 0.
return weight * intersect / (totmass - intersect)
def lagpatch_size(x, y, z, M, dr=0.0025, dqperc=1, minperc=75, defperc=95,
rmax=0.075):
"""
Calculate an approximate Lagrangian patch size in the initial conditions.
Returned as the first bin whose percentile drops by less than `dqperc` and
is above `minperc`. Note that all distances must be in box units.
Parameters
----------
x, y, z : 1-dimensional arrays
Particle coordinates.
M : 1-dimensional array
Particle masses.
dr : float, optional
Separation spacing to evaluate q-th percentile change. Optional, by
default 0.0025
dqperc : int or float, optional
Change of q-th percentile in a bin to find a threshold separation.
Optional, by default 1.
minperc : int or float, optional
Minimum q-th percentile of separation to be considered a patch size.
Optional, by default 75.
defperc : int or float, optional
Default q-th percentile if reduction by `minperc` is not satisfied in
any bin. Optional. By default 95.
rmax : float, optional
The maximum allowed patch size. Optional, by default 0.075.
Returns
-------
size : float
"""
# CM along each dimension
cmx, cmy, cmz = [numpy.average(p, weights=M) for p in (x, y, z)]
# Particle distance from the CM
sep = numpy.sqrt(numpy.square(x - cmx)
+ numpy.square(y - cmy)
+ numpy.square(z - cmz))
qs = numpy.linspace(0, 100, 100) # Percentile: where to evaluate
per = numpy.percentile(sep, qs) # Percentile: evaluated
sep2qs = interp1d(per, qs) # Separation to q-th percentile
# Evaluate in q-th percentile in separation bins
sep_bin = numpy.arange(per[0], per[-1], dr)
q_bin = sep2qs(sep_bin) # Evaluate for everyhing
dq_bin = (q_bin[1:] - q_bin[:-1]) # Take the difference
# Indices when q-th percentile changes below tolerance and is above limit
k = numpy.where((dq_bin < dqperc) & (q_bin[1:] > minperc))[0]
if k.size == 0:
return per[defperc] # Nothing found, so default percentile
else:
k = k[0] # Take the first one that satisfies the cut.
size = 0.5 * (sep_bin[k + 1] + sep_bin[k]) # Bin centre
size = rmax if size > rmax else size # Enforce maximum size
return size

View file

@ -14,7 +14,7 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .readsim import (CSiBORGPaths, ParticleReader, read_mmain, read_initcm, halfwidth_select) # noqa
from .make_cat import (HaloCatalogue, CombinedHaloCatalogue, concatenate_clumps) # noqa
from .make_cat import (HaloCatalogue, CombinedHaloCatalogue, concatenate_clumps, clumps_pos2cell) # noqa
from .readobs import (PlanckClusters, MCXCClusters, TwoMPPGalaxies, # noqa
TwoMPPGroups, SDSS) # noqa
from .outsim import (dump_split, combine_splits, make_ascii_powmes) # noqa

View file

@ -43,6 +43,7 @@ class HaloCatalogue:
_paths = None
_data = None
_knn = None
_knn0 = None
_positions = None
_positions0 = None
@ -52,11 +53,15 @@ class HaloCatalogue:
max_dist = numpy.infty if max_dist is None else max_dist
self._paths = paths
self._set_data(min_m500, max_dist)
# Initialise the KNN
# Initialise the KNN at z = 0 and at z = 70
knn = NearestNeighbors()
knn.fit(self.positions)
self._knn = knn
knn0 = NearestNeighbors()
knn0.fit(self.positions0)
self._knn0 = knn0
@property
def data(self):
"""
@ -180,11 +185,25 @@ class HaloCatalogue:
# Pre-allocate the positions arrays
self._positions = numpy.vstack(
[data["peak_{}".format(p)] for p in ("x", "y", "z")]).T
self._positions = self._positions.astype(numpy.float32)
# And do the unit transform
if initcm is not None:
data = self.box.convert_from_boxunits(data, ["x0", "y0", "z0"])
data = self.box.convert_from_boxunits(
data, ["x0", "y0", "z0", "patch_size"])
self._positions0 = numpy.vstack(
[data["{}0".format(p)] for p in ("x", "y", "z")]).T
self._positions0 = self._positions0.astype(numpy.float32)
# Convert all that is not an integer to float32
names = list(data.dtype.names)
formats = []
for name in names:
if data[name].dtype.char in numpy.typecodes["AllInteger"]:
formats.append(numpy.int32)
else:
formats.append(numpy.float32)
dtype = numpy.dtype({"names": names, "formats": formats})
data = data.astype(dtype)
self._data = data
@ -238,10 +257,10 @@ class HaloCatalogue:
raise ValueError(
"Ordering of `initcat` and `clumps` is inconsistent.")
X = numpy.full((clumps.size, 3), numpy.nan)
for i, p in enumerate(['x', 'y', 'z']):
X = numpy.full((clumps.size, 4), numpy.nan)
for i, p in enumerate(['x', 'y', 'z', "patch_size"]):
X[:, i] = initcat[p]
return add_columns(clumps, X, ["x0", "y0", "z0"])
return add_columns(clumps, X, ["x0", "y0", "z0", "patch_size"])
@property
def positions(self):
@ -317,7 +336,8 @@ class HaloCatalogue:
def radius_neigbours(self, X, radius):
"""
Return sorted nearest neigbours within `radius` or `X`.
Return sorted nearest neigbours within `radius` of `X` in the final
snapshot.
Parameters
----------
@ -341,6 +361,33 @@ class HaloCatalogue:
# Query the KNN
return self._knn.radius_neighbors(X, radius, sort_results=True)
def radius_initial_neigbours(self, X, radius):
r"""
Return sorted nearest neigbours within `radius` or `X` in the initial
snapshot.
Parameters
----------
X : 2-dimensional array
Array of shape `(n_queries, 3)`, where the latter axis represents
`x`, `y` and `z`.
radius : float
Limiting distance of neighbours.
Returns
-------
dist : list of 1-dimensional arrays
List of length `n_queries` whose elements are arrays of distances
to the nearest neighbours.
knns : list of 1-dimensional arrays
List of length `n_queries` whose elements are arrays of indices of
nearest neighbours in this catalogue.
"""
if not (X.ndim == 2 and X.shape[1] == 3):
raise TypeError("`X` must be an array of shape `(n_samples, 3)`.")
# Query the KNN
return self._knn0.radius_neighbors(X, radius, sort_results=True)
@property
def keys(self):
"""Catalogue keys."""
@ -462,8 +509,15 @@ def concatenate_clumps(clumps):
N = 0
for clump, __ in clumps:
N += clump.size
# Infer dtype of positions
if clumps[0][0]['x'].dtype.char in numpy.typecodes["AllInteger"]:
posdtype = numpy.int32
else:
posdtype = numpy.float32
# Pre-allocate array
dtype = {"names": ['x', 'y', 'z', "M"], "formats": [numpy.float32] * 4}
dtype = {"names": ['x', 'y', 'z', 'M'],
"formats": [posdtype] * 3 + [numpy.float32]}
particles = numpy.full(N, numpy.nan, dtype)
# Fill it one clump by another
@ -475,3 +529,41 @@ def concatenate_clumps(clumps):
start = end
return particles
def clumps_pos2cell(clumps, overlapper):
"""
Convert clump positions directly to cell IDs. Useful to speed up subsequent
calculations. Overwrites the passed in arrays.
Parameters
----------
clumps : array of arrays
Array of clump structured arrays whose `x`, `y`, `z` keys will be
converted.
overlapper : py:class:`csiborgtools.match.ParticleOverlapper`
`ParticleOverlapper` handling the cell assignment.
Returns
-------
None
"""
# Check if clumps are probably already in cells
if any(clumps[0][0].dtype[p].char in numpy.typecodes["AllInteger"]
for p in ('x', 'y', 'z')):
raise ValueError("Positions appear to already be converted cells.")
# Get the new dtype that replaces float for int for positions
names = clumps[0][0].dtype.names # Take the first one, doesn't matter
formats = [descr[1] for descr in clumps[0][0].dtype.descr]
for i in range(len(names)):
if names[i] in ('x', 'y', 'z'):
formats[i] = numpy.int32
dtype = numpy.dtype({"names": names, "formats": formats})
# Loop switch positions for cells IDs and change dtype
for n in range(clumps.size):
for p in ('x', 'y', 'z'):
clumps[n][0][p] = overlapper.pos2cell(clumps[n][0][p])
clumps[n][0] = clumps[n][0].astype(dtype)

View file

@ -26,7 +26,7 @@ from ..read import ParticleReader
# Map of unit conversions
CONV_NAME = {
"length": ["peak_x", "peak_y", "peak_z", "Rs", "rmin", "rmax", "r200",
"r500", "x0", "y0", "z0"],
"r500", "x0", "y0", "z0", "patch_size"],
"mass": ["mass_cl", "totpartmass", "m200", "m500", "mass_mmain"],
"density": ["rho0"]
}

View file

@ -19,14 +19,11 @@ are grouped in a clump at present redshift.
Optionally also dumps the clumps information, however watch out as this will
eat up a lot of memory.
"""
from argparse import ArgumentParser
import numpy
from datetime import datetime
from mpi4py import MPI
from distutils.util import strtobool
from os.path import join
from os import remove
from sys import stdout
from gc import collect
try:
import csiborgtools
@ -35,11 +32,6 @@ except ModuleNotFoundError:
sys.path.append("../")
import csiborgtools
parser = ArgumentParser()
parser.add_argument("--dump_clumps", default=False,
type=lambda x: bool(strtobool(x)))
args = parser.parse_args()
# Get MPI things
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
@ -57,8 +49,8 @@ fpermpart = join(dumpdir, "initmatch", "clump_{}_particles.npy")
for nsim in nsims:
if rank == 0:
print("{}: reading simulation {}.".format(datetime.now(), nsim))
stdout.flush()
print("{}: reading simulation {}.".format(datetime.now(), nsim),
flush=True)
# Set the snapshot numbers
init_paths.set_info(nsim, init_paths.get_minimum_snapshot(nsim))
@ -88,8 +80,8 @@ for nsim in nsims:
collect()
if rank == 0:
print("{}: dumping clumps for simulation.".format(datetime.now()))
stdout.flush()
print("{}: dumping clumps for simulation.".format(datetime.now()),
flush=True)
# Grab unique clump IDs and loop over them
unique_clumpids = numpy.unique(clump_ids)
@ -100,44 +92,58 @@ for nsim in nsims:
n = unique_clumpids[i]
x0 = part0[clump_ids == n]
# Center of mass
cm = numpy.asanyarray(
[numpy.average(x0[p], weights=x0["M"]) for p in ('x', 'y', 'z')])
# Center of mass and Lagrangian patch size
pos = numpy.vstack([x0[p] for p in ('x', 'y', 'z')]).T
cm = numpy.average(pos, axis=0, weights=x0['M'])
patch_size = csiborgtools.match.lagpatch_size(
*(x0[p] for p in ('x', 'y', 'z', 'M')))
# Dump the center of mass
with open(ftemp.format(nsim, n, "cm"), 'wb') as f:
numpy.save(f, cm)
# Optionally dump the entire clump
if args.dump_clumps:
with open(ftemp.format(nsim, n, "clump"), "wb") as f:
numpy.save(f, x0)
# Dump the Lagrangian patch size
with open(ftemp.format(nsim, n, "patch_size"), 'wb') as f:
numpy.save(f, patch_size)
# Dump the entire clump
with open(ftemp.format(nsim, n, "clump"), "wb") as f:
numpy.save(f, x0)
del part0, clump_ids
collect()
comm.Barrier()
if rank == 0:
print("Collecting CM files...")
stdout.flush()
# Collect the centre of masses and dump them
dtype = {"names": ['x', 'y', 'z', "ID"],
"formats": [numpy.float32] * 3 + [numpy.int32]}
print("Collecting CM files...", flush=True)
# Collect the centre of masses, patch size, etc. and dump them
dtype = {"names": ['x', 'y', 'z', "patch_size", "ID"],
"formats": [numpy.float32] * 4 + [numpy.int32]}
out = numpy.full(njobs, numpy.nan, dtype=dtype)
for i, n in enumerate(unique_clumpids):
# Load in CM vector
fpath = ftemp.format(nsim, n, "cm")
with open(fpath, 'rb') as f:
with open(fpath, "rb") as f:
fin = numpy.load(f)
out['x'][i] = fin[0]
out['y'][i] = fin[1]
out['z'][i] = fin[2]
out["ID"][i] = n
out['x'][i] = fin[0]
out['y'][i] = fin[1]
out['z'][i] = fin[2]
remove(fpath)
print("Dumping CM files to .. `{}`.".format(fpermcm.format(nsim)))
# Load in the patch size
fpath = ftemp.format(nsim, n, "patch_size")
with open(fpath, "rb") as f:
out["patch_size"][i] = numpy.load(f)
remove(fpath)
# Store the halo ID
out["ID"][i] = n
print("Dumping CM files to .. `{}`.".format(fpermcm.format(nsim)),
flush=True)
with open(fpermcm.format(nsim), 'wb') as f:
numpy.save(f, out)
print("Collecting clump files...")
stdout.flush()
print("Collecting clump files...", flush=True)
out = [None] * unique_clumpids.size
dtype = {"names": ["clump", "ID"], "formats": [object, numpy.int32]}
out = numpy.full(unique_clumpids.size, numpy.nan, dtype=dtype)
@ -148,7 +154,8 @@ for nsim in nsims:
out["clump"][i] = fin
out["ID"][i] = n
remove(fpath)
print("Dumping clump files to .. `{}`.".format(fpermpart.format(nsim)))
print("Dumping clump files to .. `{}`.".format(fpermpart.format(nsim)),
flush=True)
with open(fpermpart.format(nsim), "wb") as f:
numpy.save(f, out)