Switch initial radius definition (#26)

* Add search for neighbours at z = 0

* Add initial snapshot KNN

* Add initi search either z =0 or  z = 70

* Add import

* Add clumps_pos2cell

* Add function argument

* Add import

* Add spherical overlap and speed up make_delta

* Add clump limits calculation

* Sped up make_delta

* Add patch size conversion

* Add patch sizes

* Add patch size calculation

* Force catalogues to be in float32

* Optimised script

* Option to remove points with no overlap

* Add spherical aproximate overlap

* Fix subboxing bug

* Remove print diagnostics

* Add Lagrangian patch size

* Add patch size documentation

* Move when clumpsx converted to int

* Edit docs

* Remove spherical overlap

* New Langrangian patch calculation
This commit is contained in:
Richard Stiskalek 2023-01-31 15:09:35 +00:00 committed by GitHub
parent 912a38acfb
commit beb811e84c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 426 additions and 142 deletions

View file

@ -13,6 +13,7 @@
# with this program; if not, write to the Free Software Foundation, Inc., # with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .match import (brute_spatial_separation, RealisationsMatcher, cosine_similarity, ParticleOverlap) # noqa from .match import (brute_spatial_separation, RealisationsMatcher, cosine_similarity, # noqa
ParticleOverlap, get_clumplims, lagpatch_size) # noqa
from .num_density import (binned_counts, number_density) # noqa from .num_density import (binned_counts, number_density) # noqa
# from .correlation import (get_randoms_sphere, sphere_angular_tpcf) # noqa # from .correlation import (get_randoms_sphere, sphere_angular_tpcf) # noqa

View file

@ -14,11 +14,13 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import numpy import numpy
from scipy.interpolate import interp1d
from scipy.ndimage import gaussian_filter from scipy.ndimage import gaussian_filter
from tqdm import (tqdm, trange) from tqdm import (tqdm, trange)
from astropy.coordinates import SkyCoord from astropy.coordinates import SkyCoord
from numba import jit from numba import jit
from ..read import (CombinedHaloCatalogue, concatenate_clumps) from gc import collect
from ..read import (CombinedHaloCatalogue, concatenate_clumps, clumps_pos2cell) # noqa
def brute_spatial_separation(c1, c2, angular=False, N=None, verbose=False): def brute_spatial_separation(c1, c2, angular=False, N=None, verbose=False):
@ -154,13 +156,14 @@ class RealisationsMatcher:
return mapping return mapping
def cross_knn_position_single(self, n_sim, nmult=5, dlogmass=None, def cross_knn_position_single(self, n_sim, nmult=5, dlogmass=None,
mass_kind="totpartmass", init_dist=False, mass_kind="totpartmass", overlap=False,
overlap=False, overlapper_kwargs={}, overlapper_kwargs={}, select_initial=True,
verbose=True): remove_nooverlap=True, verbose=True):
r""" r"""
Find all neighbours within :math:`n_{\rm mult} R_{200c}` of halos in Find all neighbours within a multiple of either :math:`R_{\rm init}`
the `nsim`th simulation. Also enforces that the neighbours' (distance at :math:`z = 70`) or :math:`R_{200c}` (distance at
:math:`\log M / M_\odot` be within `dlogmass` dex. :math:`z = 0`) of halos in the `nsim`th simulation. Enforces that the
neighbours' are similar in mass up to `dlogmass` dex.
Parameters Parameters
---------- ----------
@ -168,45 +171,49 @@ class RealisationsMatcher:
Index of an IC realisation in `self.cats` whose halos' neighbours Index of an IC realisation in `self.cats` whose halos' neighbours
in the remaining simulations to search for. in the remaining simulations to search for.
nmult : float or int, optional nmult : float or int, optional
Multiple of :math:`R_{200c}` within which to return neighbours. By Multiple of :math:`R_{\rm init}` or :math:`R_{200c}` within which
default 5. to return neighbours. By default 5.
dlogmass : float, optional dlogmass : float, optional
Tolerance on mass logarithmic mass difference. By default `None`. Tolerance on mass logarithmic mass difference. By default `None`.
mass_kind : str, optional mass_kind : str, optional
The mass kind whose similarity is to be checked. Must be a valid The mass kind whose similarity is to be checked. Must be a valid
catalogue key. By default `totpartmass`, i.e. the total particle catalogue key. By default `totpartmass`, i.e. the total particle
mass associated with a halo. mass associated with a halo.
init_dist : bool, optional
Whether to calculate separation of the initial CMs. By default
`False`.
overlap : bool, optional overlap : bool, optional
Whether to calculate overlap between clumps in the initial Whether to calculate overlap between clumps in the initial
snapshot. By default `False`. Note that this operation is snapshot. By default `False`. This operation is slow.
substantially slower.
overlapper_kwargs : dict, optional overlapper_kwargs : dict, optional
Keyword arguments passed to `ParticleOverlapper`. Keyword arguments passed to `ParticleOverlapper`.
select_initial : bool, optional
Whether to select nearest neighbour at the initial or final
snapshot. By default `True`, i.e. at the initial snapshot.
remove_nooverlap : bool, optional
Whether to remove pairs with exactly zero overlap. By default
`True`.
verbose : bool, optional verbose : bool, optional
Iterator verbosity flag. By default `True`. Iterator verbosity flag. By default `True`.
Returns Returns
------- -------
matches : composite array matches : composite array
Array, indices are `(n_sims - 1, 4, n_halos, n_matches)`. The Array, indices are `(n_sims - 1, 5, n_halos, n_matches)`. The
2nd axis is `index` of the neighbouring halo in its catalogue, 2nd axis is `index` of the neighbouring halo in its catalogue,
`dist` is the 3D distance to the halo whose neighbours are `dist` is the 3D distance to the halo whose neighbours are
searched, `dist0` is the separation of the initial CMs and searched, `dist0` is the separation of the initial CMs, and
`overlap` is the overlap over the initial clumps, all respectively. `overlap` is the overlap over the initial clumps, respectively.
The latter two are calculated only if `init_dist` or `overlap` is
`True`.
""" """
self._check_masskind(mass_kind) self._check_masskind(mass_kind)
# Radius, mass and positions of halos in `n_sim` IC realisation # Halo properties of this simulation
logmass = numpy.log10(self.cats[n_sim][mass_kind]) logmass = numpy.log10(self.cats[n_sim][mass_kind])
R = self.cats[n_sim]["r200"] pos = self.cats[n_sim].positions # Grav potential minimum
pos = self.cats[n_sim].positions pos0 = self.cats[n_sim].positions0 # CM positions
if init_dist: if select_initial:
pos0 = self.cats[n_sim].positions0 # These are CM positions R = self.cats[n_sim]["patch_size"] # Initial Lagrangian patch size
else:
R = self.cats[n_sim]["r200"] # R200c at z = 0
if overlap: if overlap:
overlapper = ParticleOverlap(**overlapper_kwargs)
if verbose: if verbose:
print("Loading initial clump particles for `n_sim = {}`." print("Loading initial clump particles for `n_sim = {}`."
.format(n_sim), flush=True) .format(n_sim), flush=True)
@ -214,7 +221,11 @@ class RealisationsMatcher:
paths = self.cats[0].paths paths = self.cats[0].paths
with open(paths.clump0_path(self.cats.n_sims[n_sim]), "rb") as f: with open(paths.clump0_path(self.cats.n_sims[n_sim]), "rb") as f:
clumps0 = numpy.load(f, allow_pickle=True) clumps0 = numpy.load(f, allow_pickle=True)
overlapper = ParticleOverlap(**overlapper_kwargs) clumps_pos2cell(clumps0, overlapper)
# Precalculate min and max cell along each axis
mins0, maxs0 = get_clumplims(clumps0,
ncells=overlapper.inv_clength,
nshift=overlapper.nshift)
cat2clumps0 = self._cat2clump_mapping(self.cats[n_sim]["index"], cat2clumps0 = self._cat2clump_mapping(self.cats[n_sim]["index"],
clumps0["ID"]) clumps0["ID"])
@ -225,28 +236,42 @@ class RealisationsMatcher:
else: else:
iters = enumerate(self.search_sim_indices(n_sim)) iters = enumerate(self.search_sim_indices(n_sim))
iters = enumerate(self.search_sim_indices(n_sim)) iters = enumerate(self.search_sim_indices(n_sim))
# Search for neighbours in the other simulations # Search for neighbours in the other simulations at z = 70
for count, i in iters: for count, i in iters:
dist, indxs = self.cats[i].radius_neigbours(pos, R * nmult) if select_initial:
dist0, indxs = self.cats[i].radius_initial_neigbours(
pos0, R * nmult)
else:
# Will switch dist0 <-> dist at the end
dist0, indxs = self.cats[i].radius_neigbours(
pos, R * nmult)
# Enforce int32 and float32
for n in range(dist0.size):
dist0[n] = dist0[n].astype(numpy.float32)
indxs[n] = indxs[n].astype(numpy.int32)
# Get rid of neighbors whose mass is too off # Get rid of neighbors whose mass is too off
if dlogmass is not None: if dlogmass is not None:
for j, indx in enumerate(indxs): for j, indx in enumerate(indxs):
match_logmass = numpy.log10(self.cats[i][mass_kind][indx]) match_logmass = numpy.log10(self.cats[i][mass_kind][indx])
mask = numpy.abs(match_logmass - logmass[j]) < dlogmass mask = numpy.abs(match_logmass - logmass[j]) < dlogmass
dist[j] = dist[j][mask] dist0[j] = dist0[j][mask]
indxs[j] = indx[mask] indxs[j] = indx[mask]
# Find distance to the between the initial CM # Find the distance at z = 0 (or z = 70 dep. on `search_initial``)
dist0 = [numpy.asanyarray([], dtype=numpy.float64)] * dist.size dist = [numpy.asanyarray([], dtype=numpy.float32)] * dist0.size
if init_dist:
with_neigbours = numpy.where([ii.size > 0 for ii in indxs])[0] with_neigbours = numpy.where([ii.size > 0 for ii in indxs])[0]
# Fill the pre-allocated array on positions with neighbours # Fill the pre-allocated array on positions with neighbours
for k in with_neigbours: for k in with_neigbours:
dist0[k] = numpy.linalg.norm( if select_initial:
dist[k] = numpy.linalg.norm(
pos[k] - self.cats[i].positions[indxs[k]], axis=1)
else:
dist[k] = numpy.linalg.norm(
pos0[k] - self.cats[i].positions0[indxs[k]], axis=1) pos0[k] - self.cats[i].positions0[indxs[k]], axis=1)
# Calculate the initial snapshot overlap # Calculate the initial snapshot overlap
cross = [numpy.asanyarray([], dtype=numpy.float64)] * dist.size cross = [numpy.asanyarray([], dtype=numpy.float32)] * dist0.size
if overlap: if overlap:
if verbose: if verbose:
print("Loading initial clump particles for `n_sim = {}` " print("Loading initial clump particles for `n_sim = {}` "
@ -254,6 +279,7 @@ class RealisationsMatcher:
flush=True) flush=True)
with open(paths.clump0_path(self.cats.n_sims[i]), 'rb') as f: with open(paths.clump0_path(self.cats.n_sims[i]), 'rb') as f:
clumpsx = numpy.load(f, allow_pickle=True) clumpsx = numpy.load(f, allow_pickle=True)
clumps_pos2cell(clumpsx, overlapper)
# Calculate the particle field # Calculate the particle field
if verbose: if verbose:
@ -261,38 +287,61 @@ class RealisationsMatcher:
flush=True) flush=True)
particles = concatenate_clumps(clumpsx) particles = concatenate_clumps(clumpsx)
delta = overlapper.make_delta(particles, to_smooth=False) delta = overlapper.make_delta(particles, to_smooth=False)
del particles; collect() # noqa - no longer needed
delta = overlapper.smooth_highres(delta) delta = overlapper.smooth_highres(delta)
if verbose:
print("Smoothed up the field.", flush=True)
# Precalculate min and max cell along each axis
minsx, maxsx = get_clumplims(clumpsx,
ncells=overlapper.inv_clength,
nshift=overlapper.nshift)
cat2clumpsx = self._cat2clump_mapping(self.cats[i]["index"], cat2clumpsx = self._cat2clump_mapping(self.cats[i]["index"],
clumpsx["ID"]) clumpsx["ID"])
# Loop only over halos that have neighbours # Loop only over halos that have neighbours
with_neigbours = numpy.where([ii.size > 0 for ii in indxs])[0]
for k in tqdm(with_neigbours) if verbose else with_neigbours: for k in tqdm(with_neigbours) if verbose else with_neigbours:
# Find which clump matches index of this halo from cat # Find which clump matches index of this halo from cat
match0 = cat2clumps0[k] match0 = cat2clumps0[k]
# Get the clump and pre-calculate its cell assignment # Unpack this clum and its mins and maxs
cl0 = clumps0["clump"][match0] cl0 = clumps0["clump"][match0]
dint = numpy.full(indxs[k].size, numpy.nan, numpy.float64) mins0_current, maxs0_current = mins0[match0], maxs0[match0]
# Preallocate this array.
crosses = numpy.full(indxs[k].size, numpy.nan,
numpy.float32)
# Loop over the ones we cross-correlate with # Loop over the ones we cross-correlate with
for ii, ind in enumerate(indxs[k]): for ii, ind in enumerate(indxs[k]):
# Again which cross clump to this index # Again which cross clump to this index
matchx = cat2clumpsx[ind] matchx = cat2clumpsx[ind]
dint[ii] = overlapper(cl0, clumpsx["clump"][matchx], crosses[ii] = overlapper(
delta) cl0, clumpsx["clump"][matchx], delta,
mins0_current, maxs0_current,
minsx[matchx], maxsx[matchx])
cross[k] = dint cross[k] = crosses
# Optionally remove points whose overlap is exaclt zero
if remove_nooverlap:
mask = cross[k] > 0
indxs[k] = indxs[k][mask]
dist[k] = dist[k][mask]
dist0[k] = dist0[k][mask]
cross[k] = cross[k][mask]
# Append as a composite array # Append as a composite array. Flip dist order if not select_init
if select_initial:
matches[count] = numpy.asarray( matches[count] = numpy.asarray(
[indxs, dist, dist0, cross], dtype=object) [indxs, dist, dist0, cross], dtype=object)
else:
matches[count] = numpy.asarray(
[indxs, dist0, dist, cross], dtype=object)
return numpy.asarray(matches, dtype=object) return numpy.asarray(matches, dtype=object)
def cross_knn_position_all(self, nmult=5, dlogmass=None, def cross_knn_position_all(self, nmult=5, dlogmass=None,
mass_kind="totpartmass", init_dist=False, mass_kind="totpartmass", init_dist=False,
overlap=False, overlapper_kwargs={}, overlap=False, overlapper_kwargs={},
select_initial=True, remove_nooverlap=True,
verbose=True): verbose=True):
r""" r"""
Find all neighbours within :math:`n_{\rm mult} R_{200c}` of halos in Find all neighbours within :math:`n_{\rm mult} R_{200c}` of halos in
@ -319,6 +368,12 @@ class RealisationsMatcher:
substantially slower. substantially slower.
overlapper_kwargs : dict, optional overlapper_kwargs : dict, optional
Keyword arguments passed to `ParticleOverlapper`. Keyword arguments passed to `ParticleOverlapper`.
select_initial : bool, optional
Whether to select nearest neighbour at the initial or final
snapshot. By default `True`, i.e. at the initial snapshot.
remove_nooverlap : bool, optional
Whether to remove pairs with exactly zero overlap. By default
`True`.
verbose : bool, optional verbose : bool, optional
Iterator verbosity flag. By default `True`. Iterator verbosity flag. By default `True`.
@ -333,9 +388,11 @@ class RealisationsMatcher:
# Loop over each catalogue # Loop over each catalogue
for i in trange(N) if verbose else range(N): for i in trange(N) if verbose else range(N):
matches[i] = self.cross_knn_position_single( matches[i] = self.cross_knn_position_single(
i, nmult, dlogmass, mass_kind=mass_kind, init_dist=init_dist, i, nmult, dlogmass, mass_kind=mass_kind,
overlap=overlap, overlapper_kwargs=overlapper_kwargs, init_dist=init_dist, overlap=overlap,
verbose=verbose) overlapper_kwargs=overlapper_kwargs,
select_initial=select_initial,
remove_nooverlap=remove_nooverlap, verbose=verbose)
return matches return matches
@ -446,7 +503,9 @@ class ParticleOverlap:
def pos2cell(self, pos): def pos2cell(self, pos):
""" """
Convert position to cell number. Convert position to cell number. If `pos` is in
`numpy.typecodes["AllInteger"]` assumes it to already be the cell
number.
Parameters Parameters
---------- ----------
@ -456,6 +515,9 @@ class ParticleOverlap:
------- -------
cells : 1-dimensional array cells : 1-dimensional array
""" """
# Check whether this is already the cell
if pos.dtype.char in numpy.typecodes["AllInteger"]:
return pos
return numpy.floor(pos * self.inv_clength).astype(int) return numpy.floor(pos * self.inv_clength).astype(int)
def smooth_highres(self, delta): def smooth_highres(self, delta):
@ -486,7 +548,8 @@ class ParticleOverlap:
delta[start:end, start:end, start:end] = highres delta[start:end, start:end, start:end] = highres
return delta return delta
def make_delta(self, clump, subbox=False, to_smooth=True): def make_delta(self, clump, mins=None, maxs=None, subbox=False,
to_smooth=True):
""" """
Calculate a NGP density field of a halo on a cubic grid. Calculate a NGP density field of a halo on a cubic grid.
@ -494,6 +557,8 @@ class ParticleOverlap:
---------- ----------
clump: structurered arrays clump: structurered arrays
Clump structured array, keys must include `x`, `y`, `z` and `M`. Clump structured array, keys must include `x`, `y`, `z` and `M`.
mins, maxs : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension.
subbox : bool, optional subbox : bool, optional
Whether to calculate the density field on a grid strictly enclosing Whether to calculate the density field on a grid strictly enclosing
the clump. the clump.
@ -504,30 +569,37 @@ class ParticleOverlap:
------- -------
delta : 3-dimensional array delta : 3-dimensional array
""" """
coords = ('x', 'y', 'z') cells = [self.pos2cell(clump[p]) for p in ('x', 'y', 'z')]
xcell, ycell, zcell = (self.pos2cell(clump[p]) for p in coords)
if subbox:
# Shift the box so that each non-zero grid cell is 0th
xcell -= max(numpy.min(xcell) - self.nshift, 0)
ycell -= max(numpy.min(ycell) - self.nshift, 0)
zcell -= max(numpy.min(zcell) - self.nshift, 0)
ncells = max(*(numpy.max(p) + self.nshift # Check that minima and maxima are integers
for p in (xcell, ycell, zcell))) if not (mins is None and maxs is None):
ncells += 1 # Bump up by one to get NUMBER of cells assert mins.dtype.char in numpy.typecodes["AllInteger"]
ncells = min(ncells, self.inv_clength) assert maxs.dtype.char in numpy.typecodes["AllInteger"]
if subbox:
# Minimum xcell, ycell and zcell of this clump
if mins is None or maxs is None:
mins = numpy.asanyarray(
[max(numpy.min(cell) - self.nshift, 0) for cell in cells])
maxs = numpy.asanyarray(
[min(numpy.max(cell) + self.nshift, self.inv_clength)
for cell in cells])
ncells = numpy.max(maxs - mins) + 1 # To get the number of cells
else: else:
mins = (0, 0, 0,)
ncells = self.inv_clength ncells = self.inv_clength
# Preallocate and fill the array # Preallocate and fill the array
delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32) delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
fill_delta(delta, xcell, ycell, zcell, clump['M']) fill_delta(delta, *cells, *mins, clump['M'])
if to_smooth and self.smooth_scale is not None: if to_smooth and self.smooth_scale is not None:
gaussian_filter(delta, self.smooth_scale, output=delta) gaussian_filter(delta, self.smooth_scale, output=delta)
return delta return delta
def make_deltas(self, clump1, clump2): def make_deltas(self, clump1, clump2, mins1=None, maxs1=None,
mins2=None, maxs2=None):
""" """
Calculate a NGP density fields of two halos on a grid that encloses Calculate a NGP density fields of two halos on a grid that encloses
them both. them both.
@ -537,6 +609,12 @@ class ParticleOverlap:
clump1, clump2 : structurered arrays clump1, clump2 : structurered arrays
Particle structured array of the two clumps. Keys must include `x`, Particle structured array of the two clumps. Keys must include `x`,
`y`, `z` and `M`. `y`, `z` and `M`.
mins1, maxs1 : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension of `clump1`.
Optional.
mins2, maxs2 : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension of `clump2`.
Optional.
Returns Returns
------- -------
@ -545,39 +623,36 @@ class ParticleOverlap:
cellmins : len-3 tuple cellmins : len-3 tuple
Tuple of left-most cell ID in the full box. Tuple of left-most cell ID in the full box.
""" """
coords = ('x', 'y', 'z') xc1, yc1, zc1 = (self.pos2cell(clump1[p]) for p in ('x', 'y', 'z'))
xcell1, ycell1, zcell1 = (self.pos2cell(clump1[p]) for p in coords) xc2, yc2, zc2 = (self.pos2cell(clump2[p]) for p in ('x', 'y', 'z'))
xcell2, ycell2, zcell2 = (self.pos2cell(clump2[p]) for p in coords)
if any(obj is None for obj in (mins1, maxs1, mins2, maxs2)):
# Minimum cell number of the two halos along each dimension # Minimum cell number of the two halos along each dimension
xmin = min(numpy.min(xcell1), numpy.min(xcell2)) - self.nshift xmin = min(numpy.min(xc1), numpy.min(xc2)) - self.nshift
ymin = min(numpy.min(ycell1), numpy.min(ycell2)) - self.nshift ymin = min(numpy.min(yc1), numpy.min(yc2)) - self.nshift
zmin = min(numpy.min(zcell1), numpy.min(zcell2)) - self.nshift zmin = min(numpy.min(zc1), numpy.min(zc2)) - self.nshift
xmin, ymin, zmin = max(xmin, 0), max(ymin, 0), max(zmin, 0) # Make sure shifting does not go beyond boundaries
cellmins = (xmin, ymin, zmin) xmin, ymin, zmin = [max(px, 0) for px in (xmin, ymin, zmin)]
# Maximum cell number of the two halos along each dimension # Maximum cell number of the two halos along each dimension
xmax = max(numpy.max(xcell1), numpy.max(xcell2)) xmax = max(numpy.max(xc1), numpy.max(xc2)) + self.nshift
ymax = max(numpy.max(ycell1), numpy.max(ycell2)) ymax = max(numpy.max(yc1), numpy.max(yc2)) + self.nshift
zmax = max(numpy.max(zcell1), numpy.max(zcell2)) zmax = max(numpy.max(zc1), numpy.max(zc2)) + self.nshift
# Make sure shifting does not go beyond boundaries
xmax, ymax, zmax = [min(px, self.inv_clength - 1)
for px in (xmax, ymax, zmax)]
else:
xmin, ymin, zmin = [min(mins1[i], mins2[i]) for i in range(3)]
xmax, ymax, zmax = [max(maxs1[i], maxs2[i]) for i in range(3)]
# Number of cells is the maximum + 1 cellmins = (xmin, ymin, zmin, ) # Cell minima
ncells = max(xmax - xmin, ymax - ymin, zmax - zmin) + self.nshift ncells = max(xmax - xmin, ymax - ymin, zmax - zmin) + 1 # Num cells
ncells += 1
ncells = min(ncells, self.inv_clength)
# Shift the box so that the first non-zero grid cell is 0th
xcell1 -= xmin
xcell2 -= xmin
ycell1 -= ymin
ycell2 -= ymin
zcell1 -= zmin
zcell2 -= zmin
# Preallocate and fill the array # Preallocate and fill the array
delta1 = numpy.zeros((ncells,)*3, dtype=numpy.float32) delta1 = numpy.zeros((ncells,)*3, dtype=numpy.float32)
fill_delta(delta1, xcell1, ycell1, zcell1, clump1['M']) fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1['M'])
delta2 = numpy.zeros((ncells,)*3, dtype=numpy.float32) delta2 = numpy.zeros((ncells,)*3, dtype=numpy.float32)
fill_delta(delta2, xcell2, ycell2, zcell2, clump2['M']) fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2['M'])
if self.smooth_scale is not None: if self.smooth_scale is not None:
gaussian_filter(delta1, self.smooth_scale, output=delta1) gaussian_filter(delta1, self.smooth_scale, output=delta1)
@ -606,7 +681,8 @@ class ParticleOverlap:
""" """
return _calculate_overlap(delta1, delta2, cellmins, delta2_full) return _calculate_overlap(delta1, delta2, cellmins, delta2_full)
def __call__(self, clump1, clump2, delta2_full): def __call__(self, clump1, clump2, delta2_full, mins1=None, maxs1=None,
mins2=None, maxs2=None):
""" """
Calculate overlap between `clump1` and `clump2`. See Calculate overlap between `clump1` and `clump2`. See
`self.overlap(...)` and `self.make_deltas(...)` for further `self.overlap(...)` and `self.make_deltas(...)` for further
@ -622,17 +698,24 @@ class ParticleOverlap:
delta2_full : 3-dimensional array delta2_full : 3-dimensional array
Density field of the whole box calculated with particles assigned Density field of the whole box calculated with particles assigned
to halos at zero redshift. to halos at zero redshift.
mins1, maxs1 : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension of `clump1`.
Optional.
mins2, maxs2 : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension of `clump2`.
Optional.
Returns Returns
------- -------
overlap : float overlap : float
""" """
delta1, delta2, cellmins = self.make_deltas(clump1, clump2) delta1, delta2, cellmins = self.make_deltas(
clump1, clump2, mins1, maxs1, mins2, maxs2)
return _calculate_overlap(delta1, delta2, cellmins, delta2_full) return _calculate_overlap(delta1, delta2, cellmins, delta2_full)
@jit(nopython=True) @jit(nopython=True)
def fill_delta(delta, xcell, ycell, zcell, weights): def fill_delta(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
""" """
Fill array delta at the specified indices with their weights. This is a JIT Fill array delta at the specified indices with their weights. This is a JIT
implementation. implementation.
@ -643,6 +726,8 @@ def fill_delta(delta, xcell, ycell, zcell, weights):
Grid to be filled with weights. Grid to be filled with weights.
xcell, ycell, zcell : 1-dimensional arrays xcell, ycell, zcell : 1-dimensional arrays
Indices where to assign `weights`. Indices where to assign `weights`.
xmin, ymin, zmin : ints
Minimum cell IDs of particles.
weights : 1-dimensional arrays weights : 1-dimensional arrays
Particle mass. Particle mass.
@ -651,7 +736,43 @@ def fill_delta(delta, xcell, ycell, zcell, weights):
None None
""" """
for i in range(xcell.size): for i in range(xcell.size):
delta[xcell[i], ycell[i], zcell[i]] += weights[i] delta[xcell[i] - xmin, ycell[i] - ymin, zcell[i] - zmin] += weights[i]
def get_clumplims(clumps, ncells, nshift=None):
"""
Get the lower and upper limit of clumps' positions or cell numbers.
Parameters
----------
clumps : array of arrays
Array of clump structured arrays.
ncells : int
Number of grid cells of the box along a single dimension.
nshift : int, optional
Lower and upper shift of the clump limits.
Returns
-------
mins, maxs : 2-dimensional arrays of shape `(n_samples, 3)`
The minimum and maximum along each axis.
"""
dtype = clumps[0][0]['x'].dtype # dtype of the first clump's 'x'
# Check that for real positions we cannot apply nshift
if nshift is not None and dtype.char not in numpy.typecodes["AllInteger"]:
raise TypeError("`nshift` supported only positions are cells.")
nshift = 0 if nshift is None else nshift # To simplify code below
nclumps = clumps.size
mins = numpy.full((nclumps, 3), numpy.nan, dtype=dtype)
maxs = numpy.full((nclumps, 3), numpy.nan, dtype=dtype)
for i, clump in enumerate(clumps):
for j, p in enumerate(['x', 'y', 'z']):
mins[i, j] = max(numpy.min(clump[0][p]) - nshift, 0)
maxs[i, j] = min(numpy.max(clump[0][p]) + nshift, ncells - 1)
return mins, maxs
@jit(nopython=True) @jit(nopython=True)
@ -682,18 +803,20 @@ def _calculate_overlap(delta1, delta2, cellmins, delta2_full):
weight = 0. # Weight to account for other halos weight = 0. # Weight to account for other halos
count = 0 # Total number of pixels that are both non-zero count = 0 # Total number of pixels that are both non-zero
i0, j0, k0 = cellmins # Unpack things
for i in range(imax): for i in range(imax):
ii = cellmins[0] + i ii = i0 + i
for j in range(jmax): for j in range(jmax):
jj = cellmins[1] + j jj = j0 + j
for k in range(kmax): for k in range(kmax):
kk = cellmins[2] + k kk = k0 + k
cell1, cell2 = delta1[i, j, k], delta2[i, j, k] cell1, cell2 = delta1[i, j, k], delta2[i, j, k]
totmass += cell1 + cell2 cell = cell1 + cell2
totmass += cell
# If both are zero then skip # If both are zero then skip
if cell1 > 0 and cell2 > 0: if cell1 * cell2 > 0:
intersect += cell1 + cell2 intersect += cell
weight += cell2 / delta2_full[ii, jj, kk] weight += cell2 / delta2_full[ii, jj, kk]
count += 1 count += 1
@ -701,3 +824,64 @@ def _calculate_overlap(delta1, delta2, cellmins, delta2_full):
intersect *= 0.5 intersect *= 0.5
weight = weight / count if count > 0 else 0. weight = weight / count if count > 0 else 0.
return weight * intersect / (totmass - intersect) return weight * intersect / (totmass - intersect)
def lagpatch_size(x, y, z, M, dr=0.0025, dqperc=1, minperc=75, defperc=95,
rmax=0.075):
"""
Calculate an approximate Lagrangian patch size in the initial conditions.
Returned as the first bin whose percentile drops by less than `dqperc` and
is above `minperc`. Note that all distances must be in box units.
Parameters
----------
x, y, z : 1-dimensional arrays
Particle coordinates.
M : 1-dimensional array
Particle masses.
dr : float, optional
Separation spacing to evaluate q-th percentile change. Optional, by
default 0.0025
dqperc : int or float, optional
Change of q-th percentile in a bin to find a threshold separation.
Optional, by default 1.
minperc : int or float, optional
Minimum q-th percentile of separation to be considered a patch size.
Optional, by default 75.
defperc : int or float, optional
Default q-th percentile if reduction by `minperc` is not satisfied in
any bin. Optional. By default 95.
rmax : float, optional
The maximum allowed patch size. Optional, by default 0.075.
Returns
-------
size : float
"""
# CM along each dimension
cmx, cmy, cmz = [numpy.average(p, weights=M) for p in (x, y, z)]
# Particle distance from the CM
sep = numpy.sqrt(numpy.square(x - cmx)
+ numpy.square(y - cmy)
+ numpy.square(z - cmz))
qs = numpy.linspace(0, 100, 100) # Percentile: where to evaluate
per = numpy.percentile(sep, qs) # Percentile: evaluated
sep2qs = interp1d(per, qs) # Separation to q-th percentile
# Evaluate in q-th percentile in separation bins
sep_bin = numpy.arange(per[0], per[-1], dr)
q_bin = sep2qs(sep_bin) # Evaluate for everyhing
dq_bin = (q_bin[1:] - q_bin[:-1]) # Take the difference
# Indices when q-th percentile changes below tolerance and is above limit
k = numpy.where((dq_bin < dqperc) & (q_bin[1:] > minperc))[0]
if k.size == 0:
return per[defperc] # Nothing found, so default percentile
else:
k = k[0] # Take the first one that satisfies the cut.
size = 0.5 * (sep_bin[k + 1] + sep_bin[k]) # Bin centre
size = rmax if size > rmax else size # Enforce maximum size
return size

View file

@ -14,7 +14,7 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .readsim import (CSiBORGPaths, ParticleReader, read_mmain, read_initcm, halfwidth_select) # noqa from .readsim import (CSiBORGPaths, ParticleReader, read_mmain, read_initcm, halfwidth_select) # noqa
from .make_cat import (HaloCatalogue, CombinedHaloCatalogue, concatenate_clumps) # noqa from .make_cat import (HaloCatalogue, CombinedHaloCatalogue, concatenate_clumps, clumps_pos2cell) # noqa
from .readobs import (PlanckClusters, MCXCClusters, TwoMPPGalaxies, # noqa from .readobs import (PlanckClusters, MCXCClusters, TwoMPPGalaxies, # noqa
TwoMPPGroups, SDSS) # noqa TwoMPPGroups, SDSS) # noqa
from .outsim import (dump_split, combine_splits, make_ascii_powmes) # noqa from .outsim import (dump_split, combine_splits, make_ascii_powmes) # noqa

View file

@ -43,6 +43,7 @@ class HaloCatalogue:
_paths = None _paths = None
_data = None _data = None
_knn = None _knn = None
_knn0 = None
_positions = None _positions = None
_positions0 = None _positions0 = None
@ -52,11 +53,15 @@ class HaloCatalogue:
max_dist = numpy.infty if max_dist is None else max_dist max_dist = numpy.infty if max_dist is None else max_dist
self._paths = paths self._paths = paths
self._set_data(min_m500, max_dist) self._set_data(min_m500, max_dist)
# Initialise the KNN # Initialise the KNN at z = 0 and at z = 70
knn = NearestNeighbors() knn = NearestNeighbors()
knn.fit(self.positions) knn.fit(self.positions)
self._knn = knn self._knn = knn
knn0 = NearestNeighbors()
knn0.fit(self.positions0)
self._knn0 = knn0
@property @property
def data(self): def data(self):
""" """
@ -180,11 +185,25 @@ class HaloCatalogue:
# Pre-allocate the positions arrays # Pre-allocate the positions arrays
self._positions = numpy.vstack( self._positions = numpy.vstack(
[data["peak_{}".format(p)] for p in ("x", "y", "z")]).T [data["peak_{}".format(p)] for p in ("x", "y", "z")]).T
self._positions = self._positions.astype(numpy.float32)
# And do the unit transform # And do the unit transform
if initcm is not None: if initcm is not None:
data = self.box.convert_from_boxunits(data, ["x0", "y0", "z0"]) data = self.box.convert_from_boxunits(
data, ["x0", "y0", "z0", "patch_size"])
self._positions0 = numpy.vstack( self._positions0 = numpy.vstack(
[data["{}0".format(p)] for p in ("x", "y", "z")]).T [data["{}0".format(p)] for p in ("x", "y", "z")]).T
self._positions0 = self._positions0.astype(numpy.float32)
# Convert all that is not an integer to float32
names = list(data.dtype.names)
formats = []
for name in names:
if data[name].dtype.char in numpy.typecodes["AllInteger"]:
formats.append(numpy.int32)
else:
formats.append(numpy.float32)
dtype = numpy.dtype({"names": names, "formats": formats})
data = data.astype(dtype)
self._data = data self._data = data
@ -238,10 +257,10 @@ class HaloCatalogue:
raise ValueError( raise ValueError(
"Ordering of `initcat` and `clumps` is inconsistent.") "Ordering of `initcat` and `clumps` is inconsistent.")
X = numpy.full((clumps.size, 3), numpy.nan) X = numpy.full((clumps.size, 4), numpy.nan)
for i, p in enumerate(['x', 'y', 'z']): for i, p in enumerate(['x', 'y', 'z', "patch_size"]):
X[:, i] = initcat[p] X[:, i] = initcat[p]
return add_columns(clumps, X, ["x0", "y0", "z0"]) return add_columns(clumps, X, ["x0", "y0", "z0", "patch_size"])
@property @property
def positions(self): def positions(self):
@ -317,7 +336,8 @@ class HaloCatalogue:
def radius_neigbours(self, X, radius): def radius_neigbours(self, X, radius):
""" """
Return sorted nearest neigbours within `radius` or `X`. Return sorted nearest neigbours within `radius` of `X` in the final
snapshot.
Parameters Parameters
---------- ----------
@ -341,6 +361,33 @@ class HaloCatalogue:
# Query the KNN # Query the KNN
return self._knn.radius_neighbors(X, radius, sort_results=True) return self._knn.radius_neighbors(X, radius, sort_results=True)
def radius_initial_neigbours(self, X, radius):
r"""
Return sorted nearest neigbours within `radius` or `X` in the initial
snapshot.
Parameters
----------
X : 2-dimensional array
Array of shape `(n_queries, 3)`, where the latter axis represents
`x`, `y` and `z`.
radius : float
Limiting distance of neighbours.
Returns
-------
dist : list of 1-dimensional arrays
List of length `n_queries` whose elements are arrays of distances
to the nearest neighbours.
knns : list of 1-dimensional arrays
List of length `n_queries` whose elements are arrays of indices of
nearest neighbours in this catalogue.
"""
if not (X.ndim == 2 and X.shape[1] == 3):
raise TypeError("`X` must be an array of shape `(n_samples, 3)`.")
# Query the KNN
return self._knn0.radius_neighbors(X, radius, sort_results=True)
@property @property
def keys(self): def keys(self):
"""Catalogue keys.""" """Catalogue keys."""
@ -462,8 +509,15 @@ def concatenate_clumps(clumps):
N = 0 N = 0
for clump, __ in clumps: for clump, __ in clumps:
N += clump.size N += clump.size
# Infer dtype of positions
if clumps[0][0]['x'].dtype.char in numpy.typecodes["AllInteger"]:
posdtype = numpy.int32
else:
posdtype = numpy.float32
# Pre-allocate array # Pre-allocate array
dtype = {"names": ['x', 'y', 'z', "M"], "formats": [numpy.float32] * 4} dtype = {"names": ['x', 'y', 'z', 'M'],
"formats": [posdtype] * 3 + [numpy.float32]}
particles = numpy.full(N, numpy.nan, dtype) particles = numpy.full(N, numpy.nan, dtype)
# Fill it one clump by another # Fill it one clump by another
@ -475,3 +529,41 @@ def concatenate_clumps(clumps):
start = end start = end
return particles return particles
def clumps_pos2cell(clumps, overlapper):
"""
Convert clump positions directly to cell IDs. Useful to speed up subsequent
calculations. Overwrites the passed in arrays.
Parameters
----------
clumps : array of arrays
Array of clump structured arrays whose `x`, `y`, `z` keys will be
converted.
overlapper : py:class:`csiborgtools.match.ParticleOverlapper`
`ParticleOverlapper` handling the cell assignment.
Returns
-------
None
"""
# Check if clumps are probably already in cells
if any(clumps[0][0].dtype[p].char in numpy.typecodes["AllInteger"]
for p in ('x', 'y', 'z')):
raise ValueError("Positions appear to already be converted cells.")
# Get the new dtype that replaces float for int for positions
names = clumps[0][0].dtype.names # Take the first one, doesn't matter
formats = [descr[1] for descr in clumps[0][0].dtype.descr]
for i in range(len(names)):
if names[i] in ('x', 'y', 'z'):
formats[i] = numpy.int32
dtype = numpy.dtype({"names": names, "formats": formats})
# Loop switch positions for cells IDs and change dtype
for n in range(clumps.size):
for p in ('x', 'y', 'z'):
clumps[n][0][p] = overlapper.pos2cell(clumps[n][0][p])
clumps[n][0] = clumps[n][0].astype(dtype)

View file

@ -26,7 +26,7 @@ from ..read import ParticleReader
# Map of unit conversions # Map of unit conversions
CONV_NAME = { CONV_NAME = {
"length": ["peak_x", "peak_y", "peak_z", "Rs", "rmin", "rmax", "r200", "length": ["peak_x", "peak_y", "peak_z", "Rs", "rmin", "rmax", "r200",
"r500", "x0", "y0", "z0"], "r500", "x0", "y0", "z0", "patch_size"],
"mass": ["mass_cl", "totpartmass", "m200", "m500", "mass_mmain"], "mass": ["mass_cl", "totpartmass", "m200", "m500", "mass_mmain"],
"density": ["rho0"] "density": ["rho0"]
} }

View file

@ -19,14 +19,11 @@ are grouped in a clump at present redshift.
Optionally also dumps the clumps information, however watch out as this will Optionally also dumps the clumps information, however watch out as this will
eat up a lot of memory. eat up a lot of memory.
""" """
from argparse import ArgumentParser
import numpy import numpy
from datetime import datetime from datetime import datetime
from mpi4py import MPI from mpi4py import MPI
from distutils.util import strtobool
from os.path import join from os.path import join
from os import remove from os import remove
from sys import stdout
from gc import collect from gc import collect
try: try:
import csiborgtools import csiborgtools
@ -35,11 +32,6 @@ except ModuleNotFoundError:
sys.path.append("../") sys.path.append("../")
import csiborgtools import csiborgtools
parser = ArgumentParser()
parser.add_argument("--dump_clumps", default=False,
type=lambda x: bool(strtobool(x)))
args = parser.parse_args()
# Get MPI things # Get MPI things
comm = MPI.COMM_WORLD comm = MPI.COMM_WORLD
rank = comm.Get_rank() rank = comm.Get_rank()
@ -57,8 +49,8 @@ fpermpart = join(dumpdir, "initmatch", "clump_{}_particles.npy")
for nsim in nsims: for nsim in nsims:
if rank == 0: if rank == 0:
print("{}: reading simulation {}.".format(datetime.now(), nsim)) print("{}: reading simulation {}.".format(datetime.now(), nsim),
stdout.flush() flush=True)
# Set the snapshot numbers # Set the snapshot numbers
init_paths.set_info(nsim, init_paths.get_minimum_snapshot(nsim)) init_paths.set_info(nsim, init_paths.get_minimum_snapshot(nsim))
@ -88,8 +80,8 @@ for nsim in nsims:
collect() collect()
if rank == 0: if rank == 0:
print("{}: dumping clumps for simulation.".format(datetime.now())) print("{}: dumping clumps for simulation.".format(datetime.now()),
stdout.flush() flush=True)
# Grab unique clump IDs and loop over them # Grab unique clump IDs and loop over them
unique_clumpids = numpy.unique(clump_ids) unique_clumpids = numpy.unique(clump_ids)
@ -100,14 +92,19 @@ for nsim in nsims:
n = unique_clumpids[i] n = unique_clumpids[i]
x0 = part0[clump_ids == n] x0 = part0[clump_ids == n]
# Center of mass # Center of mass and Lagrangian patch size
cm = numpy.asanyarray( pos = numpy.vstack([x0[p] for p in ('x', 'y', 'z')]).T
[numpy.average(x0[p], weights=x0["M"]) for p in ('x', 'y', 'z')]) cm = numpy.average(pos, axis=0, weights=x0['M'])
patch_size = csiborgtools.match.lagpatch_size(
*(x0[p] for p in ('x', 'y', 'z', 'M')))
# Dump the center of mass # Dump the center of mass
with open(ftemp.format(nsim, n, "cm"), 'wb') as f: with open(ftemp.format(nsim, n, "cm"), 'wb') as f:
numpy.save(f, cm) numpy.save(f, cm)
# Optionally dump the entire clump # Dump the Lagrangian patch size
if args.dump_clumps: with open(ftemp.format(nsim, n, "patch_size"), 'wb') as f:
numpy.save(f, patch_size)
# Dump the entire clump
with open(ftemp.format(nsim, n, "clump"), "wb") as f: with open(ftemp.format(nsim, n, "clump"), "wb") as f:
numpy.save(f, x0) numpy.save(f, x0)
@ -116,28 +113,37 @@ for nsim in nsims:
comm.Barrier() comm.Barrier()
if rank == 0: if rank == 0:
print("Collecting CM files...") print("Collecting CM files...", flush=True)
stdout.flush() # Collect the centre of masses, patch size, etc. and dump them
# Collect the centre of masses and dump them dtype = {"names": ['x', 'y', 'z', "patch_size", "ID"],
dtype = {"names": ['x', 'y', 'z', "ID"], "formats": [numpy.float32] * 4 + [numpy.int32]}
"formats": [numpy.float32] * 3 + [numpy.int32]}
out = numpy.full(njobs, numpy.nan, dtype=dtype) out = numpy.full(njobs, numpy.nan, dtype=dtype)
for i, n in enumerate(unique_clumpids): for i, n in enumerate(unique_clumpids):
# Load in CM vector
fpath = ftemp.format(nsim, n, "cm") fpath = ftemp.format(nsim, n, "cm")
with open(fpath, 'rb') as f: with open(fpath, "rb") as f:
fin = numpy.load(f) fin = numpy.load(f)
out['x'][i] = fin[0] out['x'][i] = fin[0]
out['y'][i] = fin[1] out['y'][i] = fin[1]
out['z'][i] = fin[2] out['z'][i] = fin[2]
out["ID"][i] = n
remove(fpath) remove(fpath)
print("Dumping CM files to .. `{}`.".format(fpermcm.format(nsim)))
# Load in the patch size
fpath = ftemp.format(nsim, n, "patch_size")
with open(fpath, "rb") as f:
out["patch_size"][i] = numpy.load(f)
remove(fpath)
# Store the halo ID
out["ID"][i] = n
print("Dumping CM files to .. `{}`.".format(fpermcm.format(nsim)),
flush=True)
with open(fpermcm.format(nsim), 'wb') as f: with open(fpermcm.format(nsim), 'wb') as f:
numpy.save(f, out) numpy.save(f, out)
print("Collecting clump files...") print("Collecting clump files...", flush=True)
stdout.flush()
out = [None] * unique_clumpids.size out = [None] * unique_clumpids.size
dtype = {"names": ["clump", "ID"], "formats": [object, numpy.int32]} dtype = {"names": ["clump", "ID"], "formats": [object, numpy.int32]}
out = numpy.full(unique_clumpids.size, numpy.nan, dtype=dtype) out = numpy.full(unique_clumpids.size, numpy.nan, dtype=dtype)
@ -148,7 +154,8 @@ for nsim in nsims:
out["clump"][i] = fin out["clump"][i] = fin
out["ID"][i] = n out["ID"][i] = n
remove(fpath) remove(fpath)
print("Dumping clump files to .. `{}`.".format(fpermpart.format(nsim))) print("Dumping clump files to .. `{}`.".format(fpermpart.format(nsim)),
flush=True)
with open(fpermpart.format(nsim), "wb") as f: with open(fpermpart.format(nsim), "wb") as f:
numpy.save(f, out) numpy.save(f, out)