Speed up overlap (#27)

* Edit improt

* Simplify patch size calculation

* Add patch size percentiles

* Add various percentiles

* Remove  comment

* Update TODO

* Change to 95th percentile

* Add import

* Add KNN properties

* Add new matching initial condition

* Add import

* Remove import

* Add fast neighbours option

* Further edits to fast neighbours

* add imports

* add new overlap calculation and non-zero things

* Remove print

* Clean up code

* Fix small bug

* Remove comment

* Add run single cross match

* change values

* Edit hyperparams

* Add comment

* Add the argument parser

* Add new lagpatch calc

* New lagpatch calc

* Delete old patch definitions

* Make clump dumping once again optional

* Add lagpatch to the catalogue

* Edit print statement

* Fix small bug

* Remove init radius

* Change to lagpatch key

* Fix a small bug

* Fix little bug
This commit is contained in:
Richard Stiskalek 2023-02-05 11:46:19 +00:00 committed by GitHub
parent beb811e84c
commit 8dea3da4de
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 418 additions and 193 deletions

View file

@ -3,10 +3,7 @@
## CSiBORG Matching ## CSiBORG Matching
### TODO ### TODO
- [x] Implement CIC binning or an alternative scheme for nearby objects. - [ ] Modify the call to tN
- [x] Consistently locate region spanned by a single halo.
- [x] Write a script to perform the matching on a node.
- [x] Make a coarser grid for halos outside of the well resolved region.
### Questions ### Questions
- What scaling of the search region? No reason for it to be a multiple of $R_{200c}$. - What scaling of the search region? No reason for it to be a multiple of $R_{200c}$.

View file

@ -14,6 +14,8 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .match import (brute_spatial_separation, RealisationsMatcher, cosine_similarity, # noqa from .match import (brute_spatial_separation, RealisationsMatcher, cosine_similarity, # noqa
ParticleOverlap, get_clumplims, lagpatch_size) # noqa ParticleOverlap, get_clumplims, fill_delta, fill_delta_indxs, # noqa
calculate_overlap, calculate_overlap_indxs, # noqa
dist_centmass, dist_percentile) # noqa
from .num_density import (binned_counts, number_density) # noqa from .num_density import (binned_counts, number_density) # noqa
# from .correlation import (get_randoms_sphere, sphere_angular_tpcf) # noqa # from .correlation import (get_randoms_sphere, sphere_angular_tpcf) # noqa

View file

@ -14,7 +14,6 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import numpy import numpy
from scipy.interpolate import interp1d
from scipy.ndimage import gaussian_filter from scipy.ndimage import gaussian_filter
from tqdm import (tqdm, trange) from tqdm import (tqdm, trange)
from astropy.coordinates import SkyCoord from astropy.coordinates import SkyCoord
@ -155,15 +154,16 @@ class RealisationsMatcher:
mapping[ind2] = ind1 mapping[ind2] = ind1
return mapping return mapping
def cross_knn_position_single(self, n_sim, nmult=5, dlogmass=None, def cross_knn_position_single(self, n_sim, nmult=1, dlogmass=None,
mass_kind="totpartmass", overlap=False, mass_kind="totpartmass", overlap=False,
overlapper_kwargs={}, select_initial=True, overlapper_kwargs={}, select_initial=True,
remove_nooverlap=True, verbose=True): remove_nooverlap=True, fast_neighbours=False,
verbose=True):
r""" r"""
Find all neighbours within a multiple of either :math:`R_{\rm init}` Find all neighbours within a multiple of the sum of either the initial
(distance at :math:`z = 70`) or :math:`R_{200c}` (distance at Lagrangian patch sizes (distance at :math:`z = 70`) or :math:`R_{200c}`
:math:`z = 0`) of halos in the `nsim`th simulation. Enforces that the (distance at :math:`z = 0`). Enforces that the neighbours' are similar
neighbours' are similar in mass up to `dlogmass` dex. in mass up to `dlogmass` dex and optionally calculates their overlap.
Parameters Parameters
---------- ----------
@ -171,8 +171,8 @@ class RealisationsMatcher:
Index of an IC realisation in `self.cats` whose halos' neighbours Index of an IC realisation in `self.cats` whose halos' neighbours
in the remaining simulations to search for. in the remaining simulations to search for.
nmult : float or int, optional nmult : float or int, optional
Multiple of :math:`R_{\rm init}` or :math:`R_{200c}` within which Multiple of the sum of pair Lagrangian patch sizes or
to return neighbours. By default 5. :math:`R_{200c}` within which to return neighbours. By default 1.
dlogmass : float, optional dlogmass : float, optional
Tolerance on mass logarithmic mass difference. By default `None`. Tolerance on mass logarithmic mass difference. By default `None`.
mass_kind : str, optional mass_kind : str, optional
@ -190,6 +190,11 @@ class RealisationsMatcher:
remove_nooverlap : bool, optional remove_nooverlap : bool, optional
Whether to remove pairs with exactly zero overlap. By default Whether to remove pairs with exactly zero overlap. By default
`True`. `True`.
fast_neighbours : bool, optional
Whether to calculate neighbours within a fixed radius of each
clump. Note that this will result in missing some matches. If
`True` then `nmult` is a multiple of either the initial patch size
of :math:`R_{200c}`.
verbose : bool, optional verbose : bool, optional
Iterator verbosity flag. By default `True`. Iterator verbosity flag. By default `True`.
@ -208,7 +213,7 @@ class RealisationsMatcher:
pos = self.cats[n_sim].positions # Grav potential minimum pos = self.cats[n_sim].positions # Grav potential minimum
pos0 = self.cats[n_sim].positions0 # CM positions pos0 = self.cats[n_sim].positions0 # CM positions
if select_initial: if select_initial:
R = self.cats[n_sim]["patch_size"] # Initial Lagrangian patch size R = self.cats[n_sim]["lagpatch"] # Initial Lagrangian patch size
else: else:
R = self.cats[n_sim]["r200"] # R200c at z = 0 R = self.cats[n_sim]["r200"] # R200c at z = 0
@ -238,13 +243,29 @@ class RealisationsMatcher:
iters = enumerate(self.search_sim_indices(n_sim)) iters = enumerate(self.search_sim_indices(n_sim))
# Search for neighbours in the other simulations at z = 70 # Search for neighbours in the other simulations at z = 70
for count, i in iters: for count, i in iters:
if verbose:
print("Querying the KNN for `n_sim = {}`.".format(n_sim),
flush=True)
# Query the KNN either fast (miss some) or slow (get all)
if select_initial: if select_initial:
dist0, indxs = self.cats[i].radius_initial_neigbours( if fast_neighbours:
pos0, R * nmult) dist0, indxs = self.cats[i].radius_neigbours(
pos0, R * nmult, select_initial=True)
else:
dist0, indxs = radius_neighbours(
self.cats[i].knn0, pos0, radiusX=R,
radiusKNN=self.cats[i]["lagpatch"], nmult=nmult,
verbose=verbose)
else: else:
# Will switch dist0 <-> dist at the end # Will switch dist0 <-> dist at the end
if fast_neighbours:
dist0, indxs = self.cats[i].radius_neigbours( dist0, indxs = self.cats[i].radius_neigbours(
pos, R * nmult) pos, R * nmult, select_initial=False)
else:
dist0, indxs = radius_neighbours(
self.cats[i].knn, pos, radiusX=R,
radiusKNN=self.cats[i]["r200"], nmult=nmult,
verbose=verbose)
# Enforce int32 and float32 # Enforce int32 and float32
for n in range(dist0.size): for n in range(dist0.size):
dist0[n] = dist0[n].astype(numpy.float32) dist0[n] = dist0[n].astype(numpy.float32)
@ -303,8 +324,9 @@ class RealisationsMatcher:
# Find which clump matches index of this halo from cat # Find which clump matches index of this halo from cat
match0 = cat2clumps0[k] match0 = cat2clumps0[k]
# Unpack this clum and its mins and maxs # Unpack this clum, its mamss and mins and maxs
cl0 = clumps0["clump"][match0] cl0 = clumps0["clump"][match0]
mass0 = numpy.sum(cl0['M'])
mins0_current, maxs0_current = mins0[match0], maxs0[match0] mins0_current, maxs0_current = mins0[match0], maxs0[match0]
# Preallocate this array. # Preallocate this array.
crosses = numpy.full(indxs[k].size, numpy.nan, crosses = numpy.full(indxs[k].size, numpy.nan,
@ -314,10 +336,11 @@ class RealisationsMatcher:
for ii, ind in enumerate(indxs[k]): for ii, ind in enumerate(indxs[k]):
# Again which cross clump to this index # Again which cross clump to this index
matchx = cat2clumpsx[ind] matchx = cat2clumpsx[ind]
clx = clumpsx["clump"][matchx]
crosses[ii] = overlapper( crosses[ii] = overlapper(
cl0, clumpsx["clump"][matchx], delta, cl0, clx, delta, mins0_current, maxs0_current,
mins0_current, maxs0_current, minsx[matchx], maxsx[matchx],
minsx[matchx], maxsx[matchx]) mass1=mass0, mass2=numpy.sum(clx['M']))
cross[k] = crosses cross[k] = crosses
# Optionally remove points whose overlap is exaclt zero # Optionally remove points whose overlap is exaclt zero
@ -338,30 +361,26 @@ class RealisationsMatcher:
return numpy.asarray(matches, dtype=object) return numpy.asarray(matches, dtype=object)
def cross_knn_position_all(self, nmult=5, dlogmass=None, def cross_knn_position_all(self, nmult=1, dlogmass=None,
mass_kind="totpartmass", init_dist=False, mass_kind="totpartmass", overlap=False,
overlap=False, overlapper_kwargs={}, overlapper_kwargs={},
select_initial=True, remove_nooverlap=True, select_initial=True, remove_nooverlap=True,
verbose=True): verbose=True):
r""" r"""
Find all neighbours within :math:`n_{\rm mult} R_{200c}` of halos in Find all counterparts of halos in all simulations listed in
all simulations listed in `self.cats`. Also enforces that the `self.cats`. See `self.cross_knn_position_single` for more details.
neighbours' :math:`\log M_{200c}` be within `dlogmass` dex.
Parameters Parameters
---------- ----------
nmult : float or int, optional nmult : float or int, optional
Multiple of :math:`R_{200c}` within which to return neighbours. By Multiple of the sum of pair Lagrangian patch sizes or
default 5. :math:`R_{200c}` within which to return neighbours. By default 1.
dlogmass : float, optional dlogmass : float, optional
Tolerance on mass logarithmic mass difference. By default `None`. Tolerance on mass logarithmic mass difference. By default `None`.
mass_kind : str, optional mass_kind : str, optional
The mass kind whose similarity is to be checked. Must be a valid The mass kind whose similarity is to be checked. Must be a valid
catalogue key. By default `totpartmass`, i.e. the total particle catalogue key. By default `totpartmass`, i.e. the total particle
mass associated with a halo. mass associated with a halo.
init_dist : bool, optional
Whether to calculate separation of the initial CMs. By default
`False`.
overlap : bool, optional overlap : bool, optional
Whether to calculate overlap between clumps in the initial Whether to calculate overlap between clumps in the initial
snapshot. By default `False`. Note that this operation is snapshot. By default `False`. Note that this operation is
@ -388,8 +407,7 @@ class RealisationsMatcher:
# Loop over each catalogue # Loop over each catalogue
for i in trange(N) if verbose else range(N): for i in trange(N) if verbose else range(N):
matches[i] = self.cross_knn_position_single( matches[i] = self.cross_knn_position_single(
i, nmult, dlogmass, mass_kind=mass_kind, i, nmult, dlogmass, mass_kind=mass_kind, overlap=overlap,
init_dist=init_dist, overlap=overlap,
overlapper_kwargs=overlapper_kwargs, overlapper_kwargs=overlapper_kwargs,
select_initial=select_initial, select_initial=select_initial,
remove_nooverlap=remove_nooverlap, verbose=verbose) remove_nooverlap=remove_nooverlap, verbose=verbose)
@ -599,7 +617,7 @@ class ParticleOverlap:
return delta return delta
def make_deltas(self, clump1, clump2, mins1=None, maxs1=None, def make_deltas(self, clump1, clump2, mins1=None, maxs1=None,
mins2=None, maxs2=None): mins2=None, maxs2=None, return_nonzero1=False):
""" """
Calculate a NGP density fields of two halos on a grid that encloses Calculate a NGP density fields of two halos on a grid that encloses
them both. them both.
@ -622,6 +640,9 @@ class ParticleOverlap:
Density arrays of `clump1` and `clump2`, respectively. Density arrays of `clump1` and `clump2`, respectively.
cellmins : len-3 tuple cellmins : len-3 tuple
Tuple of left-most cell ID in the full box. Tuple of left-most cell ID in the full box.
nonzero1 : 2-dimensional array
Indices where `delta1` has a non-zero density. If `return_nonzero1`
is `False` return `None` instead.
""" """
xc1, yc1, zc1 = (self.pos2cell(clump1[p]) for p in ('x', 'y', 'z')) xc1, yc1, zc1 = (self.pos2cell(clump1[p]) for p in ('x', 'y', 'z'))
xc2, yc2, zc2 = (self.pos2cell(clump2[p]) for p in ('x', 'y', 'z')) xc2, yc2, zc2 = (self.pos2cell(clump2[p]) for p in ('x', 'y', 'z'))
@ -648,16 +669,22 @@ class ParticleOverlap:
cellmins = (xmin, ymin, zmin, ) # Cell minima cellmins = (xmin, ymin, zmin, ) # Cell minima
ncells = max(xmax - xmin, ymax - ymin, zmax - zmin) + 1 # Num cells ncells = max(xmax - xmin, ymax - ymin, zmax - zmin) + 1 # Num cells
# Preallocate and fill the array # Preallocate and fill the arrays
delta1 = numpy.zeros((ncells,)*3, dtype=numpy.float32) delta1 = numpy.zeros((ncells,)*3, dtype=numpy.float32)
fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1['M'])
delta2 = numpy.zeros((ncells,)*3, dtype=numpy.float32) delta2 = numpy.zeros((ncells,)*3, dtype=numpy.float32)
if return_nonzero1:
nonzero1 = fill_delta_indxs(
delta1, xc1, yc1, zc1, *cellmins, clump1['M'])
else:
fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1['M'])
nonzero1 = None
fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2['M']) fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2['M'])
if self.smooth_scale is not None: if self.smooth_scale is not None:
gaussian_filter(delta1, self.smooth_scale, output=delta1) gaussian_filter(delta1, self.smooth_scale, output=delta1)
gaussian_filter(delta2, self.smooth_scale, output=delta2) gaussian_filter(delta2, self.smooth_scale, output=delta2)
return delta1, delta2, cellmins
return delta1, delta2, cellmins, nonzero1
@staticmethod @staticmethod
def overlap(delta1, delta2, cellmins, delta2_full): def overlap(delta1, delta2, cellmins, delta2_full):
@ -679,10 +706,11 @@ class ParticleOverlap:
------- -------
overlap : float overlap : float
""" """
return _calculate_overlap(delta1, delta2, cellmins, delta2_full) return calculate_overlap(delta1, delta2, cellmins, delta2_full)
def __call__(self, clump1, clump2, delta2_full, mins1=None, maxs1=None, def __call__(self, clump1, clump2, delta2_full, mins1=None, maxs1=None,
mins2=None, maxs2=None): mins2=None, maxs2=None, mass1=None, mass2=None,
loop_nonzero=True):
""" """
Calculate overlap between `clump1` and `clump2`. See Calculate overlap between `clump1` and `clump2`. See
`self.overlap(...)` and `self.make_deltas(...)` for further `self.overlap(...)` and `self.make_deltas(...)` for further
@ -704,21 +732,33 @@ class ParticleOverlap:
mins2, maxs2 : 1-dimensional arrays of shape `(3,)` mins2, maxs2 : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension of `clump2`. Minimun and maximum cell numbers along each dimension of `clump2`.
Optional. Optional.
mass1, mass2 : floats, optional
Total mass of `clump1` and `clump2`, respectively. Must be provided
if `loop_nonzero` is `True`.
loop_nonzer : bool, optional
Whether to only loop over cells where `clump1` has non-zero
density. By default `True`.
Returns Returns
------- -------
overlap : float overlap : float
""" """
delta1, delta2, cellmins = self.make_deltas( delta1, delta2, cellmins, nonzero1 = self.make_deltas(
clump1, clump2, mins1, maxs1, mins2, maxs2) clump1, clump2, mins1, maxs1, mins2, maxs2,
return _calculate_overlap(delta1, delta2, cellmins, delta2_full) return_nonzero1=loop_nonzero)
if not loop_nonzero:
return calculate_overlap(delta1, delta2, cellmins, delta2_full)
return calculate_overlap_indxs(delta1, delta2, cellmins, delta2_full,
nonzero1, mass1, mass2)
@jit(nopython=True) @jit(nopython=True)
def fill_delta(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights): def fill_delta(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
""" """
Fill array delta at the specified indices with their weights. This is a JIT Fill array `delta` at the specified indices with their weights. This is a
implementation. JIT implementation.
Parameters Parameters
---------- ----------
@ -735,8 +775,45 @@ def fill_delta(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
------- -------
None None
""" """
for i in range(xcell.size): for n in range(xcell.size):
delta[xcell[i] - xmin, ycell[i] - ymin, zcell[i] - zmin] += weights[i] delta[xcell[n] - xmin, ycell[n] - ymin, zcell[n] - zmin] += weights[n]
@jit(nopython=True)
def fill_delta_indxs(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
"""
Fill array `delta` at the specified indices with their weights and return
indices where `delta` was assigned a value. This is a JIT implementation.
Parameters
----------
delta : 3-dimensional array
Grid to be filled with weights.
xcell, ycell, zcell : 1-dimensional arrays
Indices where to assign `weights`.
xmin, ymin, zmin : ints
Minimum cell IDs of particles.
weights : 1-dimensional arrays
Particle mass.
Returns
-------
cells : 1-dimensional array
Indices where `delta` was assigned a value.
"""
# Array to count non-zero cells
cells = numpy.full((xcell.size, 3), numpy.nan, numpy.int32)
count_nonzero = 0
for n in range(xcell.size):
i, j, k = xcell[n] - xmin, ycell[n] - ymin, zcell[n] - zmin
# If a cell is zero add it
if delta[i, j, k] == 0:
cells[count_nonzero, :] = i, j, k
count_nonzero += 1
delta[i, j, k] += weights[n]
return cells[:count_nonzero, :] # Cutoff unassigned places
def get_clumplims(clumps, ncells, nshift=None): def get_clumplims(clumps, ncells, nshift=None):
@ -776,7 +853,7 @@ def get_clumplims(clumps, ncells, nshift=None):
@jit(nopython=True) @jit(nopython=True)
def _calculate_overlap(delta1, delta2, cellmins, delta2_full): def calculate_overlap(delta1, delta2, cellmins, delta2_full):
r""" r"""
Overlap between two clumps whose density fields are evaluated on the Overlap between two clumps whose density fields are evaluated on the
same grid. This is a JIT implementation, hence it is outside of the main same grid. This is a JIT implementation, hence it is outside of the main
@ -796,14 +873,13 @@ def _calculate_overlap(delta1, delta2, cellmins, delta2_full):
------- -------
overlap : float overlap : float
""" """
imax, jmax, kmax = delta1.shape
totmass = 0. # Total mass of clump 1 and clump 2 totmass = 0. # Total mass of clump 1 and clump 2
intersect = 0. # Mass of pixels that are non-zero in both clumps intersect = 0. # Mass of pixels that are non-zero in both clumps
weight = 0. # Weight to account for other halos weight = 0. # Weight to account for other halos
count = 0 # Total number of pixels that are both non-zero count = 0 # Total number of pixels that are both non-zero
i0, j0, k0 = cellmins # Unpack things i0, j0, k0 = cellmins # Unpack things
imax, jmax, kmax = delta1.shape
for i in range(imax): for i in range(imax):
ii = i0 + i ii = i0 + i
for j in range(jmax): for j in range(jmax):
@ -826,62 +902,153 @@ def _calculate_overlap(delta1, delta2, cellmins, delta2_full):
return weight * intersect / (totmass - intersect) return weight * intersect / (totmass - intersect)
def lagpatch_size(x, y, z, M, dr=0.0025, dqperc=1, minperc=75, defperc=95, @jit(nopython=True)
rmax=0.075): def calculate_overlap_indxs(delta1, delta2, cellmins, delta2_full, nonzero1,
""" mass1, mass2):
Calculate an approximate Lagrangian patch size in the initial conditions. r"""
Returned as the first bin whose percentile drops by less than `dqperc` and Overlap between two clumps whose density fields are evaluated on the
is above `minperc`. Note that all distances must be in box units. same grid and `nonzero1` enumerates the non-zero cells of `delta1. This is
a JIT implementation, hence it is outside of the main class.
Parameters Parameters
---------- ----------
x, y, z : 1-dimensional arrays delta1, delta2 : 3-dimensional arrays
Particle coordinates. Clumps density fields.
M : 1-dimensional array cellmins : len-3 tuple
Particle masses. Tuple of left-most cell ID in the full box.
dr : float, optional delta2_full : 3-dimensional array
Separation spacing to evaluate q-th percentile change. Optional, by Density field of the whole box calculated with particles assigned
default 0.0025 to halos at zero redshift.
dqperc : int or float, optional nonzero1 : 2-dimensional array of shape `(n_cells, 3)`
Change of q-th percentile in a bin to find a threshold separation. Indices of cells that are non-zero in `delta1`. Expected to be
Optional, by default 1. precomputed from `fill_delta_indxs`.
minperc : int or float, optional mass1, mass2 : floats, optional
Minimum q-th percentile of separation to be considered a patch size. Total masses of the two clumps, respectively. Optional. If not provided
Optional, by default 75. calculcated directly from the density field.
defperc : int or float, optional
Default q-th percentile if reduction by `minperc` is not satisfied in
any bin. Optional. By default 95.
rmax : float, optional
The maximum allowed patch size. Optional, by default 0.075.
Returns Returns
------- -------
size : float overlap : float
"""
totmass = mass1 + mass2 # Total mass of clump 1 and clump 2
intersect = 0. # Mass of pixels that are non-zero in both clumps
weight = 0. # Weight to account for other halos
count = 0 # Total number of pixels that are both non-zero
i0, j0, k0 = cellmins # Unpack cell minimas
ncells = nonzero1.shape[0]
for n in range(ncells):
i, j, k = nonzero1[n, :]
cell1, cell2 = delta1[i, j, k], delta2[i, j, k]
if cell2 > 0: # We already know that cell1 is non-zero
intersect += cell1 + cell2
weight += cell2 / delta2_full[i0 + i, j0 + j, k0 + k]
count += 1
# Normalise the intersect and weights
intersect *= 0.5
weight = weight / count if count > 0 else 0.
return weight * intersect / (totmass - intersect)
def dist_centmass(clump):
"""
Calculate the clump particles' distance from the centre of mass.
Parameters
----------
clump : structurered arrays
Clump structured array. Keyes must include `x`, `y`, `z` and `M`.
Returns
-------
dist : 1-dimensional array of shape `(n_particles, )`
Particle distance from the centre of mass.
cm : 1-dimensional array of shape `(3,)`
Center of mass coordinates.
""" """
# CM along each dimension # CM along each dimension
cmx, cmy, cmz = [numpy.average(p, weights=M) for p in (x, y, z)] cmx, cmy, cmz = [numpy.average(clump[p], weights=clump['M'])
for p in ('x', 'y', 'z')]
# Particle distance from the CM # Particle distance from the CM
sep = numpy.sqrt(numpy.square(x - cmx) dist = numpy.sqrt(numpy.square(clump['x'] - cmx)
+ numpy.square(y - cmy) + numpy.square(clump['y'] - cmy)
+ numpy.square(z - cmz)) + numpy.square(clump['z'] - cmz))
qs = numpy.linspace(0, 100, 100) # Percentile: where to evaluate return dist, numpy.asarray([cmx, cmy, cmz])
per = numpy.percentile(sep, qs) # Percentile: evaluated
sep2qs = interp1d(per, qs) # Separation to q-th percentile
# Evaluate in q-th percentile in separation bins
sep_bin = numpy.arange(per[0], per[-1], dr)
q_bin = sep2qs(sep_bin) # Evaluate for everyhing
dq_bin = (q_bin[1:] - q_bin[:-1]) # Take the difference
# Indices when q-th percentile changes below tolerance and is above limit
k = numpy.where((dq_bin < dqperc) & (q_bin[1:] > minperc))[0]
if k.size == 0: def dist_percentile(dist, qs, distmax=0.075):
return per[defperc] # Nothing found, so default percentile """
else: Calculate q-th percentiles of `dist`, with an upper limit of `distmax`.
k = k[0] # Take the first one that satisfies the cut.
size = 0.5 * (sep_bin[k + 1] + sep_bin[k]) # Bin centre Parameters
size = rmax if size > rmax else size # Enforce maximum size ----------
dist : 1-dimensional array
Array of distances.
qs : 1-dimensional array
Percentiles to compute.
distmax : float, optional
The maximum distance. By default 0.075.
return size Returns
-------
x : 1-dimensional array
"""
x = numpy.percentile(dist, qs)
x[x > distmax] = distmax # Enforce the upper limit
return x
def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1., verbose=True):
"""
Find all neigbours of a trained KNN model whose center of mass separation
is less than `nmult` times the sum of their respective radii.
Parameters
----------
knn : :py:class:`sklearn.neighbors.NearestNeighbors`
Fitted nearest neighbour search.
X : 2-dimensional array
Array of shape `(n_samples, 3)`, where the latter axis represents
`x`, `y` and `z`.
radiusX: 1-dimensional array of shape `(n_samples, )`
Patch radii corresponding to clumps in `X`.
radiusKNN : 1-dimensional array
Patch radii corresponding to clumps used to train `knn`.
nmult : float, optional
Multiple of the sum of two radii below which to consider a match.
verbose : bool, optional
Verbosity flag.
Returns
-------
dists : 1-dimensional array `(n_samples,)` of arrays
Distance from `X` to matches from `knn`.
indxs : 1-dimensional array `(n_samples,)` of arrays
Matches to `X` from `knn`.
"""
assert X.ndim == 2 and X.shape[1] == 3 # shape of X ok?
assert X.shape[0] == radiusX.size # patchX matches X?
assert radiusKNN.size == knn.n_samples_fit_ # patchknn matches the knn?
nsamples = X.shape[0]
dists = [None] * nsamples # Initiate lists
indxs = [None] * nsamples
patchknn_max = numpy.max(radiusKNN) # Maximum for completeness
for i in trange(nsamples) if verbose else range(nsamples):
dist, indx = knn.radius_neighbors(X[i, :].reshape(-1, 3),
radiusX[i] + patchknn_max,
sort_results=True)
# Note that `dist` and `indx` are wrapped in 1-element arrays
# so we take the first item where appropriate
mask = (dist[0] / (radiusX[i] + radiusKNN[indx[0]])) < nmult
dists[i] = dist[0][mask]
indxs[i] = indx[0][mask]
dists = numpy.asarray(dists, dtype=object) # Turn into array of arrays
indxs = numpy.asarray(indxs, dtype=object)
return dists, indxs

View file

@ -14,7 +14,8 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .readsim import (CSiBORGPaths, ParticleReader, read_mmain, read_initcm, halfwidth_select) # noqa from .readsim import (CSiBORGPaths, ParticleReader, read_mmain, read_initcm, halfwidth_select) # noqa
from .make_cat import (HaloCatalogue, CombinedHaloCatalogue, concatenate_clumps, clumps_pos2cell) # noqa from .make_cat import (HaloCatalogue, CombinedHaloCatalogue, concatenate_clumps, # noqa
clumps_pos2cell) # noqa
from .readobs import (PlanckClusters, MCXCClusters, TwoMPPGalaxies, # noqa from .readobs import (PlanckClusters, MCXCClusters, TwoMPPGalaxies, # noqa
TwoMPPGroups, SDSS) # noqa TwoMPPGroups, SDSS) # noqa
from .outsim import (dump_split, combine_splits, make_ascii_powmes) # noqa from .outsim import (dump_split, combine_splits, make_ascii_powmes) # noqa

View file

@ -130,6 +130,28 @@ class HaloCatalogue:
""" """
return self.paths.n_sim return self.paths.n_sim
@property
def knn(self):
"""
The final snapshot k-nearest neighbour object.
Returns
-------
knn : :py:class:`sklearn.neighbors.NearestNeighbors`
"""
return self._knn
@property
def knn0(self):
"""
The initial snapshot k-nearest neighbour object.
Returns
-------
knn : :py:class:`sklearn.neighbors.NearestNeighbors`
"""
return self._knn0
def _set_data(self, min_m500, max_dist): def _set_data(self, min_m500, max_dist):
""" """
Loads the data, merges with mmain, does various coordinate transforms. Loads the data, merges with mmain, does various coordinate transforms.
@ -189,7 +211,7 @@ class HaloCatalogue:
# And do the unit transform # And do the unit transform
if initcm is not None: if initcm is not None:
data = self.box.convert_from_boxunits( data = self.box.convert_from_boxunits(
data, ["x0", "y0", "z0", "patch_size"]) data, ["x0", "y0", "z0", "lagpatch"])
self._positions0 = numpy.vstack( self._positions0 = numpy.vstack(
[data["{}0".format(p)] for p in ("x", "y", "z")]).T [data["{}0".format(p)] for p in ("x", "y", "z")]).T
self._positions0 = self._positions0.astype(numpy.float32) self._positions0 = self._positions0.astype(numpy.float32)
@ -258,9 +280,9 @@ class HaloCatalogue:
"Ordering of `initcat` and `clumps` is inconsistent.") "Ordering of `initcat` and `clumps` is inconsistent.")
X = numpy.full((clumps.size, 4), numpy.nan) X = numpy.full((clumps.size, 4), numpy.nan)
for i, p in enumerate(['x', 'y', 'z', "patch_size"]): for i, p in enumerate(['x', 'y', 'z', "lagpatch"]):
X[:, i] = initcat[p] X[:, i] = initcat[p]
return add_columns(clumps, X, ["x0", "y0", "z0", "patch_size"]) return add_columns(clumps, X, ["x0", "y0", "z0", "lagpatch"])
@property @property
def positions(self): def positions(self):
@ -314,30 +336,10 @@ class HaloCatalogue:
""" """
return numpy.vstack([self["L{}".format(p)] for p in ("x", "y", "z")]).T return numpy.vstack([self["L{}".format(p)] for p in ("x", "y", "z")]).T
@property def radius_neigbours(self, X, radius, select_initial=True):
def init_radius(self):
r""" r"""
A fiducial initial radius of particles that are identified as a single Return sorted nearest neigbours within `radius` of `X` in the initial
halo in the final snapshot. Estimated to be or final snapshot.
..math:
R = (3 N / 4 \pi)^{1 / 3} * \Delta
where :math:`N` is the number of particles and `Delta` is the initial
inter-particular distance :math:`Delta = 1 / 2^{11}` in box units. The
output fiducial radius is in comoving units of Mpc.
Returns
-------
R : float
"""
delta = self.box.box2mpc(1 / 2**11)
return (3 * self["npart"] / (4 * numpy.pi))**(1/3) * delta
def radius_neigbours(self, X, radius):
"""
Return sorted nearest neigbours within `radius` of `X` in the final
snapshot.
Parameters Parameters
---------- ----------
@ -346,6 +348,9 @@ class HaloCatalogue:
`x`, `y` and `z`. `x`, `y` and `z`.
radius : float radius : float
Limiting distance of neighbours. Limiting distance of neighbours.
select_initial : bool, optional
Whether to search for neighbours in the initial or final snapshot.
By default `True`, i.e. the final snapshot.
Returns Returns
------- -------
@ -358,35 +363,8 @@ class HaloCatalogue:
""" """
if not (X.ndim == 2 and X.shape[1] == 3): if not (X.ndim == 2 and X.shape[1] == 3):
raise TypeError("`X` must be an array of shape `(n_samples, 3)`.") raise TypeError("`X` must be an array of shape `(n_samples, 3)`.")
# Query the KNN knn = self.knn0 if select_initial else self.knn # Pick the right KNN
return self._knn.radius_neighbors(X, radius, sort_results=True) return knn.radius_neighbors(X, radius, sort_results=True)
def radius_initial_neigbours(self, X, radius):
r"""
Return sorted nearest neigbours within `radius` or `X` in the initial
snapshot.
Parameters
----------
X : 2-dimensional array
Array of shape `(n_queries, 3)`, where the latter axis represents
`x`, `y` and `z`.
radius : float
Limiting distance of neighbours.
Returns
-------
dist : list of 1-dimensional arrays
List of length `n_queries` whose elements are arrays of distances
to the nearest neighbours.
knns : list of 1-dimensional arrays
List of length `n_queries` whose elements are arrays of indices of
nearest neighbours in this catalogue.
"""
if not (X.ndim == 2 and X.shape[1] == 3):
raise TypeError("`X` must be an array of shape `(n_samples, 3)`.")
# Query the KNN
return self._knn0.radius_neighbors(X, radius, sort_results=True)
@property @property
def keys(self): def keys(self):

View file

@ -26,7 +26,7 @@ from ..read import ParticleReader
# Map of unit conversions # Map of unit conversions
CONV_NAME = { CONV_NAME = {
"length": ["peak_x", "peak_y", "peak_z", "Rs", "rmin", "rmax", "r200", "length": ["peak_x", "peak_y", "peak_z", "Rs", "rmin", "rmax", "r200",
"r500", "x0", "y0", "z0", "patch_size"], "r500", "x0", "y0", "z0", "lagpatch"],
"mass": ["mass_cl", "totpartmass", "m200", "m500", "mass_mmain"], "mass": ["mass_cl", "totpartmass", "m200", "m500", "mass_mmain"],
"density": ["rho0"] "density": ["rho0"]
} }

View file

@ -14,6 +14,10 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
""" """
MPI script to run the CSiBORG realisations matcher. MPI script to run the CSiBORG realisations matcher.
TODO
----
- [ ] Update this script
""" """
import numpy import numpy
from datetime import datetime from datetime import datetime

View file

@ -20,6 +20,8 @@ Optionally also dumps the clumps information, however watch out as this will
eat up a lot of memory. eat up a lot of memory.
""" """
import numpy import numpy
from argparse import ArgumentParser
from distutils.util import strtobool
from datetime import datetime from datetime import datetime
from mpi4py import MPI from mpi4py import MPI
from os.path import join from os.path import join
@ -37,6 +39,11 @@ comm = MPI.COMM_WORLD
rank = comm.Get_rank() rank = comm.Get_rank()
nproc = comm.Get_size() nproc = comm.Get_size()
# Argument parser
parser = ArgumentParser()
parser.add_argument("--dump_clumps", type=lambda x: bool(strtobool(x)))
args = parser.parse_args()
init_paths = csiborgtools.read.CSiBORGPaths(to_new=True) init_paths = csiborgtools.read.CSiBORGPaths(to_new=True)
fin_paths = csiborgtools.read.CSiBORGPaths(to_new=False) fin_paths = csiborgtools.read.CSiBORGPaths(to_new=False)
nsims = init_paths.ic_ids nsims = init_paths.ic_ids
@ -80,7 +87,7 @@ for nsim in nsims:
collect() collect()
if rank == 0: if rank == 0:
print("{}: dumping clumps for simulation.".format(datetime.now()), print("{}: dumping intermediate files.".format(datetime.now()),
flush=True) flush=True)
# Grab unique clump IDs and loop over them # Grab unique clump IDs and loop over them
@ -93,18 +100,17 @@ for nsim in nsims:
x0 = part0[clump_ids == n] x0 = part0[clump_ids == n]
# Center of mass and Lagrangian patch size # Center of mass and Lagrangian patch size
pos = numpy.vstack([x0[p] for p in ('x', 'y', 'z')]).T dist, cm = csiborgtools.match.dist_centmass(x0)
cm = numpy.average(pos, axis=0, weights=x0['M']) patch = csiborgtools.match.dist_percentile(dist, [99], distmax=0.075)
patch_size = csiborgtools.match.lagpatch_size(
*(x0[p] for p in ('x', 'y', 'z', 'M')))
# Dump the center of mass # Dump the center of mass
with open(ftemp.format(nsim, n, "cm"), 'wb') as f: with open(ftemp.format(nsim, n, "cm"), 'wb') as f:
numpy.save(f, cm) numpy.save(f, cm)
# Dump the Lagrangian patch size # Dump the Lagrangian patch size
with open(ftemp.format(nsim, n, "patch_size"), 'wb') as f: with open(ftemp.format(nsim, n, "lagpatch"), 'wb') as f:
numpy.save(f, patch_size) numpy.save(f, patch)
# Dump the entire clump # Dump the entire clump
if args.dump_clumps:
with open(ftemp.format(nsim, n, "clump"), "wb") as f: with open(ftemp.format(nsim, n, "clump"), "wb") as f:
numpy.save(f, x0) numpy.save(f, x0)
@ -113,9 +119,10 @@ for nsim in nsims:
comm.Barrier() comm.Barrier()
if rank == 0: if rank == 0:
print("Collecting CM files...", flush=True) print("{}: collecting summary files...".format(datetime.now()),
flush=True)
# Collect the centre of masses, patch size, etc. and dump them # Collect the centre of masses, patch size, etc. and dump them
dtype = {"names": ['x', 'y', 'z', "patch_size", "ID"], dtype = {"names": ['x', 'y', 'z', "lagpatch", "ID"],
"formats": [numpy.float32] * 4 + [numpy.int32]} "formats": [numpy.float32] * 4 + [numpy.int32]}
out = numpy.full(njobs, numpy.nan, dtype=dtype) out = numpy.full(njobs, numpy.nan, dtype=dtype)
@ -130,22 +137,25 @@ for nsim in nsims:
remove(fpath) remove(fpath)
# Load in the patch size # Load in the patch size
fpath = ftemp.format(nsim, n, "patch_size") fpath = ftemp.format(nsim, n, "lagpatch")
with open(fpath, "rb") as f: with open(fpath, "rb") as f:
out["patch_size"][i] = numpy.load(f) out["lagpatch"][i] = numpy.load(f)
remove(fpath) remove(fpath)
# Store the halo ID # Store the halo ID
out["ID"][i] = n out["ID"][i] = n
print("Dumping CM files to .. `{}`.".format(fpermcm.format(nsim)), print("{}: dumping to .. `{}`.".format(
flush=True) datetime.now(), fpermcm.format(nsim)), flush=True)
with open(fpermcm.format(nsim), 'wb') as f: with open(fpermcm.format(nsim), 'wb') as f:
numpy.save(f, out) numpy.save(f, out)
print("Collecting clump files...", flush=True) if args.dump_clumps:
print("{}: collecting particle files...".format(datetime.now()),
flush=True)
out = [None] * unique_clumpids.size out = [None] * unique_clumpids.size
dtype = {"names": ["clump", "ID"], "formats": [object, numpy.int32]} dtype = {"names": ["clump", "ID"],
"formats": [object, numpy.int32]}
out = numpy.full(unique_clumpids.size, numpy.nan, dtype=dtype) out = numpy.full(unique_clumpids.size, numpy.nan, dtype=dtype)
for i, n in enumerate(unique_clumpids): for i, n in enumerate(unique_clumpids):
fpath = ftemp.format(nsim, n, "clump") fpath = ftemp.format(nsim, n, "clump")
@ -154,8 +164,8 @@ for nsim in nsims:
out["clump"][i] = fin out["clump"][i] = fin
out["ID"][i] = n out["ID"][i] = n
remove(fpath) remove(fpath)
print("Dumping clump files to .. `{}`.".format(fpermpart.format(nsim)), print("{}: dumping to .. `{}`.".format(
flush=True) datetime.now(), fpermpart.format(nsim)), flush=True)
with open(fpermpart.format(nsim), "wb") as f: with open(fpermpart.format(nsim), "wb") as f:
numpy.save(f, out) numpy.save(f, out)

View file

@ -0,0 +1,66 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to test running the CSiBORG realisations matcher.
"""
import numpy
from argparse import ArgumentParser
from distutils.util import strtobool
from datetime import datetime
from os.path import join
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
import utils
# Argument parser
parser = ArgumentParser()
parser.add_argument("--nmult", type=float)
parser.add_argument("--overlap", type=lambda x: bool(strtobool(x)))
parser.add_argument("--select_initial", type=lambda x: bool(strtobool(x)))
parser.add_argument("--fast_neighbours", type=lambda x: bool(strtobool(x)))
args = parser.parse_args()
# File paths
ic = 7468
fperm = join(utils.dumpdir, "overlap", "cross_{}.npy")
paths = csiborgtools.read.CSiBORGPaths(to_new=False)
paths.set_info(ic, paths.get_maximum_snapshot(ic))
print("{}: loading catalogues.".format(datetime.now()), flush=True)
cat = csiborgtools.read.CombinedHaloCatalogue(paths)
matcher = csiborgtools.match.RealisationsMatcher(cat)
nsim0 = cat.n_sims[0]
nsimx = cat.n_sims[1]
print("{}: crossing the simulations.".format(datetime.now()), flush=True)
out = matcher.cross_knn_position_single(
0, nmult=args.nmult, dlogmass=2., overlap=args.overlap,
select_initial=args.select_initial, fast_neighbours=args.fast_neighbours)
# Dump the result
fout = fperm.format(nsim0)
print("Saving results to `{}`.".format(fout), flush=True)
with open(fout, "wb") as f:
numpy.save(fout, out)
print("All finished.", flush=True)