mirror of
https://github.com/Richard-Sti/csiborgtools.git
synced 2024-12-22 11:58:02 +00:00
Speed up overlap (#27)
* Edit improt * Simplify patch size calculation * Add patch size percentiles * Add various percentiles * Remove comment * Update TODO * Change to 95th percentile * Add import * Add KNN properties * Add new matching initial condition * Add import * Remove import * Add fast neighbours option * Further edits to fast neighbours * add imports * add new overlap calculation and non-zero things * Remove print * Clean up code * Fix small bug * Remove comment * Add run single cross match * change values * Edit hyperparams * Add comment * Add the argument parser * Add new lagpatch calc * New lagpatch calc * Delete old patch definitions * Make clump dumping once again optional * Add lagpatch to the catalogue * Edit print statement * Fix small bug * Remove init radius * Change to lagpatch key * Fix a small bug * Fix little bug
This commit is contained in:
parent
beb811e84c
commit
8dea3da4de
9 changed files with 418 additions and 193 deletions
|
@ -3,10 +3,7 @@
|
|||
## CSiBORG Matching
|
||||
|
||||
### TODO
|
||||
- [x] Implement CIC binning or an alternative scheme for nearby objects.
|
||||
- [x] Consistently locate region spanned by a single halo.
|
||||
- [x] Write a script to perform the matching on a node.
|
||||
- [x] Make a coarser grid for halos outside of the well resolved region.
|
||||
- [ ] Modify the call to tN
|
||||
|
||||
### Questions
|
||||
- What scaling of the search region? No reason for it to be a multiple of $R_{200c}$.
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
|
||||
from .match import (brute_spatial_separation, RealisationsMatcher, cosine_similarity, # noqa
|
||||
ParticleOverlap, get_clumplims, lagpatch_size) # noqa
|
||||
ParticleOverlap, get_clumplims, fill_delta, fill_delta_indxs, # noqa
|
||||
calculate_overlap, calculate_overlap_indxs, # noqa
|
||||
dist_centmass, dist_percentile) # noqa
|
||||
from .num_density import (binned_counts, number_density) # noqa
|
||||
# from .correlation import (get_randoms_sphere, sphere_angular_tpcf) # noqa
|
||||
|
|
|
@ -14,7 +14,6 @@
|
|||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
|
||||
import numpy
|
||||
from scipy.interpolate import interp1d
|
||||
from scipy.ndimage import gaussian_filter
|
||||
from tqdm import (tqdm, trange)
|
||||
from astropy.coordinates import SkyCoord
|
||||
|
@ -155,15 +154,16 @@ class RealisationsMatcher:
|
|||
mapping[ind2] = ind1
|
||||
return mapping
|
||||
|
||||
def cross_knn_position_single(self, n_sim, nmult=5, dlogmass=None,
|
||||
def cross_knn_position_single(self, n_sim, nmult=1, dlogmass=None,
|
||||
mass_kind="totpartmass", overlap=False,
|
||||
overlapper_kwargs={}, select_initial=True,
|
||||
remove_nooverlap=True, verbose=True):
|
||||
remove_nooverlap=True, fast_neighbours=False,
|
||||
verbose=True):
|
||||
r"""
|
||||
Find all neighbours within a multiple of either :math:`R_{\rm init}`
|
||||
(distance at :math:`z = 70`) or :math:`R_{200c}` (distance at
|
||||
:math:`z = 0`) of halos in the `nsim`th simulation. Enforces that the
|
||||
neighbours' are similar in mass up to `dlogmass` dex.
|
||||
Find all neighbours within a multiple of the sum of either the initial
|
||||
Lagrangian patch sizes (distance at :math:`z = 70`) or :math:`R_{200c}`
|
||||
(distance at :math:`z = 0`). Enforces that the neighbours' are similar
|
||||
in mass up to `dlogmass` dex and optionally calculates their overlap.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
@ -171,8 +171,8 @@ class RealisationsMatcher:
|
|||
Index of an IC realisation in `self.cats` whose halos' neighbours
|
||||
in the remaining simulations to search for.
|
||||
nmult : float or int, optional
|
||||
Multiple of :math:`R_{\rm init}` or :math:`R_{200c}` within which
|
||||
to return neighbours. By default 5.
|
||||
Multiple of the sum of pair Lagrangian patch sizes or
|
||||
:math:`R_{200c}` within which to return neighbours. By default 1.
|
||||
dlogmass : float, optional
|
||||
Tolerance on mass logarithmic mass difference. By default `None`.
|
||||
mass_kind : str, optional
|
||||
|
@ -190,6 +190,11 @@ class RealisationsMatcher:
|
|||
remove_nooverlap : bool, optional
|
||||
Whether to remove pairs with exactly zero overlap. By default
|
||||
`True`.
|
||||
fast_neighbours : bool, optional
|
||||
Whether to calculate neighbours within a fixed radius of each
|
||||
clump. Note that this will result in missing some matches. If
|
||||
`True` then `nmult` is a multiple of either the initial patch size
|
||||
of :math:`R_{200c}`.
|
||||
verbose : bool, optional
|
||||
Iterator verbosity flag. By default `True`.
|
||||
|
||||
|
@ -205,12 +210,12 @@ class RealisationsMatcher:
|
|||
self._check_masskind(mass_kind)
|
||||
# Halo properties of this simulation
|
||||
logmass = numpy.log10(self.cats[n_sim][mass_kind])
|
||||
pos = self.cats[n_sim].positions # Grav potential minimum
|
||||
pos0 = self.cats[n_sim].positions0 # CM positions
|
||||
pos = self.cats[n_sim].positions # Grav potential minimum
|
||||
pos0 = self.cats[n_sim].positions0 # CM positions
|
||||
if select_initial:
|
||||
R = self.cats[n_sim]["patch_size"] # Initial Lagrangian patch size
|
||||
R = self.cats[n_sim]["lagpatch"] # Initial Lagrangian patch size
|
||||
else:
|
||||
R = self.cats[n_sim]["r200"] # R200c at z = 0
|
||||
R = self.cats[n_sim]["r200"] # R200c at z = 0
|
||||
|
||||
if overlap:
|
||||
overlapper = ParticleOverlap(**overlapper_kwargs)
|
||||
|
@ -238,13 +243,29 @@ class RealisationsMatcher:
|
|||
iters = enumerate(self.search_sim_indices(n_sim))
|
||||
# Search for neighbours in the other simulations at z = 70
|
||||
for count, i in iters:
|
||||
if verbose:
|
||||
print("Querying the KNN for `n_sim = {}`.".format(n_sim),
|
||||
flush=True)
|
||||
# Query the KNN either fast (miss some) or slow (get all)
|
||||
if select_initial:
|
||||
dist0, indxs = self.cats[i].radius_initial_neigbours(
|
||||
pos0, R * nmult)
|
||||
if fast_neighbours:
|
||||
dist0, indxs = self.cats[i].radius_neigbours(
|
||||
pos0, R * nmult, select_initial=True)
|
||||
else:
|
||||
dist0, indxs = radius_neighbours(
|
||||
self.cats[i].knn0, pos0, radiusX=R,
|
||||
radiusKNN=self.cats[i]["lagpatch"], nmult=nmult,
|
||||
verbose=verbose)
|
||||
else:
|
||||
# Will switch dist0 <-> dist at the end
|
||||
dist0, indxs = self.cats[i].radius_neigbours(
|
||||
pos, R * nmult)
|
||||
if fast_neighbours:
|
||||
dist0, indxs = self.cats[i].radius_neigbours(
|
||||
pos, R * nmult, select_initial=False)
|
||||
else:
|
||||
dist0, indxs = radius_neighbours(
|
||||
self.cats[i].knn, pos, radiusX=R,
|
||||
radiusKNN=self.cats[i]["r200"], nmult=nmult,
|
||||
verbose=verbose)
|
||||
# Enforce int32 and float32
|
||||
for n in range(dist0.size):
|
||||
dist0[n] = dist0[n].astype(numpy.float32)
|
||||
|
@ -303,8 +324,9 @@ class RealisationsMatcher:
|
|||
# Find which clump matches index of this halo from cat
|
||||
match0 = cat2clumps0[k]
|
||||
|
||||
# Unpack this clum and its mins and maxs
|
||||
# Unpack this clum, its mamss and mins and maxs
|
||||
cl0 = clumps0["clump"][match0]
|
||||
mass0 = numpy.sum(cl0['M'])
|
||||
mins0_current, maxs0_current = mins0[match0], maxs0[match0]
|
||||
# Preallocate this array.
|
||||
crosses = numpy.full(indxs[k].size, numpy.nan,
|
||||
|
@ -314,10 +336,11 @@ class RealisationsMatcher:
|
|||
for ii, ind in enumerate(indxs[k]):
|
||||
# Again which cross clump to this index
|
||||
matchx = cat2clumpsx[ind]
|
||||
clx = clumpsx["clump"][matchx]
|
||||
crosses[ii] = overlapper(
|
||||
cl0, clumpsx["clump"][matchx], delta,
|
||||
mins0_current, maxs0_current,
|
||||
minsx[matchx], maxsx[matchx])
|
||||
cl0, clx, delta, mins0_current, maxs0_current,
|
||||
minsx[matchx], maxsx[matchx],
|
||||
mass1=mass0, mass2=numpy.sum(clx['M']))
|
||||
|
||||
cross[k] = crosses
|
||||
# Optionally remove points whose overlap is exaclt zero
|
||||
|
@ -338,30 +361,26 @@ class RealisationsMatcher:
|
|||
|
||||
return numpy.asarray(matches, dtype=object)
|
||||
|
||||
def cross_knn_position_all(self, nmult=5, dlogmass=None,
|
||||
mass_kind="totpartmass", init_dist=False,
|
||||
overlap=False, overlapper_kwargs={},
|
||||
def cross_knn_position_all(self, nmult=1, dlogmass=None,
|
||||
mass_kind="totpartmass", overlap=False,
|
||||
overlapper_kwargs={},
|
||||
select_initial=True, remove_nooverlap=True,
|
||||
verbose=True):
|
||||
r"""
|
||||
Find all neighbours within :math:`n_{\rm mult} R_{200c}` of halos in
|
||||
all simulations listed in `self.cats`. Also enforces that the
|
||||
neighbours' :math:`\log M_{200c}` be within `dlogmass` dex.
|
||||
Find all counterparts of halos in all simulations listed in
|
||||
`self.cats`. See `self.cross_knn_position_single` for more details.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
nmult : float or int, optional
|
||||
Multiple of :math:`R_{200c}` within which to return neighbours. By
|
||||
default 5.
|
||||
Multiple of the sum of pair Lagrangian patch sizes or
|
||||
:math:`R_{200c}` within which to return neighbours. By default 1.
|
||||
dlogmass : float, optional
|
||||
Tolerance on mass logarithmic mass difference. By default `None`.
|
||||
mass_kind : str, optional
|
||||
The mass kind whose similarity is to be checked. Must be a valid
|
||||
catalogue key. By default `totpartmass`, i.e. the total particle
|
||||
mass associated with a halo.
|
||||
init_dist : bool, optional
|
||||
Whether to calculate separation of the initial CMs. By default
|
||||
`False`.
|
||||
overlap : bool, optional
|
||||
Whether to calculate overlap between clumps in the initial
|
||||
snapshot. By default `False`. Note that this operation is
|
||||
|
@ -388,8 +407,7 @@ class RealisationsMatcher:
|
|||
# Loop over each catalogue
|
||||
for i in trange(N) if verbose else range(N):
|
||||
matches[i] = self.cross_knn_position_single(
|
||||
i, nmult, dlogmass, mass_kind=mass_kind,
|
||||
init_dist=init_dist, overlap=overlap,
|
||||
i, nmult, dlogmass, mass_kind=mass_kind, overlap=overlap,
|
||||
overlapper_kwargs=overlapper_kwargs,
|
||||
select_initial=select_initial,
|
||||
remove_nooverlap=remove_nooverlap, verbose=verbose)
|
||||
|
@ -599,7 +617,7 @@ class ParticleOverlap:
|
|||
return delta
|
||||
|
||||
def make_deltas(self, clump1, clump2, mins1=None, maxs1=None,
|
||||
mins2=None, maxs2=None):
|
||||
mins2=None, maxs2=None, return_nonzero1=False):
|
||||
"""
|
||||
Calculate a NGP density fields of two halos on a grid that encloses
|
||||
them both.
|
||||
|
@ -622,6 +640,9 @@ class ParticleOverlap:
|
|||
Density arrays of `clump1` and `clump2`, respectively.
|
||||
cellmins : len-3 tuple
|
||||
Tuple of left-most cell ID in the full box.
|
||||
nonzero1 : 2-dimensional array
|
||||
Indices where `delta1` has a non-zero density. If `return_nonzero1`
|
||||
is `False` return `None` instead.
|
||||
"""
|
||||
xc1, yc1, zc1 = (self.pos2cell(clump1[p]) for p in ('x', 'y', 'z'))
|
||||
xc2, yc2, zc2 = (self.pos2cell(clump2[p]) for p in ('x', 'y', 'z'))
|
||||
|
@ -648,16 +669,22 @@ class ParticleOverlap:
|
|||
cellmins = (xmin, ymin, zmin, ) # Cell minima
|
||||
ncells = max(xmax - xmin, ymax - ymin, zmax - zmin) + 1 # Num cells
|
||||
|
||||
# Preallocate and fill the array
|
||||
# Preallocate and fill the arrays
|
||||
delta1 = numpy.zeros((ncells,)*3, dtype=numpy.float32)
|
||||
fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1['M'])
|
||||
delta2 = numpy.zeros((ncells,)*3, dtype=numpy.float32)
|
||||
if return_nonzero1:
|
||||
nonzero1 = fill_delta_indxs(
|
||||
delta1, xc1, yc1, zc1, *cellmins, clump1['M'])
|
||||
else:
|
||||
fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1['M'])
|
||||
nonzero1 = None
|
||||
fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2['M'])
|
||||
|
||||
if self.smooth_scale is not None:
|
||||
gaussian_filter(delta1, self.smooth_scale, output=delta1)
|
||||
gaussian_filter(delta2, self.smooth_scale, output=delta2)
|
||||
return delta1, delta2, cellmins
|
||||
|
||||
return delta1, delta2, cellmins, nonzero1
|
||||
|
||||
@staticmethod
|
||||
def overlap(delta1, delta2, cellmins, delta2_full):
|
||||
|
@ -679,10 +706,11 @@ class ParticleOverlap:
|
|||
-------
|
||||
overlap : float
|
||||
"""
|
||||
return _calculate_overlap(delta1, delta2, cellmins, delta2_full)
|
||||
return calculate_overlap(delta1, delta2, cellmins, delta2_full)
|
||||
|
||||
def __call__(self, clump1, clump2, delta2_full, mins1=None, maxs1=None,
|
||||
mins2=None, maxs2=None):
|
||||
mins2=None, maxs2=None, mass1=None, mass2=None,
|
||||
loop_nonzero=True):
|
||||
"""
|
||||
Calculate overlap between `clump1` and `clump2`. See
|
||||
`self.overlap(...)` and `self.make_deltas(...)` for further
|
||||
|
@ -704,21 +732,33 @@ class ParticleOverlap:
|
|||
mins2, maxs2 : 1-dimensional arrays of shape `(3,)`
|
||||
Minimun and maximum cell numbers along each dimension of `clump2`.
|
||||
Optional.
|
||||
mass1, mass2 : floats, optional
|
||||
Total mass of `clump1` and `clump2`, respectively. Must be provided
|
||||
if `loop_nonzero` is `True`.
|
||||
loop_nonzer : bool, optional
|
||||
Whether to only loop over cells where `clump1` has non-zero
|
||||
density. By default `True`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
overlap : float
|
||||
"""
|
||||
delta1, delta2, cellmins = self.make_deltas(
|
||||
clump1, clump2, mins1, maxs1, mins2, maxs2)
|
||||
return _calculate_overlap(delta1, delta2, cellmins, delta2_full)
|
||||
delta1, delta2, cellmins, nonzero1 = self.make_deltas(
|
||||
clump1, clump2, mins1, maxs1, mins2, maxs2,
|
||||
return_nonzero1=loop_nonzero)
|
||||
|
||||
if not loop_nonzero:
|
||||
return calculate_overlap(delta1, delta2, cellmins, delta2_full)
|
||||
|
||||
return calculate_overlap_indxs(delta1, delta2, cellmins, delta2_full,
|
||||
nonzero1, mass1, mass2)
|
||||
|
||||
|
||||
@jit(nopython=True)
|
||||
def fill_delta(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
|
||||
"""
|
||||
Fill array delta at the specified indices with their weights. This is a JIT
|
||||
implementation.
|
||||
Fill array `delta` at the specified indices with their weights. This is a
|
||||
JIT implementation.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
@ -735,8 +775,45 @@ def fill_delta(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
|
|||
-------
|
||||
None
|
||||
"""
|
||||
for i in range(xcell.size):
|
||||
delta[xcell[i] - xmin, ycell[i] - ymin, zcell[i] - zmin] += weights[i]
|
||||
for n in range(xcell.size):
|
||||
delta[xcell[n] - xmin, ycell[n] - ymin, zcell[n] - zmin] += weights[n]
|
||||
|
||||
|
||||
@jit(nopython=True)
|
||||
def fill_delta_indxs(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
|
||||
"""
|
||||
Fill array `delta` at the specified indices with their weights and return
|
||||
indices where `delta` was assigned a value. This is a JIT implementation.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
delta : 3-dimensional array
|
||||
Grid to be filled with weights.
|
||||
xcell, ycell, zcell : 1-dimensional arrays
|
||||
Indices where to assign `weights`.
|
||||
xmin, ymin, zmin : ints
|
||||
Minimum cell IDs of particles.
|
||||
weights : 1-dimensional arrays
|
||||
Particle mass.
|
||||
|
||||
Returns
|
||||
-------
|
||||
cells : 1-dimensional array
|
||||
Indices where `delta` was assigned a value.
|
||||
"""
|
||||
# Array to count non-zero cells
|
||||
cells = numpy.full((xcell.size, 3), numpy.nan, numpy.int32)
|
||||
count_nonzero = 0
|
||||
for n in range(xcell.size):
|
||||
i, j, k = xcell[n] - xmin, ycell[n] - ymin, zcell[n] - zmin
|
||||
# If a cell is zero add it
|
||||
if delta[i, j, k] == 0:
|
||||
cells[count_nonzero, :] = i, j, k
|
||||
count_nonzero += 1
|
||||
|
||||
delta[i, j, k] += weights[n]
|
||||
|
||||
return cells[:count_nonzero, :] # Cutoff unassigned places
|
||||
|
||||
|
||||
def get_clumplims(clumps, ncells, nshift=None):
|
||||
|
@ -776,7 +853,7 @@ def get_clumplims(clumps, ncells, nshift=None):
|
|||
|
||||
|
||||
@jit(nopython=True)
|
||||
def _calculate_overlap(delta1, delta2, cellmins, delta2_full):
|
||||
def calculate_overlap(delta1, delta2, cellmins, delta2_full):
|
||||
r"""
|
||||
Overlap between two clumps whose density fields are evaluated on the
|
||||
same grid. This is a JIT implementation, hence it is outside of the main
|
||||
|
@ -796,14 +873,13 @@ def _calculate_overlap(delta1, delta2, cellmins, delta2_full):
|
|||
-------
|
||||
overlap : float
|
||||
"""
|
||||
totmass = 0. # Total mass of clump 1 and clump 2
|
||||
intersect = 0. # Mass of pixels that are non-zero in both clumps
|
||||
weight = 0. # Weight to account for other halos
|
||||
count = 0 # Total number of pixels that are both non-zero
|
||||
i0, j0, k0 = cellmins # Unpack things
|
||||
imax, jmax, kmax = delta1.shape
|
||||
|
||||
totmass = 0. # Total mass of clump 1 and clump 2
|
||||
intersect = 0. # Mass of pixels that are non-zero in both clumps
|
||||
weight = 0. # Weight to account for other halos
|
||||
count = 0 # Total number of pixels that are both non-zero
|
||||
|
||||
i0, j0, k0 = cellmins # Unpack things
|
||||
for i in range(imax):
|
||||
ii = i0 + i
|
||||
for j in range(jmax):
|
||||
|
@ -826,62 +902,153 @@ def _calculate_overlap(delta1, delta2, cellmins, delta2_full):
|
|||
return weight * intersect / (totmass - intersect)
|
||||
|
||||
|
||||
def lagpatch_size(x, y, z, M, dr=0.0025, dqperc=1, minperc=75, defperc=95,
|
||||
rmax=0.075):
|
||||
"""
|
||||
Calculate an approximate Lagrangian patch size in the initial conditions.
|
||||
Returned as the first bin whose percentile drops by less than `dqperc` and
|
||||
is above `minperc`. Note that all distances must be in box units.
|
||||
@jit(nopython=True)
|
||||
def calculate_overlap_indxs(delta1, delta2, cellmins, delta2_full, nonzero1,
|
||||
mass1, mass2):
|
||||
r"""
|
||||
Overlap between two clumps whose density fields are evaluated on the
|
||||
same grid and `nonzero1` enumerates the non-zero cells of `delta1. This is
|
||||
a JIT implementation, hence it is outside of the main class.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x, y, z : 1-dimensional arrays
|
||||
Particle coordinates.
|
||||
M : 1-dimensional array
|
||||
Particle masses.
|
||||
dr : float, optional
|
||||
Separation spacing to evaluate q-th percentile change. Optional, by
|
||||
default 0.0025
|
||||
dqperc : int or float, optional
|
||||
Change of q-th percentile in a bin to find a threshold separation.
|
||||
Optional, by default 1.
|
||||
minperc : int or float, optional
|
||||
Minimum q-th percentile of separation to be considered a patch size.
|
||||
Optional, by default 75.
|
||||
defperc : int or float, optional
|
||||
Default q-th percentile if reduction by `minperc` is not satisfied in
|
||||
any bin. Optional. By default 95.
|
||||
rmax : float, optional
|
||||
The maximum allowed patch size. Optional, by default 0.075.
|
||||
delta1, delta2 : 3-dimensional arrays
|
||||
Clumps density fields.
|
||||
cellmins : len-3 tuple
|
||||
Tuple of left-most cell ID in the full box.
|
||||
delta2_full : 3-dimensional array
|
||||
Density field of the whole box calculated with particles assigned
|
||||
to halos at zero redshift.
|
||||
nonzero1 : 2-dimensional array of shape `(n_cells, 3)`
|
||||
Indices of cells that are non-zero in `delta1`. Expected to be
|
||||
precomputed from `fill_delta_indxs`.
|
||||
mass1, mass2 : floats, optional
|
||||
Total masses of the two clumps, respectively. Optional. If not provided
|
||||
calculcated directly from the density field.
|
||||
|
||||
Returns
|
||||
-------
|
||||
size : float
|
||||
overlap : float
|
||||
"""
|
||||
totmass = mass1 + mass2 # Total mass of clump 1 and clump 2
|
||||
intersect = 0. # Mass of pixels that are non-zero in both clumps
|
||||
weight = 0. # Weight to account for other halos
|
||||
count = 0 # Total number of pixels that are both non-zero
|
||||
i0, j0, k0 = cellmins # Unpack cell minimas
|
||||
|
||||
ncells = nonzero1.shape[0]
|
||||
|
||||
for n in range(ncells):
|
||||
i, j, k = nonzero1[n, :]
|
||||
cell1, cell2 = delta1[i, j, k], delta2[i, j, k]
|
||||
|
||||
if cell2 > 0: # We already know that cell1 is non-zero
|
||||
intersect += cell1 + cell2
|
||||
weight += cell2 / delta2_full[i0 + i, j0 + j, k0 + k]
|
||||
count += 1
|
||||
|
||||
# Normalise the intersect and weights
|
||||
intersect *= 0.5
|
||||
weight = weight / count if count > 0 else 0.
|
||||
return weight * intersect / (totmass - intersect)
|
||||
|
||||
|
||||
def dist_centmass(clump):
|
||||
"""
|
||||
Calculate the clump particles' distance from the centre of mass.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
clump : structurered arrays
|
||||
Clump structured array. Keyes must include `x`, `y`, `z` and `M`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dist : 1-dimensional array of shape `(n_particles, )`
|
||||
Particle distance from the centre of mass.
|
||||
cm : 1-dimensional array of shape `(3,)`
|
||||
Center of mass coordinates.
|
||||
"""
|
||||
# CM along each dimension
|
||||
cmx, cmy, cmz = [numpy.average(p, weights=M) for p in (x, y, z)]
|
||||
cmx, cmy, cmz = [numpy.average(clump[p], weights=clump['M'])
|
||||
for p in ('x', 'y', 'z')]
|
||||
# Particle distance from the CM
|
||||
sep = numpy.sqrt(numpy.square(x - cmx)
|
||||
+ numpy.square(y - cmy)
|
||||
+ numpy.square(z - cmz))
|
||||
dist = numpy.sqrt(numpy.square(clump['x'] - cmx)
|
||||
+ numpy.square(clump['y'] - cmy)
|
||||
+ numpy.square(clump['z'] - cmz))
|
||||
|
||||
qs = numpy.linspace(0, 100, 100) # Percentile: where to evaluate
|
||||
per = numpy.percentile(sep, qs) # Percentile: evaluated
|
||||
sep2qs = interp1d(per, qs) # Separation to q-th percentile
|
||||
return dist, numpy.asarray([cmx, cmy, cmz])
|
||||
|
||||
# Evaluate in q-th percentile in separation bins
|
||||
sep_bin = numpy.arange(per[0], per[-1], dr)
|
||||
q_bin = sep2qs(sep_bin) # Evaluate for everyhing
|
||||
dq_bin = (q_bin[1:] - q_bin[:-1]) # Take the difference
|
||||
# Indices when q-th percentile changes below tolerance and is above limit
|
||||
k = numpy.where((dq_bin < dqperc) & (q_bin[1:] > minperc))[0]
|
||||
|
||||
if k.size == 0:
|
||||
return per[defperc] # Nothing found, so default percentile
|
||||
else:
|
||||
k = k[0] # Take the first one that satisfies the cut.
|
||||
def dist_percentile(dist, qs, distmax=0.075):
|
||||
"""
|
||||
Calculate q-th percentiles of `dist`, with an upper limit of `distmax`.
|
||||
|
||||
size = 0.5 * (sep_bin[k + 1] + sep_bin[k]) # Bin centre
|
||||
size = rmax if size > rmax else size # Enforce maximum size
|
||||
Parameters
|
||||
----------
|
||||
dist : 1-dimensional array
|
||||
Array of distances.
|
||||
qs : 1-dimensional array
|
||||
Percentiles to compute.
|
||||
distmax : float, optional
|
||||
The maximum distance. By default 0.075.
|
||||
|
||||
return size
|
||||
Returns
|
||||
-------
|
||||
x : 1-dimensional array
|
||||
"""
|
||||
x = numpy.percentile(dist, qs)
|
||||
x[x > distmax] = distmax # Enforce the upper limit
|
||||
return x
|
||||
|
||||
|
||||
def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1., verbose=True):
|
||||
"""
|
||||
Find all neigbours of a trained KNN model whose center of mass separation
|
||||
is less than `nmult` times the sum of their respective radii.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
knn : :py:class:`sklearn.neighbors.NearestNeighbors`
|
||||
Fitted nearest neighbour search.
|
||||
X : 2-dimensional array
|
||||
Array of shape `(n_samples, 3)`, where the latter axis represents
|
||||
`x`, `y` and `z`.
|
||||
radiusX: 1-dimensional array of shape `(n_samples, )`
|
||||
Patch radii corresponding to clumps in `X`.
|
||||
radiusKNN : 1-dimensional array
|
||||
Patch radii corresponding to clumps used to train `knn`.
|
||||
nmult : float, optional
|
||||
Multiple of the sum of two radii below which to consider a match.
|
||||
verbose : bool, optional
|
||||
Verbosity flag.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dists : 1-dimensional array `(n_samples,)` of arrays
|
||||
Distance from `X` to matches from `knn`.
|
||||
indxs : 1-dimensional array `(n_samples,)` of arrays
|
||||
Matches to `X` from `knn`.
|
||||
"""
|
||||
assert X.ndim == 2 and X.shape[1] == 3 # shape of X ok?
|
||||
assert X.shape[0] == radiusX.size # patchX matches X?
|
||||
assert radiusKNN.size == knn.n_samples_fit_ # patchknn matches the knn?
|
||||
|
||||
nsamples = X.shape[0]
|
||||
dists = [None] * nsamples # Initiate lists
|
||||
indxs = [None] * nsamples
|
||||
patchknn_max = numpy.max(radiusKNN) # Maximum for completeness
|
||||
|
||||
for i in trange(nsamples) if verbose else range(nsamples):
|
||||
dist, indx = knn.radius_neighbors(X[i, :].reshape(-1, 3),
|
||||
radiusX[i] + patchknn_max,
|
||||
sort_results=True)
|
||||
# Note that `dist` and `indx` are wrapped in 1-element arrays
|
||||
# so we take the first item where appropriate
|
||||
mask = (dist[0] / (radiusX[i] + radiusKNN[indx[0]])) < nmult
|
||||
dists[i] = dist[0][mask]
|
||||
indxs[i] = indx[0][mask]
|
||||
|
||||
dists = numpy.asarray(dists, dtype=object) # Turn into array of arrays
|
||||
indxs = numpy.asarray(indxs, dtype=object)
|
||||
return dists, indxs
|
||||
|
|
|
@ -14,7 +14,8 @@
|
|||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
|
||||
from .readsim import (CSiBORGPaths, ParticleReader, read_mmain, read_initcm, halfwidth_select) # noqa
|
||||
from .make_cat import (HaloCatalogue, CombinedHaloCatalogue, concatenate_clumps, clumps_pos2cell) # noqa
|
||||
from .make_cat import (HaloCatalogue, CombinedHaloCatalogue, concatenate_clumps, # noqa
|
||||
clumps_pos2cell) # noqa
|
||||
from .readobs import (PlanckClusters, MCXCClusters, TwoMPPGalaxies, # noqa
|
||||
TwoMPPGroups, SDSS) # noqa
|
||||
from .outsim import (dump_split, combine_splits, make_ascii_powmes) # noqa
|
||||
|
|
|
@ -130,6 +130,28 @@ class HaloCatalogue:
|
|||
"""
|
||||
return self.paths.n_sim
|
||||
|
||||
@property
|
||||
def knn(self):
|
||||
"""
|
||||
The final snapshot k-nearest neighbour object.
|
||||
|
||||
Returns
|
||||
-------
|
||||
knn : :py:class:`sklearn.neighbors.NearestNeighbors`
|
||||
"""
|
||||
return self._knn
|
||||
|
||||
@property
|
||||
def knn0(self):
|
||||
"""
|
||||
The initial snapshot k-nearest neighbour object.
|
||||
|
||||
Returns
|
||||
-------
|
||||
knn : :py:class:`sklearn.neighbors.NearestNeighbors`
|
||||
"""
|
||||
return self._knn0
|
||||
|
||||
def _set_data(self, min_m500, max_dist):
|
||||
"""
|
||||
Loads the data, merges with mmain, does various coordinate transforms.
|
||||
|
@ -189,7 +211,7 @@ class HaloCatalogue:
|
|||
# And do the unit transform
|
||||
if initcm is not None:
|
||||
data = self.box.convert_from_boxunits(
|
||||
data, ["x0", "y0", "z0", "patch_size"])
|
||||
data, ["x0", "y0", "z0", "lagpatch"])
|
||||
self._positions0 = numpy.vstack(
|
||||
[data["{}0".format(p)] for p in ("x", "y", "z")]).T
|
||||
self._positions0 = self._positions0.astype(numpy.float32)
|
||||
|
@ -258,9 +280,9 @@ class HaloCatalogue:
|
|||
"Ordering of `initcat` and `clumps` is inconsistent.")
|
||||
|
||||
X = numpy.full((clumps.size, 4), numpy.nan)
|
||||
for i, p in enumerate(['x', 'y', 'z', "patch_size"]):
|
||||
for i, p in enumerate(['x', 'y', 'z', "lagpatch"]):
|
||||
X[:, i] = initcat[p]
|
||||
return add_columns(clumps, X, ["x0", "y0", "z0", "patch_size"])
|
||||
return add_columns(clumps, X, ["x0", "y0", "z0", "lagpatch"])
|
||||
|
||||
@property
|
||||
def positions(self):
|
||||
|
@ -314,30 +336,10 @@ class HaloCatalogue:
|
|||
"""
|
||||
return numpy.vstack([self["L{}".format(p)] for p in ("x", "y", "z")]).T
|
||||
|
||||
@property
|
||||
def init_radius(self):
|
||||
def radius_neigbours(self, X, radius, select_initial=True):
|
||||
r"""
|
||||
A fiducial initial radius of particles that are identified as a single
|
||||
halo in the final snapshot. Estimated to be
|
||||
|
||||
..math:
|
||||
R = (3 N / 4 \pi)^{1 / 3} * \Delta
|
||||
|
||||
where :math:`N` is the number of particles and `Delta` is the initial
|
||||
inter-particular distance :math:`Delta = 1 / 2^{11}` in box units. The
|
||||
output fiducial radius is in comoving units of Mpc.
|
||||
|
||||
Returns
|
||||
-------
|
||||
R : float
|
||||
"""
|
||||
delta = self.box.box2mpc(1 / 2**11)
|
||||
return (3 * self["npart"] / (4 * numpy.pi))**(1/3) * delta
|
||||
|
||||
def radius_neigbours(self, X, radius):
|
||||
"""
|
||||
Return sorted nearest neigbours within `radius` of `X` in the final
|
||||
snapshot.
|
||||
Return sorted nearest neigbours within `radius` of `X` in the initial
|
||||
or final snapshot.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
@ -346,6 +348,9 @@ class HaloCatalogue:
|
|||
`x`, `y` and `z`.
|
||||
radius : float
|
||||
Limiting distance of neighbours.
|
||||
select_initial : bool, optional
|
||||
Whether to search for neighbours in the initial or final snapshot.
|
||||
By default `True`, i.e. the final snapshot.
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
@ -358,35 +363,8 @@ class HaloCatalogue:
|
|||
"""
|
||||
if not (X.ndim == 2 and X.shape[1] == 3):
|
||||
raise TypeError("`X` must be an array of shape `(n_samples, 3)`.")
|
||||
# Query the KNN
|
||||
return self._knn.radius_neighbors(X, radius, sort_results=True)
|
||||
|
||||
def radius_initial_neigbours(self, X, radius):
|
||||
r"""
|
||||
Return sorted nearest neigbours within `radius` or `X` in the initial
|
||||
snapshot.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : 2-dimensional array
|
||||
Array of shape `(n_queries, 3)`, where the latter axis represents
|
||||
`x`, `y` and `z`.
|
||||
radius : float
|
||||
Limiting distance of neighbours.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dist : list of 1-dimensional arrays
|
||||
List of length `n_queries` whose elements are arrays of distances
|
||||
to the nearest neighbours.
|
||||
knns : list of 1-dimensional arrays
|
||||
List of length `n_queries` whose elements are arrays of indices of
|
||||
nearest neighbours in this catalogue.
|
||||
"""
|
||||
if not (X.ndim == 2 and X.shape[1] == 3):
|
||||
raise TypeError("`X` must be an array of shape `(n_samples, 3)`.")
|
||||
# Query the KNN
|
||||
return self._knn0.radius_neighbors(X, radius, sort_results=True)
|
||||
knn = self.knn0 if select_initial else self.knn # Pick the right KNN
|
||||
return knn.radius_neighbors(X, radius, sort_results=True)
|
||||
|
||||
@property
|
||||
def keys(self):
|
||||
|
|
|
@ -26,7 +26,7 @@ from ..read import ParticleReader
|
|||
# Map of unit conversions
|
||||
CONV_NAME = {
|
||||
"length": ["peak_x", "peak_y", "peak_z", "Rs", "rmin", "rmax", "r200",
|
||||
"r500", "x0", "y0", "z0", "patch_size"],
|
||||
"r500", "x0", "y0", "z0", "lagpatch"],
|
||||
"mass": ["mass_cl", "totpartmass", "m200", "m500", "mass_mmain"],
|
||||
"density": ["rho0"]
|
||||
}
|
||||
|
|
|
@ -14,6 +14,10 @@
|
|||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
"""
|
||||
MPI script to run the CSiBORG realisations matcher.
|
||||
|
||||
TODO
|
||||
----
|
||||
- [ ] Update this script
|
||||
"""
|
||||
import numpy
|
||||
from datetime import datetime
|
||||
|
|
|
@ -20,6 +20,8 @@ Optionally also dumps the clumps information, however watch out as this will
|
|||
eat up a lot of memory.
|
||||
"""
|
||||
import numpy
|
||||
from argparse import ArgumentParser
|
||||
from distutils.util import strtobool
|
||||
from datetime import datetime
|
||||
from mpi4py import MPI
|
||||
from os.path import join
|
||||
|
@ -37,6 +39,11 @@ comm = MPI.COMM_WORLD
|
|||
rank = comm.Get_rank()
|
||||
nproc = comm.Get_size()
|
||||
|
||||
# Argument parser
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("--dump_clumps", type=lambda x: bool(strtobool(x)))
|
||||
args = parser.parse_args()
|
||||
|
||||
init_paths = csiborgtools.read.CSiBORGPaths(to_new=True)
|
||||
fin_paths = csiborgtools.read.CSiBORGPaths(to_new=False)
|
||||
nsims = init_paths.ic_ids
|
||||
|
@ -80,7 +87,7 @@ for nsim in nsims:
|
|||
collect()
|
||||
|
||||
if rank == 0:
|
||||
print("{}: dumping clumps for simulation.".format(datetime.now()),
|
||||
print("{}: dumping intermediate files.".format(datetime.now()),
|
||||
flush=True)
|
||||
|
||||
# Grab unique clump IDs and loop over them
|
||||
|
@ -93,29 +100,29 @@ for nsim in nsims:
|
|||
x0 = part0[clump_ids == n]
|
||||
|
||||
# Center of mass and Lagrangian patch size
|
||||
pos = numpy.vstack([x0[p] for p in ('x', 'y', 'z')]).T
|
||||
cm = numpy.average(pos, axis=0, weights=x0['M'])
|
||||
patch_size = csiborgtools.match.lagpatch_size(
|
||||
*(x0[p] for p in ('x', 'y', 'z', 'M')))
|
||||
dist, cm = csiborgtools.match.dist_centmass(x0)
|
||||
patch = csiborgtools.match.dist_percentile(dist, [99], distmax=0.075)
|
||||
|
||||
# Dump the center of mass
|
||||
with open(ftemp.format(nsim, n, "cm"), 'wb') as f:
|
||||
numpy.save(f, cm)
|
||||
# Dump the Lagrangian patch size
|
||||
with open(ftemp.format(nsim, n, "patch_size"), 'wb') as f:
|
||||
numpy.save(f, patch_size)
|
||||
with open(ftemp.format(nsim, n, "lagpatch"), 'wb') as f:
|
||||
numpy.save(f, patch)
|
||||
# Dump the entire clump
|
||||
with open(ftemp.format(nsim, n, "clump"), "wb") as f:
|
||||
numpy.save(f, x0)
|
||||
if args.dump_clumps:
|
||||
with open(ftemp.format(nsim, n, "clump"), "wb") as f:
|
||||
numpy.save(f, x0)
|
||||
|
||||
del part0, clump_ids
|
||||
collect()
|
||||
|
||||
comm.Barrier()
|
||||
if rank == 0:
|
||||
print("Collecting CM files...", flush=True)
|
||||
print("{}: collecting summary files...".format(datetime.now()),
|
||||
flush=True)
|
||||
# Collect the centre of masses, patch size, etc. and dump them
|
||||
dtype = {"names": ['x', 'y', 'z', "patch_size", "ID"],
|
||||
dtype = {"names": ['x', 'y', 'z', "lagpatch", "ID"],
|
||||
"formats": [numpy.float32] * 4 + [numpy.int32]}
|
||||
out = numpy.full(njobs, numpy.nan, dtype=dtype)
|
||||
|
||||
|
@ -130,34 +137,37 @@ for nsim in nsims:
|
|||
remove(fpath)
|
||||
|
||||
# Load in the patch size
|
||||
fpath = ftemp.format(nsim, n, "patch_size")
|
||||
fpath = ftemp.format(nsim, n, "lagpatch")
|
||||
with open(fpath, "rb") as f:
|
||||
out["patch_size"][i] = numpy.load(f)
|
||||
out["lagpatch"][i] = numpy.load(f)
|
||||
remove(fpath)
|
||||
|
||||
# Store the halo ID
|
||||
out["ID"][i] = n
|
||||
|
||||
print("Dumping CM files to .. `{}`.".format(fpermcm.format(nsim)),
|
||||
flush=True)
|
||||
print("{}: dumping to .. `{}`.".format(
|
||||
datetime.now(), fpermcm.format(nsim)), flush=True)
|
||||
with open(fpermcm.format(nsim), 'wb') as f:
|
||||
numpy.save(f, out)
|
||||
|
||||
print("Collecting clump files...", flush=True)
|
||||
out = [None] * unique_clumpids.size
|
||||
dtype = {"names": ["clump", "ID"], "formats": [object, numpy.int32]}
|
||||
out = numpy.full(unique_clumpids.size, numpy.nan, dtype=dtype)
|
||||
for i, n in enumerate(unique_clumpids):
|
||||
fpath = ftemp.format(nsim, n, "clump")
|
||||
with open(fpath, 'rb') as f:
|
||||
fin = numpy.load(f)
|
||||
out["clump"][i] = fin
|
||||
out["ID"][i] = n
|
||||
remove(fpath)
|
||||
print("Dumping clump files to .. `{}`.".format(fpermpart.format(nsim)),
|
||||
flush=True)
|
||||
with open(fpermpart.format(nsim), "wb") as f:
|
||||
numpy.save(f, out)
|
||||
if args.dump_clumps:
|
||||
print("{}: collecting particle files...".format(datetime.now()),
|
||||
flush=True)
|
||||
out = [None] * unique_clumpids.size
|
||||
dtype = {"names": ["clump", "ID"],
|
||||
"formats": [object, numpy.int32]}
|
||||
out = numpy.full(unique_clumpids.size, numpy.nan, dtype=dtype)
|
||||
for i, n in enumerate(unique_clumpids):
|
||||
fpath = ftemp.format(nsim, n, "clump")
|
||||
with open(fpath, 'rb') as f:
|
||||
fin = numpy.load(f)
|
||||
out["clump"][i] = fin
|
||||
out["ID"][i] = n
|
||||
remove(fpath)
|
||||
print("{}: dumping to .. `{}`.".format(
|
||||
datetime.now(), fpermpart.format(nsim)), flush=True)
|
||||
with open(fpermpart.format(nsim), "wb") as f:
|
||||
numpy.save(f, out)
|
||||
|
||||
del out
|
||||
collect()
|
||||
del out
|
||||
collect()
|
||||
|
|
66
scripts/run_singlematch.py
Normal file
66
scripts/run_singlematch.py
Normal file
|
@ -0,0 +1,66 @@
|
|||
# Copyright (C) 2022 Richard Stiskalek
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation; either version 3 of the License, or (at your
|
||||
# option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
# Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
"""
|
||||
Script to test running the CSiBORG realisations matcher.
|
||||
"""
|
||||
import numpy
|
||||
from argparse import ArgumentParser
|
||||
from distutils.util import strtobool
|
||||
from datetime import datetime
|
||||
from os.path import join
|
||||
try:
|
||||
import csiborgtools
|
||||
except ModuleNotFoundError:
|
||||
import sys
|
||||
sys.path.append("../")
|
||||
import csiborgtools
|
||||
import utils
|
||||
|
||||
# Argument parser
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("--nmult", type=float)
|
||||
parser.add_argument("--overlap", type=lambda x: bool(strtobool(x)))
|
||||
parser.add_argument("--select_initial", type=lambda x: bool(strtobool(x)))
|
||||
parser.add_argument("--fast_neighbours", type=lambda x: bool(strtobool(x)))
|
||||
args = parser.parse_args()
|
||||
|
||||
# File paths
|
||||
ic = 7468
|
||||
fperm = join(utils.dumpdir, "overlap", "cross_{}.npy")
|
||||
|
||||
paths = csiborgtools.read.CSiBORGPaths(to_new=False)
|
||||
paths.set_info(ic, paths.get_maximum_snapshot(ic))
|
||||
|
||||
print("{}: loading catalogues.".format(datetime.now()), flush=True)
|
||||
cat = csiborgtools.read.CombinedHaloCatalogue(paths)
|
||||
|
||||
|
||||
matcher = csiborgtools.match.RealisationsMatcher(cat)
|
||||
nsim0 = cat.n_sims[0]
|
||||
nsimx = cat.n_sims[1]
|
||||
|
||||
print("{}: crossing the simulations.".format(datetime.now()), flush=True)
|
||||
|
||||
out = matcher.cross_knn_position_single(
|
||||
0, nmult=args.nmult, dlogmass=2., overlap=args.overlap,
|
||||
select_initial=args.select_initial, fast_neighbours=args.fast_neighbours)
|
||||
|
||||
# Dump the result
|
||||
fout = fperm.format(nsim0)
|
||||
print("Saving results to `{}`.".format(fout), flush=True)
|
||||
with open(fout, "wb") as f:
|
||||
numpy.save(fout, out)
|
||||
|
||||
print("All finished.", flush=True)
|
Loading…
Reference in a new issue