Overlap calculation (#19)

* Rm comment

* Add new fixed particle overlap

* Fix overlap calculation

* Update docs

* Add new overlapper support in matcher

* add filter optin

* Add the real space filter.

* Change filtering to real space only

* Update TODO

* add high-resolution switch

* add cross script

* Remove comment

* Add smoothing option
This commit is contained in:
Richard Stiskalek 2022-12-27 18:32:12 +01:00 committed by GitHub
parent 1edb566792
commit 3dc559fec1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 268 additions and 77 deletions

View file

@ -3,9 +3,10 @@
## CSiBORG Matching ## CSiBORG Matching
### TODO ### TODO
- [ ] Implement CIC binning or an alternative scheme for nearby objects. - [x] Implement CIC binning or an alternative scheme for nearby objects.
- [x] Consistently locate region spanned by a single halo.
- [ ] Write a script to perform the matching on a node. - [ ] Write a script to perform the matching on a node.
- [ ] Consistently locate region spanned by a single halo. - [ ] Make a coarser grid for halos outside of the well resolved region.
### Questions ### Questions
- What scaling of the search region? No reason for it to be a multiple of $R_{200c}$. - What scaling of the search region? No reason for it to be a multiple of $R_{200c}$.

View file

@ -14,8 +14,11 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import numpy import numpy
from scipy.ndimage import gaussian_filter
from math import ceil
from tqdm import (tqdm, trange) from tqdm import (tqdm, trange)
from astropy.coordinates import SkyCoord from astropy.coordinates import SkyCoord
import MAS_library as MASL
from ..read import CombinedHaloCatalogue from ..read import CombinedHaloCatalogue
@ -79,9 +82,6 @@ class RealisationsMatcher:
---------- ----------
cats : :py:class`csiborgtools.read.CombinedHaloCatalogue` cats : :py:class`csiborgtools.read.CombinedHaloCatalogue`
Combined halo catalogue to search. Combined halo catalogue to search.
# NOTE add later
# dtype : dtype, optional
# Output precision. By default `numpy.float32`.
""" """
_cats = None _cats = None
@ -156,7 +156,8 @@ class RealisationsMatcher:
def cross_knn_position_single(self, n_sim, nmult=5, dlogmass=None, def cross_knn_position_single(self, n_sim, nmult=5, dlogmass=None,
mass_kind="totpartmass", init_dist=False, mass_kind="totpartmass", init_dist=False,
overlap=False, verbose=True): overlap=False, overlapper_kwargs={},
verbose=True):
r""" r"""
Find all neighbours within :math:`n_{\rm mult} R_{200c}` of halos in Find all neighbours within :math:`n_{\rm mult} R_{200c}` of halos in
the `nsim`th simulation. Also enforces that the neighbours' the `nsim`th simulation. Also enforces that the neighbours'
@ -183,6 +184,8 @@ class RealisationsMatcher:
Whether to calculate overlap between clumps in the initial Whether to calculate overlap between clumps in the initial
snapshot. By default `False`. Note that this operation is snapshot. By default `False`. Note that this operation is
substantially slower. substantially slower.
overlapper_kwargs : dict, optional
Keyword arguments passed to `ParticleOverlapper`.
verbose : bool, optional verbose : bool, optional
Iterator verbosity flag. By default `True`. Iterator verbosity flag. By default `True`.
@ -196,9 +199,6 @@ class RealisationsMatcher:
`overlap` is the overlap over the initial clumps, all respectively. `overlap` is the overlap over the initial clumps, all respectively.
The latter two are calculated only if `init_dist` or `overlap` is The latter two are calculated only if `init_dist` or `overlap` is
`True`. `True`.
TODO:
- [ ] Precalculate the mapping from halo index to clump array position
""" """
self._check_masskind(mass_kind) self._check_masskind(mass_kind)
# Radius, mass and positions of halos in `n_sim` IC realisation # Radius, mass and positions of halos in `n_sim` IC realisation
@ -215,7 +215,7 @@ class RealisationsMatcher:
paths = self.cats[0].paths paths = self.cats[0].paths
with open(paths.clump0_path(self.cats.n_sims[n_sim]), "rb") as f: with open(paths.clump0_path(self.cats.n_sims[n_sim]), "rb") as f:
clumps0 = numpy.load(f, allow_pickle=True) clumps0 = numpy.load(f, allow_pickle=True)
overlapper = ParticleOverlap() overlapper = ParticleOverlap(**overlapper_kwargs)
cat2clumps0 = self._cat2clump_mapping(self.cats[n_sim]["index"], cat2clumps0 = self._cat2clump_mapping(self.cats[n_sim]["index"],
clumps0["ID"]) clumps0["ID"])
@ -254,6 +254,11 @@ class RealisationsMatcher:
"to compare against `n_sim = {}`.".format(i, n_sim)) "to compare against `n_sim = {}`.".format(i, n_sim))
with open(paths.clump0_path(self.cats.n_sims[i]), 'rb') as f: with open(paths.clump0_path(self.cats.n_sims[i]), 'rb') as f:
clumpsx = numpy.load(f, allow_pickle=True) clumpsx = numpy.load(f, allow_pickle=True)
# Switch overlapper resolution if halo outside well-def region
is_high = self.cats[n_sim]["dist"] < 155.5 / 0.705
overlapper.cellsize = 1 / 2**11 if is_high else 1 / 2**8
cat2clumpsx = self._cat2clump_mapping(self.cats[i]["index"], cat2clumpsx = self._cat2clump_mapping(self.cats[i]["index"],
clumpsx["ID"]) clumpsx["ID"])
@ -265,16 +270,13 @@ class RealisationsMatcher:
# Get the clump and pre-calculate its cell assignment # Get the clump and pre-calculate its cell assignment
cl0 = clumps0["clump"][match0] cl0 = clumps0["clump"][match0]
cl0_cells = overlapper.assign_to_cell(
*(cl0[p] for p in ('x', 'y', 'z')))
dint = numpy.full(indxs[k].size, numpy.nan, numpy.float64) dint = numpy.full(indxs[k].size, numpy.nan, numpy.float64)
# Loop over the ones we cross-correlate with # Loop over the ones we cross-correlate with
for ii, ind in enumerate(indxs[k]): for ii, ind in enumerate(indxs[k]):
# Again which cross clump to this index # Again which cross clump to this index
matchx = cat2clumpsx[ind] matchx = cat2clumpsx[ind]
dint[ii] = overlapper.mass_overlap( dint[ii] = overlapper(cl0, clumpsx["clump"][matchx])
cl0, clumpsx["clump"][matchx], cl0_cells)
cross[k] = dint cross[k] = dint
@ -286,7 +288,8 @@ class RealisationsMatcher:
def cross_knn_position_all(self, nmult=5, dlogmass=None, def cross_knn_position_all(self, nmult=5, dlogmass=None,
mass_kind="totpartmass", init_dist=False, mass_kind="totpartmass", init_dist=False,
overlap=False, verbose=True): overlap=False, overlapper_kwargs={},
verbose=True):
r""" r"""
Find all neighbours within :math:`n_{\rm mult} R_{200c}` of halos in Find all neighbours within :math:`n_{\rm mult} R_{200c}` of halos in
all simulations listed in `self.cats`. Also enforces that the all simulations listed in `self.cats`. Also enforces that the
@ -310,6 +313,8 @@ class RealisationsMatcher:
Whether to calculate overlap between clumps in the initial Whether to calculate overlap between clumps in the initial
snapshot. By default `False`. Note that this operation is snapshot. By default `False`. Note that this operation is
substantially slower. substantially slower.
overlapper_kwargs : dict, optional
Keyword arguments passed to `ParticleOverlapper`.
verbose : bool, optional verbose : bool, optional
Iterator verbosity flag. By default `True`. Iterator verbosity flag. By default `True`.
@ -325,7 +330,8 @@ class RealisationsMatcher:
for i in trange(N) if verbose else range(N): for i in trange(N) if verbose else range(N):
matches[i] = self.cross_knn_position_single( matches[i] = self.cross_knn_position_single(
i, nmult, dlogmass, mass_kind=mass_kind, init_dist=init_dist, i, nmult, dlogmass, mass_kind=mass_kind, init_dist=init_dist,
overlap=overlap, verbose=verbose) overlap=overlap, overlapper_kwargs=overlapper_kwargs,
verbose=verbose)
return matches return matches
@ -366,94 +372,200 @@ def cosine_similarity(x, y):
class ParticleOverlap: class ParticleOverlap:
""" r"""
TODO: Class to calculate overlap between two halos from different simulations.
- [ ] Class documentation
"""
_bins = None
def __init__(self, bins=None): Parameters
if bins is None: ----------
dx = 1 / 2**11 cellsize : float, optional
bins = numpy.arange(0, 1 + dx, dx) Cellsize in box units. By default :math:`1 / 2^11`, which matches the
self.bins = bins initial RAMSES grid resolution.
smooth_scale : float or integer, optional
Optional Gaussian smoothing scale to by applied to the fields. By
default no smoothing is applied. Otherwise the scale is to be
specified in the number of cells (i.e. in units of `self.cellsize`).
MAS : str, optional
The mass assignment scheme to a grid. By default `PCS`.
"""
_cellsize = None
_smooth_scale = None
_MAS = None
def __init__(self, cellsize=1/2**11, smooth_scale=None, MAS="PCS"):
self.cellsize = cellsize
self.smooth_scale = smooth_scale
self.MAS = MAS
@property @property
def bins(self): def cellsize(self):
""" """
The grid spacing. Assumed to be equal for all three dimensions. Units The grid cubical cell size.
ought to match the requested coordinates.
Returns Returns
------- -------
bins : 1-dimensional array cellsize: 1-dimensional array
""" """
return self._bins return self._cellsize
@bins.setter @cellsize.setter
def bins(self, bins): def cellsize(self, cellsize):
"""Sets `bins`.""" """Sets `cellsize`."""
bins = numpy.asarray(bins) if isinstance(bins, list) else bins assert cellsize > 0, "`cellsize` must be positive."
assert bins.ndim == 1, "`bins` must be a 1-dimensional array." self._cellsize = cellsize
self._bins = bins
def assign_to_cell(self, x, y, z): @property
def smooth_scale(self):
""" """
Assign particles specified by coordinates `x`, `y`, and `z` to grid The smoothing scale in units of `self.cellsize`. If not set `None`.
cells.
Returns
-------
smooth_scale : int or float
"""
return self._smooth_scale
@smooth_scale.setter
def smooth_scale(self, smooth_scale):
"""Sets `smooth_scale`."""
if smooth_scale is not None:
assert smooth_scale > 0
self._smooth_scale = smooth_scale
@property
def MAS(self):
"""
Mass assignment scheme.
Returns
-------
MAS : str
"""
return self._MAS
@MAS.setter
def MAS(self, MAS):
"""
Set `MAS`, checking it's a good value.
"""
assert MAS in ["NGP", "CIC", "TSC", "PCS"]
self._MAS = MAS
@staticmethod
def _minmax(X1, X2):
"""
Calculate the minimum and maximum coordinates from both arrays.
Parameters Parameters
---------- ----------
x, y, z : 1-dimensional arrays X1, X2 : 2-dimensional arrays of shape (n_samples, 3)
Positions of particles in the box. Cartesian coordinates of samples.
Returns Returns
------- -------
cells : 1-dimensional array mins, maxs : 1-dimensional arrays
Cell ID of each particle. Arrays of minima and maxima.
""" """
assert x.ndim == 1 and x.size == y.size == z.size # Calculate minimas for X1, X2
xbin = numpy.digitize(x, self.bins) mins1 = numpy.min(X1, axis=0)
ybin = numpy.digitize(y, self.bins) mins2 = numpy.min(X2, axis=0)
zbin = numpy.digitize(z, self.bins) # Where X2 less than X1 replace the minima, we want min of both arrs
N = self.bins.size # and will return mins1!
m = mins2 < mins1
mins1[m] = mins2[m]
return xbin + ybin * N + zbin * N**2 # Repeat for maximas
maxs1 = numpy.max(X1, axis=0)
maxs2 = numpy.max(X2, axis=0)
# Where X2 less than X1 replace the minima, we want min of both arrs
m = maxs2 > maxs1
maxs1[m] = maxs2[m]
def mass_overlap(self, clump1, clump2, cells1=None): return mins1, maxs1
def make_deltas(self, clump1, clump2):
"""
Calculate density fields of two halos on a grid that encloses them.
Parameters
----------
clump1, clump2 : structurered arrays
Structured arrays containing the particles of a given clump. Keys
must include `x`, `y`, `z` and `M`.
Returns
-------
delta1, delta2 : 3-dimensional arrays
Density arrays of `clump1` and `clump2`, respectively.
"""
# Turn structured arrays to 2-dim arrs
X1 = numpy.vstack([clump1[p] for p in ('x', 'y', 'z')]).T
X2 = numpy.vstack([clump2[p] for p in ('x', 'y', 'z')]).T
# Calculate where to place box boundaries
mins, maxs = self._minmax(X1, X2)
# Rescale X1 and X2
X1 -= mins
X1 /= maxs - mins
X2 -= mins
X2 /= maxs - mins
# How many cells in a subcube along each direction
width = numpy.max(maxs - mins)
ncells = ceil(width / self.cellsize)
# Assign particles to the grid now
delta1 = numpy.zeros((ncells, ncells, ncells), dtype=numpy.float32)
delta2 = numpy.zeros_like(delta1)
# Now do MAS
MASL.MA(X1, delta1, 1., self.MAS, verbose=False, W=clump1["M"])
MASL.MA(X2, delta2, 1., self.MAS, verbose=False, W=clump2["M"])
if self.smooth_scale is not None:
delta1 = gaussian_filter(delta1, self.smooth_scale, output=delta1)
delta2 = gaussian_filter(delta2, self.smooth_scale, output=delta2)
return delta1, delta2
@staticmethod
def overlap(delta1, delta2):
r""" r"""
Calculate the particle, mass-weighted overlap between two halos. Overlap between two density grids.
Defined as
..math::
(M_{u,1} + M_{u,2}) / (M_1 + M_2),
where :math:`M_{u, 1}` is the mass of particles of the first halo in
cells that are also present in the second halo and :math:`M_1` is the
total particle mass of the first halo.
Parameters Parameters
---------- ----------
clump1, clump2 : structured arrays delta1, delta2 : 3-dimensional arrays
Structured arrays corresponding to the two clumps. Should contain Density arrays.
keys `x`, `y`, `z` and `M`.
cells1 : 1-dimensional array, optional
Optionlaly precomputed cells of `clump1`. Be careful when using
this to ensure it matches `clump1`.
Returns Returns
------- -------
overlap : float overlap : float
""" """
# 1-dimensional cell ID of each particle in clump1 and clump2 mass1 = numpy.sum(delta1)
if cells1 is None: mass2 = numpy.sum(delta2)
cells1 = self.assign_to_cell(*[clump1[p] for p in ('x', 'y', 'z')]) # Cells where both fields are > 0
cells2 = self.assign_to_cell(*[clump2[p] for p in ('x', 'y', 'z')]) mask = (delta1 > 0) & (delta2 > 0)
# Elementwise cells1 in cells2 and vice versa # Note the factor of 0.5 to avoid double counting
m1 = numpy.isin(cells1, cells2) intersect = 0.5 * numpy.sum(delta1[mask] + delta2[mask])
m2 = numpy.isin(cells2, cells1) return intersect / (mass1 + mass2 - intersect)
# Summed shared mass and the total
interp = numpy.sum(clump1["M"][m1]) + numpy.sum(clump2["M"][m2])
mtot = numpy.sum(clump1["M"]) + numpy.sum(clump2["M"])
return interp / mtot def __call__(self, clump1, clump2):
"""
Calculate overlap between `clump1` and `clump2`. See
`self.overlap(...)` and `self.make_deltas(...)` for further
information.
Parameters
----------
clump1, clump2 : structurered arrays
Structured arrays containing the particles of a given clump. Keys
must include `x`, `y`, `z` and `M`.
Returns
-------
overlap : float
"""
delta1, delta2 = self.make_deltas(clump1, clump2)
return self.overlap(delta1, delta2)

78
scripts/run_crossmatch.py Normal file
View file

@ -0,0 +1,78 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
MPI script to run the CSiBORG realisations matcher.
"""
import numpy
from datetime import datetime
from mpi4py import MPI
from os.path import join
from os import remove
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
import utils
# Get MPI things
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
# File paths
ftemp = join(utils.dumpdir, "temp_match", "match_{}.npy")
fperm = join(utils.dumpdir, "match", "cross_matches.npy")
# Set up the catalogue
paths = csiborgtools.read.CSiBORGPaths(to_new=False)
print("{}: started reading in the combined catalogue.".format(datetime.now()),
flush=True)
cat = csiborgtools.read.CombinedHaloCatalogue(
paths, min_m500=None, max_dist=None, verbose=False)
print("{}: finished reading in the combined catalogue with `{}`."
.format(datetime.now(), cat.n_sims), flush=True)
matcher = csiborgtools.match.RealisationsMatcher(cat)
for i in csiborgtools.fits.split_jobs(len(cat.n_sims), nproc)[rank]:
n = cat.n_sims[i]
print("{}: rank {} working on simulation `{}`."
.format(datetime.now(), rank, n), flush=True)
out = matcher.cross_knn_position_single(
i, nmult=15, dlogmass=2, init_dist=True, overlap=True, verbose=False,
overlapper_kwargs={"smooth_scale": 0.5})
# Dump the result
with open(ftemp.format(n), "wb") as f:
numpy.save(f, out)
comm.Barrier()
if rank == 0:
print("Collecting files...", flush=True)
dtype = {"names": ["match", "nsim"], "formats": [object, numpy.int32]}
matches = numpy.full(len(cat.n_sims), numpy.nan, dtype=dtype)
for i, n in enumerate(cat.n_sims):
with open(ftemp.format(n), "rb") as f:
matches["match"][i] = numpy.load(f, allow_pickle=True)
matches["nsim"][i] = n
remove(ftemp.format(n))
print("Saving results to `{}`.".format(fperm))
with open(fperm, "wb") as f:
numpy.save(f, matches)