Gaussian smoothing of density fields (#33)

* Simplify smoothing support and looping over nonzero

* Simplify comments

* add now()

* add cat length

* add smoothed calculation

* add smoothing

* Add sorting

* Edit what is ignored

* Move notebooks

* Add nonsymmetric smoothed overlap

* Update NB

* Add support for reading in the smoothed overlap

* Switch to the true overlap definition

* Reader of the true overlap

* rem occups

* Import moved to a class

* Move definition

* Edit submission script

* Update to account for the new definition

* backup nb

* Switch back to properly initialising arrays

* Fix addition bug

* Update NB

* Fix little bug

* Update nb
This commit is contained in:
Richard Stiskalek 2023-03-27 09:22:03 +01:00 committed by GitHub
parent 9b524db617
commit 5dd8c668fa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 10587 additions and 441 deletions

2
.gitignore vendored
View file

@ -14,7 +14,5 @@ scripts/*.out
build/* build/*
.eggs/* .eggs/*
csiborgtools.egg-info/* csiborgtools.egg-info/*
scripts/playground_*
scripts/*.ipynb
Pylians3/* Pylians3/*
scripts/plot_correlation.ipynb scripts/plot_correlation.ipynb

View file

@ -16,11 +16,11 @@
import numpy import numpy
from scipy.ndimage import gaussian_filter from scipy.ndimage import gaussian_filter
from tqdm import (tqdm, trange) from tqdm import (tqdm, trange)
from datetime import datetime
from astropy.coordinates import SkyCoord from astropy.coordinates import SkyCoord
from numba import jit from numba import jit
from gc import collect from gc import collect
from ..read import (concatenate_clumps, clumps_pos2cell) from ..read import concatenate_clumps
from ..utils import now
def brute_spatial_separation(c1, c2, angular=False, N=None, verbose=False): def brute_spatial_separation(c1, c2, angular=False, N=None, verbose=False):
@ -92,23 +92,19 @@ class RealisationsMatcher:
mass associated with a halo. mass associated with a halo.
overlapper_kwargs : dict, optional overlapper_kwargs : dict, optional
Keyword arguments passed to `ParticleOverlapper`. Keyword arguments passed to `ParticleOverlapper`.
remove_nooverlap : bool, optional
Whether to remove pairs with exactly zero overlap. By default `True`.
""" """
_nmult = None _nmult = None
_dlogmass = None _dlogmass = None
_mass_kind = None _mass_kind = None
_overlapper = None _overlapper = None
_remove_nooverlap = None
def __init__(self, nmult=1., dlogmass=2., mass_kind="totpartmass", def __init__(self, nmult=1., dlogmass=2., mass_kind="totpartmass",
overlapper_kwargs={}, remove_nooverlap=True): overlapper_kwargs={}):
self.nmult = nmult self.nmult = nmult
self.dlogmass = dlogmass self.dlogmass = dlogmass
self.mass_kind = mass_kind self.mass_kind = mass_kind
self._overlapper = ParticleOverlap(**overlapper_kwargs) self._overlapper = ParticleOverlap(**overlapper_kwargs)
self.remove_nooverlap = remove_nooverlap
@property @property
def nmult(self): def nmult(self):
@ -163,23 +159,6 @@ class RealisationsMatcher:
assert isinstance(mass_kind, str) assert isinstance(mass_kind, str)
self._mass_kind = mass_kind self._mass_kind = mass_kind
@property
def remove_nooverlap(self):
"""
Whether to remove pairs with exactly zero overlap.
Returns
-------
remove_nooverlap : bool
"""
return self._remove_nooverlap
@remove_nooverlap.setter
def remove_nooverlap(self, remove_nooverlap):
"""Set `remove_nooverlap`."""
assert isinstance(remove_nooverlap, bool)
self._remove_nooverlap = remove_nooverlap
@property @property
def overlapper(self): def overlapper(self):
""" """
@ -191,61 +170,48 @@ class RealisationsMatcher:
""" """
return self._overlapper return self._overlapper
@staticmethod def cross(self, cat0, catx, clumps0, clumpsx, delta_bckg, verbose=True):
def _cat2clump_mapping(cat_indxs, clump_indxs):
"""
Create a mapping from a catalogue array index to a clump array index.
Parameters
----------
cat_indxs : 1-dimensional array
Clump indices in the catalogue array.
clump_indxs : 1-dimensional array
Clump indices in the clump array.
Returns
-------
mapping : 1-dimensional array
Mapping. The array indices match catalogue array and values are
array positions in the clump array.
"""
mapping = numpy.full(cat_indxs.size, numpy.nan, dtype=int)
__, ind1, ind2 = numpy.intersect1d(clump_indxs, cat_indxs,
return_indices=True)
mapping[ind2] = ind1
return mapping
def cross(self, cat0, catx, overlap=False, verbose=True):
r""" r"""
Find all neighbours whose CM separation is less than `nmult` times the Find all neighbours whose CM separation is less than `nmult` times the
sum of their initial Lagrangian patch sizes. Enforces that the sum of their initial Lagrangian patch sizes and optionally calculate
neighbours' are similar in mass up to `dlogmass` dex. their overlap. Enforces that the neighbours' are similar in mass up to
`dlogmass` dex.
Parameters Parameters
---------- ----------
cat0, catx: :py:class:`csiborgtools.read.HaloCatalogue` cat0 : :py:class:`csiborgtools.read.HaloCatalogue`
Halo catalogues corresponding to the reference and cross Halo catalogue of the reference simulation.
simulations. catx : :py:class:`csiborgtools.read.HaloCatalogue`
overlap : bool, optional Halo catalogue of the cross simulation.
whether to calculate overlap between clumps in the initial clumps0 : list of structured arrays
snapshot. by default `false`. this operation is slow. List of clump structured arrays of the reference simulation, keys
must include `x`, `y`, `z` and `M`. The positions must already be
converted to cell numbers.
clumpsx : list of structured arrays
List of clump structured arrays of the cross simulation, keys must
include `x`, `y`, `z` and `M`. The positions must already be
converted to cell numbers.
delta_bcgk : 3-dimensional array
Summed background density field of the reference and cross
simulations calculated with particles assigned to halos at the
final snapshot. Assumed to only be sampled in cells
:math:`[512, 1536)^3`.
verbose : bool, optional verbose : bool, optional
iterator verbosity flag. by default `true`. iterator verbosity flag. by default `true`.
Returns Returns
------- -------
ref_indxs : 1-dimensional array ref_indxs : 1-dimensional array
Indices of halos in the reference catalogue. Halo IDs in the reference catalogue.
cross_indxs : 1-dimensional array cross_indxs : 1-dimensional array
Indices of halos in the cross catalogue. Halo IDs in the cross catalogue.
match_indxs : 1-dimensional array of arrays match_indxs : 1-dimensional array of arrays
Indices of halo counterparts in the cross catalogue. Indices of halo counterparts in the cross catalogue.
overlaps : 1-dimensional array of arrays overlaps : 1-dimensional array of arrays
Overlaps with the cross catalogue. Overlaps with the cross catalogue.
""" """
# Query the KNN # Query the KNN
if verbose: verbose and print("{}: querying the KNN.".format(now()), flush=True)
print("{}: querying the KNN.".format(datetime.now()), flush=True)
match_indxs = radius_neighbours( match_indxs = radius_neighbours(
catx.knn(select_initial=True), cat0.positions0, catx.knn(select_initial=True), cat0.positions0,
radiusX=cat0["lagpatch"], radiusKNN=catx["lagpatch"], radiusX=cat0["lagpatch"], radiusKNN=catx["lagpatch"],
@ -253,33 +219,11 @@ class RealisationsMatcher:
# Remove neighbours whose mass is too large/small # Remove neighbours whose mass is too large/small
if self.dlogmass is not None: if self.dlogmass is not None:
for j, indx in enumerate(match_indxs): for i, indx in enumerate(match_indxs):
# |log(M1 / M2)| # |log(M1 / M2)|
p = self.mass_kind p = self.mass_kind
aratio = numpy.abs(numpy.log10(catx[p][indx] / cat0[p][j])) aratio = numpy.abs(numpy.log10(catx[p][indx] / cat0[p][i]))
match_indxs[j] = match_indxs[j][aratio < self.dlogmass] match_indxs[i] = match_indxs[i][aratio < self.dlogmass]
# Initialise the array outside in case `overlap` is `False`
cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
if overlap:
if verbose:
print("Loading the clump particles", flush=True)
with open(cat0.paths.clump0_path(cat0.n_sim), "rb") as f:
clumps0 = numpy.load(f, allow_pickle=True)
with open(catx.paths.clump0_path(catx.n_sim), 'rb') as f:
clumpsx = numpy.load(f, allow_pickle=True)
# Convert 3D positions to particle IDs
clumps_pos2cell(clumps0, self.overlapper)
clumps_pos2cell(clumpsx, self.overlapper)
# Calculate the particle field
if verbose:
print("Creating and smoothing the crossed field.", flush=True)
delta_bckg0 = self.overlapper.make_background_delta(
clumps0, to_smooth=False)
delta_bckgx = self.overlapper.make_background_delta(
clumpsx, to_smooth=False)
# Min and max cells along each axis for each halo # Min and max cells along each axis for each halo
limkwargs = {"ncells": self.overlapper.inv_clength, limkwargs = {"ncells": self.overlapper.inv_clength,
@ -287,40 +231,113 @@ class RealisationsMatcher:
mins0, maxs0 = get_clumplims(clumps0, **limkwargs) mins0, maxs0 = get_clumplims(clumps0, **limkwargs)
minsx, maxsx = get_clumplims(clumpsx, **limkwargs) minsx, maxsx = get_clumplims(clumpsx, **limkwargs)
# Mapping from a catalogue halo index to the list of clumps # Mapping from a halo index to the list of clumps
cat2clumps0 = self._cat2clump_mapping(cat0["index"], clumps0["ID"]) hid2clumps0 = {hid: n for n, hid in enumerate(clumps0["ID"])}
cat2clumpsx = self._cat2clump_mapping(catx["index"], clumpsx["ID"]) hid2clumpsx = {hid: n for n, hid in enumerate(clumpsx["ID"])}
cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
# Loop only over halos that have neighbours # Loop only over halos that have neighbours
wneigbours = numpy.where([ii.size > 0 for ii in match_indxs])[0] iters = numpy.arange(len(cat0))[[x.size > 0 for x in match_indxs]]
for k in tqdm(wneigbours) if verbose else wneigbours: for i in tqdm(iters) if verbose else iters:
match0 = cat2clumps0[k] # Clump pos matching this halo match0 = hid2clumps0[cat0["index"][i]]
# The clump, its mass and mins & maxs # The clump, its mass and mins & maxs
cl0 = clumps0["clump"][match0] cl0 = clumps0["clump"][match0]
mass0 = numpy.sum(cl0['M']) mass0 = numpy.sum(cl0['M'])
mins0_current, maxs0_current = mins0[match0], maxs0[match0] mins0_current, maxs0_current = mins0[match0], maxs0[match0]
# Array to store overlaps of this halo # Preallocate arrays to store overlap information
crosses = numpy.full(match_indxs[k].size, numpy.nan, _cross = numpy.full(match_indxs[i].size, numpy.nan,
numpy.float32) dtype=numpy.float32)
# Loop over matches of this halo from the other simulation # Loop over matches of this halo from the other simulation
for ii, ind in enumerate(match_indxs[k]): for j, ind in enumerate(match_indxs[i]):
matchx = cat2clumpsx[ind] # Clump pos matching this halo matchx = hid2clumpsx[catx["index"][ind]]
clx = clumpsx["clump"][matchx] clx = clumpsx["clump"][matchx]
crosses[ii] = self.overlapper( _cross[j] = self.overlapper(
cl0, clx, delta_bckg0, delta_bckgx, mins0_current, cl0, clx, delta_bckg, mins0_current, maxs0_current,
maxs0_current, minsx[matchx], maxsx[matchx], minsx[matchx], maxsx[matchx], mass1=mass0,
mass1=mass0, mass2=numpy.sum(clx['M'])) mass2=numpy.sum(clx['M']))
cross[k] = crosses cross[i] = _cross
# Optionally remove matches with exactly 0 overlap # Remove matches with exactly 0 overlap
if self.remove_nooverlap: mask = cross[i] > 0
mask = cross[k] > 0 match_indxs[i] = match_indxs[i][mask]
match_indxs[k] = match_indxs[k][mask] cross[i] = cross[i][mask]
cross[k] = cross[k][mask]
# Sort the matches by overlap
ordering = numpy.argsort(cross[i])[::-1]
match_indxs[i] = match_indxs[i][ordering]
cross[i] = cross[i][ordering]
cross = numpy.asanyarray(cross, dtype=object)
return cat0["index"], catx["index"], match_indxs, cross return cat0["index"], catx["index"], match_indxs, cross
def smoothed_cross(self, clumps0, clumpsx, delta_bckg, ref_indxs,
cross_indxs, match_indxs, smooth_kwargs, verbose=True):
r"""
Calculate the smoothed overlaps for pair previously identified via
`self.cross(...)` to have a non-zero overlap.
Parameters
----------
clumps0 : list of structured arrays
List of clump structured arrays of the reference simulation, keys
must include `x`, `y`, `z` and `M`. The positions must already be
converted to cell numbers.
clumpsx : list of structured arrays
List of clump structured arrays of the cross simulation, keys must
include `x`, `y`, `z` and `M`. The positions must already be
converted to cell numbers.
delta_bcgk : 3-dimensional array
Smoothed summed background density field of the reference and cross
simulations calculated with particles assigned to halos at the
final snapshot. Assumed to only be sampled in cells
:math:`[512, 1536)^3`.
ref_indxs : 1-dimensional array
Halo IDs in the reference catalogue.
cross_indxs : 1-dimensional array
Halo IDs in the cross catalogue.
match_indxs : 1-dimensional array of arrays
Indices of halo counterparts in the cross catalogue.
smooth_kwargs : kwargs
Kwargs to be passed to :py:func:`scipy.ndimage.gaussian_filter`.
verbose : bool, optional
Iterator verbosity flag. By default `True`.
Returns
-------
overlaps : 1-dimensional array of arrays
"""
# Min and max cells along each axis for each halo
limkwargs = {"ncells": self.overlapper.inv_clength,
"nshift": self.overlapper.nshift}
mins0, maxs0 = get_clumplims(clumps0, **limkwargs)
minsx, maxsx = get_clumplims(clumpsx, **limkwargs)
hid2clumps0 = {hid: n for n, hid in enumerate(clumps0["ID"])}
hid2clumpsx = {hid: n for n, hid in enumerate(clumpsx["ID"])}
# Preallocate arrays to store the overlap information
cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
for i, ref_ind in enumerate(tqdm(ref_indxs) if verbose else ref_indxs):
match0 = hid2clumps0[ref_ind]
# The reference clump, its mass and mins & maxs
cl0 = clumps0["clump"][match0]
mins0_current, maxs0_current = mins0[match0], maxs0[match0]
# Preallocate
nmatches = match_indxs[i].size
_cross = numpy.full(nmatches, numpy.nan, numpy.float32)
for j, match_ind in enumerate(match_indxs[i]):
matchx = hid2clumpsx[cross_indxs[match_ind]]
clx = clumpsx["clump"][matchx]
_cross[j] = self.overlapper(
cl0, clx, delta_bckg, mins0_current,
maxs0_current, minsx[matchx], maxsx[matchx],
smooth_kwargs=smooth_kwargs)
cross[i] = _cross
return numpy.asanyarray(cross, dtype=object)
############################################################################### ###############################################################################
# Matching statistics # # Matching statistics #
@ -360,72 +377,21 @@ def cosine_similarity(x, y):
class ParticleOverlap: class ParticleOverlap:
r""" r"""
Class to calculate overlap between two halos from different simulations. Class to calculate halo overlaps. The density field calculation is based on
The density field calculation is based on the nearest grid position the nearest grid position particle assignment scheme, with optional
particle assignment scheme, with optional additional Gaussian smoothing. Gaussian smoothing.
Parameters
----------
nshift : int, optional
Number of cells by which to shift the subbox from the outside-most
cell containing a particle. By default 5.
smooth_scale : float or integer, optional
Optional Gaussian smoothing scale to by applied to the fields. By
default no smoothing is applied. Otherwise the scale is to be
specified in the number of cells (i.e. in units of `self.cellsize`).
""" """
_inv_clength = None _inv_clength = None
_smooth_scale = None
_clength = None _clength = None
_nshift = None _nshift = None
def __init__(self, smooth_scale=None, nshift=5): def __init__(self):
# Inverse cell length in box units. By default :math:`2^11`, which # Inverse cell length in box units. By default :math:`2^11`, which
# matches the initial RAMSES grid resolution. # matches the initial RAMSES grid resolution.
self.inv_clength = 2**11 self.inv_clength = 2**11
self.smooth_scale = smooth_scale self.nshift = 5 # Hardcode this too to force consistency
self.nshift = nshift
@property
def inv_clength(self):
"""
Inverse cell length.
Returns
-------
inv_clength : float
"""
return self._inv_clength
@inv_clength.setter
def inv_clength(self, inv_clength):
"""Sets `inv_clength`."""
assert inv_clength > 0, "`inv_clength` must be positive."
assert isinstance(inv_clength, int), "`inv_clength` must be integer."
self._inv_clength = int(inv_clength)
# Also set the inverse and number of cells
self._clength = 1 / self.inv_clength self._clength = 1 / self.inv_clength
@property
def smooth_scale(self):
"""
The smoothing scale in units of `self.cellsize`. If not set `None`.
Returns
-------
smooth_scale : int or float
"""
return self._smooth_scale
@smooth_scale.setter
def smooth_scale(self, smooth_scale):
"""Sets `smooth_scale`."""
if smooth_scale is None:
self._smooth_scale = None
else:
assert smooth_scale > 0
self._smooth_scale = smooth_scale
def pos2cell(self, pos): def pos2cell(self, pos):
""" """
Convert position to cell number. If `pos` is in Convert position to cell number. If `pos` is in
@ -435,6 +401,7 @@ class ParticleOverlap:
Parameters Parameters
---------- ----------
pos : 1-dimensional array pos : 1-dimensional array
Array of positions along an axis in the box.
Returns Returns
------- -------
@ -445,18 +412,55 @@ class ParticleOverlap:
return pos return pos
return numpy.floor(pos * self.inv_clength).astype(int) return numpy.floor(pos * self.inv_clength).astype(int)
def make_background_delta(self, clumps, to_smooth=True): def clumps_pos2cell(self, clumps):
"""
Convert clump positions directly to cell IDs. Useful to speed up
subsequent calculations. Overwrites the passed in arrays.
Parameters
----------
clumps : array of arrays
Array of clump structured arrays whose `x`, `y`, `z` keys will be
converted.
Returns
-------
None
"""
# Check if clumps are probably already in cells
if any(clumps[0][0].dtype[p].char in numpy.typecodes["AllInteger"]
for p in ('x', 'y', 'z')):
raise ValueError("Positions appear to already be converted cells.")
# Get the new dtype that replaces float for int for positions
names = clumps[0][0].dtype.names # Take the first one, doesn't matter
formats = [descr[1] for descr in clumps[0][0].dtype.descr]
for i in range(len(names)):
if names[i] in ('x', 'y', 'z'):
formats[i] = numpy.int32
dtype = numpy.dtype({"names": names, "formats": formats})
# Loop switch positions for cells IDs and change dtype
for n in range(clumps.size):
for p in ('x', 'y', 'z'):
clumps[n][0][p] = self.pos2cell(clumps[n][0][p])
clumps[n][0] = clumps[n][0].astype(dtype)
def make_bckg_delta(self, clumps, delta=None):
""" """
Calculate a NGP density field of clumps within the central Calculate a NGP density field of clumps within the central
:math:`1/2^3` region of the simulation. :math:`1/2^3` region of the simulation. Smoothing must be applied
separately.
Parameters Parameters
---------- ----------
clumps : list of structured arrays clumps : list of structured arrays
List of clump structured array, keys must include `x`, `y`, `z` List of clump structured array, keys must include `x`, `y`, `z`
and `M`. and `M`.
to_smooth : bool, optional delta : 3-dimensional array, optional
Explicit control over whether to smooth. By default `True`. Array to store the density field in. If `None` a new array is
created.
Returns Returns
------- -------
@ -480,30 +484,34 @@ class ParticleOverlap:
cells = [c[mask] for c in cells] cells = [c[mask] for c in cells]
mass = mass[mask] mass = mass[mask]
# Preallocate and fill the array # Prepare the density field or check it is of the right shape
if delta is None:
delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32) delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
else:
assert ((delta.shape == (ncells,) * 3)
& (delta.dtype == numpy.float32))
fill_delta(delta, *cells, *(cellmin,) * 3, mass) fill_delta(delta, *cells, *(cellmin,) * 3, mass)
if to_smooth and self.smooth_scale is not None:
gaussian_filter(delta, self.smooth_scale, output=delta)
return delta return delta
def make_delta(self, clump, mins=None, maxs=None, subbox=False, def make_delta(self, clump, mins=None, maxs=None, subbox=False,
to_smooth=True): smooth_kwargs=None):
""" """
Calculate a NGP density field of a halo on a cubic grid. Calculate a NGP density field of a halo on a cubic grid. Optionally can
be smoothed with a Gaussian kernel.
Parameters Parameters
---------- ----------
clump: structurered arrays clump : structurered arrays
Clump structured array, keys must include `x`, `y`, `z` and `M`. Clump structured array, keys must include `x`, `y`, `z` and `M`.
mins, maxs : 1-dimensional arrays of shape `(3,)` mins, maxs : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension. Minimun and maximum cell numbers along each dimension.
subbox : bool, optional subbox : bool, optional
Whether to calculate the density field on a grid strictly enclosing Whether to calculate the density field on a grid strictly enclosing
the clump. the clump.
to_smooth : bool, optional smooth_kwargs : kwargs, optional
Explicit control over whether to smooth. By default `True`. Kwargs to be passed to :py:func:`scipy.ndimage.gaussian_filter`.
If `None` no smoothing is applied.
Returns Returns
------- -------
@ -534,15 +542,15 @@ class ParticleOverlap:
delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32) delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
fill_delta(delta, *cells, *mins, clump['M']) fill_delta(delta, *cells, *mins, clump['M'])
if to_smooth and self.smooth_scale is not None: if smooth_kwargs is not None:
gaussian_filter(delta, self.smooth_scale, output=delta) gaussian_filter(delta, output=delta, **smooth_kwargs)
return delta return delta
def make_deltas(self, clump1, clump2, mins1=None, maxs1=None, def make_deltas(self, clump1, clump2, mins1=None, maxs1=None,
mins2=None, maxs2=None, return_nonzero1=False): mins2=None, maxs2=None, smooth_kwargs=None):
""" """
Calculate a NGP density fields of two halos on a grid that encloses Calculate a NGP density fields of two halos on a grid that encloses
them both. them both. Optionally can be smoothed with a Gaussian kernel.
Parameters Parameters
---------- ----------
@ -555,9 +563,9 @@ class ParticleOverlap:
mins2, maxs2 : 1-dimensional arrays of shape `(3,)` mins2, maxs2 : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension of `clump2`. Minimun and maximum cell numbers along each dimension of `clump2`.
Optional. Optional.
return_nonzero1 : bool, optional smooth_kwargs : kwargs, optional
Whether to return the indices where the contribution of `clump1` is Kwargs to be passed to :py:func:`scipy.ndimage.gaussian_filter`.
non-zero. If `None` no smoothing is applied.
Returns Returns
------- -------
@ -565,9 +573,9 @@ class ParticleOverlap:
Density arrays of `clump1` and `clump2`, respectively. Density arrays of `clump1` and `clump2`, respectively.
cellmins : len-3 tuple cellmins : len-3 tuple
Tuple of left-most cell ID in the full box. Tuple of left-most cell ID in the full box.
nonzero1 : 2-dimensional array nonzero : 2-dimensional array
Indices where `delta1` has a non-zero density. If `return_nonzero1` Indices where the lower mass clump has a non-zero density.
is `False` return `None` instead. Calculated only if no smoothing is applied, otherwise `None`.
""" """
xc1, yc1, zc1 = (self.pos2cell(clump1[p]) for p in ('x', 'y', 'z')) xc1, yc1, zc1 = (self.pos2cell(clump1[p]) for p in ('x', 'y', 'z'))
xc2, yc2, zc2 = (self.pos2cell(clump2[p]) for p in ('x', 'y', 'z')) xc2, yc2, zc2 = (self.pos2cell(clump2[p]) for p in ('x', 'y', 'z'))
@ -597,26 +605,37 @@ class ParticleOverlap:
# Preallocate and fill the arrays # Preallocate and fill the arrays
delta1 = numpy.zeros((ncells,)*3, dtype=numpy.float32) delta1 = numpy.zeros((ncells,)*3, dtype=numpy.float32)
delta2 = numpy.zeros((ncells,)*3, dtype=numpy.float32) delta2 = numpy.zeros((ncells,)*3, dtype=numpy.float32)
if return_nonzero1:
nonzero1 = fill_delta_indxs( # If no smoothing figure out the nonzero indices of the smaller clump
delta1, xc1, yc1, zc1, *cellmins, clump1['M']) if smooth_kwargs is None:
if clump1.size > clump2.size:
fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1['M'])
nonzero = fill_delta_indxs(delta2, xc2, yc2, zc2, *cellmins,
clump2['M'])
else:
nonzero = fill_delta_indxs(delta1, xc1, yc1, zc1, *cellmins,
clump1['M'])
fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2['M'])
else: else:
fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1['M']) fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1['M'])
nonzero1 = None
fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2['M']) fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2['M'])
nonzero = None
if self.smooth_scale is not None: if smooth_kwargs is not None:
gaussian_filter(delta1, self.smooth_scale, output=delta1) gaussian_filter(delta1, output=delta1, **smooth_kwargs)
gaussian_filter(delta2, self.smooth_scale, output=delta2) gaussian_filter(delta2, output=delta2, **smooth_kwargs)
return delta1, delta2, cellmins, nonzero
return delta1, delta2, cellmins, nonzero1 def __call__(self, clump1, clump2, delta_bckg, mins1=None, maxs1=None,
mins2=None, maxs2=None, mass1=None, mass2=None,
def __call__(self, clump1, clump2, delta1_bckg, delta2_bckg, smooth_kwargs=None):
mins1=None, maxs1=None, mins2=None, maxs2=None,
mass1=None, mass2=None, loop_nonzero=True):
""" """
Calculate overlap between `clump1` and `clump2`. See Calculate overlap between `clump1` and `clump2`. See
`calculate_overlap(...)` for further information. `calculate_overlap(...)` for further information. Be careful so that
the background density field is calculated with the same
`smooth_kwargs`. If any smoothing is applied then loops over the full
density fields, otherwise only over the non-zero cells of the lower
mass clump.
Parameters Parameters
---------- ----------
@ -625,40 +644,39 @@ class ParticleOverlap:
must include `x`, `y`, `z` and `M`. must include `x`, `y`, `z` and `M`.
cellmins : len-3 tuple cellmins : len-3 tuple
Tuple of left-most cell ID in the full box. Tuple of left-most cell ID in the full box.
delta1_bcgk, delta2_bckg : 3-dimensional arrays delta_bcgk : 3-dimensional array
Background density fields of the reference and cross boxes Summed background density field of the reference and cross
calculated with particles assigned to halos at the final snapshot. simulations calculated with particles assigned to halos at the
Assumed to only be sampled in cells :math:`[512, 1536)^3`. final snapshot. Assumed to only be sampled in cells
:math:`[512, 1536)^3`.
mins1, maxs1 : 1-dimensional arrays of shape `(3,)` mins1, maxs1 : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension of `clump1`. Minimun and maximum cell numbers along each dimension of `clump1`.
Optional. Optional.
mins2, maxs2 : 1-dimensional arrays of shape `(3,)` mins2, maxs2 : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension of `clump2`. Minimum and maximum cell numbers along each dimension of `clump2`,
Optional. optional.
mass1, mass2 : floats, optional mass1, mass2 : floats, optional
Total mass of `clump1` and `clump2`, respectively. Must be provided Total mass of `clump1` and `clump2`, respectively. Must be provided
if `loop_nonzero` is `True`. if `loop_nonzero` is `True`.
loop_nonzer : bool, optional smooth_kwargs : kwargs, optional
Whether to only loop over cells where `clump1` has non-zero Kwargs to be passed to :py:func:`scipy.ndimage.gaussian_filter`.
density. By default `True`. If `None` no smoothing is applied.
Returns Returns
------- -------
overlap : float overlap : float
""" """
delta1, delta2, cellmins, nonzero1 = self.make_deltas( delta1, delta2, cellmins, nonzero = self.make_deltas(
clump1, clump2, mins1, maxs1, mins2, maxs2, clump1, clump2, mins1, maxs1, mins2, maxs2,
return_nonzero1=loop_nonzero) smooth_kwargs=smooth_kwargs)
if not loop_nonzero:
return calculate_overlap(delta1, delta2, cellmins,
delta1_bckg, delta2_bckg)
if smooth_kwargs is not None:
return calculate_overlap(delta1, delta2, cellmins, delta_bckg)
# Calculate masses not given # Calculate masses not given
mass1 = numpy.sum(clump1['M']) if mass1 is None else mass1 mass1 = numpy.sum(clump1['M']) if mass1 is None else mass1
mass2 = numpy.sum(clump2['M']) if mass2 is None else mass2 mass2 = numpy.sum(clump2['M']) if mass2 is None else mass2
return calculate_overlap_indxs(delta1, delta2, cellmins, delta1_bckg, return calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg,
delta2_bckg, nonzero1, mass1, mass2) nonzero, mass1, mass2)
@jit(nopython=True) @jit(nopython=True)
@ -760,29 +778,31 @@ def get_clumplims(clumps, ncells, nshift=None):
@jit(nopython=True) @jit(nopython=True)
def calculate_overlap(delta1, delta2, cellmins, delta1_bckg, delta2_bckg): def calculate_overlap(delta1, delta2, cellmins, delta_bckg):
r""" r"""
Overlap between two clumps whose density fields are evaluated on the Overlap between two halos whose density fields are evaluated on the
same grid. This is a JIT implementation, hence it is outside of the main same grid. This is a JIT implementation, hence it is outside of the main
class. class.
Parameters Parameters
---------- ----------
delta1, delta2 : 3-dimensional arrays delta1: 3-dimensional array
Clumps density fields. Density field of the first halo.
delta2 : 3-dimensional array
Density field of the second halo.
cellmins : len-3 tuple cellmins : len-3 tuple
Tuple of left-most cell ID in the full box. Tuple of left-most cell ID in the full box.
delta1_bcgk, delta2_bckg : 3-dimensional arrays delta_bcgk : 3-dimensional array
Background density fields of the reference and cross boxes calculated Summed background density field of the reference and cross simulations
with particles assigned to halos at the final snapshot. Assumed to only calculated with particles assigned to halos at the final snapshot.
be sampled in cells :math:`[512, 1536)^3`. Assumed to only be sampled in cells :math:`[512, 1536)^3`.
Returns Returns
------- -------
overlap : float overlap : float
""" """
totmass = 0. # Total mass of clump 1 and clump 2 totmass = 0. # Total mass of clump 1 and clump 2
intersect = 0. # Mass of pixels that are non-zero in both clumps intersect = 0. # Weighted intersecting mass
i0, j0, k0 = cellmins # Unpack things i0, j0, k0 = cellmins # Unpack things
bckg_offset = 512 # Offset of the background density field bckg_offset = 512 # Offset of the background density field
bckg_size = 1024 bckg_size = 1024
@ -790,34 +810,27 @@ def calculate_overlap(delta1, delta2, cellmins, delta1_bckg, delta2_bckg):
for i in range(imax): for i in range(imax):
ii = i0 + i - bckg_offset ii = i0 + i - bckg_offset
flag = 0 <= ii < bckg_size ishighres = 0 <= ii < bckg_size
for j in range(jmax): for j in range(jmax):
jj = j0 + j - bckg_offset jj = j0 + j - bckg_offset
flag &= 0 <= jj < bckg_size ishighres &= 0 <= jj < bckg_size
for k in range(kmax): for k in range(kmax):
kk = k0 + k - bckg_offset kk = k0 + k - bckg_offset
flag &= 0 <= kk < bckg_size ishighres &= 0 <= kk < bckg_size
cell1, cell2 = delta1[i, j, k], delta2[i, j, k] m1, m2 = delta1[i, j, k], delta2[i, j, k]
# If both are zero then skip totmass += m1 + m2
if cell1 * cell2 > 0: prod = 2 * m1 * m2
if flag: if prod > 0: # If both cells are non-zero
weight1 = cell1 / delta1_bckg[ii, jj, kk] bcgk = delta_bckg[ii, jj, kk] if ishighres else m1 + m2
weight2 = cell2 / delta2_bckg[ii, jj, kk] intersect += prod / bcgk if bcgk > 0 else prod / (m1 + m2)
else:
weight1 = 1.
weight2 = 1.
# Average weighted mass in the cell
intersect += 0.5 * (weight1 * cell1 + weight2 * cell2)
totmass += cell1 + cell2
return intersect / (totmass - intersect) return intersect / (totmass - intersect)
@jit(nopython=True) @jit(nopython=True)
def calculate_overlap_indxs(delta1, delta2, cellmins, delta1_bckg, delta2_bckg, def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
nonzero1, mass1, mass2): mass1, mass2):
r""" r"""
Overlap between two clumps whose density fields are evaluated on the Overlap between two clumps whose density fields are evaluated on the
same grid and `nonzero1` enumerates the non-zero cells of `delta1. This is same grid and `nonzero1` enumerates the non-zero cells of `delta1. This is
@ -825,17 +838,19 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta1_bckg, delta2_bckg,
Parameters Parameters
---------- ----------
delta1, delta2 : 3-dimensional arrays delta1: 3-dimensional array
Clumps density fields. Density field of the first halo.
delta2 : 3-dimensional array
Density field of the second halo.
cellmins : len-3 tuple cellmins : len-3 tuple
Tuple of left-most cell ID in the full box. Tuple of left-most cell ID in the full box.
delta1_bcgk, delta2_bckg : 3-dimensional arrays delta_bcgk : 3-dimensional array
Background density fields of the reference and cross boxes calculated Summed background density field of the reference and cross simulations
with particles assigned to halos at the final snapshot. Assumed to only calculated with particles assigned to halos at the final snapshot.
be sampled in cells :math:`[512, 1536)^3`. Assumed to only be sampled in cells :math:`[512, 1536)^3`.
nonzero1 : 2-dimensional array of shape `(n_cells, 3)` nonzero : 2-dimensional array of shape `(n_cells, 3)`
Indices of cells that are non-zero in `delta1`. Expected to be Indices of cells that are non-zero of the lower mass clump. Expected to
precomputed from `fill_delta_indxs`. be precomputed from `fill_delta_indxs`.
mass1, mass2 : floats, optional mass1, mass2 : floats, optional
Total masses of the two clumps, respectively. Optional. If not provided Total masses of the two clumps, respectively. Optional. If not provided
calculcated directly from the density field. calculcated directly from the density field.
@ -844,34 +859,27 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta1_bckg, delta2_bckg,
------- -------
overlap : float overlap : float
""" """
intersect = 0. # Mass of pixels that are non-zero in both clumps intersect = 0. # Weighted intersecting mass
i0, j0, k0 = cellmins # Unpack cell minimas i0, j0, k0 = cellmins # Unpack cell minimas
bckg_offset = 512 # Offset of the background density field bckg_offset = 512 # Offset of the background density field
bckg_size = 1024 # Size of the background density field array bckg_size = 1024 # Size of the background density field array
for n in range(nonzero1.shape[0]): for n in range(nonzero.shape[0]):
i, j, k = nonzero1[n, :] i, j, k = nonzero[n, :]
cell2 = delta2[i, j, k] m1, m2 = delta1[i, j, k], delta2[i, j, k]
prod = 2 * m1 * m2
if cell2 > 0: # We already know that cell1 is non-zero if prod > 0:
cell1 = delta1[i, j, k] # Now unpack cell1 as well
ii = i0 + i - bckg_offset # Indices of this cell in the ii = i0 + i - bckg_offset # Indices of this cell in the
jj = j0 + j - bckg_offset # background density field. jj = j0 + j - bckg_offset # background density field.
kk = k0 + k - bckg_offset kk = k0 + k - bckg_offset
flag = 0 <= ii < bckg_size # Whether this cell is in the high ishighres = 0 <= ii < bckg_size # Is this cell is in the high
flag &= 0 <= jj < bckg_size # resolution region for which the ishighres &= 0 <= jj < bckg_size # resolution region for which the
flag &= 0 <= kk < bckg_size # background density is calculated. ishighres &= 0 <= kk < bckg_size # background field is calculated.
if flag: bckg = delta_bckg[ii, jj, kk] if ishighres else m1 + m2
weight1 = cell1 / delta1_bckg[ii, jj, kk] intersect += prod / bckg if bckg > 0 else prod / (m1 + m2)
weight2 = cell2 / delta2_bckg[ii, jj, kk]
else:
weight1 = 1.
weight2 = 1.
# Average weighted mass in the cell
intersect += 0.5 * (weight1 * cell1 + weight2 * cell2)
return intersect / (mass1 + mass2 - intersect) return intersect / (mass1 + mass2 - intersect)

View file

@ -14,7 +14,7 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .readsim import (CSiBORGPaths, ParticleReader, read_mmain, read_initcm, halfwidth_select) # noqa from .readsim import (CSiBORGPaths, ParticleReader, read_mmain, read_initcm, halfwidth_select) # noqa
from .make_cat import (HaloCatalogue, concatenate_clumps, clumps_pos2cell) # noqa from .make_cat import (HaloCatalogue, concatenate_clumps) # noqa
from .readobs import (PlanckClusters, MCXCClusters, TwoMPPGalaxies, # noqa from .readobs import (PlanckClusters, MCXCClusters, TwoMPPGalaxies, # noqa
TwoMPPGroups, SDSS) # noqa TwoMPPGroups, SDSS) # noqa
from .outsim import (dump_split, combine_splits, make_ascii_powmes) # noqa from .outsim import (dump_split, combine_splits, make_ascii_powmes) # noqa

View file

@ -340,6 +340,9 @@ class HaloCatalogue:
raise RuntimeError("Initial positions are not set!") raise RuntimeError("Initial positions are not set!")
return self._data[key] return self._data[key]
def __len__(self):
return self.data.size
def concatenate_clumps(clumps): def concatenate_clumps(clumps):
""" """
@ -377,41 +380,3 @@ def concatenate_clumps(clumps):
start = end start = end
return particles return particles
def clumps_pos2cell(clumps, overlapper):
"""
Convert clump positions directly to cell IDs. Useful to speed up subsequent
calculations. Overwrites the passed in arrays.
Parameters
----------
clumps : array of arrays
Array of clump structured arrays whose `x`, `y`, `z` keys will be
converted.
overlapper : py:class:`csiborgtools.match.ParticleOverlapper`
`ParticleOverlapper` handling the cell assignment.
Returns
-------
None
"""
# Check if clumps are probably already in cells
if any(clumps[0][0].dtype[p].char in numpy.typecodes["AllInteger"]
for p in ('x', 'y', 'z')):
raise ValueError("Positions appear to already be converted cells.")
# Get the new dtype that replaces float for int for positions
names = clumps[0][0].dtype.names # Take the first one, doesn't matter
formats = [descr[1] for descr in clumps[0][0].dtype.descr]
for i in range(len(names)):
if names[i] in ('x', 'y', 'z'):
formats[i] = numpy.int32
dtype = numpy.dtype({"names": names, "formats": formats})
# Loop switch positions for cells IDs and change dtype
for n in range(clumps.size):
for p in ('x', 'y', 'z'):
clumps[n][0][p] = overlapper.pos2cell(clumps[n][0][p])
clumps[n][0] = clumps[n][0].astype(dtype)

View file

@ -213,25 +213,29 @@ class PairOverlap:
.format(cat0.n_sim, catx.n_sim)) .format(cat0.n_sim, catx.n_sim))
# We can set catalogues already now even if inverted # We can set catalogues already now even if inverted
data = numpy.load(fpath, allow_pickle=True) d = numpy.load(fpath, allow_pickle=True)
ngp_overlap = d["ngp_overlap"]
smoothed_overlap = d["smoothed_overlap"]
match_indxs = d["match_indxs"]
if is_inverted: if is_inverted:
inv_match_indxs, inv_overlap = self._invert_match( indxs = d["cross_indxs"]
data["match_indxs"], data["overlap"], # Invert the matches
data["cross_indxs"].size,) match_indxs, ngp_overlap, smoothed_overlap = self._invert_match(
self._data = { match_indxs, ngp_overlap, smoothed_overlap, indxs.size,)
"index": data["cross_indxs"],
"match_indxs": inv_match_indxs,
"overlap": inv_overlap}
else: else:
indxs = d["ref_indxs"]
self._data = { self._data = {
"index": data["ref_indxs"], "index": indxs,
"match_indxs": data["match_indxs"], "match_indxs": match_indxs,
"overlap": data["overlap"]} "ngp_overlap": ngp_overlap,
"smoothed_overlap": smoothed_overlap,
}
self._make_refmask(min_mass, max_dist) self._make_refmask(min_mass, max_dist)
@staticmethod @staticmethod
def _invert_match(match_indxs, overlap, cross_size): def _invert_match(match_indxs, ngp_overlap, smoothed_overlap, cross_size):
""" """
Invert reference and cross matching, possible since the overlap Invert reference and cross matching, possible since the overlap
definition is symmetric. definition is symmetric.
@ -241,9 +245,12 @@ class PairOverlap:
match_indxs : array of 1-dimensional arrays match_indxs : array of 1-dimensional arrays
Indices of halos from the original cross catalogue matched to the Indices of halos from the original cross catalogue matched to the
reference catalogue. reference catalogue.
overlap : array of 1-dimensional arrays ngp_overlap : array of 1-dimensional arrays
Pair overlap of halos between the originla reference and cross NGP pair overlap of halos between the original reference and cross
simulations. simulations.
smoothed_overlap : array of 1-dimensional arrays
Smoothed pair overlap of halos between the original reference and
cross simulations.
cross_size : int cross_size : int
The size of the cross catalogue. The size of the cross catalogue.
@ -251,35 +258,46 @@ class PairOverlap:
------- -------
inv_match_indxs : array of 1-dimensional arrays inv_match_indxs : array of 1-dimensional arrays
The inverted match indices. The inverted match indices.
ind_overlap : array of 1-dimensional arrays ind_ngp_overlap : array of 1-dimensional arrays
The corresponding overlaps to `inv_match_indxs`. The corresponding NGP overlaps to `inv_match_indxs`.
ind_smoothed_overlap : array of 1-dimensional arrays
The corresponding smoothed overlaps to `inv_match_indxs`.
""" """
# 1. Invert the match. Each reference halo has a list of counterparts # 1. Invert the match. Each reference halo has a list of counterparts
# so loop over those to each counterpart assign a reference halo # so loop over those to each counterpart assign a reference halo
# and at the same time also add the overlaps # and at the same time also add the overlaps
inv_match_indxs = [[] for __ in range(cross_size)] inv_match_indxs = [[] for __ in range(cross_size)]
inv_overlap = [[] for __ in range(cross_size)] inv_ngp_overlap = [[] for __ in range(cross_size)]
inv_smoothed_overlap = [[] for __ in range(cross_size)]
for ref_id in range(match_indxs.size): for ref_id in range(match_indxs.size):
for cross_id, cross in zip(match_indxs[ref_id], overlap[ref_id]): for cross_id, ngp_cross, smoothed_cross in zip(match_indxs[ref_id],
ngp_overlap[ref_id],
smoothed_overlap[ref_id]): # noqa
inv_match_indxs[cross_id].append(ref_id) inv_match_indxs[cross_id].append(ref_id)
inv_overlap[cross_id].append(cross) inv_ngp_overlap[cross_id].append(ngp_cross)
inv_smoothed_overlap[cross_id].append(smoothed_cross)
# 2. Convert the cross matches and overlaps to proper numpy arrays # 2. Convert the cross matches and overlaps to proper numpy arrays
# and ensure that the overlaps are ordered. # and ensure that the overlaps are ordered.
for n in range(len(inv_match_indxs)): for n in range(len(inv_match_indxs)):
inv_match_indxs[n] = numpy.asanyarray(inv_match_indxs[n], inv_match_indxs[n] = numpy.asanyarray(inv_match_indxs[n],
dtype=numpy.int32) dtype=numpy.int32)
inv_overlap[n] = numpy.asanyarray(inv_overlap[n], inv_ngp_overlap[n] = numpy.asanyarray(inv_ngp_overlap[n],
dtype=numpy.float32)
inv_smoothed_overlap[n] = numpy.asanyarray(inv_smoothed_overlap[n],
dtype=numpy.float32) dtype=numpy.float32)
ordering = numpy.argsort(inv_overlap[n])[::-1] ordering = numpy.argsort(inv_ngp_overlap[n])[::-1]
inv_match_indxs[n] = inv_match_indxs[n][ordering] inv_match_indxs[n] = inv_match_indxs[n][ordering]
inv_overlap[n] = inv_overlap[n][ordering] inv_ngp_overlap[n] = inv_ngp_overlap[n][ordering]
inv_smoothed_overlap[n] = inv_smoothed_overlap[n][ordering]
inv_match_indxs = numpy.asarray(inv_match_indxs, dtype=object) inv_match_indxs = numpy.asarray(inv_match_indxs, dtype=object)
inv_overlap = numpy.asarray(inv_overlap, dtype=object) inv_ngp_overlap = numpy.asarray(inv_ngp_overlap, dtype=object)
inv_smoothed_overlap = numpy.asarray(inv_smoothed_overlap,
dtype=object)
return inv_match_indxs, inv_overlap return inv_match_indxs, inv_ngp_overlap, inv_smoothed_overlap
def _make_refmask(self, min_mass, max_dist): def _make_refmask(self, min_mass, max_dist):
r""" r"""
@ -304,34 +322,63 @@ class PairOverlap:
m = ((self.cat0()["totpartmass"] > min_mass) m = ((self.cat0()["totpartmass"] > min_mass)
& (self.cat0()["dist"] < max_dist)) & (self.cat0()["dist"] < max_dist))
# Now remove indices that are below this cut # Now remove indices that are below this cut
self._data["index"] = self._data["index"][m] for p in ("index", "match_indxs", "ngp_overlap", "smoothed_overlap"):
self._data["match_indxs"] = self._data["match_indxs"][m] self._data[p] = self._data[p][m]
self._data["overlap"] = self._data["overlap"][m]
self._data["refmask"] = m self._data["refmask"] = m
def summed_overlap(self): def overlap(self, from_smoothed):
"""
Pair overlap of matched halos between the reference and cross
simulations.
Parameters
----------
from_smoothed : bool
Whether to use the smoothed overlap.
Returns
-------
overlap : 1-dimensional array of arrays
"""
if from_smoothed:
return self["smoothed_overlap"]
return self["ngp_overlap"]
def summed_overlap(self, from_smoothed):
""" """
Summed overlap of each halo in the reference simulation with the cross Summed overlap of each halo in the reference simulation with the cross
simulation. simulation.
Parameters
----------
from_smoothed : bool
Whether to use the smoothed overlap or not.
Returns Returns
------- -------
summed_overlap : 1-dimensional array of shape `(nhalos, )` summed_overlap : 1-dimensional array of shape `(nhalos, )`
""" """
return numpy.array([numpy.sum(cross) for cross in self["overlap"]]) overlap = self.overlap(from_smoothed)
return numpy.array([numpy.sum(cross)for cross in overlap])
def prob_nomatch(self): def prob_nomatch(self, from_smoothed):
""" """
Probability of no match for each halo in the reference simulation with Probability of no match for each halo in the reference simulation with
the cross simulation. Defined as a product of 1 - overlap with other the cross simulation. Defined as a product of 1 - overlap with other
halos. halos.
Parameters
----------
from_smoothed : bool
Whether to use the smoothed overlap or not.
Returns Returns
------- -------
prob_nomatch : 1-dimensional array of shape `(nhalos, )` prob_nomatch : 1-dimensional array of shape `(nhalos, )`
""" """
return numpy.array( overlap = self.overlap(from_smoothed)
[numpy.product(1 - overlap) for overlap in self["overlap"]]) return numpy.array([numpy.product(1 - overlap) for overlap in overlap])
def dist(self, in_initial, norm_kind=None): def dist(self, in_initial, norm_kind=None):
""" """
@ -414,14 +461,16 @@ class PairOverlap:
ratio[i] = numpy.abs(ratio[i]) ratio[i] = numpy.abs(ratio[i])
return numpy.array(ratio, dtype=object) return numpy.array(ratio, dtype=object)
def counterpart_mass(self, overlap_threshold=0., in_log=False, def counterpart_mass(self, from_smoothed, overlap_threshold=0.,
mass_kind="totpartmass"): in_log=False, mass_kind="totpartmass"):
""" """
Calculate the expected counterpart mass of each halo in the reference Calculate the expected counterpart mass of each halo in the reference
simulation from the crossed simulation. simulation from the crossed simulation.
Parameters Parameters
----------- -----------
from_smoothed : bool
Whether to use the smoothed overlap or not.
overlap_threshold : float, optional overlap_threshold : float, optional
Minimum overlap required for a halo to be considered a match. By Minimum overlap required for a halo to be considered a match. By
default 0.0, i.e. no threshold. default 0.0, i.e. no threshold.
@ -440,8 +489,8 @@ class PairOverlap:
mean = numpy.full(len(self), numpy.nan, dtype=numpy.float32) mean = numpy.full(len(self), numpy.nan, dtype=numpy.float32)
std = numpy.full(len(self), numpy.nan, dtype=numpy.float32) std = numpy.full(len(self), numpy.nan, dtype=numpy.float32)
massx = self.catx(mass_kind) # Create references to the arrays here massx = self.catx(mass_kind) # Create references to speed
overlap = self["overlap"] # to speed up the loop below. overlap = self.overlap(from_smoothed) # up the loop below
for i, match_ind in enumerate(self["match_indxs"]): for i, match_ind in enumerate(self["match_indxs"]):
# Skip if no match # Skip if no match
@ -538,9 +587,11 @@ class PairOverlap:
def __getitem__(self, key): def __getitem__(self, key):
""" """
Must be one of `index`, `match_indxs`, `overlap` or `refmask`. Must be one of `index`, `match_indxs`, `ngp_overlap`,
`smoothed_overlap` or `refmask`.
""" """
assert key in ("index", "match_indxs", "overlap", "refmask") assert key in ("index", "match_indxs", "ngp_overlap",
"smoothed_overlap", "refmask")
return self._data[key] return self._data[key]
def __len__(self): def __len__(self):
@ -574,14 +625,17 @@ class NPairsOverlap:
self._pairs = [PairOverlap(cat0, catx, fskel=fskel, min_mass=min_mass, self._pairs = [PairOverlap(cat0, catx, fskel=fskel, min_mass=min_mass,
max_dist=max_dist) for catx in catxs] max_dist=max_dist) for catx in catxs]
def summed_overlap(self, verbose=False): def summed_overlap(self, from_smoothed, verbose=False):
""" """
Summed overlap of each halo in the reference simulation with the cross Summed overlap of each halo in the reference simulation with the cross
simulations. simulations.
Parameters Parameters
---------- ----------
from_smoothed : bool
Whether to use the smoothed overlap or not.
verbose : bool, optional verbose : bool, optional
Verbosity flag.
Returns Returns
------- -------
@ -589,17 +643,20 @@ class NPairsOverlap:
""" """
out = [None] * len(self) out = [None] * len(self)
for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs): for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs):
out[i] = pair.summed_overlap() out[i] = pair.summed_overlap(from_smoothed)
return numpy.vstack(out).T return numpy.vstack(out).T
def prob_nomatch(self, verbose=False): def prob_nomatch(self, from_smoothed, verbose=False):
""" """
Probability of no match for each halo in the reference simulation with Probability of no match for each halo in the reference simulation with
the cross simulation. the cross simulation.
Parameters Parameters
---------- ----------
from_smoothed : bool
Whether to use the smoothed overlap or not.
verbose : bool, optional verbose : bool, optional
Verbosity flag.
Returns Returns
------- -------
@ -607,18 +664,20 @@ class NPairsOverlap:
""" """
out = [None] * len(self) out = [None] * len(self)
for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs): for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs):
out[i] = pair.prob_nomatch() out[i] = pair.prob_nomatch(from_smoothed)
return numpy.vstack(out).T return numpy.vstack(out).T
def counterpart_mass(self, overlap_threshold=0., in_log=False, def counterpart_mass(self, from_smoothed, overlap_threshold=0.,
mass_kind="totpartmass", return_full=True, in_log=False, mass_kind="totpartmass",
verbose=False): return_full=True, verbose=False):
""" """
Calculate the expected counterpart mass of each halo in the reference Calculate the expected counterpart mass of each halo in the reference
simulation from the crossed simulation. simulation from the crossed simulation.
Parameters Parameters
----------- -----------
from_smoothed : bool
Whether to use the smoothed overlap or not.
overlap_threshold : float, optional overlap_threshold : float, optional
Minimum overlap required for a halo to be considered a match. By Minimum overlap required for a halo to be considered a match. By
default 0.0, i.e. no threshold. default 0.0, i.e. no threshold.
@ -647,11 +706,12 @@ class NPairsOverlap:
mus, stds = [None] * len(self), [None] * len(self) mus, stds = [None] * len(self), [None] * len(self)
for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs): for i, pair in enumerate(tqdm(self.pairs) if verbose else self.pairs):
mus[i], stds[i] = pair.counterpart_mass( mus[i], stds[i] = pair.counterpart_mass(
from_smoothed=from_smoothed,
overlap_threshold=overlap_threshold, in_log=in_log, overlap_threshold=overlap_threshold, in_log=in_log,
mass_kind=mass_kind) mass_kind=mass_kind)
mus, stds = numpy.vstack(mus).T, numpy.vstack(stds).T mus, stds = numpy.vstack(mus).T, numpy.vstack(stds).T
probmatch = 1 - self.prob_nomatch() # Prob of > 0 matches probmatch = 1 - self.prob_nomatch(from_smoothed) # Prob of > 0 matches
# Normalise it for weighted sums etc. # Normalise it for weighted sums etc.
norm_probmatch = numpy.apply_along_axis( norm_probmatch = numpy.apply_along_axis(
lambda x: x / numpy.sum(x), axis=1, arr=probmatch) lambda x: x / numpy.sum(x), axis=1, arr=probmatch)
@ -659,6 +719,7 @@ class NPairsOverlap:
# Mean and standard deviation of weighted stacked Gaussians # Mean and standard deviation of weighted stacked Gaussians
mu = numpy.sum(norm_probmatch * mus, axis=1) mu = numpy.sum(norm_probmatch * mus, axis=1)
std = numpy.sum(norm_probmatch * (mus**2 + stds**2), axis=1) - mu**2 std = numpy.sum(norm_probmatch * (mus**2 + stds**2), axis=1) - mu**2
std **= 0.5
if return_full: if return_full:
return mu, std, mus, stds return mu, std, mus, stds

View file

@ -13,6 +13,12 @@
# with this program; if not, write to the Free Software Foundation, Inc., # with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from datetime import datetime
from .recarray_manip import (cols_to_structured, add_columns, rm_columns, # noqa from .recarray_manip import (cols_to_structured, add_columns, rm_columns, # noqa
list_to_ndarray, array_to_structured, # noqa list_to_ndarray, array_to_structured, # noqa
flip_cols, extract_from_structured) # noqa flip_cols, extract_from_structured) # noqa
def now(tz=None):
"""Shortcut to `datetime.datetime.now`."""
return datetime.now(tz=tz)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -12,14 +12,12 @@
# You should have received a copy of the GNU General Public License along # You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc., # with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
""" """A script to calculate overlap between two CSiBORG realisations."""
Script to test running the CSiBORG realisations matcher.
"""
import numpy
from argparse import ArgumentParser
from distutils.util import strtobool
from datetime import datetime
from os.path import join from os.path import join
from argparse import ArgumentParser
from datetime import datetime
import numpy
from scipy.ndimage import gaussian_filter
try: try:
import csiborgtools import csiborgtools
except ModuleNotFoundError: except ModuleNotFoundError:
@ -33,26 +31,59 @@ parser = ArgumentParser()
parser.add_argument("--nsim0", type=int) parser.add_argument("--nsim0", type=int)
parser.add_argument("--nsimx", type=int) parser.add_argument("--nsimx", type=int)
parser.add_argument("--nmult", type=float) parser.add_argument("--nmult", type=float)
parser.add_argument("--overlap", type=lambda x: bool(strtobool(x))) parser.add_argument("--sigma", type=float)
args = parser.parse_args() args = parser.parse_args()
# File paths # File paths
fout = join( fout = join(utils.dumpdir, "overlap",
utils.dumpdir, "overlap", "cross_{}_{}.npz".format(args.nsim0, args.nsimx)) "cross_{}_{}.npz".format(args.nsim0, args.nsimx))
smooth_kwargs = {"sigma": args.sigma, "mode": "constant", "cval": 0.0}
overlapper = csiborgtools.match.ParticleOverlap()
print("{}: loading catalogues.".format(datetime.now()), flush=True) # Load catalogues
print("{}: loading catalogues {} and {}."
.format(datetime.now(), args.nsim0, args.nsimx), flush=True)
cat0 = csiborgtools.read.HaloCatalogue(args.nsim0) cat0 = csiborgtools.read.HaloCatalogue(args.nsim0)
catx = csiborgtools.read.HaloCatalogue(args.nsimx) catx = csiborgtools.read.HaloCatalogue(args.nsimx)
matcher = csiborgtools.match.RealisationsMatcher()
print("{}: loading simulation {} and converting positions to cell numbers."
.format(datetime.now(), args.nsim0), flush=True)
with open(cat0.paths.clump0_path(args.nsim0), "rb") as f:
clumps0 = numpy.load(f, allow_pickle=True)
overlapper.clumps_pos2cell(clumps0)
print("{}: loading simulation {} and converting positions to cell numbers."
.format(datetime.now(), args.nsimx), flush=True)
with open(catx.paths.clump0_path(args.nsimx), 'rb') as f:
clumpsx = numpy.load(f, allow_pickle=True)
overlapper.clumps_pos2cell(clumpsx)
print("{}: generating the background density fields.".format(datetime.now()),
flush=True)
delta_bckg = overlapper.make_bckg_delta(clumps0)
delta_bckg = overlapper.make_bckg_delta(clumpsx, delta=delta_bckg)
print("{}: crossing the simulations.".format(datetime.now()), flush=True) print("{}: crossing the simulations.".format(datetime.now()), flush=True)
ref_indxs, cross_indxs, match_indxs, overlap = matcher.cross( matcher = csiborgtools.match.RealisationsMatcher()
cat0, catx, overlap=args.overlap) ref_indxs, cross_indxs, match_indxs, ngp_overlap = matcher.cross(
cat0, catx, clumps0, clumpsx, delta_bckg)
print("{}: smoothing the background field.".format(datetime.now()), flush=True)
gaussian_filter(delta_bckg, output=delta_bckg, **smooth_kwargs)
print("{}: calculating smoothed overlaps.".format(datetime.now()), flush=True)
smoothed_overlap = matcher.smoothed_cross(clumps0, clumpsx, delta_bckg,
ref_indxs, cross_indxs, match_indxs,
smooth_kwargs)
# Dump the result # Dump the result
print("Saving results to `{}`.".format(fout), flush=True) print("Saving results to `{}`.".format(fout), flush=True)
with open(fout, "wb") as f: with open(fout, "wb") as f:
numpy.savez(fout, ref_indxs=ref_indxs, cross_indxs=cross_indxs, numpy.savez(fout, ref_indxs=ref_indxs, cross_indxs=cross_indxs,
match_indxs=match_indxs, overlap=overlap) match_indxs=match_indxs, ngp_overlap=ngp_overlap,
smoothed_overlap=smoothed_overlap, sigma=args.sigma)
print("All finished.", flush=True) print("All finished.", flush=True)