Final overlap definition update.. (#29)

* Delete whitespace

* Update overlap definition

* Add documentation

* add overlap args

* Make the background array store only high density region

* Add nsims to args

* Remove conc as done inside the func

* Simplify the overlap calculation

* Rename variable

* Just some docs

* Docs

* Correct overlap definition

* Undo debug comment

* Remove old debug code

* Add prob of no match and exp couunterpart mass

* docs

* Update the overlap definition
This commit is contained in:
Richard Stiskalek 2023-03-09 14:55:38 +00:00 committed by GitHub
parent 1c859dbdac
commit f2cef73977
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 222 additions and 129 deletions

View file

@ -19,6 +19,7 @@ from tqdm import (tqdm, trange)
from datetime import datetime from datetime import datetime
from astropy.coordinates import SkyCoord from astropy.coordinates import SkyCoord
from numba import jit from numba import jit
from gc import collect
from ..read import (concatenate_clumps, clumps_pos2cell) from ..read import (concatenate_clumps, clumps_pos2cell)
@ -276,9 +277,10 @@ class RealisationsMatcher:
# Calculate the particle field # Calculate the particle field
if verbose: if verbose:
print("Creating and smoothing the crossed field.", flush=True) print("Creating and smoothing the crossed field.", flush=True)
delta = self.overlapper.make_delta(concatenate_clumps(clumpsx), delta_bckg0 = self.overlapper.make_background_delta(
to_smooth=False) clumps0, to_smooth=False)
delta = self.overlapper.smooth_highres(delta) delta_bckgx = self.overlapper.make_background_delta(
clumpsx, to_smooth=False)
# Min and max cells along each axis for each halo # Min and max cells along each axis for each halo
limkwargs = {"ncells": self.overlapper.inv_clength, limkwargs = {"ncells": self.overlapper.inv_clength,
@ -307,8 +309,8 @@ class RealisationsMatcher:
matchx = cat2clumpsx[ind] # Clump pos matching this halo matchx = cat2clumpsx[ind] # Clump pos matching this halo
clx = clumpsx["clump"][matchx] clx = clumpsx["clump"][matchx]
crosses[ii] = self.overlapper( crosses[ii] = self.overlapper(
cl0, clx, delta, mins0_current, maxs0_current, cl0, clx, delta_bckg0, delta_bckgx, mins0_current,
minsx[matchx], maxsx[matchx], maxs0_current, minsx[matchx], maxsx[matchx],
mass1=mass0, mass2=numpy.sum(clx['M'])) mass1=mass0, mass2=numpy.sum(clx['M']))
cross[k] = crosses cross[k] = crosses
@ -365,9 +367,6 @@ class ParticleOverlap:
Parameters Parameters
---------- ----------
inv_clength : float, optional
Inverse cell length in box units. By default :math:`2^11`, which
matches the initial RAMSES grid resolution.
nshift : int, optional nshift : int, optional
Number of cells by which to shift the subbox from the outside-most Number of cells by which to shift the subbox from the outside-most
cell containing a particle. By default 5. cell containing a particle. By default 5.
@ -381,8 +380,10 @@ class ParticleOverlap:
_clength = None _clength = None
_nshift = None _nshift = None
def __init__(self, inv_clength=2**11, smooth_scale=None, nshift=5): def __init__(self, smooth_scale=None, nshift=5):
self.inv_clength = inv_clength # Inverse cell length in box units. By default :math:`2^11`, which
# matches the initial RAMSES grid resolution.
self.inv_clength = 2**11
self.smooth_scale = smooth_scale self.smooth_scale = smooth_scale
self.nshift = nshift self.nshift = nshift
@ -445,32 +446,47 @@ class ParticleOverlap:
return pos return pos
return numpy.floor(pos * self.inv_clength).astype(int) return numpy.floor(pos * self.inv_clength).astype(int)
def smooth_highres(self, delta): def make_background_delta(self, clumps, to_smooth=True):
""" """
Smooth the central region of a full box density field. Note that if Calculate a NGP density field of clumps within the central
`self.smooth_scale` is `None` then quietly exits the function. :math:`1/2^3` region of the simulation.
Parameters Parameters
---------- ----------
delta : 3-dimensional array clumps : list of structured arrays
List of clump structured array, keys must include `x`, `y`, `z`
and `M`.
to_smooth : bool, optional
Explicit control over whether to smooth. By default `True`.
Returns Returns
------- -------
smooth_delta : 3-dimensional arrray delta : 3-dimensional array
""" """
if self.smooth_scale is None: conc_clumps = concatenate_clumps(clumps)
return delta cells = [self.pos2cell(conc_clumps[p]) for p in ('x', 'y', 'z')]
msg = "Shape of `delta` must match the entire box." mass = conc_clumps['M']
assert delta.shape == (self._inv_clength,)*3, msg
# Subselect only the high-resolution region del conc_clumps
start = self._inv_clength // 4 collect() # This is a large array so force memory clean
end = start * 3
highres = delta[start:end, start:end, start:end] cellmin = self.inv_clength // 4 # The minimum cell ID
# Smoothen it cellmax = 3 * self.inv_clength // 4 # The maximum cell ID
gaussian_filter(highres, self.smooth_scale, output=highres) ncells = cellmax - cellmin
# Put things back into the original array # Mask out particles outside the cubical high resolution region
delta[start:end, start:end, start:end] = highres mask = ((cellmin <= cells[0]) & (cells[0] < cellmax)
& (cellmin <= cells[1]) & (cells[1] < cellmax)
& (cellmin <= cells[2]) & (cells[2] < cellmax)
)
cells = [c[mask] for c in cells]
mass = mass[mask]
# Preallocate and fill the array
delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
fill_delta(delta, *cells, *(cellmin,) * 3, mass)
if to_smooth and self.smooth_scale is not None:
gaussian_filter(delta, self.smooth_scale, output=delta)
return delta return delta
def make_delta(self, clump, mins=None, maxs=None, subbox=False, def make_delta(self, clump, mins=None, maxs=None, subbox=False,
@ -540,6 +556,9 @@ class ParticleOverlap:
mins2, maxs2 : 1-dimensional arrays of shape `(3,)` mins2, maxs2 : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension of `clump2`. Minimun and maximum cell numbers along each dimension of `clump2`.
Optional. Optional.
return_nonzero1 : bool, optional
Whether to return the indices where the contribution of `clump1` is
non-zero.
Returns Returns
------- -------
@ -593,35 +612,12 @@ class ParticleOverlap:
return delta1, delta2, cellmins, nonzero1 return delta1, delta2, cellmins, nonzero1
@staticmethod def __call__(self, clump1, clump2, delta1_bckg, delta2_bckg,
def overlap(delta1, delta2, cellmins, delta2_full): mins1=None, maxs1=None, mins2=None, maxs2=None,
r""" mass1=None, mass2=None, loop_nonzero=True):
Overlap between two clumps whose density fields are evaluated on the
same grid.
Parameters
----------
delta1, delta2 : 3-dimensional arrays
Clumps density fields.
cellmins : len-3 tuple
Tuple of left-most cell ID in the full box.
delta2_full : 3-dimensional array
Density field of the whole box calculated with particles assigned
to halos at zero redshift.
Returns
-------
overlap : float
"""
return calculate_overlap(delta1, delta2, cellmins, delta2_full)
def __call__(self, clump1, clump2, delta2_full, mins1=None, maxs1=None,
mins2=None, maxs2=None, mass1=None, mass2=None,
loop_nonzero=True):
""" """
Calculate overlap between `clump1` and `clump2`. See Calculate overlap between `clump1` and `clump2`. See
`self.overlap(...)` and `self.make_deltas(...)` for further `calculate_overlap(...)` for further information.
information.
Parameters Parameters
---------- ----------
@ -630,9 +626,10 @@ class ParticleOverlap:
must include `x`, `y`, `z` and `M`. must include `x`, `y`, `z` and `M`.
cellmins : len-3 tuple cellmins : len-3 tuple
Tuple of left-most cell ID in the full box. Tuple of left-most cell ID in the full box.
delta2_full : 3-dimensional array delta1_bcgk, delta2_bckg : 3-dimensional arrays
Density field of the whole box calculated with particles assigned Background density fields of the reference and cross boxes
to halos at zero redshift. calculated with particles assigned to halos at the final snapshot.
Assumed to only be sampled in cells :math:`[512, 1536)^3`.
mins1, maxs1 : 1-dimensional arrays of shape `(3,)` mins1, maxs1 : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension of `clump1`. Minimun and maximum cell numbers along each dimension of `clump1`.
Optional. Optional.
@ -655,10 +652,14 @@ class ParticleOverlap:
return_nonzero1=loop_nonzero) return_nonzero1=loop_nonzero)
if not loop_nonzero: if not loop_nonzero:
return calculate_overlap(delta1, delta2, cellmins, delta2_full) return calculate_overlap(delta1, delta2, cellmins,
delta1_bckg, delta2_bckg)
return calculate_overlap_indxs(delta1, delta2, cellmins, delta2_full, # Calculate masses not given
nonzero1, mass1, mass2) mass1 = numpy.sum(clump1['M']) if mass1 is None else mass1
mass2 = numpy.sum(clump2['M']) if mass2 is None else mass2
return calculate_overlap_indxs(delta1, delta2, cellmins, delta1_bckg,
delta2_bckg, nonzero1, mass1, mass2)
@jit(nopython=True) @jit(nopython=True)
@ -760,7 +761,7 @@ def get_clumplims(clumps, ncells, nshift=None):
@jit(nopython=True) @jit(nopython=True)
def calculate_overlap(delta1, delta2, cellmins, delta2_full): def calculate_overlap(delta1, delta2, cellmins, delta1_bckg, delta2_bckg):
r""" r"""
Overlap between two clumps whose density fields are evaluated on the Overlap between two clumps whose density fields are evaluated on the
same grid. This is a JIT implementation, hence it is outside of the main same grid. This is a JIT implementation, hence it is outside of the main
@ -772,9 +773,10 @@ def calculate_overlap(delta1, delta2, cellmins, delta2_full):
Clumps density fields. Clumps density fields.
cellmins : len-3 tuple cellmins : len-3 tuple
Tuple of left-most cell ID in the full box. Tuple of left-most cell ID in the full box.
delta2_full : 3-dimensional array delta1_bcgk, delta2_bckg : 3-dimensional arrays
Density field of the whole box calculated with particles assigned Background density fields of the reference and cross boxes calculated
to halos at zero redshift. with particles assigned to halos at the final snapshot. Assumed to only
be sampled in cells :math:`[512, 1536)^3`.
Returns Returns
------- -------
@ -782,36 +784,41 @@ def calculate_overlap(delta1, delta2, cellmins, delta2_full):
""" """
totmass = 0. # Total mass of clump 1 and clump 2 totmass = 0. # Total mass of clump 1 and clump 2
intersect = 0. # Mass of pixels that are non-zero in both clumps intersect = 0. # Mass of pixels that are non-zero in both clumps
weight = 0. # Weight to account for other halos
count = 0 # Total number of pixels that are both non-zero
i0, j0, k0 = cellmins # Unpack things i0, j0, k0 = cellmins # Unpack things
bckg_offset = 512 # Offset of the background density field
bckg_size = 1024
imax, jmax, kmax = delta1.shape imax, jmax, kmax = delta1.shape
for i in range(imax): for i in range(imax):
ii = i0 + i ii = i0 + i - bckg_offset
flag = 0 <= ii < bckg_size
for j in range(jmax): for j in range(jmax):
jj = j0 + j jj = j0 + j - bckg_offset
flag &= 0 <= jj < bckg_size
for k in range(kmax): for k in range(kmax):
kk = k0 + k kk = k0 + k - bckg_offset
flag &= 0 <= kk < bckg_size
cell1, cell2 = delta1[i, j, k], delta2[i, j, k] cell1, cell2 = delta1[i, j, k], delta2[i, j, k]
cell = cell1 + cell2
totmass += cell
# If both are zero then skip # If both are zero then skip
if cell1 * cell2 > 0: if cell1 * cell2 > 0:
intersect += cell if flag:
weight += cell2 / delta2_full[ii, jj, kk] weight1 = cell1 / delta1_bckg[ii, jj, kk]
count += 1 weight2 = cell2 / delta2_bckg[ii, jj, kk]
else:
weight1 = 1.
weight2 = 1.
# Average weighted mass in the cell
intersect += 0.5 * (weight1 * cell1 + weight2 * cell2)
# Normalise the intersect and weights totmass += cell1 + cell2
intersect *= 0.5
weight = weight / count if count > 0 else 0. return intersect / (totmass - intersect)
return weight * intersect / (totmass - intersect)
@jit(nopython=True) @jit(nopython=True)
def calculate_overlap_indxs(delta1, delta2, cellmins, delta2_full, nonzero1, def calculate_overlap_indxs(delta1, delta2, cellmins, delta1_bckg, delta2_bckg,
mass1, mass2): nonzero1, mass1, mass2):
r""" r"""
Overlap between two clumps whose density fields are evaluated on the Overlap between two clumps whose density fields are evaluated on the
same grid and `nonzero1` enumerates the non-zero cells of `delta1. This is same grid and `nonzero1` enumerates the non-zero cells of `delta1. This is
@ -823,9 +830,10 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta2_full, nonzero1,
Clumps density fields. Clumps density fields.
cellmins : len-3 tuple cellmins : len-3 tuple
Tuple of left-most cell ID in the full box. Tuple of left-most cell ID in the full box.
delta2_full : 3-dimensional array delta1_bcgk, delta2_bckg : 3-dimensional arrays
Density field of the whole box calculated with particles assigned Background density fields of the reference and cross boxes calculated
to halos at zero redshift. with particles assigned to halos at the final snapshot. Assumed to only
be sampled in cells :math:`[512, 1536)^3`.
nonzero1 : 2-dimensional array of shape `(n_cells, 3)` nonzero1 : 2-dimensional array of shape `(n_cells, 3)`
Indices of cells that are non-zero in `delta1`. Expected to be Indices of cells that are non-zero in `delta1`. Expected to be
precomputed from `fill_delta_indxs`. precomputed from `fill_delta_indxs`.
@ -837,27 +845,36 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta2_full, nonzero1,
------- -------
overlap : float overlap : float
""" """
totmass = mass1 + mass2 # Total mass of clump 1 and clump 2 intersect = 0. # Mass of pixels that are non-zero in both clumps
intersect = 0. # Mass of pixels that are non-zero in both clumps i0, j0, k0 = cellmins # Unpack cell minimas
weight = 0. # Weight to account for other halos bckg_offset = 512 # Offset of the background density field
count = 0 # Total number of pixels that are both non-zero bckg_size = 1024 # Size of the background density field array
i0, j0, k0 = cellmins # Unpack cell minimas
ncells = nonzero1.shape[0] for n in range(nonzero1.shape[0]):
for n in range(ncells):
i, j, k = nonzero1[n, :] i, j, k = nonzero1[n, :]
cell1, cell2 = delta1[i, j, k], delta2[i, j, k] cell2 = delta2[i, j, k]
if cell2 > 0: # We already know that cell1 is non-zero if cell2 > 0: # We already know that cell1 is non-zero
intersect += cell1 + cell2 cell1 = delta1[i, j, k] # Now unpack cell1 as well
weight += cell2 / delta2_full[i0 + i, j0 + j, k0 + k] ii = i0 + i - bckg_offset # Indices of this cell in the
count += 1 jj = j0 + j - bckg_offset # background density field.
kk = k0 + k - bckg_offset
# Normalise the intersect and weights flag = 0 <= ii < bckg_size # Whether this cell is in the high
intersect *= 0.5 flag &= 0 <= jj < bckg_size # resolution region for which the
weight = weight / count if count > 0 else 0. flag &= 0 <= kk < bckg_size # background density is calculated.
return weight * intersect / (totmass - intersect)
if flag:
weight1 = cell1 / delta1_bckg[ii, jj, kk]
weight2 = cell2 / delta2_bckg[ii, jj, kk]
else:
weight1 = 1.
weight2 = 1.
# Average weighted mass in the cell
intersect += 0.5 * (weight1 * cell1 + weight2 * cell2)
return intersect / (mass1 + mass2 - intersect)
def dist_centmass(clump): def dist_centmass(clump):

View file

@ -18,6 +18,7 @@ Tools for summarising various results.
import numpy import numpy
import joblib import joblib
from tqdm import tqdm from tqdm import tqdm
from .make_cat import HaloCatalogue
class PKReader: class PKReader:
@ -170,16 +171,24 @@ class PKReader:
class OverlapReader: class OverlapReader:
""" """
TODO: docs A shortcut object for reading in the results of matching two simulations.
Parameters
----------
nsim0 : int
The reference simulation ID.
nsimx : int
The cross simulation ID.
fskel : str, optional
Path to the overlap. By default `None`, i.e.
`/mnt/extraspace/rstiskalek/csiborg/overlap/cross_{}_{}.npz`.
""" """
def __init__(self, nsim0, nsimx, fskel=None):
def __init__(self, nsim0, nsimx, cat0, catx, fskel=None):
if fskel is None: if fskel is None:
fskel = "/mnt/extraspace/rstiskalek/csiborg/overlap/" fskel = "/mnt/extraspace/rstiskalek/csiborg/overlap/"
fskel += "cross_{}_{}.npz" fskel += "cross_{}_{}.npz"
self._data = numpy.load(fskel.format(nsim0, nsimx), allow_pickle=True) self._data = numpy.load(fskel.format(nsim0, nsimx), allow_pickle=True)
self._set_cats(nsim0, nsimx, cat0, catx) self._set_cats(nsim0, nsimx)
@property @property
def nsim0(self): def nsim0(self):
@ -225,7 +234,7 @@ class OverlapReader:
""" """
return self._catx return self._catx
def _set_cats(self, nsim0, nsimx, cat0, catx): def _set_cats(self, nsim0, nsimx):
""" """
Set the simulation IDs and catalogues. Set the simulation IDs and catalogues.
@ -233,18 +242,15 @@ class OverlapReader:
---------- ----------
nsim0, nsimx : int nsim0, nsimx : int
The reference and cross simulation IDs. The reference and cross simulation IDs.
cat0, catx: :py:class:`csiborgtools.read.HaloCatalogue`
Halo catalogues corresponding to `nsim0` and `nsimx`, respectively.
Returns Returns
------- -------
None None
""" """
assert (nsim0 == cat0.paths.n_sim) & (nsimx == catx.paths.n_sim)
self._nsim0 = nsim0 self._nsim0 = nsim0
self._nsimx = nsimx self._nsimx = nsimx
self._cat0 = cat0 self._cat0 = HaloCatalogue(nsim0)
self._catx = catx self._catx = HaloCatalogue(nsimx)
@property @property
def indxs(self): def indxs(self):
@ -351,15 +357,11 @@ class OverlapReader:
Summed overlap of each halo in the reference simulation with the cross Summed overlap of each halo in the reference simulation with the cross
simulation. simulation.
Parameters
----------
None
Returns Returns
------- -------
summed_overlap : 1-dimensional array of shape `(nhalos, )` summed_overlap : 1-dimensional array of shape `(nhalos, )`
""" """
return numpy.array([numpy.sum(cross) for cross in self._data["cross"]]) return numpy.array([numpy.sum(cross) for cross in self.overlap])
def copy_per_match(self, par): def copy_per_match(self, par):
""" """
@ -381,6 +383,80 @@ class OverlapReader:
out[n] = numpy.ones(ind.size) * self.cat0[par][n] out[n] = numpy.ones(ind.size) * self.cat0[par][n]
return numpy.array(out, dtype=object) return numpy.array(out, dtype=object)
def prob_nomatch(self):
"""
Probability of no match for each halo in the reference simulation with
the cross simulation. Defined as a product of 1 - overlap with other
halos.
Returns
-------
out : 1-dimensional array of shape `(nhalos, )`
"""
return numpy.array(
[numpy.product(1 - overlap) for overlap in self.overlap])
def expected_counterpart_mass(self, overlap_threshold=0., in_log=False,
mass_kind="totpartmass"):
"""
Calculate the expected counterpart mass of each halo in the reference
simulation from the crossed simulation.
Parameters
-----------
overlap_threshold : float, optional
Minimum overlap required for a halo to be considered a match. By
default 0.0, i.e. no threshold.
in_log : bool, optional
Whether to calculate the expectation value in log space. By default
`False`.
mass_kind : str, optional
The mass kind whose ratio is to be calculated. Must be a valid
catalogue key. By default `totpartmass`, i.e. the total particle
mass associated with a halo.
Returns
-------
mean, std : 1-dimensional arrays of shape `(nhalos, )`
"""
nhalos = self.indxs.size
mean = numpy.full(nhalos, numpy.nan) # Preallocate output arrays
std = numpy.full(nhalos, numpy.nan)
massx = self.catx[mass_kind] # Create references to the arrays here
overlap = self.overlap # to speed up the loop below.
# Is the iterator verbose?
for n, match_ind in enumerate((self.match_indxs)):
# Skip if no match
if match_ind.size == 0:
continue
massx_ = massx[match_ind] # Again just create references
overlap_ = overlap[n] # to the appropriate elements
# Optionally apply overlap threshold
if overlap_threshold > 0.:
mask = overlap_ > overlap_threshold
if numpy.sum(mask) == 0:
continue
massx_ = massx_[mask]
overlap_ = overlap_[mask]
massx_ = numpy.log10(massx_) if in_log else massx_
# Weighted average and *biased* standard deviation
mean_ = numpy.average(massx_, weights=overlap_)
std_ = numpy.average((massx_ - mean_)**2, weights=overlap_)**0.5
# If in log, convert back to linear
mean_ = 10**mean_ if in_log else mean_
std_ = mean_ * std_ * numpy.log(10) if in_log else std_
mean[n] = mean_
std[n] = std_
return mean, std
def binned_resample_mean(x, y, prob, bins, nresample=50, seed=42): def binned_resample_mean(x, y, prob, bins, nresample=50, seed=42):
""" """
@ -430,4 +506,3 @@ def binned_resample_mean(x, y, prob, bins, nresample=50, seed=42):
bin_centres = (bins[1:] + bins[:-1]) / 2 bin_centres = (bins[1:] + bins[:-1]) / 2
return bin_centres, stat return bin_centres, stat

View file

@ -30,25 +30,26 @@ import utils
# Argument parser # Argument parser
parser = ArgumentParser() parser = ArgumentParser()
parser.add_argument("--nsim0", type=int)
parser.add_argument("--nsimx", type=int)
parser.add_argument("--nmult", type=float) parser.add_argument("--nmult", type=float)
parser.add_argument("--overlap", type=lambda x: bool(strtobool(x))) parser.add_argument("--overlap", type=lambda x: bool(strtobool(x)))
args = parser.parse_args() args = parser.parse_args()
# File paths # File paths
nsim0 = 7468 fout = join(
nsimx = 7588 utils.dumpdir, "overlap", "cross_{}_{}.npz".format(args.nsim0, args.nsimx))
fperm = join(utils.dumpdir, "overlap", "cross_{}_{}.npy")
print("{}: loading catalogues.".format(datetime.now()), flush=True) print("{}: loading catalogues.".format(datetime.now()), flush=True)
cat0 = csiborgtools.read.HaloCatalogue(nsim0) cat0 = csiborgtools.read.HaloCatalogue(args.nsim0)
catx = csiborgtools.read.HaloCatalogue(nsimx) catx = csiborgtools.read.HaloCatalogue(args.nsimx)
matcher = csiborgtools.match.RealisationsMatcher() matcher = csiborgtools.match.RealisationsMatcher()
print("{}: crossing the simulations.".format(datetime.now()), flush=True) print("{}: crossing the simulations.".format(datetime.now()), flush=True)
indxs, match_indxs, cross = matcher.cross( indxs, match_indxs, cross = matcher.cross(
nsim0, nsimx, cat0, catx, overlap=False) args.nsim0, args.nsimx, cat0, catx, overlap=args.overlap)
# Dump the result # Dump the result
fout = fperm.format(nsim0, nsimx)
print("Saving results to `{}`.".format(fout), flush=True) print("Saving results to `{}`.".format(fout), flush=True)
with open(fout, "wb") as f: with open(fout, "wb") as f:
numpy.savez(fout, indxs=indxs, match_indxs=match_indxs, cross=cross) numpy.savez(fout, indxs=indxs, match_indxs=match_indxs, cross=cross)