Final overlap definition update.. (#29)

* Delete whitespace

* Update overlap definition

* Add documentation

* add overlap args

* Make the background array store only high density region

* Add nsims to args

* Remove conc as done inside the func

* Simplify the overlap calculation

* Rename variable

* Just some docs

* Docs

* Correct overlap definition

* Undo debug comment

* Remove old debug code

* Add prob of no match and exp couunterpart mass

* docs

* Update the overlap definition
This commit is contained in:
Richard Stiskalek 2023-03-09 14:55:38 +00:00 committed by GitHub
parent 1c859dbdac
commit f2cef73977
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 222 additions and 129 deletions

View File

@ -19,6 +19,7 @@ from tqdm import (tqdm, trange)
from datetime import datetime
from astropy.coordinates import SkyCoord
from numba import jit
from gc import collect
from ..read import (concatenate_clumps, clumps_pos2cell)
@ -276,9 +277,10 @@ class RealisationsMatcher:
# Calculate the particle field
if verbose:
print("Creating and smoothing the crossed field.", flush=True)
delta = self.overlapper.make_delta(concatenate_clumps(clumpsx),
to_smooth=False)
delta = self.overlapper.smooth_highres(delta)
delta_bckg0 = self.overlapper.make_background_delta(
clumps0, to_smooth=False)
delta_bckgx = self.overlapper.make_background_delta(
clumpsx, to_smooth=False)
# Min and max cells along each axis for each halo
limkwargs = {"ncells": self.overlapper.inv_clength,
@ -307,8 +309,8 @@ class RealisationsMatcher:
matchx = cat2clumpsx[ind] # Clump pos matching this halo
clx = clumpsx["clump"][matchx]
crosses[ii] = self.overlapper(
cl0, clx, delta, mins0_current, maxs0_current,
minsx[matchx], maxsx[matchx],
cl0, clx, delta_bckg0, delta_bckgx, mins0_current,
maxs0_current, minsx[matchx], maxsx[matchx],
mass1=mass0, mass2=numpy.sum(clx['M']))
cross[k] = crosses
@ -365,9 +367,6 @@ class ParticleOverlap:
Parameters
----------
inv_clength : float, optional
Inverse cell length in box units. By default :math:`2^11`, which
matches the initial RAMSES grid resolution.
nshift : int, optional
Number of cells by which to shift the subbox from the outside-most
cell containing a particle. By default 5.
@ -381,8 +380,10 @@ class ParticleOverlap:
_clength = None
_nshift = None
def __init__(self, inv_clength=2**11, smooth_scale=None, nshift=5):
self.inv_clength = inv_clength
def __init__(self, smooth_scale=None, nshift=5):
# Inverse cell length in box units. By default :math:`2^11`, which
# matches the initial RAMSES grid resolution.
self.inv_clength = 2**11
self.smooth_scale = smooth_scale
self.nshift = nshift
@ -445,32 +446,47 @@ class ParticleOverlap:
return pos
return numpy.floor(pos * self.inv_clength).astype(int)
def smooth_highres(self, delta):
def make_background_delta(self, clumps, to_smooth=True):
"""
Smooth the central region of a full box density field. Note that if
`self.smooth_scale` is `None` then quietly exits the function.
Calculate a NGP density field of clumps within the central
:math:`1/2^3` region of the simulation.
Parameters
----------
delta : 3-dimensional array
clumps : list of structured arrays
List of clump structured array, keys must include `x`, `y`, `z`
and `M`.
to_smooth : bool, optional
Explicit control over whether to smooth. By default `True`.
Returns
-------
smooth_delta : 3-dimensional arrray
delta : 3-dimensional array
"""
if self.smooth_scale is None:
return delta
msg = "Shape of `delta` must match the entire box."
assert delta.shape == (self._inv_clength,)*3, msg
conc_clumps = concatenate_clumps(clumps)
cells = [self.pos2cell(conc_clumps[p]) for p in ('x', 'y', 'z')]
mass = conc_clumps['M']
# Subselect only the high-resolution region
start = self._inv_clength // 4
end = start * 3
highres = delta[start:end, start:end, start:end]
# Smoothen it
gaussian_filter(highres, self.smooth_scale, output=highres)
# Put things back into the original array
delta[start:end, start:end, start:end] = highres
del conc_clumps
collect() # This is a large array so force memory clean
cellmin = self.inv_clength // 4 # The minimum cell ID
cellmax = 3 * self.inv_clength // 4 # The maximum cell ID
ncells = cellmax - cellmin
# Mask out particles outside the cubical high resolution region
mask = ((cellmin <= cells[0]) & (cells[0] < cellmax)
& (cellmin <= cells[1]) & (cells[1] < cellmax)
& (cellmin <= cells[2]) & (cells[2] < cellmax)
)
cells = [c[mask] for c in cells]
mass = mass[mask]
# Preallocate and fill the array
delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
fill_delta(delta, *cells, *(cellmin,) * 3, mass)
if to_smooth and self.smooth_scale is not None:
gaussian_filter(delta, self.smooth_scale, output=delta)
return delta
def make_delta(self, clump, mins=None, maxs=None, subbox=False,
@ -540,6 +556,9 @@ class ParticleOverlap:
mins2, maxs2 : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension of `clump2`.
Optional.
return_nonzero1 : bool, optional
Whether to return the indices where the contribution of `clump1` is
non-zero.
Returns
-------
@ -593,35 +612,12 @@ class ParticleOverlap:
return delta1, delta2, cellmins, nonzero1
@staticmethod
def overlap(delta1, delta2, cellmins, delta2_full):
r"""
Overlap between two clumps whose density fields are evaluated on the
same grid.
Parameters
----------
delta1, delta2 : 3-dimensional arrays
Clumps density fields.
cellmins : len-3 tuple
Tuple of left-most cell ID in the full box.
delta2_full : 3-dimensional array
Density field of the whole box calculated with particles assigned
to halos at zero redshift.
Returns
-------
overlap : float
"""
return calculate_overlap(delta1, delta2, cellmins, delta2_full)
def __call__(self, clump1, clump2, delta2_full, mins1=None, maxs1=None,
mins2=None, maxs2=None, mass1=None, mass2=None,
loop_nonzero=True):
def __call__(self, clump1, clump2, delta1_bckg, delta2_bckg,
mins1=None, maxs1=None, mins2=None, maxs2=None,
mass1=None, mass2=None, loop_nonzero=True):
"""
Calculate overlap between `clump1` and `clump2`. See
`self.overlap(...)` and `self.make_deltas(...)` for further
information.
`calculate_overlap(...)` for further information.
Parameters
----------
@ -630,9 +626,10 @@ class ParticleOverlap:
must include `x`, `y`, `z` and `M`.
cellmins : len-3 tuple
Tuple of left-most cell ID in the full box.
delta2_full : 3-dimensional array
Density field of the whole box calculated with particles assigned
to halos at zero redshift.
delta1_bcgk, delta2_bckg : 3-dimensional arrays
Background density fields of the reference and cross boxes
calculated with particles assigned to halos at the final snapshot.
Assumed to only be sampled in cells :math:`[512, 1536)^3`.
mins1, maxs1 : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension of `clump1`.
Optional.
@ -655,10 +652,14 @@ class ParticleOverlap:
return_nonzero1=loop_nonzero)
if not loop_nonzero:
return calculate_overlap(delta1, delta2, cellmins, delta2_full)
return calculate_overlap(delta1, delta2, cellmins,
delta1_bckg, delta2_bckg)
return calculate_overlap_indxs(delta1, delta2, cellmins, delta2_full,
nonzero1, mass1, mass2)
# Calculate masses not given
mass1 = numpy.sum(clump1['M']) if mass1 is None else mass1
mass2 = numpy.sum(clump2['M']) if mass2 is None else mass2
return calculate_overlap_indxs(delta1, delta2, cellmins, delta1_bckg,
delta2_bckg, nonzero1, mass1, mass2)
@jit(nopython=True)
@ -760,7 +761,7 @@ def get_clumplims(clumps, ncells, nshift=None):
@jit(nopython=True)
def calculate_overlap(delta1, delta2, cellmins, delta2_full):
def calculate_overlap(delta1, delta2, cellmins, delta1_bckg, delta2_bckg):
r"""
Overlap between two clumps whose density fields are evaluated on the
same grid. This is a JIT implementation, hence it is outside of the main
@ -772,9 +773,10 @@ def calculate_overlap(delta1, delta2, cellmins, delta2_full):
Clumps density fields.
cellmins : len-3 tuple
Tuple of left-most cell ID in the full box.
delta2_full : 3-dimensional array
Density field of the whole box calculated with particles assigned
to halos at zero redshift.
delta1_bcgk, delta2_bckg : 3-dimensional arrays
Background density fields of the reference and cross boxes calculated
with particles assigned to halos at the final snapshot. Assumed to only
be sampled in cells :math:`[512, 1536)^3`.
Returns
-------
@ -782,36 +784,41 @@ def calculate_overlap(delta1, delta2, cellmins, delta2_full):
"""
totmass = 0. # Total mass of clump 1 and clump 2
intersect = 0. # Mass of pixels that are non-zero in both clumps
weight = 0. # Weight to account for other halos
count = 0 # Total number of pixels that are both non-zero
i0, j0, k0 = cellmins # Unpack things
bckg_offset = 512 # Offset of the background density field
bckg_size = 1024
imax, jmax, kmax = delta1.shape
for i in range(imax):
ii = i0 + i
ii = i0 + i - bckg_offset
flag = 0 <= ii < bckg_size
for j in range(jmax):
jj = j0 + j
jj = j0 + j - bckg_offset
flag &= 0 <= jj < bckg_size
for k in range(kmax):
kk = k0 + k
kk = k0 + k - bckg_offset
flag &= 0 <= kk < bckg_size
cell1, cell2 = delta1[i, j, k], delta2[i, j, k]
cell = cell1 + cell2
totmass += cell
# If both are zero then skip
if cell1 * cell2 > 0:
intersect += cell
weight += cell2 / delta2_full[ii, jj, kk]
count += 1
if flag:
weight1 = cell1 / delta1_bckg[ii, jj, kk]
weight2 = cell2 / delta2_bckg[ii, jj, kk]
else:
weight1 = 1.
weight2 = 1.
# Average weighted mass in the cell
intersect += 0.5 * (weight1 * cell1 + weight2 * cell2)
# Normalise the intersect and weights
intersect *= 0.5
weight = weight / count if count > 0 else 0.
return weight * intersect / (totmass - intersect)
totmass += cell1 + cell2
return intersect / (totmass - intersect)
@jit(nopython=True)
def calculate_overlap_indxs(delta1, delta2, cellmins, delta2_full, nonzero1,
mass1, mass2):
def calculate_overlap_indxs(delta1, delta2, cellmins, delta1_bckg, delta2_bckg,
nonzero1, mass1, mass2):
r"""
Overlap between two clumps whose density fields are evaluated on the
same grid and `nonzero1` enumerates the non-zero cells of `delta1. This is
@ -823,9 +830,10 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta2_full, nonzero1,
Clumps density fields.
cellmins : len-3 tuple
Tuple of left-most cell ID in the full box.
delta2_full : 3-dimensional array
Density field of the whole box calculated with particles assigned
to halos at zero redshift.
delta1_bcgk, delta2_bckg : 3-dimensional arrays
Background density fields of the reference and cross boxes calculated
with particles assigned to halos at the final snapshot. Assumed to only
be sampled in cells :math:`[512, 1536)^3`.
nonzero1 : 2-dimensional array of shape `(n_cells, 3)`
Indices of cells that are non-zero in `delta1`. Expected to be
precomputed from `fill_delta_indxs`.
@ -837,27 +845,36 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta2_full, nonzero1,
-------
overlap : float
"""
totmass = mass1 + mass2 # Total mass of clump 1 and clump 2
intersect = 0. # Mass of pixels that are non-zero in both clumps
weight = 0. # Weight to account for other halos
count = 0 # Total number of pixels that are both non-zero
i0, j0, k0 = cellmins # Unpack cell minimas
intersect = 0. # Mass of pixels that are non-zero in both clumps
i0, j0, k0 = cellmins # Unpack cell minimas
bckg_offset = 512 # Offset of the background density field
bckg_size = 1024 # Size of the background density field array
ncells = nonzero1.shape[0]
for n in range(ncells):
for n in range(nonzero1.shape[0]):
i, j, k = nonzero1[n, :]
cell1, cell2 = delta1[i, j, k], delta2[i, j, k]
cell2 = delta2[i, j, k]
if cell2 > 0: # We already know that cell1 is non-zero
intersect += cell1 + cell2
weight += cell2 / delta2_full[i0 + i, j0 + j, k0 + k]
count += 1
cell1 = delta1[i, j, k] # Now unpack cell1 as well
ii = i0 + i - bckg_offset # Indices of this cell in the
jj = j0 + j - bckg_offset # background density field.
kk = k0 + k - bckg_offset
# Normalise the intersect and weights
intersect *= 0.5
weight = weight / count if count > 0 else 0.
return weight * intersect / (totmass - intersect)
flag = 0 <= ii < bckg_size # Whether this cell is in the high
flag &= 0 <= jj < bckg_size # resolution region for which the
flag &= 0 <= kk < bckg_size # background density is calculated.
if flag:
weight1 = cell1 / delta1_bckg[ii, jj, kk]
weight2 = cell2 / delta2_bckg[ii, jj, kk]
else:
weight1 = 1.
weight2 = 1.
# Average weighted mass in the cell
intersect += 0.5 * (weight1 * cell1 + weight2 * cell2)
return intersect / (mass1 + mass2 - intersect)
def dist_centmass(clump):

View File

@ -18,6 +18,7 @@ Tools for summarising various results.
import numpy
import joblib
from tqdm import tqdm
from .make_cat import HaloCatalogue
class PKReader:
@ -170,16 +171,24 @@ class PKReader:
class OverlapReader:
"""
TODO: docs
A shortcut object for reading in the results of matching two simulations.
Parameters
----------
nsim0 : int
The reference simulation ID.
nsimx : int
The cross simulation ID.
fskel : str, optional
Path to the overlap. By default `None`, i.e.
`/mnt/extraspace/rstiskalek/csiborg/overlap/cross_{}_{}.npz`.
"""
def __init__(self, nsim0, nsimx, cat0, catx, fskel=None):
def __init__(self, nsim0, nsimx, fskel=None):
if fskel is None:
fskel = "/mnt/extraspace/rstiskalek/csiborg/overlap/"
fskel += "cross_{}_{}.npz"
self._data = numpy.load(fskel.format(nsim0, nsimx), allow_pickle=True)
self._set_cats(nsim0, nsimx, cat0, catx)
self._set_cats(nsim0, nsimx)
@property
def nsim0(self):
@ -225,7 +234,7 @@ class OverlapReader:
"""
return self._catx
def _set_cats(self, nsim0, nsimx, cat0, catx):
def _set_cats(self, nsim0, nsimx):
"""
Set the simulation IDs and catalogues.
@ -233,18 +242,15 @@ class OverlapReader:
----------
nsim0, nsimx : int
The reference and cross simulation IDs.
cat0, catx: :py:class:`csiborgtools.read.HaloCatalogue`
Halo catalogues corresponding to `nsim0` and `nsimx`, respectively.
Returns
-------
None
"""
assert (nsim0 == cat0.paths.n_sim) & (nsimx == catx.paths.n_sim)
self._nsim0 = nsim0
self._nsimx = nsimx
self._cat0 = cat0
self._catx = catx
self._cat0 = HaloCatalogue(nsim0)
self._catx = HaloCatalogue(nsimx)
@property
def indxs(self):
@ -351,16 +357,12 @@ class OverlapReader:
Summed overlap of each halo in the reference simulation with the cross
simulation.
Parameters
----------
None
Returns
-------
summed_overlap : 1-dimensional array of shape `(nhalos, )`
"""
return numpy.array([numpy.sum(cross) for cross in self._data["cross"]])
return numpy.array([numpy.sum(cross) for cross in self.overlap])
def copy_per_match(self, par):
"""
Make an array like `self.match_indxs` where each of its element is an
@ -371,7 +373,7 @@ class OverlapReader:
----------
par : str
Property to be copied over.
Returns
-------
out : 1-dimensional array of shape `(nhalos, )`
@ -381,6 +383,80 @@ class OverlapReader:
out[n] = numpy.ones(ind.size) * self.cat0[par][n]
return numpy.array(out, dtype=object)
def prob_nomatch(self):
"""
Probability of no match for each halo in the reference simulation with
the cross simulation. Defined as a product of 1 - overlap with other
halos.
Returns
-------
out : 1-dimensional array of shape `(nhalos, )`
"""
return numpy.array(
[numpy.product(1 - overlap) for overlap in self.overlap])
def expected_counterpart_mass(self, overlap_threshold=0., in_log=False,
mass_kind="totpartmass"):
"""
Calculate the expected counterpart mass of each halo in the reference
simulation from the crossed simulation.
Parameters
-----------
overlap_threshold : float, optional
Minimum overlap required for a halo to be considered a match. By
default 0.0, i.e. no threshold.
in_log : bool, optional
Whether to calculate the expectation value in log space. By default
`False`.
mass_kind : str, optional
The mass kind whose ratio is to be calculated. Must be a valid
catalogue key. By default `totpartmass`, i.e. the total particle
mass associated with a halo.
Returns
-------
mean, std : 1-dimensional arrays of shape `(nhalos, )`
"""
nhalos = self.indxs.size
mean = numpy.full(nhalos, numpy.nan) # Preallocate output arrays
std = numpy.full(nhalos, numpy.nan)
massx = self.catx[mass_kind] # Create references to the arrays here
overlap = self.overlap # to speed up the loop below.
# Is the iterator verbose?
for n, match_ind in enumerate((self.match_indxs)):
# Skip if no match
if match_ind.size == 0:
continue
massx_ = massx[match_ind] # Again just create references
overlap_ = overlap[n] # to the appropriate elements
# Optionally apply overlap threshold
if overlap_threshold > 0.:
mask = overlap_ > overlap_threshold
if numpy.sum(mask) == 0:
continue
massx_ = massx_[mask]
overlap_ = overlap_[mask]
massx_ = numpy.log10(massx_) if in_log else massx_
# Weighted average and *biased* standard deviation
mean_ = numpy.average(massx_, weights=overlap_)
std_ = numpy.average((massx_ - mean_)**2, weights=overlap_)**0.5
# If in log, convert back to linear
mean_ = 10**mean_ if in_log else mean_
std_ = mean_ * std_ * numpy.log(10) if in_log else std_
mean[n] = mean_
std[n] = std_
return mean, std
def binned_resample_mean(x, y, prob, bins, nresample=50, seed=42):
"""
@ -401,7 +477,7 @@ def binned_resample_mean(x, y, prob, bins, nresample=50, seed=42):
Number of MC resamples. By default 50.
seed : int, optional
Random seed.
Returns
-------
bin_centres : 1-dimensional array
@ -413,8 +489,8 @@ def binned_resample_mean(x, y, prob, bins, nresample=50, seed=42):
gen = numpy.random.RandomState(seed)
loop_stat = numpy.full(nresample, numpy.nan) # Preallocate loop arr
stat = numpy.full((bins.size - 1, 2), numpy.nan) # Preallocate output
loop_stat = numpy.full(nresample, numpy.nan) # Preallocate loop arr
stat = numpy.full((bins.size - 1, 2), numpy.nan) # Preallocate output
for i in range(bins.size - 1):
mask = (x > bins[i]) & (x <= bins[i + 1])
@ -423,11 +499,10 @@ def binned_resample_mean(x, y, prob, bins, nresample=50, seed=42):
loop_stat[:] = numpy.nan # Clear it
for j in range(nresample):
loop_stat[j] = numpy.mean(y[mask][gen.rand(nsamples) < prob[mask]])
stat[i, 0] = numpy.mean(loop_stat)
stat[i, 1] = numpy.std(loop_stat)
bin_centres = (bins[1:] + bins[:-1]) / 2
return bin_centres, stat

View File

@ -30,25 +30,26 @@ import utils
# Argument parser
parser = ArgumentParser()
parser.add_argument("--nsim0", type=int)
parser.add_argument("--nsimx", type=int)
parser.add_argument("--nmult", type=float)
parser.add_argument("--overlap", type=lambda x: bool(strtobool(x)))
args = parser.parse_args()
# File paths
nsim0 = 7468
nsimx = 7588
fperm = join(utils.dumpdir, "overlap", "cross_{}_{}.npy")
fout = join(
utils.dumpdir, "overlap", "cross_{}_{}.npz".format(args.nsim0, args.nsimx))
print("{}: loading catalogues.".format(datetime.now()), flush=True)
cat0 = csiborgtools.read.HaloCatalogue(nsim0)
catx = csiborgtools.read.HaloCatalogue(nsimx)
cat0 = csiborgtools.read.HaloCatalogue(args.nsim0)
catx = csiborgtools.read.HaloCatalogue(args.nsimx)
matcher = csiborgtools.match.RealisationsMatcher()
print("{}: crossing the simulations.".format(datetime.now()), flush=True)
indxs, match_indxs, cross = matcher.cross(
nsim0, nsimx, cat0, catx, overlap=False)
args.nsim0, args.nsimx, cat0, catx, overlap=args.overlap)
# Dump the result
fout = fperm.format(nsim0, nsimx)
print("Saving results to `{}`.".format(fout), flush=True)
with open(fout, "wb") as f:
numpy.savez(fout, indxs=indxs, match_indxs=match_indxs, cross=cross)