Update initial matching & overlaps (#47)

* pep8

* fix convention

* Update script

* enforce optimisation boundaries to be finite

* Update TODO

* Remove sky matching

* FIx a small bug

* fix bug

* Remove import

* Add halo fitted quantities

* Update nbs

* update README

* Add load_initial comments

* Rename nbs

* Delete nb

* Update imports

* Rename function

* Update matcher

* Add overlap paths

* Update the matching script

* Update verbosity

* Add verbosity flags

* Simplify make_bckg_delta

* bug fix

* fix bug
This commit is contained in:
Richard Stiskalek 2023-04-21 01:35:06 +02:00 committed by GitHub
parent 39b3498621
commit 04119a5314
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 527 additions and 2836 deletions

View file

@ -4,12 +4,13 @@
## Project Overlap ## Project Overlap
- [x] Clean up the kNN paths in the summary. - [x] Clean up the kNN paths in the summary.
- [x] Clean up the 2PCF paths in the summary. - [x] Clean up the 2PCF paths in the summary.
- [x] Update the clustering scripts to work with clumps instead.
- [x] Sort out the splitting of individual clumps.
- [x] Update the fitting scripts to work for clumps and parent halos.
- [ ] Sort out the splitting of individual clumps. - [ ] When calculating the overlap now check whether the halos have any particles, but that should already be the case otherwise its initial CM will not be defined.
- [ ] Update the fitting scripts to work for clumps and parent halos.
- [ ] Calculated fitted quantities for clumps and parent halos and add them to the catalogues. - [ ] Calculated fitted quantities for clumps and parent halos and add them to the catalogues.
- [ ] Update overlap scripts to work with summed parent halos. - [ ] Update overlap scripts to work with summed parent halos.
- [ ] Update the clustering scripts to work with clumps instead.
- [ ] Calculate the overlap between all 101 IC realisations on DiRAC. - [ ] Calculate the overlap between all 101 IC realisations on DiRAC.

View file

@ -349,24 +349,31 @@ class NFWPosterior(NFWProfile):
""" """
assert isinstance(clump, Clump) assert isinstance(clump, Clump)
r = clump.r() r = clump.r()
rmin = numpy.min(r) rmin = numpy.min(r[r > 0]) # First particle that is not at r = 0
rmax, mtot = clump.spherical_overdensity_mass(200) rmax, mtot = clump.spherical_overdensity_mass(200)
mask = (rmin <= r) & (r <= rmax) mask = (rmin <= r) & (r <= rmax)
npart = numpy.sum(mask) npart = numpy.sum(mask)
r = r[mask] r = r[mask]
# Loss function to optimize
def loss(logRs): def loss(logRs):
return -self(logRs, r, rmin, rmax, npart) return -self(logRs, r, rmin, rmax, npart)
# Define optimisation boundaries. Check whether they are finite and
# that rmax > rmin. If not, then return NaN.
bounds = (numpy.log10(rmin), numpy.log10(rmax))
if not (numpy.all(numpy.isfinite(bounds)) and bounds[0] < bounds[1]):
return numpy.nan, numpy.nan
res = minimize_scalar( res = minimize_scalar(
loss, bounds=(numpy.log10(rmin), numpy.log10(rmax)), method="bounded" loss, bounds=(numpy.log10(rmin), numpy.log10(rmax)), method="bounded"
) )
# Check whether the fit converged to radius sufficienly far from `rmax`
# and that its a success. Otherwise return NaNs.
if numpy.log10(rmax) - res.x < eps: if numpy.log10(rmax) - res.x < eps:
res.success = False res.success = False
if not res.success: if not res.success:
return numpy.nan, numpy.nan return numpy.nan, numpy.nan
# Additionally we also wish to calculate the central density from the
# mass (integral) constraint.
rho0 = self.rho0_from_Rs(10**res.x, rmin, rmax, mtot) rho0 = self.rho0_from_Rs(10**res.x, rmin, rmax, mtot)
return 10**res.x, rho0 return 10**res.x, rho0

View file

@ -20,9 +20,6 @@ from .match import ( # noqa
cosine_similarity, cosine_similarity,
dist_centmass, dist_centmass,
dist_percentile, dist_percentile,
fill_delta,
fill_delta_indxs,
get_clumplims,
) )
from .num_density import binned_counts, number_density # noqa from .num_density import binned_counts, number_density # noqa
from .utils import concatenate_clumps # noqa from .utils import concatenate_parts # noqa

View file

@ -16,15 +16,12 @@
Support for matching halos between CSiBORG IC realisations. Support for matching halos between CSiBORG IC realisations.
""" """
from datetime import datetime from datetime import datetime
from gc import collect
import numpy import numpy
from numba import jit from numba import jit
from scipy.ndimage import gaussian_filter from scipy.ndimage import gaussian_filter
from tqdm import tqdm, trange from tqdm import tqdm, trange
from .utils import concatenate_clumps
############################################################################### ###############################################################################
# Realisations matcher for calculating overlaps # # Realisations matcher for calculating overlaps #
############################################################################### ###############################################################################
@ -32,9 +29,7 @@ from .utils import concatenate_clumps
class RealisationsMatcher: class RealisationsMatcher:
""" """
A tool to match halos between IC realisations. Looks for halos 3D space A tool to match halos between IC realisations.
neighbours in all remaining IC realisations that are within some mass
range of it.
Parameters Parameters
---------- ----------
@ -49,12 +44,13 @@ class RealisationsMatcher:
catalogue key. By default `totpartmass`, i.e. the total particle catalogue key. By default `totpartmass`, i.e. the total particle
mass associated with a halo. mass associated with a halo.
""" """
_nmult = None _nmult = None
_dlogmass = None _dlogmass = None
_mass_kind = None _mass_kind = None
_overlapper = None _overlapper = None
def __init__(self, nmult=1., dlogmass=2., mass_kind="totpartmass"): def __init__(self, nmult=1.0, dlogmass=2.0, mass_kind="totpartmass"):
assert nmult > 0 assert nmult > 0
assert dlogmass > 0 assert dlogmass > 0
assert isinstance(mass_kind, str) assert isinstance(mass_kind, str)
@ -109,12 +105,13 @@ class RealisationsMatcher:
""" """
return self._overlapper return self._overlapper
def cross(self, cat0, catx, clumps0, clumpsx, delta_bckg, verbose=True): def cross(
self, cat0, catx, halos0_archive, halosx_archive, delta_bckg, verbose=True
):
r""" r"""
Find all neighbours whose CM separation is less than `nmult` times the Find all neighbours whose CM separation is less than `nmult` times the
sum of their initial Lagrangian patch sizes and optionally calculate sum of their initial Lagrangian patch sizes and calculate their overlap.
their overlap. Enforces that the neighbours' are similar in mass up to Enforces that the neighbours' are similar in mass up to `dlogmass` dex.
`dlogmass` dex.
Parameters Parameters
---------- ----------
@ -122,14 +119,14 @@ class RealisationsMatcher:
Halo catalogue of the reference simulation. Halo catalogue of the reference simulation.
catx : :py:class:`csiborgtools.read.ClumpsCatalogue` catx : :py:class:`csiborgtools.read.ClumpsCatalogue`
Halo catalogue of the cross simulation. Halo catalogue of the cross simulation.
clumps0 : list of structured arrays halos0_archive : `NpzFile` object
List of clump structured arrays of the reference simulation, keys Archive of halos' particles of the reference simulation, keys must
must include `x`, `y`, `z` and `M`. The positions must already be include `x`, `y`, `z` and `M`. The positions must already be converted
converted to cell numbers. to cell numbers.
clumpsx : list of structured arrays halosx_archive : `NpzFile` object
List of clump structured arrays of the cross simulation, keys must Archive of halos' particles of the cross simulation, keys must
include `x`, `y`, `z` and `M`. The positions must already be include `x`, `y`, `z` and `M`. The positions must already be converted
converted to cell numbers. to cell numbers.
delta_bckg : 3-dimensional array delta_bckg : 3-dimensional array
Summed background density field of the reference and cross Summed background density field of the reference and cross
simulations calculated with particles assigned to halos at the simulations calculated with particles assigned to halos at the
@ -140,24 +137,27 @@ class RealisationsMatcher:
Returns Returns
------- -------
ref_indxs : 1-dimensional array
Halo IDs in the reference catalogue.
cross_indxs : 1-dimensional array
Halo IDs in the cross catalogue.
match_indxs : 1-dimensional array of arrays match_indxs : 1-dimensional array of arrays
Indices of halo counterparts in the cross catalogue. The outer array corresponds to halos in the reference catalogue, the
inner array corresponds to the array positions of matches in the cross
catalogue.
overlaps : 1-dimensional array of arrays overlaps : 1-dimensional array of arrays
Overlaps with the cross catalogue. Overlaps with the cross catalogue. Follows similar pattern as `match_indxs`.
""" """
# Query the KNN # We begin by querying the kNN for the nearest neighbours of each halo
verbose and print("{}: querying the KNN." # in the reference simulation from the cross simulation in the initial
.format(datetime.now()), flush=True) # snapshot.
verbose and print("{}: querying the KNN.".format(datetime.now()), flush=True)
match_indxs = radius_neighbours( match_indxs = radius_neighbours(
catx.knn(select_initial=True), cat0.positions(in_initial=True), catx.knn(select_initial=True),
radiusX=cat0["lagpatch"], radiusKNN=catx["lagpatch"], cat0.positions(in_initial=True),
nmult=self.nmult, enforce_in32=True, verbose=verbose) radiusX=cat0["lagpatch"],
radiusKNN=catx["lagpatch"],
# Remove neighbours whose mass is too large/small nmult=self.nmult,
enforce_int32=True,
verbose=verbose,
)
# We next remove neighbours whose mass is too large/small.
if self.dlogmass is not None: if self.dlogmass is not None:
for i, indx in enumerate(match_indxs): for i, indx in enumerate(match_indxs):
# |log(M1 / M2)| # |log(M1 / M2)|
@ -165,68 +165,103 @@ class RealisationsMatcher:
aratio = numpy.abs(numpy.log10(catx[p][indx] / cat0[p][i])) aratio = numpy.abs(numpy.log10(catx[p][indx] / cat0[p][i]))
match_indxs[i] = match_indxs[i][aratio < self.dlogmass] match_indxs[i] = match_indxs[i][aratio < self.dlogmass]
# Min and max cells along each axis for each halo # We will make a dictionary to keep in memory the halos' particles from the
limkwargs = {"ncells": self.overlapper.inv_clength, # cross simulations so that they are not loaded in several times and we only
"nshift": self.overlapper.nshift} # convert their positions to cells once. Possibly make an option to not do
mins0, maxs0 = get_clumplims(clumps0, **limkwargs) # this to lower memory requirements?
minsx, maxsx = get_clumplims(clumpsx, **limkwargs) cross_halos = {}
cross_lims = {}
# Mapping from a halo index to the list of clumps
hid2clumps0 = {hid: n for n, hid in enumerate(clumps0["ID"])}
hid2clumpsx = {hid: n for n, hid in enumerate(clumpsx["ID"])}
cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
# Loop only over halos that have neighbours for i, k0 in enumerate(tqdm(cat0["index"]) if verbose else cat0["index"]):
iters = numpy.arange(len(cat0))[[x.size > 0 for x in match_indxs]] # If we have no matches continue to the next halo.
for i in tqdm(iters) if verbose else iters: matches = match_indxs[i]
match0 = hid2clumps0[cat0["index"][i]] if matches.size == 0:
# The clump, its mass and mins & maxs continue
cl0 = clumps0["clump"][match0] # Next, we find this halo's particles, total mass and minimum/maximum cells
mass0 = numpy.sum(cl0['M']) # and convert positions to cells.
mins0_current, maxs0_current = mins0[match0], maxs0[match0] halo0 = halos0_archive[str(k0)]
mass0 = numpy.sum(halo0["M"])
# Preallocate arrays to store overlap information mins0, maxs0 = get_halolims(
_cross = numpy.full(match_indxs[i].size, numpy.nan, halo0, ncells=self.overlapper.inv_clength, nshift=self.overlapper.nshift
dtype=numpy.float32) )
# Loop over matches of this halo from the other simulation for p in ("x", "y", "z"):
for j, ind in enumerate(match_indxs[i]): halo0[p] = self.overlapper.pos2cell(halo0[p])
matchx = hid2clumpsx[catx["index"][ind]] # We now loop over matches of this halo and calculate their overlap,
clx = clumpsx["clump"][matchx] # storing them in `_cross`.
_cross = numpy.full(matches.size, numpy.nan, dtype=numpy.float32)
for j, kf in enumerate(catx["index"][matches]):
# Attempt to load this cross halo from memory, if it fails get it from
# from the halo archive (and similarly for the limits) and convert the
# particle positions to cells.
try:
halox = cross_halos[kf]
minsx, maxsx = cross_lims[kf]
except KeyError:
halox = halosx_archive[str(kf)]
minsx, maxsx = get_halolims(
halox,
ncells=self.overlapper.inv_clength,
nshift=self.overlapper.nshift,
)
for p in ("x", "y", "z"):
halox[p] = self.overlapper.pos2cell(halox[p])
cross_halos[kf] = halox
cross_lims[kf] = (minsx, maxsx)
massx = numpy.sum(halox["M"])
_cross[j] = self.overlapper( _cross[j] = self.overlapper(
cl0, clx, delta_bckg, mins0_current, maxs0_current, halo0,
minsx[matchx], maxsx[matchx], mass1=mass0, halox,
mass2=numpy.sum(clx['M'])) delta_bckg,
mins0,
maxs0,
minsx,
maxsx,
mass1=mass0,
mass2=massx,
)
cross[i] = _cross cross[i] = _cross
# Remove matches with exactly 0 overlap # We remove all matches that have zero overlap to save space.
mask = cross[i] > 0 mask = cross[i] > 0
match_indxs[i] = match_indxs[i][mask] match_indxs[i] = match_indxs[i][mask]
cross[i] = cross[i][mask] cross[i] = cross[i][mask]
# And finally we sort the matches by their overlap.
# Sort the matches by overlap
ordering = numpy.argsort(cross[i])[::-1] ordering = numpy.argsort(cross[i])[::-1]
match_indxs[i] = match_indxs[i][ordering] match_indxs[i] = match_indxs[i][ordering]
cross[i] = cross[i][ordering] cross[i] = cross[i][ordering]
cross = numpy.asanyarray(cross, dtype=object) cross = numpy.asanyarray(cross, dtype=object)
return cat0["index"], catx["index"], match_indxs, cross return match_indxs, cross
def smoothed_cross(self, clumps0, clumpsx, delta_bckg, ref_indxs, def smoothed_cross(
cross_indxs, match_indxs, smooth_kwargs, verbose=True): self,
cat0,
catx,
halos0_archive,
halosx_archive,
delta_bckg,
match_indxs,
smooth_kwargs,
verbose=True,
):
r""" r"""
Calculate the smoothed overlaps for pair previously identified via Calculate the smoothed overlaps for pair previously identified via
`self.cross(...)` to have a non-zero overlap. `self.cross(...)` to have a non-zero overlap.
Parameters Parameters
---------- ----------
clumps0 : list of structured arrays cat0 : :py:class:`csiborgtools.read.ClumpsCatalogue`
List of clump structured arrays of the reference simulation, keys Halo catalogue of the reference simulation.
must include `x`, `y`, `z` and `M`. The positions must already be catx : :py:class:`csiborgtools.read.ClumpsCatalogue`
converted to cell numbers. Halo catalogue of the cross simulation.
clumpsx : list of structured arrays halos0_archive : `NpzFile` object
List of clump structured arrays of the cross simulation, keys must Archive of halos' particles of the reference simulation, keys must
include `x`, `y`, `z` and `M`. The positions must already be include `x`, `y`, `z` and `M`. The positions must already be converted
converted to cell numbers. to cell numbers.
halosx_archive : `NpzFile` object
Archive of halos' particles of the cross simulation, keys must
include `x`, `y`, `z` and `M`. The positions must already be converted
to cell numbers.
delta_bckg : 3-dimensional array delta_bckg : 3-dimensional array
Smoothed summed background density field of the reference and cross Smoothed summed background density field of the reference and cross
simulations calculated with particles assigned to halos at the simulations calculated with particles assigned to halos at the
@ -247,33 +282,45 @@ class RealisationsMatcher:
------- -------
overlaps : 1-dimensional array of arrays overlaps : 1-dimensional array of arrays
""" """
# Min and max cells along each axis for each halo
limkwargs = {"ncells": self.overlapper.inv_clength,
"nshift": self.overlapper.nshift}
mins0, maxs0 = get_clumplims(clumps0, **limkwargs)
minsx, maxsx = get_clumplims(clumpsx, **limkwargs)
hid2clumps0 = {hid: n for n, hid in enumerate(clumps0["ID"])} cross_halos = {}
hid2clumpsx = {hid: n for n, hid in enumerate(clumpsx["ID"])} cross_lims = {}
# Preallocate arrays to store the overlap information
cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
for i, ref_ind in enumerate(tqdm(ref_indxs) if verbose else ref_indxs):
match0 = hid2clumps0[ref_ind]
# The reference clump, its mass and mins & maxs
cl0 = clumps0["clump"][match0]
mins0_current, maxs0_current = mins0[match0], maxs0[match0]
# Preallocate for i, k0 in enumerate(tqdm(cat0["index"]) if verbose else cat0["index"]):
nmatches = match_indxs[i].size halo0 = halos0_archive[str(k0)]
_cross = numpy.full(nmatches, numpy.nan, numpy.float32) mins0, maxs0 = get_halolims(
for j, match_ind in enumerate(match_indxs[i]): halo0, ncells=self.overlapper.inv_clength, nshift=self.overlapper.nshift
matchx = hid2clumpsx[cross_indxs[match_ind]] )
clx = clumpsx["clump"][matchx]
# Now loop over the matches and calculate the smoothed overlap.
_cross = numpy.full(match_indxs[i].size, numpy.nan, numpy.float32)
for j, kf in enumerate(catx["index"][match_indxs[i]]):
# Attempt to load this cross halo from memory, if it fails get it from
# from the halo archive (and similarly for the limits).
try:
halox = cross_halos[kf]
minsx, maxsx = cross_lims[kf]
except KeyError:
halox = halosx_archive[str(kf)]
minsx, maxsx = get_halolims(
halox,
ncells=self.overlapper.inv_clength,
nshift=self.overlapper.nshift,
)
cross_halos[kf] = halox
cross_lims[kf] = (minsx, maxsx)
_cross[j] = self.overlapper( _cross[j] = self.overlapper(
cl0, clx, delta_bckg, mins0_current, halo0,
maxs0_current, minsx[matchx], maxsx[matchx], halox,
smooth_kwargs=smooth_kwargs) delta_bckg,
mins0,
maxs0,
minsx,
maxsx,
smooth_kwargs=smooth_kwargs,
)
cross[i] = _cross cross[i] = _cross
return numpy.asanyarray(cross, dtype=object) return numpy.asanyarray(cross, dtype=object)
@ -321,6 +368,7 @@ class ParticleOverlap:
the nearest grid position particle assignment scheme, with optional the nearest grid position particle assignment scheme, with optional
Gaussian smoothing. Gaussian smoothing.
""" """
def __init__(self): def __init__(self):
# Inverse cell length in box units. By default :math:`2^11`, which # Inverse cell length in box units. By default :math:`2^11`, which
# matches the initial RAMSES grid resolution. # matches the initial RAMSES grid resolution.
@ -346,92 +394,63 @@ class ParticleOverlap:
# Check whether this is already the cell # Check whether this is already the cell
if pos.dtype.char in numpy.typecodes["AllInteger"]: if pos.dtype.char in numpy.typecodes["AllInteger"]:
return pos return pos
return numpy.floor(pos * self.inv_clength).astype(int) return numpy.floor(pos * self.inv_clength).astype(numpy.int32)
def clumps_pos2cell(self, clumps): def make_bckg_delta(self, halo_archive, delta=None, verbose=False):
""" """
Convert clump positions directly to cell IDs. Useful to speed up Calculate a NGP density field of particles belonging to halos within the
subsequent calculations. Overwrites the passed in arrays. central :math:`1/2^3` high-resolution region of the simulation. Smoothing
must be applied separately.
Parameters Parameters
---------- ----------
clumps : array of arrays halo_archive : `NpzFile` object
Array of clump structured arrays whose `x`, `y`, `z` keys will be Archive of halos' particles of the reference simulation, keys must
converted. include `x`, `y`, `z` and `M`.
Returns
-------
None
"""
# Check if clumps are probably already in cells
if any(clumps[0][0].dtype[p].char in numpy.typecodes["AllInteger"]
for p in ('x', 'y', 'z')):
raise ValueError("Positions appear to already be converted cells.")
# Get the new dtype that replaces float for int for positions
names = clumps[0][0].dtype.names # Take the first one, doesn't matter
formats = [descr[1] for descr in clumps[0][0].dtype.descr]
for i in range(len(names)):
if names[i] in ('x', 'y', 'z'):
formats[i] = numpy.int32
dtype = numpy.dtype({"names": names, "formats": formats})
# Loop switch positions for cells IDs and change dtype
for n in range(clumps.size):
for p in ('x', 'y', 'z'):
clumps[n][0][p] = self.pos2cell(clumps[n][0][p])
clumps[n][0] = clumps[n][0].astype(dtype)
def make_bckg_delta(self, clumps, delta=None):
"""
Calculate a NGP density field of clumps within the central
:math:`1/2^3` region of the simulation. Smoothing must be applied
separately.
Parameters
----------
clumps : list of structured arrays
List of clump structured array, keys must include `x`, `y`, `z`
and `M`.
delta : 3-dimensional array, optional delta : 3-dimensional array, optional
Array to store the density field in. If `None` a new array is Array to store the density field in. If `None` a new array is
created. created.
verbose : bool, optional
Verbosity flag for loading the files.
Returns Returns
------- -------
delta : 3-dimensional array delta : 3-dimensional array
""" """
conc_clumps = concatenate_clumps(clumps) # We obtain the minimum/maximum cell IDs and number of cells along each dim.
cells = [self.pos2cell(conc_clumps[p]) for p in ('x', 'y', 'z')]
mass = conc_clumps['M']
del conc_clumps
collect() # This is a large array so force memory clean
cellmin = self.inv_clength // 4 # The minimum cell ID cellmin = self.inv_clength // 4 # The minimum cell ID
cellmax = 3 * self.inv_clength // 4 # The maximum cell ID cellmax = 3 * self.inv_clength // 4 # The maximum cell ID
ncells = cellmax - cellmin ncells = cellmax - cellmin
# Mask out particles outside the cubical high resolution region # We then pre-allocate the density field or check it is of the right shape
mask = ((cellmin <= cells[0]) & (cells[0] < cellmax) # if already given.
& (cellmin <= cells[1]) & (cells[1] < cellmax)
& (cellmin <= cells[2]) & (cells[2] < cellmax)
)
cells = [c[mask] for c in cells]
mass = mass[mask]
# Prepare the density field or check it is of the right shape
if delta is None: if delta is None:
delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32) delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
else: else:
assert ((delta.shape == (ncells,) * 3) assert (delta.shape == (ncells,) * 3) & (delta.dtype == numpy.float32)
& (delta.dtype == numpy.float32))
# We now loop one-by-one over the halos fill the density field.
files = halo_archive.files
for file in tqdm(files) if verbose else files:
parts = halo_archive[file]
cells = [self.pos2cell(parts[p]) for p in ("x", "y", "z")]
mass = parts["M"]
# We mask out particles outside the cubical high-resolution region
mask = (
(cellmin <= cells[0])
& (cells[0] < cellmax)
& (cellmin <= cells[1])
& (cells[1] < cellmax)
& (cellmin <= cells[2])
& (cells[2] < cellmax)
)
cells = [c[mask] for c in cells]
mass = mass[mask]
fill_delta(delta, *cells, *(cellmin,) * 3, mass) fill_delta(delta, *cells, *(cellmin,) * 3, mass)
return delta return delta
def make_delta(self, clump, mins=None, maxs=None, subbox=False, def make_delta(self, clump, mins=None, maxs=None, subbox=False, smooth_kwargs=None):
smooth_kwargs=None):
""" """
Calculate a NGP density field of a halo on a cubic grid. Optionally can Calculate a NGP density field of a halo on a cubic grid. Optionally can
be smoothed with a Gaussian kernel. be smoothed with a Gaussian kernel.
@ -453,7 +472,7 @@ class ParticleOverlap:
------- -------
delta : 3-dimensional array delta : 3-dimensional array
""" """
cells = [self.pos2cell(clump[p]) for p in ('x', 'y', 'z')] cells = [self.pos2cell(clump[p]) for p in ("x", "y", "z")]
# Check that minima and maxima are integers # Check that minima and maxima are integers
if not (mins is None and maxs is None): if not (mins is None and maxs is None):
@ -464,26 +483,38 @@ class ParticleOverlap:
# Minimum xcell, ycell and zcell of this clump # Minimum xcell, ycell and zcell of this clump
if mins is None or maxs is None: if mins is None or maxs is None:
mins = numpy.asanyarray( mins = numpy.asanyarray(
[max(numpy.min(cell) - self.nshift, 0) for cell in cells]) [max(numpy.min(cell) - self.nshift, 0) for cell in cells]
)
maxs = numpy.asanyarray( maxs = numpy.asanyarray(
[min(numpy.max(cell) + self.nshift, self.inv_clength) [
for cell in cells]) min(numpy.max(cell) + self.nshift, self.inv_clength)
for cell in cells
]
)
ncells = numpy.max(maxs - mins) + 1 # To get the number of cells ncells = numpy.max(maxs - mins) + 1 # To get the number of cells
else: else:
mins = (0, 0, 0,) mins = [0, 0, 0]
ncells = self.inv_clength ncells = self.inv_clength
# Preallocate and fill the array # Preallocate and fill the array
delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32) delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
fill_delta(delta, *cells, *mins, clump['M']) fill_delta(delta, *cells, *mins, clump["M"])
if smooth_kwargs is not None: if smooth_kwargs is not None:
gaussian_filter(delta, output=delta, **smooth_kwargs) gaussian_filter(delta, output=delta, **smooth_kwargs)
return delta return delta
def make_deltas(self, clump1, clump2, mins1=None, maxs1=None, def make_deltas(
mins2=None, maxs2=None, smooth_kwargs=None): self,
clump1,
clump2,
mins1=None,
maxs1=None,
mins2=None,
maxs2=None,
smooth_kwargs=None,
):
""" """
Calculate a NGP density fields of two halos on a grid that encloses Calculate a NGP density fields of two halos on a grid that encloses
them both. Optionally can be smoothed with a Gaussian kernel. them both. Optionally can be smoothed with a Gaussian kernel.
@ -513,8 +544,8 @@ class ParticleOverlap:
Indices where the lower mass clump has a non-zero density. Indices where the lower mass clump has a non-zero density.
Calculated only if no smoothing is applied, otherwise `None`. Calculated only if no smoothing is applied, otherwise `None`.
""" """
xc1, yc1, zc1 = (self.pos2cell(clump1[p]) for p in ('x', 'y', 'z')) xc1, yc1, zc1 = (self.pos2cell(clump1[p]) for p in ("x", "y", "z"))
xc2, yc2, zc2 = (self.pos2cell(clump2[p]) for p in ('x', 'y', 'z')) xc2, yc2, zc2 = (self.pos2cell(clump2[p]) for p in ("x", "y", "z"))
if any(obj is None for obj in (mins1, maxs1, mins2, maxs2)): if any(obj is None for obj in (mins1, maxs1, mins2, maxs2)):
# Minimum cell number of the two halos along each dimension # Minimum cell number of the two halos along each dimension
@ -529,13 +560,14 @@ class ParticleOverlap:
ymax = max(numpy.max(yc1), numpy.max(yc2)) + self.nshift ymax = max(numpy.max(yc1), numpy.max(yc2)) + self.nshift
zmax = max(numpy.max(zc1), numpy.max(zc2)) + self.nshift zmax = max(numpy.max(zc1), numpy.max(zc2)) + self.nshift
# Make sure shifting does not go beyond boundaries # Make sure shifting does not go beyond boundaries
xmax, ymax, zmax = [min(px, self.inv_clength - 1) xmax, ymax, zmax = [
for px in (xmax, ymax, zmax)] min(px, self.inv_clength - 1) for px in (xmax, ymax, zmax)
]
else: else:
xmin, ymin, zmin = [min(mins1[i], mins2[i]) for i in range(3)] xmin, ymin, zmin = [min(mins1[i], mins2[i]) for i in range(3)]
xmax, ymax, zmax = [max(maxs1[i], maxs2[i]) for i in range(3)] xmax, ymax, zmax = [max(maxs1[i], maxs2[i]) for i in range(3)]
cellmins = (xmin, ymin, zmin, ) # Cell minima cellmins = (xmin, ymin, zmin) # Cell minima
ncells = max(xmax - xmin, ymax - ymin, zmax - zmin) + 1 # Num cells ncells = max(xmax - xmin, ymax - ymin, zmax - zmin) + 1 # Num cells
# Preallocate and fill the arrays # Preallocate and fill the arrays
@ -545,16 +577,18 @@ class ParticleOverlap:
# If no smoothing figure out the nonzero indices of the smaller clump # If no smoothing figure out the nonzero indices of the smaller clump
if smooth_kwargs is None: if smooth_kwargs is None:
if clump1.size > clump2.size: if clump1.size > clump2.size:
fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1['M']) fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1["M"])
nonzero = fill_delta_indxs(delta2, xc2, yc2, zc2, *cellmins, nonzero = fill_delta_indxs(
clump2['M']) delta2, xc2, yc2, zc2, *cellmins, clump2["M"]
)
else: else:
nonzero = fill_delta_indxs(delta1, xc1, yc1, zc1, *cellmins, nonzero = fill_delta_indxs(
clump1['M']) delta1, xc1, yc1, zc1, *cellmins, clump1["M"]
fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2['M']) )
fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2["M"])
else: else:
fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1['M']) fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1["M"])
fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2['M']) fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2["M"])
nonzero = None nonzero = None
if smooth_kwargs is not None: if smooth_kwargs is not None:
@ -562,9 +596,19 @@ class ParticleOverlap:
gaussian_filter(delta2, output=delta2, **smooth_kwargs) gaussian_filter(delta2, output=delta2, **smooth_kwargs)
return delta1, delta2, cellmins, nonzero return delta1, delta2, cellmins, nonzero
def __call__(self, clump1, clump2, delta_bckg, mins1=None, maxs1=None, def __call__(
mins2=None, maxs2=None, mass1=None, mass2=None, self,
smooth_kwargs=None): clump1,
clump2,
delta_bckg,
mins1=None,
maxs1=None,
mins2=None,
maxs2=None,
mass1=None,
mass2=None,
smooth_kwargs=None,
):
""" """
Calculate overlap between `clump1` and `clump2`. See Calculate overlap between `clump1` and `clump2`. See
`calculate_overlap(...)` for further information. Be careful so that `calculate_overlap(...)` for further information. Be careful so that
@ -603,16 +647,17 @@ class ParticleOverlap:
overlap : float overlap : float
""" """
delta1, delta2, cellmins, nonzero = self.make_deltas( delta1, delta2, cellmins, nonzero = self.make_deltas(
clump1, clump2, mins1, maxs1, mins2, maxs2, clump1, clump2, mins1, maxs1, mins2, maxs2, smooth_kwargs=smooth_kwargs
smooth_kwargs=smooth_kwargs) )
if smooth_kwargs is not None: if smooth_kwargs is not None:
return calculate_overlap(delta1, delta2, cellmins, delta_bckg) return calculate_overlap(delta1, delta2, cellmins, delta_bckg)
# Calculate masses not given # Calculate masses not given
mass1 = numpy.sum(clump1['M']) if mass1 is None else mass1 mass1 = numpy.sum(clump1["M"]) if mass1 is None else mass1
mass2 = numpy.sum(clump2['M']) if mass2 is None else mass2 mass2 = numpy.sum(clump2["M"]) if mass2 is None else mass2
return calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, return calculate_overlap_indxs(
nonzero, mass1, mass2) delta1, delta2, cellmins, delta_bckg, nonzero, mass1, mass2
)
############################################################################### ###############################################################################
@ -682,14 +727,15 @@ def fill_delta_indxs(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
return cells[:count_nonzero, :] # Cutoff unassigned places return cells[:count_nonzero, :] # Cutoff unassigned places
def get_clumplims(clumps, ncells, nshift=None): def get_halolims(halo, ncells, nshift=None):
""" """
Get the lower and upper limit of clumps' positions or cell numbers. Get the lower and upper limit of a halo's positions or cell numbers.
Parameters Parameters
---------- ----------
clumps : array of arrays halo : structured array
Array of clump structured arrays. Structured array containing the particles of a given halo. Keys must
`x`, `y`, `z`.
ncells : int ncells : int
Number of grid cells of the box along a single dimension. Number of grid cells of the box along a single dimension.
nshift : int, optional nshift : int, optional
@ -697,23 +743,21 @@ def get_clumplims(clumps, ncells, nshift=None):
Returns Returns
------- -------
mins, maxs : 2-dimensional arrays of shape `(n_samples, 3)` mins, maxs : 1-dimensional arrays of shape `(3, )`
Minimum and maximum along each axis. Minimum and maximum along each axis.
""" """
dtype = clumps[0][0]['x'].dtype # dtype of the first clump's 'x' # Check that in case of `nshift` we have integer positions.
# Check that for real positions we cannot apply nshift dtype = halo["x"].dtype
if nshift is not None and dtype.char not in numpy.typecodes["AllInteger"]: if nshift is not None and dtype.char not in numpy.typecodes["AllInteger"]:
raise TypeError("`nshift` supported only positions are cells.") raise TypeError("`nshift` supported only positions are cells.")
nshift = 0 if nshift is None else nshift # To simplify code below nshift = 0 if nshift is None else nshift # To simplify code below
nclumps = clumps.size mins = numpy.full(3, numpy.nan, dtype=dtype)
mins = numpy.full((nclumps, 3), numpy.nan, dtype=dtype) maxs = numpy.full(3, numpy.nan, dtype=dtype)
maxs = numpy.full((nclumps, 3), numpy.nan, dtype=dtype)
for i, clump in enumerate(clumps): for i, p in enumerate(["x", "y", "z"]):
for j, p in enumerate(['x', 'y', 'z']): mins[i] = max(numpy.min(halo[p]) - nshift, 0)
mins[i, j] = max(numpy.min(clump[0][p]) - nshift, 0) maxs[i] = min(numpy.max(halo[p]) + nshift, ncells - 1)
maxs[i, j] = min(numpy.max(clump[0][p]) + nshift, ncells - 1)
return mins, maxs return mins, maxs
@ -742,8 +786,8 @@ def calculate_overlap(delta1, delta2, cellmins, delta_bckg):
------- -------
overlap : float overlap : float
""" """
totmass = 0. # Total mass of clump 1 and clump 2 totmass = 0.0 # Total mass of clump 1 and clump 2
intersect = 0. # Weighted intersecting mass intersect = 0.0 # Weighted intersecting mass
i0, j0, k0 = cellmins # Unpack things i0, j0, k0 = cellmins # Unpack things
bckg_offset = 512 # Offset of the background density field bckg_offset = 512 # Offset of the background density field
bckg_size = 1024 bckg_size = 1024
@ -770,8 +814,9 @@ def calculate_overlap(delta1, delta2, cellmins, delta_bckg):
@jit(nopython=True) @jit(nopython=True)
def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero, def calculate_overlap_indxs(
mass1, mass2): delta1, delta2, cellmins, delta_bckg, nonzero, mass1, mass2
):
r""" r"""
Overlap between two clumps whose density fields are evaluated on the Overlap between two clumps whose density fields are evaluated on the
same grid and `nonzero1` enumerates the non-zero cells of `delta1. This is same grid and `nonzero1` enumerates the non-zero cells of `delta1. This is
@ -800,7 +845,7 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
------- -------
overlap : float overlap : float
""" """
intersect = 0. # Weighted intersecting mass intersect = 0.0 # Weighted intersecting mass
i0, j0, k0 = cellmins # Unpack cell minimas i0, j0, k0 = cellmins # Unpack cell minimas
bckg_offset = 512 # Offset of the background density field bckg_offset = 512 # Offset of the background density field
bckg_size = 1024 # Size of the background density field array bckg_size = 1024 # Size of the background density field array
@ -827,7 +872,7 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
def dist_centmass(clump): def dist_centmass(clump):
""" """
Calculate the clump particles' distance from the centre of mass. Calculate the clump (or halo) particles' distance from the centre of mass.
Parameters Parameters
---------- ----------
@ -842,12 +887,15 @@ def dist_centmass(clump):
Center of mass coordinates. Center of mass coordinates.
""" """
# CM along each dimension # CM along each dimension
cmx, cmy, cmz = [numpy.average(clump[p], weights=clump['M']) cmx, cmy, cmz = [
for p in ('x', 'y', 'z')] numpy.average(clump[p], weights=clump["M"]) for p in ("x", "y", "z")
]
# Particle distance from the CM # Particle distance from the CM
dist = numpy.sqrt(numpy.square(clump['x'] - cmx) dist = numpy.sqrt(
+ numpy.square(clump['y'] - cmy) numpy.square(clump["x"] - cmx)
+ numpy.square(clump['z'] - cmz)) + numpy.square(clump["y"] - cmy)
+ numpy.square(clump["z"] - cmz)
)
return dist, numpy.asarray([cmx, cmy, cmz]) return dist, numpy.asarray([cmx, cmy, cmz])
@ -874,8 +922,9 @@ def dist_percentile(dist, qs, distmax=0.075):
return x return x
def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1., def radius_neighbours(
enforce_int32=False, verbose=True): knn, X, radiusX, radiusKNN, nmult=1.0, enforce_int32=False, verbose=True
):
""" """
Find all neigbours of a trained KNN model whose center of mass separation Find all neigbours of a trained KNN model whose center of mass separation
is less than `nmult` times the sum of their respective radii. is less than `nmult` times the sum of their respective radii.
@ -913,9 +962,9 @@ def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1.,
patchknn_max = numpy.max(radiusKNN) # Maximum for completeness patchknn_max = numpy.max(radiusKNN) # Maximum for completeness
for i in trange(nsamples) if verbose else range(nsamples): for i in trange(nsamples) if verbose else range(nsamples):
dist, indx = knn.radius_neighbors(X[i, :].reshape(-1, 3), dist, indx = knn.radius_neighbors(
radiusX[i] + patchknn_max, X[i, :].reshape(-1, 3), radiusX[i] + patchknn_max, sort_results=True
sort_results=True) )
# Note that `dist` and `indx` are wrapped in 1-element arrays # Note that `dist` and `indx` are wrapped in 1-element arrays
# so we take the first item where appropriate # so we take the first item where appropriate
mask = (dist[0] / (radiusX[i] + radiusKNN[indx[0]])) < nmult mask = (dist[0] / (radiusX[i] + radiusKNN[indx[0]])) < nmult
@ -924,58 +973,3 @@ def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1.,
indxs[i] = indxs[i].astype(numpy.int32) indxs[i] = indxs[i].astype(numpy.int32)
return numpy.asarray(indxs, dtype=object) return numpy.asarray(indxs, dtype=object)
###############################################################################
# Sky mathing #
###############################################################################
# def brute_spatial_separation(c1, c2, angular=False, N=None, verbose=False):
# """
# Calculate for each point in `c1` the `N` closest points in `c2`.
# Parameters
# ----------
# c1 : `astropy.coordinates.SkyCoord`
# Coordinates of the first set of points.
# c2 : `astropy.coordinates.SkyCoord`
# Coordinates of the second set of points.
# angular : bool, optional
# Whether to calculate angular separation or 3D separation. By default
# `False` and 3D separation is calculated.
# N : int, optional
# Number of closest points in `c2` to each object in `c1` to return.
# verbose : bool, optional
# Verbosity flag. By default `False`.
# Returns
# -------
# sep : 1-dimensional array
# Separation of each object in `c1` to `N` closest objects in `c2`. The
# array shape is `(c1.size, N)`. Separation is in units of `c1`.
# indxs : 1-dimensional array
# Indexes of the closest objects in `c2` for each object in `c1`. The
# array shape is `(c1.size, N)`.
# """
# if not (isinstance(c1, SkyCoord) and isinstance(c2, SkyCoord)):
# raise TypeError(
# "`c1` & `c2` must be `astropy.coordinates.SkyCoord`.")
# N1 = c1.size
# N2 = c2.size if N is None else N
# # Pre-allocate arrays
# sep = numpy.full((N1, N2), numpy.nan)
# indxs = numpy.full((N1, N2), numpy.nan, dtype=int)
# iters = tqdm(range(N1)) if verbose else range(N1)
# for i in iters:
# if angular:
# dist = c1[i].separation(c2).value
# else:
# dist = c1[i].separation_3d(c2).value
# # Sort the distances
# sort = numpy.argsort(dist)[:N2]
# indxs[i, :] = sort
# sep[i, :] = dist[sort]
# return sep, indxs

View file

@ -16,27 +16,27 @@
import numpy import numpy
def concatenate_clumps(clumps, include_velocities=False): def concatenate_parts(list_parts, include_velocities=False):
""" """
Concatenate an array of clumps to a single array containing all particles. Concatenate a list of particle arrays into a single array.
Parameters Parameters
---------- ----------
clumps : list of structured arrays list_parts : list of structured arrays
List of clumps. Each clump must be a structured array with keys List of particle arrays.
include_velocities : bool, optional include_velocities : bool, optional
Whether to include velocities in the output array. Whether to include velocities in the output array.
Returns Returns
------- -------
particles : structured array parts_out : structured array
""" """
# Count how large array will be needed # Count how large array will be needed
N = 0 N = 0
for clump, __ in clumps: for part in list_parts:
N += clump.size N += part.size
# Infer dtype of positions # Infer dtype of positions
if clumps[0][0]["x"].dtype.char in numpy.typecodes["AllInteger"]: if list_parts[0]["x"].dtype.char in numpy.typecodes["AllInteger"]:
posdtype = numpy.int32 posdtype = numpy.int32
else: else:
posdtype = numpy.float32 posdtype = numpy.float32
@ -54,14 +54,14 @@ def concatenate_clumps(clumps, include_velocities=False):
"names": ["x", "y", "z", "M"], "names": ["x", "y", "z", "M"],
"formats": [posdtype] * 3 + [numpy.float32], "formats": [posdtype] * 3 + [numpy.float32],
} }
particles = numpy.full(N, numpy.nan, dtype) parts_out = numpy.full(N, numpy.nan, dtype)
# Fill it one clump by another # Fill it one clump by another
start = 0 start = 0
for clump, __ in clumps: for parts in list_parts:
end = start + clump.size end = start + parts.size
for p in dtype["names"]: for p in dtype["names"]:
particles[p][start:end] = clump[p] parts_out[p][start:end] = parts[p]
start = end start = end
return particles return parts_out

View file

@ -375,9 +375,6 @@ class HaloCatalogue(BaseCatalogue):
Halo catalogue, i.e. parent halos with summed substructure, defined in the Halo catalogue, i.e. parent halos with summed substructure, defined in the
final snapshot. final snapshot.
TODO:
Add the fitted quantities
Parameters Parameters
---------- ----------
nsim : int nsim : int
@ -391,26 +388,60 @@ class HaloCatalogue(BaseCatalogue):
minmass : len-2 tuple minmass : len-2 tuple
Minimum mass. The first element is the catalogue key and the second is Minimum mass. The first element is the catalogue key and the second is
the value. the value.
load_fitted : bool, optional
Whether to load fitted quantities.
load_initial : bool, optional
Whether to load initial positions.
rawdata : bool, optional rawdata : bool, optional
Whether to return the raw data. In this case applies no cuts and Whether to return the raw data. In this case applies no cuts and
transformations. transformations.
""" """
def __init__( def __init__(
self, nsim, paths, maxdist=155.5 / 0.705, minmass=("M", 1e12), rawdata=False self,
nsim,
paths,
maxdist=155.5 / 0.705,
minmass=("M", 1e12),
load_fitted=True,
load_initial=False,
rawdata=False,
): ):
self.nsim = nsim self.nsim = nsim
self.paths = paths self.paths = paths
# Read in the mmain catalogue of summed substructure # Read in the mmain catalogue of summed substructure
mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim)) mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim))
self._data = mmain["mmain"] self._data = mmain["mmain"]
if load_fitted:
fits = numpy.load(paths.structfit_path(self.nsnap, nsim, "halos"))
cols = [col for col in fits.dtype.names if col != "index"]
X = [fits[col] for col in cols]
self._data = add_columns(self._data, X, cols)
# TODO: load initial positions
if not rawdata: if not rawdata:
# Flip positions and convert from code units to cMpc. Convert M too # Flip positions and convert from code units to cMpc. Convert M too
flip_cols(self._data, "x", "z") flip_cols(self._data, "x", "z")
for p in ("x", "y", "z"): for p in ("x", "y", "z"):
self._data[p] -= 0.5 self._data[p] -= 0.5
self._data = self.box.convert_from_boxunits( self._data = self.box.convert_from_boxunits(
self._data, ["x", "y", "z", "M"] self._data,
[
"x",
"y",
"z",
"M",
"totpartmass",
"rho0",
"r200c",
"r500c",
"m200c",
"m500c",
"r200m",
"m200m",
],
) )
if maxdist is not None: if maxdist is not None:

View file

@ -133,13 +133,13 @@ class CSiBORGPaths:
nsim : int nsim : int
IC realisation index. IC realisation index.
kind : str kind : str
Type of match. Can be either `cm` or `particles`. Type of match. Can be either `fit` or `particles`.
Returns Returns
------- -------
path : str path : str
""" """
assert kind in ["cm", "particles"] assert kind in ["fit", "particles"]
fdir = join(self.postdir, "initmatch") fdir = join(self.postdir, "initmatch")
if not isdir(fdir): if not isdir(fdir):
mkdir(fdir) mkdir(fdir)
@ -284,6 +284,28 @@ class CSiBORGPaths:
fname = "{}_out_{}_{}.npy".format(kind, str(nsim).zfill(5), str(nsnap).zfill(5)) fname = "{}_out_{}_{}.npy".format(kind, str(nsim).zfill(5), str(nsnap).zfill(5))
return join(fdir, fname) return join(fdir, fname)
def overlap_path(self, nsim0, nsimx):
"""
Path to the overlap files between two simulations.
Parameters
----------
nsim0 : int
IC realisation index of the first simulation.
nsimx : int
IC realisation index of the second simulation.
Returns
-------
path : str
"""
fdir = join(self.postdir, "overlap")
if not isdir(fdir):
mkdir(fdir)
warn("Created directory `{}`.".format(fdir), UserWarning, stacklevel=1)
fname = "ovelrap_{}_{}.npz".format(str(nsim0).zfill(5), str(nsimx).zfill(5))
return join(fdir, fname)
def knnauto_path(self, run, nsim=None): def knnauto_path(self, run, nsim=None):
""" """
Path to the `knn` auto-correlation files. If `nsim` is not specified Path to the `knn` auto-correlation files. If `nsim` is not specified

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1139,8 +1139,8 @@
"overlapper.clumps_pos2cell(clumps0)\n", "overlapper.clumps_pos2cell(clumps0)\n",
"overlapper.clumps_pos2cell(clumpsx)\n", "overlapper.clumps_pos2cell(clumpsx)\n",
"\n", "\n",
"mins0, maxs0 = csiborgtools.match.get_clumplims(clumps0, overlapper.inv_clength, overlapper.nshift)\n", "mins0, maxs0 = csiborgtools.match.get_halolims(clumps0, overlapper.inv_clength, overlapper.nshift)\n",
"minsx, maxsx = csiborgtools.match.get_clumplims(clumpsx, overlapper.inv_clength, overlapper.nshift)" "minsx, maxsx = csiborgtools.match.get_halolims(clumpsx, overlapper.inv_clength, overlapper.nshift)"
] ]
}, },
{ {

View file

@ -664,8 +664,8 @@
"overlapper.clumps_pos2cell(clumps0)\n", "overlapper.clumps_pos2cell(clumps0)\n",
"overlapper.clumps_pos2cell(clumpsx)\n", "overlapper.clumps_pos2cell(clumpsx)\n",
"\n", "\n",
"mins0, maxs0 = csiborgtools.match.get_clumplims(clumps0, overlapper.inv_clength, overlapper.nshift)\n", "mins0, maxs0 = csiborgtools.match.get_halolims(clumps0, overlapper.inv_clength, overlapper.nshift)\n",
"minsx, maxsx = csiborgtools.match.get_clumplims(clumpsx, overlapper.inv_clength, overlapper.nshift)" "minsx, maxsx = csiborgtools.match.get_halolims(clumpsx, overlapper.inv_clength, overlapper.nshift)"
] ]
}, },
{ {

View file

@ -1,4 +1,3 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it # This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the # under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your # Free Software Foundation; either version 3 of the License, or (at your
@ -15,7 +14,7 @@
"""A script to calculate overlap between two CSiBORG realisations.""" """A script to calculate overlap between two CSiBORG realisations."""
from argparse import ArgumentParser from argparse import ArgumentParser
from datetime import datetime from datetime import datetime
from os.path import join from distutils.util import strtobool
import numpy import numpy
from scipy.ndimage import gaussian_filter from scipy.ndimage import gaussian_filter
@ -24,71 +23,76 @@ try:
import csiborgtools import csiborgtools
except ModuleNotFoundError: except ModuleNotFoundError:
import sys import sys
sys.path.append("../") sys.path.append("../")
import csiborgtools import csiborgtools
import utils
# Argument parser # Argument parser
parser = ArgumentParser() parser = ArgumentParser()
parser.add_argument("--nsim0", type=int) parser.add_argument("--nsim0", type=int)
parser.add_argument("--nsimx", type=int) parser.add_argument("--nsimx", type=int)
parser.add_argument("--nmult", type=float) parser.add_argument("--nmult", type=float)
parser.add_argument("--sigma", type=float) parser.add_argument("--sigma", type=float)
parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)), default=False)
args = parser.parse_args() args = parser.parse_args()
# File paths
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
fout = join(utils.dumpdir, "overlap",
"cross_{}_{}.npz".format(args.nsim0, args.nsimx))
smooth_kwargs = {"sigma": args.sigma, "mode": "constant", "cval": 0.0} smooth_kwargs = {"sigma": args.sigma, "mode": "constant", "cval": 0.0}
overlapper = csiborgtools.match.ParticleOverlap() overlapper = csiborgtools.match.ParticleOverlap()
# Load catalogues
print("{}: loading catalogues {} and {}."
.format(datetime.now(), args.nsim0, args.nsimx), flush=True)
cat0 = csiborgtools.read.ClumpsCatalogue(args.nsim0, paths)
catx = csiborgtools.read.ClumpsCatalogue(args.nsimx, paths)
print("{}: loading simulation {} and converting positions to cell numbers."
.format(datetime.now(), args.nsim0), flush=True)
with open(paths.initmatch_path(args.nsim0, "particles"), "rb") as f:
clumps0 = numpy.load(f, allow_pickle=True)
overlapper.clumps_pos2cell(clumps0)
print("{}: loading simulation {} and converting positions to cell numbers."
.format(datetime.now(), args.nsimx), flush=True)
with open(paths.initmatch_path(args.nsimx, "particles"), 'rb') as f:
clumpsx = numpy.load(f, allow_pickle=True)
overlapper.clumps_pos2cell(clumpsx)
print("{}: generating the background density fields.".format(datetime.now()),
flush=True)
delta_bckg = overlapper.make_bckg_delta(clumps0)
delta_bckg = overlapper.make_bckg_delta(clumpsx, delta=delta_bckg)
print("{}: crossing the simulations.".format(datetime.now()), flush=True)
matcher = csiborgtools.match.RealisationsMatcher() matcher = csiborgtools.match.RealisationsMatcher()
ref_indxs, cross_indxs, match_indxs, ngp_overlap = matcher.cross(
cat0, catx, clumps0, clumpsx, delta_bckg)
# Load the raw catalogues (i.e. no selection) including the initial CM positions
# and the particle archives.
cat0 = csiborgtools.read.HaloCatalogue(
args.nsim0, paths, load_initial=True, rawdata=True
)
catx = csiborgtools.read.HaloCatalogue(
args.nsimx, paths, load_initial=True, rawdata=True
)
halos0_archive = paths.initmatch_path(args.nsim0, "particles")
halosx_archive = paths.initmatch_path(args.nsimx, "particles")
print("{}: smoothing the background field.".format(datetime.now()), flush=True) # We generate the background density fields. Loads halos's particles one by one
# from the archive, concatenates them and calculates the NGP density field.
args.verbose and print(
"{}: generating the background density fields.".format(datetime.now()), flush=True
)
delta_bckg = overlapper.make_bckg_delta(halos0_archive, verbose=args.verbose)
delta_bckg = overlapper.make_bckg_delta(
halosx_archive, delta=delta_bckg, verbose=args.verbose
)
# We calculate the overlap between the NGP fields.
args.verbose and print(
"{}: crossing the simulations.".format(datetime.now()), flush=True
)
match_indxs, ngp_overlap = matcher.cross(
cat0, catx, halos0_archive, halosx_archive, delta_bckg
)
# We now smoothen up the background density field for the smoothed overlap calculation.
args.verbose and print(
"{}: smoothing the background field.".format(datetime.now()), flush=True
)
gaussian_filter(delta_bckg, output=delta_bckg, **smooth_kwargs) gaussian_filter(delta_bckg, output=delta_bckg, **smooth_kwargs)
# We calculate the smoothed overlap for the pairs whose NGP overlap is > 0.
args.verbose and print(
"{}: calculating smoothed overlaps.".format(datetime.now()), flush=True
)
smoothed_overlap = matcher.smoothed_cross(
cat0, catx, halos0_archive, halosx_archive, delta_bckg, match_indxs, smooth_kwargs
)
print("{}: calculating smoothed overlaps.".format(datetime.now()), flush=True) # We save the results at long last.
smoothed_overlap = matcher.smoothed_cross(clumps0, clumpsx, delta_bckg, fout = paths.overlap_path(args.nsim0, args.nsimx)
ref_indxs, cross_indxs, match_indxs, args.verbose and print(
smooth_kwargs) "{}: saving results to `{}`.".format(datetime.now(), fout), flush=True
)
# Dump the result numpy.savez(
print("Saving results to `{}`.".format(fout), flush=True) fout,
with open(fout, "wb") as f: match_indxs=match_indxs,
numpy.savez(fout, ref_indxs=ref_indxs, cross_indxs=cross_indxs, ngp_overlap=ngp_overlap,
match_indxs=match_indxs, ngp_overlap=ngp_overlap, smoothed_overlap=smoothed_overlap,
smoothed_overlap=smoothed_overlap, sigma=args.sigma) sigma=args.sigma,
print("All finished.", flush=True) )
print("{}: all finished.".format(datetime.now()), flush=True)

View file

@ -72,9 +72,6 @@ def fit_clump(particles, clump_info, box):
obj = csiborgtools.fits.Clump(particles, clump_info, box) obj = csiborgtools.fits.Clump(particles, clump_info, box)
out = {} out = {}
if numpy.isnan(clump_info["index"]):
print("Why am I NaN?", flush=True)
out["index"] = clump_info["index"]
out["npart"] = len(obj) out["npart"] = len(obj)
out["totpartmass"] = numpy.sum(obj["M"]) out["totpartmass"] = numpy.sum(obj["M"])
for i, v in enumerate(["vx", "vy", "vz"]): for i, v in enumerate(["vx", "vy", "vz"]):
@ -121,7 +118,7 @@ def load_parent_particles(clumpid, particle_archive, clumps_cat):
if len(clumps) == 0: if len(clumps) == 0:
return None return None
return csiborgtools.match.concatenate_clumps(clumps, include_velocities=True) return csiborgtools.match.concatenate_parts(clumps, include_velocities=True)
# We now start looping over all simulations # We now start looping over all simulations
@ -152,11 +149,13 @@ for i, nsim in enumerate(paths.get_ics(tonew=False)):
jobs = csiborgtools.fits.split_jobs(ntasks, nproc)[rank] jobs = csiborgtools.fits.split_jobs(ntasks, nproc)[rank]
out = csiborgtools.read.cols_to_structured(len(jobs), cols_collect) out = csiborgtools.read.cols_to_structured(len(jobs), cols_collect)
for i, j in enumerate(tqdm(jobs)) if nproc == 1 else enumerate(jobs): for i, j in enumerate(tqdm(jobs)) if nproc == 1 else enumerate(jobs):
clumpid = clumps_cat["index"][j]
out["index"][i] = clumpid
# If we are fitting halos and this clump is not a main, then continue. # If we are fitting halos and this clump is not a main, then continue.
if args.kind == "halos" and not ismain[j]: if args.kind == "halos" and not ismain[j]:
continue continue
clumpid = clumps_cat["index"][j]
if args.kind == "halos": if args.kind == "halos":
part = load_parent_particles(clumpid, particle_archive, clumps_cat) part = load_parent_particles(clumpid, particle_archive, clumps_cat)
else: else:
@ -169,9 +168,6 @@ for i, nsim in enumerate(paths.get_ics(tonew=False)):
_out = fit_clump(part, clumps_cat[j], box) _out = fit_clump(part, clumps_cat[j], box)
for key in _out.keys(): for key in _out.keys():
out[key][i] = _out[key] out[key][i] = _out[key]
else:
out["index"][i] = clumpid
out["npart"][i] = 0
fout = ftemp.format(str(nsim).zfill(5), str(nsnap).zfill(5), rank) fout = ftemp.format(str(nsim).zfill(5), str(nsnap).zfill(5), rank)
if nproc == 0: if nproc == 0:
@ -204,7 +200,7 @@ for i, nsim in enumerate(paths.get_ics(tonew=False)):
if args.kind == "halos": if args.kind == "halos":
out = out[ismain] out = out[ismain]
fout = paths.structfit_path(nsnap, nsim, "clumps") fout = paths.structfit_path(nsnap, nsim, args.kind)
print("Saving to `{}`.".format(fout), flush=True) print("Saving to `{}`.".format(fout), flush=True)
numpy.save(fout, out) numpy.save(fout, out)

View file

@ -13,11 +13,8 @@
# with this program; if not, write to the Free Software Foundation, Inc., # with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
""" """
A script to calculate the centre of mass of particles at redshift 70 that Script to calculate the particle centre of mass and Lagrangian patch size in the initial
are grouped in a clump at present redshift. snapshot. Optinally dumps the particle files, however this requires a lot of memory.
Optionally also dumps the clumps information, however watch out as this will
eat up a lot of memory.
""" """
from argparse import ArgumentParser from argparse import ArgumentParser
from datetime import datetime from datetime import datetime
@ -28,141 +25,143 @@ from os.path import join
import numpy import numpy
from mpi4py import MPI from mpi4py import MPI
from tqdm import tqdm
try: try:
import csiborgtools import csiborgtools
except ModuleNotFoundError: except ModuleNotFoundError:
import sys import sys
sys.path.append("../") sys.path.append("../")
import csiborgtools import csiborgtools
# Get MPI things # Get MPI things
comm = MPI.COMM_WORLD comm = MPI.COMM_WORLD
rank = comm.Get_rank() rank = comm.Get_rank()
nproc = comm.Get_size() nproc = comm.Get_size()
verbose = nproc == 1
# Argument parser # Argument parser
parser = ArgumentParser() parser = ArgumentParser()
parser.add_argument("--dump_clumps", type=lambda x: bool(strtobool(x))) parser.add_argument("--dump", type=lambda x: bool(strtobool(x)))
args = parser.parse_args() args = parser.parse_args()
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
nsims = paths.get_ics(tonew=True) partreader = csiborgtools.read.ParticleReader(paths)
ftemp = join(paths.temp_dumpdir, "initmatch_{}_{}_{}.npy")
# Temporary output file # We loop over all particles and then use MPI when matching halos to the
ftemp = join(paths.dumpdir, "temp", "initmatch_{}_{}_{}.npy") # initial snapshot and dumping them.
for i, nsim in enumerate(paths.get_ics(tonew=True)):
for nsim in nsims:
if rank == 0: if rank == 0:
print("{}: reading simulation {}.".format(datetime.now(), nsim), print("{}: reading simulation {}.".format(datetime.now(), nsim), flush=True)
flush=True) nsnap = max(paths.get_snapshots(nsim))
nsnap_max = max(paths.get_snapshots(nsim))
reader = csiborgtools.read.ParticleReader(paths)
# Read and sort the initial particle files by their particle IDs # We first load particles in the initial and final snapshots and sort them
part0 = reader.read_particle(1, nsim, ["x", "y", "z", "M", "ID"], # by their particle IDs so that we can match them by array position.
verbose=False) # `clump_ids` are the clump IDs of particles.
part0 = partreader.read_particle(
1, nsim, ["x", "y", "z", "M", "ID"], verbose=verbose
)
part0 = part0[numpy.argsort(part0["ID"])] part0 = part0[numpy.argsort(part0["ID"])]
# Order the final snapshot clump IDs by the particle IDs pid = partreader.read_particle(nsnap, nsim, ["ID"], verbose=verbose)["ID"]
pid = reader.read_particle(nsnap_max, nsim, ["ID"], verbose=False)["ID"] clump_ids = partreader.read_clumpid(nsnap, nsim, verbose=verbose)
clump_ids = reader.read_clumpid(nsnap_max, nsim, verbose=False)
clump_ids = clump_ids[numpy.argsort(pid)] clump_ids = clump_ids[numpy.argsort(pid)]
# Release the particle IDs, we will not need them anymore now that both
# particle arrays are matched in ordering.
del pid del pid
collect() collect()
# Get rid of the clumps whose index is 0 -- those are unassigned # Particles whose clump ID is 0 are unassigned to a clump, so we can get
# rid of them to speed up subsequent operations. Again we release the mask.
mask = clump_ids > 0 mask = clump_ids > 0
clump_ids = clump_ids[mask] clump_ids = clump_ids[mask]
part0 = part0[mask] part0 = part0[mask]
del mask del mask
collect() collect()
# Calculate the centre of mass of each parent halo, the Lagrangian patch
# size and optionally the initial snapshot particles belonging to this
# parent halo. Dumping the particles will take majority of time.
if rank == 0: if rank == 0:
print("{}: dumping intermediate files.".format(datetime.now()), print(
flush=True) "{}: calculating {}th simulation {}.".format(datetime.now(), i, nsim),
flush=True,
)
# We load up the clump catalogue which contains information about the
# ultimate parent halos of each clump. We will loop only over the clump
# IDs of ultimate parent halos and add their substructure particles and at
# the end save these.
cat = csiborgtools.read.ClumpsCatalogue(
nsim, paths, load_fitted=False, rawdata=True
)
parent_ids = cat["index"][cat.ismain][:500]
jobs = csiborgtools.fits.split_jobs(parent_ids.size, nproc)[rank]
for i in tqdm(jobs) if verbose else jobs:
clid = parent_ids[i]
mmain_indxs = cat["index"][cat["parent"] == clid]
# Grab unique clump IDs and loop over them mmain_mask = numpy.isin(clump_ids, mmain_indxs, assume_unique=True)
unique_clumpids = numpy.unique(clump_ids) mmain_particles = part0[mmain_mask]
njobs = unique_clumpids.size raddist, cmpos = csiborgtools.match.dist_centmass(mmain_particles)
jobs = csiborgtools.utils.split_jobs(njobs, nproc)[rank] patchsize = csiborgtools.match.dist_percentile(raddist, [99], distmax=0.075)
for i in jobs: with open(ftemp.format(nsim, clid, "fit"), "wb") as f:
n = unique_clumpids[i] numpy.savez(f, cmpos=cmpos, patchsize=patchsize)
x0 = part0[clump_ids == n]
# Center of mass and Lagrangian patch size if args.dump:
dist, cm = csiborgtools.match.dist_centmass(x0) with open(ftemp.format(nsim, clid, "particles"), "wb") as f:
patch = csiborgtools.match.dist_percentile(dist, [99], distmax=0.075) numpy.save(f, mmain_particles)
# Dump the center of mass
with open(ftemp.format(nsim, n, "cm"), 'wb') as f:
numpy.save(f, cm)
# Dump the Lagrangian patch size
with open(ftemp.format(nsim, n, "lagpatch"), 'wb') as f:
numpy.save(f, patch)
# Dump the entire clump
if args.dump_clumps:
with open(ftemp.format(nsim, n, "clump"), "wb") as f:
numpy.save(f, x0)
# We force clean up the memory before continuing.
del part0, clump_ids del part0, clump_ids
collect() collect()
# We now wait for all processes and then use the 0th process to collect the results.
# We first collect just the Lagrangian patch size information.
comm.Barrier() comm.Barrier()
if rank == 0: if rank == 0:
print("{}: collecting summary files...".format(datetime.now()), print("{}: collecting fits...".format(datetime.now()), flush=True)
flush=True) dtype = {
# Collect the centre of masses, patch size, etc. and dump them "names": ["index", "x", "y", "z", "lagpatch"],
dtype = {"names": ['x', 'y', 'z', "lagpatch", "ID"], "formats": [numpy.int32] + [numpy.float32] * 4,
"formats": [numpy.float32] * 4 + [numpy.int32]} }
out = numpy.full(njobs, numpy.nan, dtype=dtype) out = numpy.full(parent_ids.size, numpy.nan, dtype=dtype)
for i, clid in enumerate(parent_ids):
for i, n in enumerate(unique_clumpids): fpath = ftemp.format(nsim, clid, "fit")
# Load in CM vector
fpath = ftemp.format(nsim, n, "cm")
with open(fpath, "rb") as f: with open(fpath, "rb") as f:
fin = numpy.load(f) inp = numpy.load(f)
out['x'][i] = fin[0] out["index"][i] = clid
out['y'][i] = fin[1] out["x"][i] = inp["cmpos"][0]
out['z'][i] = fin[2] out["y"][i] = inp["cmpos"][1]
out["z"][i] = inp["cmpos"][2]
out["lagpatch"][i] = inp["patchsize"]
remove(fpath) remove(fpath)
# Load in the patch size fout = paths.initmatch_path(nsim, "fit")
fpath = ftemp.format(nsim, n, "lagpatch") print("{}: dumping fits to .. `{}`.".format(datetime.now(), fout), flush=True)
with open(fpath, "rb") as f:
out["lagpatch"][i] = numpy.load(f)
remove(fpath)
# Store the halo ID
out["ID"][i] = n
print("{}: dumping to .. `{}`.".format(
datetime.now(), paths.initmatch_path(nsim, "cm")), flush=True)
with open(paths.initmatch_path(nsim, "cm"), 'wb') as f:
numpy.save(f, out)
if args.dump_clumps:
print("{}: collecting particle files...".format(datetime.now()),
flush=True)
out = [None] * unique_clumpids.size
dtype = {"names": ["clump", "ID"],
"formats": [object, numpy.int32]}
out = numpy.full(unique_clumpids.size, numpy.nan, dtype=dtype)
for i, n in enumerate(unique_clumpids):
fpath = ftemp.format(nsim, n, "clump")
with open(fpath, 'rb') as f:
fin = numpy.load(f)
out["clump"][i] = fin
out["ID"][i] = n
remove(fpath)
fout = paths.initmatch_path(nsim, "particles")
print("{}: dumping to .. `{}`.".format(datetime.now(), fout),
flush=True)
with open(fout, "wb") as f: with open(fout, "wb") as f:
numpy.save(f, out) numpy.save(f, out)
# We now optionally collect the individual clumps and store them in an archive,
# which has the benefit of being a single file that can be easily read in.
if args.dump:
print("{}: collecting particles...".format(datetime.now()), flush=True)
out = {}
for clid in parent_ids:
fpath = ftemp.format(nsim, clid, "particles")
with open(fpath, "rb") as f:
out.update({str(clid): numpy.load(f)})
fout = paths.initmatch_path(nsim, "particles")
print(
"{}: dumping particles to .. `{}`.".format(datetime.now(), fout),
flush=True,
)
with open(fout, "wb") as f:
numpy.savez(f, **out)
# Again we force clean up the memory before continuing.
del out del out
collect() collect()