Update initial matching & overlaps (#47)

* pep8

* fix convention

* Update script

* enforce optimisation boundaries to be finite

* Update TODO

* Remove sky matching

* FIx a small bug

* fix bug

* Remove import

* Add halo fitted quantities

* Update nbs

* update README

* Add load_initial comments

* Rename nbs

* Delete nb

* Update imports

* Rename function

* Update matcher

* Add overlap paths

* Update the matching script

* Update verbosity

* Add verbosity flags

* Simplify make_bckg_delta

* bug fix

* fix bug
This commit is contained in:
Richard Stiskalek 2023-04-21 01:35:06 +02:00 committed by GitHub
parent 39b3498621
commit 04119a5314
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 527 additions and 2836 deletions

View File

@ -4,12 +4,13 @@
## Project Overlap
- [x] Clean up the kNN paths in the summary.
- [x] Clean up the 2PCF paths in the summary.
- [x] Update the clustering scripts to work with clumps instead.
- [x] Sort out the splitting of individual clumps.
- [x] Update the fitting scripts to work for clumps and parent halos.
- [ ] Sort out the splitting of individual clumps.
- [ ] Update the fitting scripts to work for clumps and parent halos.
- [ ] When calculating the overlap now check whether the halos have any particles, but that should already be the case otherwise its initial CM will not be defined.
- [ ] Calculated fitted quantities for clumps and parent halos and add them to the catalogues.
- [ ] Update overlap scripts to work with summed parent halos.
- [ ] Update the clustering scripts to work with clumps instead.
- [ ] Calculate the overlap between all 101 IC realisations on DiRAC.

View File

@ -349,24 +349,31 @@ class NFWPosterior(NFWProfile):
"""
assert isinstance(clump, Clump)
r = clump.r()
rmin = numpy.min(r)
rmin = numpy.min(r[r > 0]) # First particle that is not at r = 0
rmax, mtot = clump.spherical_overdensity_mass(200)
mask = (rmin <= r) & (r <= rmax)
npart = numpy.sum(mask)
r = r[mask]
# Loss function to optimize
def loss(logRs):
return -self(logRs, r, rmin, rmax, npart)
# Define optimisation boundaries. Check whether they are finite and
# that rmax > rmin. If not, then return NaN.
bounds = (numpy.log10(rmin), numpy.log10(rmax))
if not (numpy.all(numpy.isfinite(bounds)) and bounds[0] < bounds[1]):
return numpy.nan, numpy.nan
res = minimize_scalar(
loss, bounds=(numpy.log10(rmin), numpy.log10(rmax)), method="bounded"
)
# Check whether the fit converged to radius sufficienly far from `rmax`
# and that its a success. Otherwise return NaNs.
if numpy.log10(rmax) - res.x < eps:
res.success = False
if not res.success:
return numpy.nan, numpy.nan
# Additionally we also wish to calculate the central density from the
# mass (integral) constraint.
rho0 = self.rho0_from_Rs(10**res.x, rmin, rmax, mtot)
return 10**res.x, rho0

View File

@ -20,9 +20,6 @@ from .match import ( # noqa
cosine_similarity,
dist_centmass,
dist_percentile,
fill_delta,
fill_delta_indxs,
get_clumplims,
)
from .num_density import binned_counts, number_density # noqa
from .utils import concatenate_clumps # noqa
from .utils import concatenate_parts # noqa

View File

@ -16,15 +16,12 @@
Support for matching halos between CSiBORG IC realisations.
"""
from datetime import datetime
from gc import collect
import numpy
from numba import jit
from scipy.ndimage import gaussian_filter
from tqdm import tqdm, trange
from .utils import concatenate_clumps
###############################################################################
# Realisations matcher for calculating overlaps #
###############################################################################
@ -32,9 +29,7 @@ from .utils import concatenate_clumps
class RealisationsMatcher:
"""
A tool to match halos between IC realisations. Looks for halos 3D space
neighbours in all remaining IC realisations that are within some mass
range of it.
A tool to match halos between IC realisations.
Parameters
----------
@ -49,12 +44,13 @@ class RealisationsMatcher:
catalogue key. By default `totpartmass`, i.e. the total particle
mass associated with a halo.
"""
_nmult = None
_dlogmass = None
_mass_kind = None
_overlapper = None
def __init__(self, nmult=1., dlogmass=2., mass_kind="totpartmass"):
def __init__(self, nmult=1.0, dlogmass=2.0, mass_kind="totpartmass"):
assert nmult > 0
assert dlogmass > 0
assert isinstance(mass_kind, str)
@ -109,12 +105,13 @@ class RealisationsMatcher:
"""
return self._overlapper
def cross(self, cat0, catx, clumps0, clumpsx, delta_bckg, verbose=True):
def cross(
self, cat0, catx, halos0_archive, halosx_archive, delta_bckg, verbose=True
):
r"""
Find all neighbours whose CM separation is less than `nmult` times the
sum of their initial Lagrangian patch sizes and optionally calculate
their overlap. Enforces that the neighbours' are similar in mass up to
`dlogmass` dex.
sum of their initial Lagrangian patch sizes and calculate their overlap.
Enforces that the neighbours' are similar in mass up to `dlogmass` dex.
Parameters
----------
@ -122,14 +119,14 @@ class RealisationsMatcher:
Halo catalogue of the reference simulation.
catx : :py:class:`csiborgtools.read.ClumpsCatalogue`
Halo catalogue of the cross simulation.
clumps0 : list of structured arrays
List of clump structured arrays of the reference simulation, keys
must include `x`, `y`, `z` and `M`. The positions must already be
converted to cell numbers.
clumpsx : list of structured arrays
List of clump structured arrays of the cross simulation, keys must
include `x`, `y`, `z` and `M`. The positions must already be
converted to cell numbers.
halos0_archive : `NpzFile` object
Archive of halos' particles of the reference simulation, keys must
include `x`, `y`, `z` and `M`. The positions must already be converted
to cell numbers.
halosx_archive : `NpzFile` object
Archive of halos' particles of the cross simulation, keys must
include `x`, `y`, `z` and `M`. The positions must already be converted
to cell numbers.
delta_bckg : 3-dimensional array
Summed background density field of the reference and cross
simulations calculated with particles assigned to halos at the
@ -140,24 +137,27 @@ class RealisationsMatcher:
Returns
-------
ref_indxs : 1-dimensional array
Halo IDs in the reference catalogue.
cross_indxs : 1-dimensional array
Halo IDs in the cross catalogue.
match_indxs : 1-dimensional array of arrays
Indices of halo counterparts in the cross catalogue.
The outer array corresponds to halos in the reference catalogue, the
inner array corresponds to the array positions of matches in the cross
catalogue.
overlaps : 1-dimensional array of arrays
Overlaps with the cross catalogue.
Overlaps with the cross catalogue. Follows similar pattern as `match_indxs`.
"""
# Query the KNN
verbose and print("{}: querying the KNN."
.format(datetime.now()), flush=True)
# We begin by querying the kNN for the nearest neighbours of each halo
# in the reference simulation from the cross simulation in the initial
# snapshot.
verbose and print("{}: querying the KNN.".format(datetime.now()), flush=True)
match_indxs = radius_neighbours(
catx.knn(select_initial=True), cat0.positions(in_initial=True),
radiusX=cat0["lagpatch"], radiusKNN=catx["lagpatch"],
nmult=self.nmult, enforce_in32=True, verbose=verbose)
# Remove neighbours whose mass is too large/small
catx.knn(select_initial=True),
cat0.positions(in_initial=True),
radiusX=cat0["lagpatch"],
radiusKNN=catx["lagpatch"],
nmult=self.nmult,
enforce_int32=True,
verbose=verbose,
)
# We next remove neighbours whose mass is too large/small.
if self.dlogmass is not None:
for i, indx in enumerate(match_indxs):
# |log(M1 / M2)|
@ -165,68 +165,103 @@ class RealisationsMatcher:
aratio = numpy.abs(numpy.log10(catx[p][indx] / cat0[p][i]))
match_indxs[i] = match_indxs[i][aratio < self.dlogmass]
# Min and max cells along each axis for each halo
limkwargs = {"ncells": self.overlapper.inv_clength,
"nshift": self.overlapper.nshift}
mins0, maxs0 = get_clumplims(clumps0, **limkwargs)
minsx, maxsx = get_clumplims(clumpsx, **limkwargs)
# Mapping from a halo index to the list of clumps
hid2clumps0 = {hid: n for n, hid in enumerate(clumps0["ID"])}
hid2clumpsx = {hid: n for n, hid in enumerate(clumpsx["ID"])}
# We will make a dictionary to keep in memory the halos' particles from the
# cross simulations so that they are not loaded in several times and we only
# convert their positions to cells once. Possibly make an option to not do
# this to lower memory requirements?
cross_halos = {}
cross_lims = {}
cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
# Loop only over halos that have neighbours
iters = numpy.arange(len(cat0))[[x.size > 0 for x in match_indxs]]
for i in tqdm(iters) if verbose else iters:
match0 = hid2clumps0[cat0["index"][i]]
# The clump, its mass and mins & maxs
cl0 = clumps0["clump"][match0]
mass0 = numpy.sum(cl0['M'])
mins0_current, maxs0_current = mins0[match0], maxs0[match0]
# Preallocate arrays to store overlap information
_cross = numpy.full(match_indxs[i].size, numpy.nan,
dtype=numpy.float32)
# Loop over matches of this halo from the other simulation
for j, ind in enumerate(match_indxs[i]):
matchx = hid2clumpsx[catx["index"][ind]]
clx = clumpsx["clump"][matchx]
for i, k0 in enumerate(tqdm(cat0["index"]) if verbose else cat0["index"]):
# If we have no matches continue to the next halo.
matches = match_indxs[i]
if matches.size == 0:
continue
# Next, we find this halo's particles, total mass and minimum/maximum cells
# and convert positions to cells.
halo0 = halos0_archive[str(k0)]
mass0 = numpy.sum(halo0["M"])
mins0, maxs0 = get_halolims(
halo0, ncells=self.overlapper.inv_clength, nshift=self.overlapper.nshift
)
for p in ("x", "y", "z"):
halo0[p] = self.overlapper.pos2cell(halo0[p])
# We now loop over matches of this halo and calculate their overlap,
# storing them in `_cross`.
_cross = numpy.full(matches.size, numpy.nan, dtype=numpy.float32)
for j, kf in enumerate(catx["index"][matches]):
# Attempt to load this cross halo from memory, if it fails get it from
# from the halo archive (and similarly for the limits) and convert the
# particle positions to cells.
try:
halox = cross_halos[kf]
minsx, maxsx = cross_lims[kf]
except KeyError:
halox = halosx_archive[str(kf)]
minsx, maxsx = get_halolims(
halox,
ncells=self.overlapper.inv_clength,
nshift=self.overlapper.nshift,
)
for p in ("x", "y", "z"):
halox[p] = self.overlapper.pos2cell(halox[p])
cross_halos[kf] = halox
cross_lims[kf] = (minsx, maxsx)
massx = numpy.sum(halox["M"])
_cross[j] = self.overlapper(
cl0, clx, delta_bckg, mins0_current, maxs0_current,
minsx[matchx], maxsx[matchx], mass1=mass0,
mass2=numpy.sum(clx['M']))
halo0,
halox,
delta_bckg,
mins0,
maxs0,
minsx,
maxsx,
mass1=mass0,
mass2=massx,
)
cross[i] = _cross
# Remove matches with exactly 0 overlap
# We remove all matches that have zero overlap to save space.
mask = cross[i] > 0
match_indxs[i] = match_indxs[i][mask]
cross[i] = cross[i][mask]
# Sort the matches by overlap
# And finally we sort the matches by their overlap.
ordering = numpy.argsort(cross[i])[::-1]
match_indxs[i] = match_indxs[i][ordering]
cross[i] = cross[i][ordering]
cross = numpy.asanyarray(cross, dtype=object)
return cat0["index"], catx["index"], match_indxs, cross
return match_indxs, cross
def smoothed_cross(self, clumps0, clumpsx, delta_bckg, ref_indxs,
cross_indxs, match_indxs, smooth_kwargs, verbose=True):
def smoothed_cross(
self,
cat0,
catx,
halos0_archive,
halosx_archive,
delta_bckg,
match_indxs,
smooth_kwargs,
verbose=True,
):
r"""
Calculate the smoothed overlaps for pair previously identified via
`self.cross(...)` to have a non-zero overlap.
Parameters
----------
clumps0 : list of structured arrays
List of clump structured arrays of the reference simulation, keys
must include `x`, `y`, `z` and `M`. The positions must already be
converted to cell numbers.
clumpsx : list of structured arrays
List of clump structured arrays of the cross simulation, keys must
include `x`, `y`, `z` and `M`. The positions must already be
converted to cell numbers.
cat0 : :py:class:`csiborgtools.read.ClumpsCatalogue`
Halo catalogue of the reference simulation.
catx : :py:class:`csiborgtools.read.ClumpsCatalogue`
Halo catalogue of the cross simulation.
halos0_archive : `NpzFile` object
Archive of halos' particles of the reference simulation, keys must
include `x`, `y`, `z` and `M`. The positions must already be converted
to cell numbers.
halosx_archive : `NpzFile` object
Archive of halos' particles of the cross simulation, keys must
include `x`, `y`, `z` and `M`. The positions must already be converted
to cell numbers.
delta_bckg : 3-dimensional array
Smoothed summed background density field of the reference and cross
simulations calculated with particles assigned to halos at the
@ -247,33 +282,45 @@ class RealisationsMatcher:
-------
overlaps : 1-dimensional array of arrays
"""
# Min and max cells along each axis for each halo
limkwargs = {"ncells": self.overlapper.inv_clength,
"nshift": self.overlapper.nshift}
mins0, maxs0 = get_clumplims(clumps0, **limkwargs)
minsx, maxsx = get_clumplims(clumpsx, **limkwargs)
hid2clumps0 = {hid: n for n, hid in enumerate(clumps0["ID"])}
hid2clumpsx = {hid: n for n, hid in enumerate(clumpsx["ID"])}
# Preallocate arrays to store the overlap information
cross_halos = {}
cross_lims = {}
cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
for i, ref_ind in enumerate(tqdm(ref_indxs) if verbose else ref_indxs):
match0 = hid2clumps0[ref_ind]
# The reference clump, its mass and mins & maxs
cl0 = clumps0["clump"][match0]
mins0_current, maxs0_current = mins0[match0], maxs0[match0]
# Preallocate
nmatches = match_indxs[i].size
_cross = numpy.full(nmatches, numpy.nan, numpy.float32)
for j, match_ind in enumerate(match_indxs[i]):
matchx = hid2clumpsx[cross_indxs[match_ind]]
clx = clumpsx["clump"][matchx]
for i, k0 in enumerate(tqdm(cat0["index"]) if verbose else cat0["index"]):
halo0 = halos0_archive[str(k0)]
mins0, maxs0 = get_halolims(
halo0, ncells=self.overlapper.inv_clength, nshift=self.overlapper.nshift
)
# Now loop over the matches and calculate the smoothed overlap.
_cross = numpy.full(match_indxs[i].size, numpy.nan, numpy.float32)
for j, kf in enumerate(catx["index"][match_indxs[i]]):
# Attempt to load this cross halo from memory, if it fails get it from
# from the halo archive (and similarly for the limits).
try:
halox = cross_halos[kf]
minsx, maxsx = cross_lims[kf]
except KeyError:
halox = halosx_archive[str(kf)]
minsx, maxsx = get_halolims(
halox,
ncells=self.overlapper.inv_clength,
nshift=self.overlapper.nshift,
)
cross_halos[kf] = halox
cross_lims[kf] = (minsx, maxsx)
_cross[j] = self.overlapper(
cl0, clx, delta_bckg, mins0_current,
maxs0_current, minsx[matchx], maxsx[matchx],
smooth_kwargs=smooth_kwargs)
halo0,
halox,
delta_bckg,
mins0,
maxs0,
minsx,
maxsx,
smooth_kwargs=smooth_kwargs,
)
cross[i] = _cross
return numpy.asanyarray(cross, dtype=object)
@ -321,6 +368,7 @@ class ParticleOverlap:
the nearest grid position particle assignment scheme, with optional
Gaussian smoothing.
"""
def __init__(self):
# Inverse cell length in box units. By default :math:`2^11`, which
# matches the initial RAMSES grid resolution.
@ -346,92 +394,63 @@ class ParticleOverlap:
# Check whether this is already the cell
if pos.dtype.char in numpy.typecodes["AllInteger"]:
return pos
return numpy.floor(pos * self.inv_clength).astype(int)
return numpy.floor(pos * self.inv_clength).astype(numpy.int32)
def clumps_pos2cell(self, clumps):
def make_bckg_delta(self, halo_archive, delta=None, verbose=False):
"""
Convert clump positions directly to cell IDs. Useful to speed up
subsequent calculations. Overwrites the passed in arrays.
Calculate a NGP density field of particles belonging to halos within the
central :math:`1/2^3` high-resolution region of the simulation. Smoothing
must be applied separately.
Parameters
----------
clumps : array of arrays
Array of clump structured arrays whose `x`, `y`, `z` keys will be
converted.
Returns
-------
None
"""
# Check if clumps are probably already in cells
if any(clumps[0][0].dtype[p].char in numpy.typecodes["AllInteger"]
for p in ('x', 'y', 'z')):
raise ValueError("Positions appear to already be converted cells.")
# Get the new dtype that replaces float for int for positions
names = clumps[0][0].dtype.names # Take the first one, doesn't matter
formats = [descr[1] for descr in clumps[0][0].dtype.descr]
for i in range(len(names)):
if names[i] in ('x', 'y', 'z'):
formats[i] = numpy.int32
dtype = numpy.dtype({"names": names, "formats": formats})
# Loop switch positions for cells IDs and change dtype
for n in range(clumps.size):
for p in ('x', 'y', 'z'):
clumps[n][0][p] = self.pos2cell(clumps[n][0][p])
clumps[n][0] = clumps[n][0].astype(dtype)
def make_bckg_delta(self, clumps, delta=None):
"""
Calculate a NGP density field of clumps within the central
:math:`1/2^3` region of the simulation. Smoothing must be applied
separately.
Parameters
----------
clumps : list of structured arrays
List of clump structured array, keys must include `x`, `y`, `z`
and `M`.
halo_archive : `NpzFile` object
Archive of halos' particles of the reference simulation, keys must
include `x`, `y`, `z` and `M`.
delta : 3-dimensional array, optional
Array to store the density field in. If `None` a new array is
created.
verbose : bool, optional
Verbosity flag for loading the files.
Returns
-------
delta : 3-dimensional array
"""
conc_clumps = concatenate_clumps(clumps)
cells = [self.pos2cell(conc_clumps[p]) for p in ('x', 'y', 'z')]
mass = conc_clumps['M']
del conc_clumps
collect() # This is a large array so force memory clean
# We obtain the minimum/maximum cell IDs and number of cells along each dim.
cellmin = self.inv_clength // 4 # The minimum cell ID
cellmax = 3 * self.inv_clength // 4 # The maximum cell ID
ncells = cellmax - cellmin
# Mask out particles outside the cubical high resolution region
mask = ((cellmin <= cells[0]) & (cells[0] < cellmax)
& (cellmin <= cells[1]) & (cells[1] < cellmax)
& (cellmin <= cells[2]) & (cells[2] < cellmax)
)
cells = [c[mask] for c in cells]
mass = mass[mask]
# Prepare the density field or check it is of the right shape
# We then pre-allocate the density field or check it is of the right shape
# if already given.
if delta is None:
delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
else:
assert ((delta.shape == (ncells,) * 3)
& (delta.dtype == numpy.float32))
assert (delta.shape == (ncells,) * 3) & (delta.dtype == numpy.float32)
# We now loop one-by-one over the halos fill the density field.
files = halo_archive.files
for file in tqdm(files) if verbose else files:
parts = halo_archive[file]
cells = [self.pos2cell(parts[p]) for p in ("x", "y", "z")]
mass = parts["M"]
# We mask out particles outside the cubical high-resolution region
mask = (
(cellmin <= cells[0])
& (cells[0] < cellmax)
& (cellmin <= cells[1])
& (cells[1] < cellmax)
& (cellmin <= cells[2])
& (cells[2] < cellmax)
)
cells = [c[mask] for c in cells]
mass = mass[mask]
fill_delta(delta, *cells, *(cellmin,) * 3, mass)
return delta
def make_delta(self, clump, mins=None, maxs=None, subbox=False,
smooth_kwargs=None):
def make_delta(self, clump, mins=None, maxs=None, subbox=False, smooth_kwargs=None):
"""
Calculate a NGP density field of a halo on a cubic grid. Optionally can
be smoothed with a Gaussian kernel.
@ -453,7 +472,7 @@ class ParticleOverlap:
-------
delta : 3-dimensional array
"""
cells = [self.pos2cell(clump[p]) for p in ('x', 'y', 'z')]
cells = [self.pos2cell(clump[p]) for p in ("x", "y", "z")]
# Check that minima and maxima are integers
if not (mins is None and maxs is None):
@ -464,26 +483,38 @@ class ParticleOverlap:
# Minimum xcell, ycell and zcell of this clump
if mins is None or maxs is None:
mins = numpy.asanyarray(
[max(numpy.min(cell) - self.nshift, 0) for cell in cells])
[max(numpy.min(cell) - self.nshift, 0) for cell in cells]
)
maxs = numpy.asanyarray(
[min(numpy.max(cell) + self.nshift, self.inv_clength)
for cell in cells])
[
min(numpy.max(cell) + self.nshift, self.inv_clength)
for cell in cells
]
)
ncells = numpy.max(maxs - mins) + 1 # To get the number of cells
else:
mins = (0, 0, 0,)
mins = [0, 0, 0]
ncells = self.inv_clength
# Preallocate and fill the array
delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
fill_delta(delta, *cells, *mins, clump['M'])
fill_delta(delta, *cells, *mins, clump["M"])
if smooth_kwargs is not None:
gaussian_filter(delta, output=delta, **smooth_kwargs)
return delta
def make_deltas(self, clump1, clump2, mins1=None, maxs1=None,
mins2=None, maxs2=None, smooth_kwargs=None):
def make_deltas(
self,
clump1,
clump2,
mins1=None,
maxs1=None,
mins2=None,
maxs2=None,
smooth_kwargs=None,
):
"""
Calculate a NGP density fields of two halos on a grid that encloses
them both. Optionally can be smoothed with a Gaussian kernel.
@ -513,8 +544,8 @@ class ParticleOverlap:
Indices where the lower mass clump has a non-zero density.
Calculated only if no smoothing is applied, otherwise `None`.
"""
xc1, yc1, zc1 = (self.pos2cell(clump1[p]) for p in ('x', 'y', 'z'))
xc2, yc2, zc2 = (self.pos2cell(clump2[p]) for p in ('x', 'y', 'z'))
xc1, yc1, zc1 = (self.pos2cell(clump1[p]) for p in ("x", "y", "z"))
xc2, yc2, zc2 = (self.pos2cell(clump2[p]) for p in ("x", "y", "z"))
if any(obj is None for obj in (mins1, maxs1, mins2, maxs2)):
# Minimum cell number of the two halos along each dimension
@ -529,32 +560,35 @@ class ParticleOverlap:
ymax = max(numpy.max(yc1), numpy.max(yc2)) + self.nshift
zmax = max(numpy.max(zc1), numpy.max(zc2)) + self.nshift
# Make sure shifting does not go beyond boundaries
xmax, ymax, zmax = [min(px, self.inv_clength - 1)
for px in (xmax, ymax, zmax)]
xmax, ymax, zmax = [
min(px, self.inv_clength - 1) for px in (xmax, ymax, zmax)
]
else:
xmin, ymin, zmin = [min(mins1[i], mins2[i]) for i in range(3)]
xmax, ymax, zmax = [max(maxs1[i], maxs2[i]) for i in range(3)]
cellmins = (xmin, ymin, zmin, ) # Cell minima
cellmins = (xmin, ymin, zmin) # Cell minima
ncells = max(xmax - xmin, ymax - ymin, zmax - zmin) + 1 # Num cells
# Preallocate and fill the arrays
delta1 = numpy.zeros((ncells,)*3, dtype=numpy.float32)
delta2 = numpy.zeros((ncells,)*3, dtype=numpy.float32)
delta1 = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
delta2 = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
# If no smoothing figure out the nonzero indices of the smaller clump
if smooth_kwargs is None:
if clump1.size > clump2.size:
fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1['M'])
nonzero = fill_delta_indxs(delta2, xc2, yc2, zc2, *cellmins,
clump2['M'])
fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1["M"])
nonzero = fill_delta_indxs(
delta2, xc2, yc2, zc2, *cellmins, clump2["M"]
)
else:
nonzero = fill_delta_indxs(delta1, xc1, yc1, zc1, *cellmins,
clump1['M'])
fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2['M'])
nonzero = fill_delta_indxs(
delta1, xc1, yc1, zc1, *cellmins, clump1["M"]
)
fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2["M"])
else:
fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1['M'])
fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2['M'])
fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1["M"])
fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2["M"])
nonzero = None
if smooth_kwargs is not None:
@ -562,9 +596,19 @@ class ParticleOverlap:
gaussian_filter(delta2, output=delta2, **smooth_kwargs)
return delta1, delta2, cellmins, nonzero
def __call__(self, clump1, clump2, delta_bckg, mins1=None, maxs1=None,
mins2=None, maxs2=None, mass1=None, mass2=None,
smooth_kwargs=None):
def __call__(
self,
clump1,
clump2,
delta_bckg,
mins1=None,
maxs1=None,
mins2=None,
maxs2=None,
mass1=None,
mass2=None,
smooth_kwargs=None,
):
"""
Calculate overlap between `clump1` and `clump2`. See
`calculate_overlap(...)` for further information. Be careful so that
@ -603,16 +647,17 @@ class ParticleOverlap:
overlap : float
"""
delta1, delta2, cellmins, nonzero = self.make_deltas(
clump1, clump2, mins1, maxs1, mins2, maxs2,
smooth_kwargs=smooth_kwargs)
clump1, clump2, mins1, maxs1, mins2, maxs2, smooth_kwargs=smooth_kwargs
)
if smooth_kwargs is not None:
return calculate_overlap(delta1, delta2, cellmins, delta_bckg)
# Calculate masses not given
mass1 = numpy.sum(clump1['M']) if mass1 is None else mass1
mass2 = numpy.sum(clump2['M']) if mass2 is None else mass2
return calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg,
nonzero, mass1, mass2)
mass1 = numpy.sum(clump1["M"]) if mass1 is None else mass1
mass2 = numpy.sum(clump2["M"]) if mass2 is None else mass2
return calculate_overlap_indxs(
delta1, delta2, cellmins, delta_bckg, nonzero, mass1, mass2
)
###############################################################################
@ -682,14 +727,15 @@ def fill_delta_indxs(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
return cells[:count_nonzero, :] # Cutoff unassigned places
def get_clumplims(clumps, ncells, nshift=None):
def get_halolims(halo, ncells, nshift=None):
"""
Get the lower and upper limit of clumps' positions or cell numbers.
Get the lower and upper limit of a halo's positions or cell numbers.
Parameters
----------
clumps : array of arrays
Array of clump structured arrays.
halo : structured array
Structured array containing the particles of a given halo. Keys must
`x`, `y`, `z`.
ncells : int
Number of grid cells of the box along a single dimension.
nshift : int, optional
@ -697,23 +743,21 @@ def get_clumplims(clumps, ncells, nshift=None):
Returns
-------
mins, maxs : 2-dimensional arrays of shape `(n_samples, 3)`
mins, maxs : 1-dimensional arrays of shape `(3, )`
Minimum and maximum along each axis.
"""
dtype = clumps[0][0]['x'].dtype # dtype of the first clump's 'x'
# Check that for real positions we cannot apply nshift
# Check that in case of `nshift` we have integer positions.
dtype = halo["x"].dtype
if nshift is not None and dtype.char not in numpy.typecodes["AllInteger"]:
raise TypeError("`nshift` supported only positions are cells.")
nshift = 0 if nshift is None else nshift # To simplify code below
nclumps = clumps.size
mins = numpy.full((nclumps, 3), numpy.nan, dtype=dtype)
maxs = numpy.full((nclumps, 3), numpy.nan, dtype=dtype)
mins = numpy.full(3, numpy.nan, dtype=dtype)
maxs = numpy.full(3, numpy.nan, dtype=dtype)
for i, clump in enumerate(clumps):
for j, p in enumerate(['x', 'y', 'z']):
mins[i, j] = max(numpy.min(clump[0][p]) - nshift, 0)
maxs[i, j] = min(numpy.max(clump[0][p]) + nshift, ncells - 1)
for i, p in enumerate(["x", "y", "z"]):
mins[i] = max(numpy.min(halo[p]) - nshift, 0)
maxs[i] = min(numpy.max(halo[p]) + nshift, ncells - 1)
return mins, maxs
@ -742,8 +786,8 @@ def calculate_overlap(delta1, delta2, cellmins, delta_bckg):
-------
overlap : float
"""
totmass = 0. # Total mass of clump 1 and clump 2
intersect = 0. # Weighted intersecting mass
totmass = 0.0 # Total mass of clump 1 and clump 2
intersect = 0.0 # Weighted intersecting mass
i0, j0, k0 = cellmins # Unpack things
bckg_offset = 512 # Offset of the background density field
bckg_size = 1024
@ -770,8 +814,9 @@ def calculate_overlap(delta1, delta2, cellmins, delta_bckg):
@jit(nopython=True)
def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
mass1, mass2):
def calculate_overlap_indxs(
delta1, delta2, cellmins, delta_bckg, nonzero, mass1, mass2
):
r"""
Overlap between two clumps whose density fields are evaluated on the
same grid and `nonzero1` enumerates the non-zero cells of `delta1. This is
@ -800,7 +845,7 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
-------
overlap : float
"""
intersect = 0. # Weighted intersecting mass
intersect = 0.0 # Weighted intersecting mass
i0, j0, k0 = cellmins # Unpack cell minimas
bckg_offset = 512 # Offset of the background density field
bckg_size = 1024 # Size of the background density field array
@ -827,7 +872,7 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
def dist_centmass(clump):
"""
Calculate the clump particles' distance from the centre of mass.
Calculate the clump (or halo) particles' distance from the centre of mass.
Parameters
----------
@ -842,12 +887,15 @@ def dist_centmass(clump):
Center of mass coordinates.
"""
# CM along each dimension
cmx, cmy, cmz = [numpy.average(clump[p], weights=clump['M'])
for p in ('x', 'y', 'z')]
cmx, cmy, cmz = [
numpy.average(clump[p], weights=clump["M"]) for p in ("x", "y", "z")
]
# Particle distance from the CM
dist = numpy.sqrt(numpy.square(clump['x'] - cmx)
+ numpy.square(clump['y'] - cmy)
+ numpy.square(clump['z'] - cmz))
dist = numpy.sqrt(
numpy.square(clump["x"] - cmx)
+ numpy.square(clump["y"] - cmy)
+ numpy.square(clump["z"] - cmz)
)
return dist, numpy.asarray([cmx, cmy, cmz])
@ -874,8 +922,9 @@ def dist_percentile(dist, qs, distmax=0.075):
return x
def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1.,
enforce_int32=False, verbose=True):
def radius_neighbours(
knn, X, radiusX, radiusKNN, nmult=1.0, enforce_int32=False, verbose=True
):
"""
Find all neigbours of a trained KNN model whose center of mass separation
is less than `nmult` times the sum of their respective radii.
@ -913,9 +962,9 @@ def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1.,
patchknn_max = numpy.max(radiusKNN) # Maximum for completeness
for i in trange(nsamples) if verbose else range(nsamples):
dist, indx = knn.radius_neighbors(X[i, :].reshape(-1, 3),
radiusX[i] + patchknn_max,
sort_results=True)
dist, indx = knn.radius_neighbors(
X[i, :].reshape(-1, 3), radiusX[i] + patchknn_max, sort_results=True
)
# Note that `dist` and `indx` are wrapped in 1-element arrays
# so we take the first item where appropriate
mask = (dist[0] / (radiusX[i] + radiusKNN[indx[0]])) < nmult
@ -924,58 +973,3 @@ def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1.,
indxs[i] = indxs[i].astype(numpy.int32)
return numpy.asarray(indxs, dtype=object)
###############################################################################
# Sky mathing #
###############################################################################
# def brute_spatial_separation(c1, c2, angular=False, N=None, verbose=False):
# """
# Calculate for each point in `c1` the `N` closest points in `c2`.
# Parameters
# ----------
# c1 : `astropy.coordinates.SkyCoord`
# Coordinates of the first set of points.
# c2 : `astropy.coordinates.SkyCoord`
# Coordinates of the second set of points.
# angular : bool, optional
# Whether to calculate angular separation or 3D separation. By default
# `False` and 3D separation is calculated.
# N : int, optional
# Number of closest points in `c2` to each object in `c1` to return.
# verbose : bool, optional
# Verbosity flag. By default `False`.
# Returns
# -------
# sep : 1-dimensional array
# Separation of each object in `c1` to `N` closest objects in `c2`. The
# array shape is `(c1.size, N)`. Separation is in units of `c1`.
# indxs : 1-dimensional array
# Indexes of the closest objects in `c2` for each object in `c1`. The
# array shape is `(c1.size, N)`.
# """
# if not (isinstance(c1, SkyCoord) and isinstance(c2, SkyCoord)):
# raise TypeError(
# "`c1` & `c2` must be `astropy.coordinates.SkyCoord`.")
# N1 = c1.size
# N2 = c2.size if N is None else N
# # Pre-allocate arrays
# sep = numpy.full((N1, N2), numpy.nan)
# indxs = numpy.full((N1, N2), numpy.nan, dtype=int)
# iters = tqdm(range(N1)) if verbose else range(N1)
# for i in iters:
# if angular:
# dist = c1[i].separation(c2).value
# else:
# dist = c1[i].separation_3d(c2).value
# # Sort the distances
# sort = numpy.argsort(dist)[:N2]
# indxs[i, :] = sort
# sep[i, :] = dist[sort]
# return sep, indxs

View File

@ -16,27 +16,27 @@
import numpy
def concatenate_clumps(clumps, include_velocities=False):
def concatenate_parts(list_parts, include_velocities=False):
"""
Concatenate an array of clumps to a single array containing all particles.
Concatenate a list of particle arrays into a single array.
Parameters
----------
clumps : list of structured arrays
List of clumps. Each clump must be a structured array with keys
list_parts : list of structured arrays
List of particle arrays.
include_velocities : bool, optional
Whether to include velocities in the output array.
Returns
-------
particles : structured array
parts_out : structured array
"""
# Count how large array will be needed
N = 0
for clump, __ in clumps:
N += clump.size
for part in list_parts:
N += part.size
# Infer dtype of positions
if clumps[0][0]["x"].dtype.char in numpy.typecodes["AllInteger"]:
if list_parts[0]["x"].dtype.char in numpy.typecodes["AllInteger"]:
posdtype = numpy.int32
else:
posdtype = numpy.float32
@ -54,14 +54,14 @@ def concatenate_clumps(clumps, include_velocities=False):
"names": ["x", "y", "z", "M"],
"formats": [posdtype] * 3 + [numpy.float32],
}
particles = numpy.full(N, numpy.nan, dtype)
parts_out = numpy.full(N, numpy.nan, dtype)
# Fill it one clump by another
start = 0
for clump, __ in clumps:
end = start + clump.size
for parts in list_parts:
end = start + parts.size
for p in dtype["names"]:
particles[p][start:end] = clump[p]
parts_out[p][start:end] = parts[p]
start = end
return particles
return parts_out

View File

@ -375,9 +375,6 @@ class HaloCatalogue(BaseCatalogue):
Halo catalogue, i.e. parent halos with summed substructure, defined in the
final snapshot.
TODO:
Add the fitted quantities
Parameters
----------
nsim : int
@ -391,26 +388,60 @@ class HaloCatalogue(BaseCatalogue):
minmass : len-2 tuple
Minimum mass. The first element is the catalogue key and the second is
the value.
load_fitted : bool, optional
Whether to load fitted quantities.
load_initial : bool, optional
Whether to load initial positions.
rawdata : bool, optional
Whether to return the raw data. In this case applies no cuts and
transformations.
"""
def __init__(
self, nsim, paths, maxdist=155.5 / 0.705, minmass=("M", 1e12), rawdata=False
self,
nsim,
paths,
maxdist=155.5 / 0.705,
minmass=("M", 1e12),
load_fitted=True,
load_initial=False,
rawdata=False,
):
self.nsim = nsim
self.paths = paths
# Read in the mmain catalogue of summed substructure
mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim))
self._data = mmain["mmain"]
if load_fitted:
fits = numpy.load(paths.structfit_path(self.nsnap, nsim, "halos"))
cols = [col for col in fits.dtype.names if col != "index"]
X = [fits[col] for col in cols]
self._data = add_columns(self._data, X, cols)
# TODO: load initial positions
if not rawdata:
# Flip positions and convert from code units to cMpc. Convert M too
flip_cols(self._data, "x", "z")
for p in ("x", "y", "z"):
self._data[p] -= 0.5
self._data = self.box.convert_from_boxunits(
self._data, ["x", "y", "z", "M"]
self._data,
[
"x",
"y",
"z",
"M",
"totpartmass",
"rho0",
"r200c",
"r500c",
"m200c",
"m500c",
"r200m",
"m200m",
],
)
if maxdist is not None:

View File

@ -133,13 +133,13 @@ class CSiBORGPaths:
nsim : int
IC realisation index.
kind : str
Type of match. Can be either `cm` or `particles`.
Type of match. Can be either `fit` or `particles`.
Returns
-------
path : str
"""
assert kind in ["cm", "particles"]
assert kind in ["fit", "particles"]
fdir = join(self.postdir, "initmatch")
if not isdir(fdir):
mkdir(fdir)
@ -284,6 +284,28 @@ class CSiBORGPaths:
fname = "{}_out_{}_{}.npy".format(kind, str(nsim).zfill(5), str(nsnap).zfill(5))
return join(fdir, fname)
def overlap_path(self, nsim0, nsimx):
"""
Path to the overlap files between two simulations.
Parameters
----------
nsim0 : int
IC realisation index of the first simulation.
nsimx : int
IC realisation index of the second simulation.
Returns
-------
path : str
"""
fdir = join(self.postdir, "overlap")
if not isdir(fdir):
mkdir(fdir)
warn("Created directory `{}`.".format(fdir), UserWarning, stacklevel=1)
fname = "ovelrap_{}_{}.npz".format(str(nsim0).zfill(5), str(nsimx).zfill(5))
return join(fdir, fname)
def knnauto_path(self, run, nsim=None):
"""
Path to the `knn` auto-correlation files. If `nsim` is not specified

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1139,8 +1139,8 @@
"overlapper.clumps_pos2cell(clumps0)\n",
"overlapper.clumps_pos2cell(clumpsx)\n",
"\n",
"mins0, maxs0 = csiborgtools.match.get_clumplims(clumps0, overlapper.inv_clength, overlapper.nshift)\n",
"minsx, maxsx = csiborgtools.match.get_clumplims(clumpsx, overlapper.inv_clength, overlapper.nshift)"
"mins0, maxs0 = csiborgtools.match.get_halolims(clumps0, overlapper.inv_clength, overlapper.nshift)\n",
"minsx, maxsx = csiborgtools.match.get_halolims(clumpsx, overlapper.inv_clength, overlapper.nshift)"
]
},
{

View File

@ -664,8 +664,8 @@
"overlapper.clumps_pos2cell(clumps0)\n",
"overlapper.clumps_pos2cell(clumpsx)\n",
"\n",
"mins0, maxs0 = csiborgtools.match.get_clumplims(clumps0, overlapper.inv_clength, overlapper.nshift)\n",
"minsx, maxsx = csiborgtools.match.get_clumplims(clumpsx, overlapper.inv_clength, overlapper.nshift)"
"mins0, maxs0 = csiborgtools.match.get_halolims(clumps0, overlapper.inv_clength, overlapper.nshift)\n",
"minsx, maxsx = csiborgtools.match.get_halolims(clumpsx, overlapper.inv_clength, overlapper.nshift)"
]
},
{

View File

@ -1,4 +1,3 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
@ -15,7 +14,7 @@
"""A script to calculate overlap between two CSiBORG realisations."""
from argparse import ArgumentParser
from datetime import datetime
from os.path import join
from distutils.util import strtobool
import numpy
from scipy.ndimage import gaussian_filter
@ -24,71 +23,76 @@ try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
import utils
# Argument parser
parser = ArgumentParser()
parser.add_argument("--nsim0", type=int)
parser.add_argument("--nsimx", type=int)
parser.add_argument("--nmult", type=float)
parser.add_argument("--sigma", type=float)
parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)), default=False)
args = parser.parse_args()
# File paths
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
fout = join(utils.dumpdir, "overlap",
"cross_{}_{}.npz".format(args.nsim0, args.nsimx))
smooth_kwargs = {"sigma": args.sigma, "mode": "constant", "cval": 0.0}
overlapper = csiborgtools.match.ParticleOverlap()
# Load catalogues
print("{}: loading catalogues {} and {}."
.format(datetime.now(), args.nsim0, args.nsimx), flush=True)
cat0 = csiborgtools.read.ClumpsCatalogue(args.nsim0, paths)
catx = csiborgtools.read.ClumpsCatalogue(args.nsimx, paths)
print("{}: loading simulation {} and converting positions to cell numbers."
.format(datetime.now(), args.nsim0), flush=True)
with open(paths.initmatch_path(args.nsim0, "particles"), "rb") as f:
clumps0 = numpy.load(f, allow_pickle=True)
overlapper.clumps_pos2cell(clumps0)
print("{}: loading simulation {} and converting positions to cell numbers."
.format(datetime.now(), args.nsimx), flush=True)
with open(paths.initmatch_path(args.nsimx, "particles"), 'rb') as f:
clumpsx = numpy.load(f, allow_pickle=True)
overlapper.clumps_pos2cell(clumpsx)
print("{}: generating the background density fields.".format(datetime.now()),
flush=True)
delta_bckg = overlapper.make_bckg_delta(clumps0)
delta_bckg = overlapper.make_bckg_delta(clumpsx, delta=delta_bckg)
print("{}: crossing the simulations.".format(datetime.now()), flush=True)
matcher = csiborgtools.match.RealisationsMatcher()
ref_indxs, cross_indxs, match_indxs, ngp_overlap = matcher.cross(
cat0, catx, clumps0, clumpsx, delta_bckg)
# Load the raw catalogues (i.e. no selection) including the initial CM positions
# and the particle archives.
cat0 = csiborgtools.read.HaloCatalogue(
args.nsim0, paths, load_initial=True, rawdata=True
)
catx = csiborgtools.read.HaloCatalogue(
args.nsimx, paths, load_initial=True, rawdata=True
)
halos0_archive = paths.initmatch_path(args.nsim0, "particles")
halosx_archive = paths.initmatch_path(args.nsimx, "particles")
print("{}: smoothing the background field.".format(datetime.now()), flush=True)
# We generate the background density fields. Loads halos's particles one by one
# from the archive, concatenates them and calculates the NGP density field.
args.verbose and print(
"{}: generating the background density fields.".format(datetime.now()), flush=True
)
delta_bckg = overlapper.make_bckg_delta(halos0_archive, verbose=args.verbose)
delta_bckg = overlapper.make_bckg_delta(
halosx_archive, delta=delta_bckg, verbose=args.verbose
)
# We calculate the overlap between the NGP fields.
args.verbose and print(
"{}: crossing the simulations.".format(datetime.now()), flush=True
)
match_indxs, ngp_overlap = matcher.cross(
cat0, catx, halos0_archive, halosx_archive, delta_bckg
)
# We now smoothen up the background density field for the smoothed overlap calculation.
args.verbose and print(
"{}: smoothing the background field.".format(datetime.now()), flush=True
)
gaussian_filter(delta_bckg, output=delta_bckg, **smooth_kwargs)
# We calculate the smoothed overlap for the pairs whose NGP overlap is > 0.
args.verbose and print(
"{}: calculating smoothed overlaps.".format(datetime.now()), flush=True
)
smoothed_overlap = matcher.smoothed_cross(
cat0, catx, halos0_archive, halosx_archive, delta_bckg, match_indxs, smooth_kwargs
)
print("{}: calculating smoothed overlaps.".format(datetime.now()), flush=True)
smoothed_overlap = matcher.smoothed_cross(clumps0, clumpsx, delta_bckg,
ref_indxs, cross_indxs, match_indxs,
smooth_kwargs)
# Dump the result
print("Saving results to `{}`.".format(fout), flush=True)
with open(fout, "wb") as f:
numpy.savez(fout, ref_indxs=ref_indxs, cross_indxs=cross_indxs,
match_indxs=match_indxs, ngp_overlap=ngp_overlap,
smoothed_overlap=smoothed_overlap, sigma=args.sigma)
print("All finished.", flush=True)
# We save the results at long last.
fout = paths.overlap_path(args.nsim0, args.nsimx)
args.verbose and print(
"{}: saving results to `{}`.".format(datetime.now(), fout), flush=True
)
numpy.savez(
fout,
match_indxs=match_indxs,
ngp_overlap=ngp_overlap,
smoothed_overlap=smoothed_overlap,
sigma=args.sigma,
)
print("{}: all finished.".format(datetime.now()), flush=True)

View File

@ -72,9 +72,6 @@ def fit_clump(particles, clump_info, box):
obj = csiborgtools.fits.Clump(particles, clump_info, box)
out = {}
if numpy.isnan(clump_info["index"]):
print("Why am I NaN?", flush=True)
out["index"] = clump_info["index"]
out["npart"] = len(obj)
out["totpartmass"] = numpy.sum(obj["M"])
for i, v in enumerate(["vx", "vy", "vz"]):
@ -121,7 +118,7 @@ def load_parent_particles(clumpid, particle_archive, clumps_cat):
if len(clumps) == 0:
return None
return csiborgtools.match.concatenate_clumps(clumps, include_velocities=True)
return csiborgtools.match.concatenate_parts(clumps, include_velocities=True)
# We now start looping over all simulations
@ -152,11 +149,13 @@ for i, nsim in enumerate(paths.get_ics(tonew=False)):
jobs = csiborgtools.fits.split_jobs(ntasks, nproc)[rank]
out = csiborgtools.read.cols_to_structured(len(jobs), cols_collect)
for i, j in enumerate(tqdm(jobs)) if nproc == 1 else enumerate(jobs):
clumpid = clumps_cat["index"][j]
out["index"][i] = clumpid
# If we are fitting halos and this clump is not a main, then continue.
if args.kind == "halos" and not ismain[j]:
continue
clumpid = clumps_cat["index"][j]
if args.kind == "halos":
part = load_parent_particles(clumpid, particle_archive, clumps_cat)
else:
@ -169,9 +168,6 @@ for i, nsim in enumerate(paths.get_ics(tonew=False)):
_out = fit_clump(part, clumps_cat[j], box)
for key in _out.keys():
out[key][i] = _out[key]
else:
out["index"][i] = clumpid
out["npart"][i] = 0
fout = ftemp.format(str(nsim).zfill(5), str(nsnap).zfill(5), rank)
if nproc == 0:
@ -204,7 +200,7 @@ for i, nsim in enumerate(paths.get_ics(tonew=False)):
if args.kind == "halos":
out = out[ismain]
fout = paths.structfit_path(nsnap, nsim, "clumps")
fout = paths.structfit_path(nsnap, nsim, args.kind)
print("Saving to `{}`.".format(fout), flush=True)
numpy.save(fout, out)

View File

@ -13,11 +13,8 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
A script to calculate the centre of mass of particles at redshift 70 that
are grouped in a clump at present redshift.
Optionally also dumps the clumps information, however watch out as this will
eat up a lot of memory.
Script to calculate the particle centre of mass and Lagrangian patch size in the initial
snapshot. Optinally dumps the particle files, however this requires a lot of memory.
"""
from argparse import ArgumentParser
from datetime import datetime
@ -28,141 +25,143 @@ from os.path import join
import numpy
from mpi4py import MPI
from tqdm import tqdm
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
# Get MPI things
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
verbose = nproc == 1
# Argument parser
parser = ArgumentParser()
parser.add_argument("--dump_clumps", type=lambda x: bool(strtobool(x)))
parser.add_argument("--dump", type=lambda x: bool(strtobool(x)))
args = parser.parse_args()
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
nsims = paths.get_ics(tonew=True)
partreader = csiborgtools.read.ParticleReader(paths)
ftemp = join(paths.temp_dumpdir, "initmatch_{}_{}_{}.npy")
# Temporary output file
ftemp = join(paths.dumpdir, "temp", "initmatch_{}_{}_{}.npy")
for nsim in nsims:
# We loop over all particles and then use MPI when matching halos to the
# initial snapshot and dumping them.
for i, nsim in enumerate(paths.get_ics(tonew=True)):
if rank == 0:
print("{}: reading simulation {}.".format(datetime.now(), nsim),
flush=True)
nsnap_max = max(paths.get_snapshots(nsim))
reader = csiborgtools.read.ParticleReader(paths)
print("{}: reading simulation {}.".format(datetime.now(), nsim), flush=True)
nsnap = max(paths.get_snapshots(nsim))
# Read and sort the initial particle files by their particle IDs
part0 = reader.read_particle(1, nsim, ["x", "y", "z", "M", "ID"],
verbose=False)
# We first load particles in the initial and final snapshots and sort them
# by their particle IDs so that we can match them by array position.
# `clump_ids` are the clump IDs of particles.
part0 = partreader.read_particle(
1, nsim, ["x", "y", "z", "M", "ID"], verbose=verbose
)
part0 = part0[numpy.argsort(part0["ID"])]
# Order the final snapshot clump IDs by the particle IDs
pid = reader.read_particle(nsnap_max, nsim, ["ID"], verbose=False)["ID"]
clump_ids = reader.read_clumpid(nsnap_max, nsim, verbose=False)
pid = partreader.read_particle(nsnap, nsim, ["ID"], verbose=verbose)["ID"]
clump_ids = partreader.read_clumpid(nsnap, nsim, verbose=verbose)
clump_ids = clump_ids[numpy.argsort(pid)]
# Release the particle IDs, we will not need them anymore now that both
# particle arrays are matched in ordering.
del pid
collect()
# Get rid of the clumps whose index is 0 -- those are unassigned
# Particles whose clump ID is 0 are unassigned to a clump, so we can get
# rid of them to speed up subsequent operations. Again we release the mask.
mask = clump_ids > 0
clump_ids = clump_ids[mask]
part0 = part0[mask]
del mask
collect()
# Calculate the centre of mass of each parent halo, the Lagrangian patch
# size and optionally the initial snapshot particles belonging to this
# parent halo. Dumping the particles will take majority of time.
if rank == 0:
print("{}: dumping intermediate files.".format(datetime.now()),
flush=True)
print(
"{}: calculating {}th simulation {}.".format(datetime.now(), i, nsim),
flush=True,
)
# We load up the clump catalogue which contains information about the
# ultimate parent halos of each clump. We will loop only over the clump
# IDs of ultimate parent halos and add their substructure particles and at
# the end save these.
cat = csiborgtools.read.ClumpsCatalogue(
nsim, paths, load_fitted=False, rawdata=True
)
parent_ids = cat["index"][cat.ismain][:500]
jobs = csiborgtools.fits.split_jobs(parent_ids.size, nproc)[rank]
for i in tqdm(jobs) if verbose else jobs:
clid = parent_ids[i]
mmain_indxs = cat["index"][cat["parent"] == clid]
# Grab unique clump IDs and loop over them
unique_clumpids = numpy.unique(clump_ids)
mmain_mask = numpy.isin(clump_ids, mmain_indxs, assume_unique=True)
mmain_particles = part0[mmain_mask]
njobs = unique_clumpids.size
jobs = csiborgtools.utils.split_jobs(njobs, nproc)[rank]
for i in jobs:
n = unique_clumpids[i]
x0 = part0[clump_ids == n]
raddist, cmpos = csiborgtools.match.dist_centmass(mmain_particles)
patchsize = csiborgtools.match.dist_percentile(raddist, [99], distmax=0.075)
with open(ftemp.format(nsim, clid, "fit"), "wb") as f:
numpy.savez(f, cmpos=cmpos, patchsize=patchsize)
# Center of mass and Lagrangian patch size
dist, cm = csiborgtools.match.dist_centmass(x0)
patch = csiborgtools.match.dist_percentile(dist, [99], distmax=0.075)
# Dump the center of mass
with open(ftemp.format(nsim, n, "cm"), 'wb') as f:
numpy.save(f, cm)
# Dump the Lagrangian patch size
with open(ftemp.format(nsim, n, "lagpatch"), 'wb') as f:
numpy.save(f, patch)
# Dump the entire clump
if args.dump_clumps:
with open(ftemp.format(nsim, n, "clump"), "wb") as f:
numpy.save(f, x0)
if args.dump:
with open(ftemp.format(nsim, clid, "particles"), "wb") as f:
numpy.save(f, mmain_particles)
# We force clean up the memory before continuing.
del part0, clump_ids
collect()
# We now wait for all processes and then use the 0th process to collect the results.
# We first collect just the Lagrangian patch size information.
comm.Barrier()
if rank == 0:
print("{}: collecting summary files...".format(datetime.now()),
flush=True)
# Collect the centre of masses, patch size, etc. and dump them
dtype = {"names": ['x', 'y', 'z', "lagpatch", "ID"],
"formats": [numpy.float32] * 4 + [numpy.int32]}
out = numpy.full(njobs, numpy.nan, dtype=dtype)
for i, n in enumerate(unique_clumpids):
# Load in CM vector
fpath = ftemp.format(nsim, n, "cm")
print("{}: collecting fits...".format(datetime.now()), flush=True)
dtype = {
"names": ["index", "x", "y", "z", "lagpatch"],
"formats": [numpy.int32] + [numpy.float32] * 4,
}
out = numpy.full(parent_ids.size, numpy.nan, dtype=dtype)
for i, clid in enumerate(parent_ids):
fpath = ftemp.format(nsim, clid, "fit")
with open(fpath, "rb") as f:
fin = numpy.load(f)
out['x'][i] = fin[0]
out['y'][i] = fin[1]
out['z'][i] = fin[2]
inp = numpy.load(f)
out["index"][i] = clid
out["x"][i] = inp["cmpos"][0]
out["y"][i] = inp["cmpos"][1]
out["z"][i] = inp["cmpos"][2]
out["lagpatch"][i] = inp["patchsize"]
remove(fpath)
# Load in the patch size
fpath = ftemp.format(nsim, n, "lagpatch")
with open(fpath, "rb") as f:
out["lagpatch"][i] = numpy.load(f)
remove(fpath)
# Store the halo ID
out["ID"][i] = n
print("{}: dumping to .. `{}`.".format(
datetime.now(), paths.initmatch_path(nsim, "cm")), flush=True)
with open(paths.initmatch_path(nsim, "cm"), 'wb') as f:
numpy.save(f, out)
if args.dump_clumps:
print("{}: collecting particle files...".format(datetime.now()),
flush=True)
out = [None] * unique_clumpids.size
dtype = {"names": ["clump", "ID"],
"formats": [object, numpy.int32]}
out = numpy.full(unique_clumpids.size, numpy.nan, dtype=dtype)
for i, n in enumerate(unique_clumpids):
fpath = ftemp.format(nsim, n, "clump")
with open(fpath, 'rb') as f:
fin = numpy.load(f)
out["clump"][i] = fin
out["ID"][i] = n
remove(fpath)
fout = paths.initmatch_path(nsim, "particles")
print("{}: dumping to .. `{}`.".format(datetime.now(), fout),
flush=True)
fout = paths.initmatch_path(nsim, "fit")
print("{}: dumping fits to .. `{}`.".format(datetime.now(), fout), flush=True)
with open(fout, "wb") as f:
numpy.save(f, out)
# We now optionally collect the individual clumps and store them in an archive,
# which has the benefit of being a single file that can be easily read in.
if args.dump:
print("{}: collecting particles...".format(datetime.now()), flush=True)
out = {}
for clid in parent_ids:
fpath = ftemp.format(nsim, clid, "particles")
with open(fpath, "rb") as f:
out.update({str(clid): numpy.load(f)})
fout = paths.initmatch_path(nsim, "particles")
print(
"{}: dumping particles to .. `{}`.".format(datetime.now(), fout),
flush=True,
)
with open(fout, "wb") as f:
numpy.savez(f, **out)
# Again we force clean up the memory before continuing.
del out
collect()