Fixing overlaps and halo definitions. (#80)

* Add imports

* Refactor code

* Rename fof velocities

* Clean up and add Quijote

* Edit docstrings

* Update submission script

* Fix bug

* Start loading fitted properties

* Edit docstrings

* Update fitting for new `halo`

* Update CM definition and R200c

* Tune the minimum number of particles

* Enforce crossing threshold & tune hypers

* Fix periodiity when calculating angmom

* Doc strings

* Relax checkip

* Minor edit

* Fix old kwarg bug

* Fix CSiBORG bounds

* Catch warnings!

* Add `mass_kind` and new boundaries
This commit is contained in:
Richard Stiskalek 2023-07-31 16:13:21 +02:00 committed by GitHub
parent 169a5e5bd7
commit 344ff8e091
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 543 additions and 388 deletions

View file

@ -19,17 +19,13 @@ import numpy
from numba import jit
from scipy.optimize import minimize
GRAV = 6.6743e-11 # m^3 kg^-1 s^-2
MSUN = 1.988409870698051e+30 # kg
MPC2M = 3.0856775814671916e+22 # 1 Mpc is this many meters
GRAV = 4.300917270069976e-09 # G in (Msun / h)^-1 (Mpc / h) (km / s)^2
class BaseStructure(ABC):
"""
Basic structure object for handling operations on its particles.
"""
_particles = None
_box = None
@ -90,94 +86,119 @@ class BaseStructure(ABC):
"""
return numpy.vstack([self[p] for p in ("vx", "vy", "vz")]).T
def spherical_overdensity_mass(self, delta_mult, kind="crit", rtol=1e-8,
maxiter=100, npart_min=10):
def center_of_mass(self, npart_min=30, shrink_factor=0.98):
r"""
Calculate spherical overdensity mass and radius via the iterative
shrinking sphere method.
Calculate the center of mass of a halo via the shrinking sphere
procedure. Iteratively reduces initial radius and calculates the CM of
enclosed particles while the number of enclosed particles is greater
than a set minimum.
Parameters
----------
npart_min : int, optional
Minimum number of enclosed particles above which to continue
shrinking the sphere.
shrink_factor : float, optional
Factor by which to shrink the sphere radius at each iteration.
Returns
-------
cm : 1-dimensional array of shape `(3, )`
Center of mass in box units.
dist : 1-dimensional array of shape `(n_particles, )`
Distance of each particle from the center of mass in box units.
"""
pos, mass = self.pos, self["M"]
cm = center_of_mass(pos, mass, boxsize=1)
rad = None
while True:
dist = periodic_distance(pos, cm, boxsize=1)
if rad is None:
rad = numpy.max(dist)
within_rad = dist <= rad
cm = center_of_mass(pos[within_rad], mass[within_rad], boxsize=1)
if numpy.sum(within_rad) < npart_min:
return cm, periodic_distance(pos, cm, boxsize=1)
rad *= shrink_factor
def spherical_overdensity_mass(self, dist, delta_mult, kind="crit"):
r"""
Calculate spherical overdensity mass and radius around a CM, defined as
the inner-most radius where the density falls below a given threshold.
The exact radius is found via linear interpolation between the two
particles enclosing the threshold.
Parameters
----------
dist : 1-dimensional array of shape `(n_particles, )`
Distance of each particle from the centre of mass in box units.
delta_mult : int or float
Overdensity multiple.
kind : str, optional
Either `crit` or `matter`, for critical or matter overdensity
rtol : float, optional
Tolerance for the change in the center of mass or radius.
maxiter : int, optional
Maximum number of iterations.
npart_min : int, optional
Minimum number of enclosed particles to reset the iterator.
Returns
-------
mass : float
The requested spherical overdensity mass in :math:`M_\odot / h`.
Overdensity mass in (Msun / h).
rad : float
The radius of the sphere enclosing the requested overdensity in box
units.
cm : 1-dimensional array of shape `(3, )`
The center of mass of the sphere enclosing the requested
overdensity in box units.
Overdensity radius in box units.
"""
assert kind in ["crit", "matter"]
if kind not in ["crit", "matter"]:
raise ValueError("kind must be either `crit` or `matter`.")
# Calculate density based on the provided kind
rho = delta_mult * self.box.rho_crit0
if kind == "matter":
rho *= self.box.Om
rho *= self.box.Om if kind == "matter" else 1.
pos, mass = self.pos, self["M"]
argsort = numpy.argsort(dist)
dist = self.box.box2mpc(dist[argsort])
# Initial estimates for center of mass and radius
init_cm = center_of_mass(pos, mass, boxsize=1)
init_rad = self.box.mpc2box(mass_to_radius(numpy.sum(mass), rho) * 1.5)
norm_density = numpy.cumsum(self['M'][argsort])
totmass = norm_density[-1]
with numpy.errstate(divide="ignore"):
norm_density /= (4. / 3. * numpy.pi * dist**3)
norm_density /= rho
rad, cm = init_rad, numpy.copy(init_cm)
# This ensures that the j - 1 index is also just above 1, therefore the
# expression below strictly interpolates.
j = find_first_below_threshold(norm_density, 1.)
for _ in range(maxiter):
dist = periodic_distance(pos, cm, boxsize=1)
within_rad = dist <= rad
if j is None:
return numpy.nan, numpy.nan
# Heuristic reset if too few enclosed particles
if numpy.sum(within_rad) < npart_min:
js = numpy.random.choice(len(self), len(self), replace=True)
cm = center_of_mass(pos[js], mass[js], boxsize=1)
rad = init_rad * (0.75 + numpy.random.rand())
dist = periodic_distance(pos, cm, boxsize=1)
within_rad = dist <= rad
i = j - 1
# If there are still too few particles, then skip this
# iteration.
if numpy.sum(within_rad) < npart_min:
continue
rad = (dist[j] - dist[i])
rad *= (1. - norm_density[i]) / (norm_density[j] - norm_density[i])
rad += dist[i]
enclosed_mass = numpy.sum(mass[within_rad])
new_rad = self.box.mpc2box(mass_to_radius(enclosed_mass, rho))
new_cm = center_of_mass(pos[within_rad], mass[within_rad],
boxsize=1)
mass = radius_to_mass(rad, rho)
rad = self.box.mpc2box(rad)
# Check convergence based on center of mass and radius
cm_conv = numpy.linalg.norm(cm - new_cm) < rtol
rad_conv = abs(rad - new_rad) < rtol
if mass > totmass:
return numpy.nan, numpy.nan
if cm_conv or rad_conv:
return enclosed_mass, rad, cm
return mass, rad
cm, rad = new_cm, new_rad
# Return NaN values if no convergence after max iterations
return numpy.nan, numpy.nan, numpy.full(3, numpy.nan, numpy.float32)
def angular_momentum(self, ref, rad, npart_min=10):
def angular_momentum(self, dist, cm, rad, npart_min=10):
r"""
Calculate angular momentum around a reference point using all particles
within a radius. Units are
:math:`(M_\odot / h) (\mathrm{Mpc} / h) \mathrm{km} / \mathrm{s}`.
Calculate angular momentum around a centre of mass using all particles
within a radius. Accounts for periodicity of the box and units are
(Msun / h) * (Mpc / h) * (km / s).
Parameters
----------
ref : 1-dimensional array of shape `(3, )`
dist : 1-dimensional array of shape `(n_particles, )`
Distance of each particle from center of mass in box units.
cm : 1-dimensional array of shape `(3, )`
Reference point in box units.
rad : float
Radius around the reference point in box units.
@ -189,31 +210,28 @@ class BaseStructure(ABC):
-------
angmom : 1-dimensional array or shape `(3, )`
"""
# Calculate the distance of each particle from the reference point.
distances = periodic_distance(self.pos, ref, boxsize=1)
mask = dist < rad
# Filter particles within the provided radius.
mask = distances < rad
if numpy.sum(mask) < npart_min:
return numpy.full(3, numpy.nan, numpy.float32)
mass, pos, vel = self["M"][mask], self.pos[mask], self.vel[mask]
# Convert positions to Mpc / h and center around the reference point.
pos = self.box.box2mpc(pos) - ref
# Adjust velocities to be in the CM frame.
pos = shift_to_center_of_box(pos, cm, 1.0, set_cm_to_zero=True)
pos = self.box.box2mpc(pos)
vel -= numpy.average(vel, axis=0, weights=mass)
# Calculate angular momentum.
return numpy.sum(mass[:, numpy.newaxis] * numpy.cross(pos, vel),
axis=0)
def lambda_bullock(self, ref, rad):
r"""
Bullock spin, see Eq. 5 in [1], in a given radius around a reference
point.
def lambda_bullock(self, angmom, mass, rad):
"""
Calculate the Bullock spin, see Eq. 5 in [1].
Parameters
----------
angmom : 1-dimensional array of shape `(3, )`
Angular momentum in (Msun / h) * (Mpc / h) * (km / s).
ref : 1-dimensional array of shape `(3, )`
Reference point in box units.
rad : float
@ -229,28 +247,18 @@ class BaseStructure(ABC):
Bullock, J. S.; Dekel, A.; Kolatt, T. S.; Kravtsov, A. V.;
Klypin, A. A.; Porciani, C.; Primack, J. R.
"""
# Filter particles within the provided radius
mask = periodic_distance(self.pos, ref, boxsize=1) < rad
# Calculate the total mass of the enclosed particles
enclosed_mass = numpy.sum(self["M"][mask])
# Convert the radius from box units to Mpc/h
rad_mpc = self.box.box2mpc(rad)
# Circular velocity in km/s
circvel = (GRAV * enclosed_mass * MSUN / (rad_mpc * MPC2M))**0.5 * 1e-3
# Magnitude of the angular momentum
l_norm = numpy.linalg.norm(self.angular_momentum(ref, rad))
# Compute and return the Bullock spin parameter
return l_norm / (numpy.sqrt(2) * enclosed_mass * circvel * rad_mpc)
out = numpy.linalg.norm(angmom)
return out / numpy.sqrt(2 * GRAV * mass**3 * self.box.box2mpc(rad))
def nfw_concentration(self, ref, rad, conc_min=1e-3, npart_min=10):
def nfw_concentration(self, dist, rad, conc_min=1e-3, npart_min=10):
"""
Calculate the NFW concentration parameter in a given radius around a
reference point.
Parameters
----------
ref : 1-dimensional array of shape `(3, )`
Reference point in box units.
dist : 1-dimensional array of shape `(n_particles, )`
Distance of each particle from center of mass in box units.
rad : float
Radius around the reference point in box units.
conc_min : float
@ -263,36 +271,25 @@ class BaseStructure(ABC):
-------
conc : float
"""
dist = periodic_distance(self.pos, ref, boxsize=1)
mask = dist < rad
if numpy.sum(mask) < npart_min:
return numpy.nan
dist, weight = dist[mask], self["M"][mask]
weight /= numpy.mean(weight)
weight /= weight[0]
# Objective function for minimization
def negll_nfw_concentration(log_c, xs, w):
c = 10**log_c
ll = xs / (1 + c * xs)**2 * c**2
ll *= (1 + c) / ((1 + c) * numpy.log(1 + c) - c)
ll = numpy.sum(numpy.log(w * ll))
return -ll
initial_guess = 1.5
res = minimize(negll_nfw_concentration, x0=initial_guess,
res = minimize(negll_nfw_concentration, x0=1.,
args=(dist / rad, weight, ), method='Nelder-Mead',
bounds=((numpy.log10(conc_min), 5),))
if not res.success:
return numpy.nan
conc_value = 10**res["x"][0]
if conc_value < conc_min or numpy.isclose(conc_value, conc_min):
conc = 10**res["x"][0]
if conc < conc_min or numpy.isclose(conc, conc_min):
return numpy.nan
return conc_value
return conc
def __getitem__(self, key):
key_to_index = {'x': 0, 'y': 1, 'z': 2,
@ -329,11 +326,12 @@ class Halo(BaseStructure):
###############################################################################
@jit(nopython=True, fastmath=True, boundscheck=False)
def center_of_mass(points, mass, boxsize):
"""
Calculate the center of mass of a halo, while assuming for periodic
boundary conditions of a cubical box. Assuming that particle positions are
in `[0, boxsize)` range.
Calculate the center of mass of a halo while assuming periodic boundary
conditions of a cubical box. Assuming that particle positions are in
`[0, boxsize)` range. This is a JIT implementation.
Parameters
----------
@ -348,20 +346,29 @@ def center_of_mass(points, mass, boxsize):
-------
cm : 1-dimensional array of shape `(3, )`
"""
# Convert positions to unit circle coordinates in the complex plane
pos = numpy.exp(2j * numpy.pi * points / boxsize)
# Compute weighted average of these coordinates, convert it back to
# box coordinates and fix any negative positions due to angle calculations.
cm = numpy.angle(numpy.average(pos, axis=0, weights=mass))
cm *= boxsize / (2 * numpy.pi)
cm[cm < 0] += boxsize
cm = numpy.zeros(3, dtype=points.dtype)
totmass = sum(mass)
# Convert positions to unit circle coordinates in the complex plane,
# calculate the weighted average and convert it back to box coordinates.
for i in range(3):
cm_i = sum(mass * numpy.exp(2j * numpy.pi * points[:, i] / boxsize))
cm_i /= totmass
cm_i = numpy.arctan2(cm_i.imag, cm_i.real) * boxsize / (2 * numpy.pi)
if cm_i < 0:
cm_i += boxsize
cm[i] = cm_i
return cm
@jit(nopython=True)
def periodic_distance(points, reference, boxsize):
"""
Compute the periodic distance between multiple points and a reference
point.
Compute the 3D distance between multiple points and a reference point using
periodic boundary conditions. This is an optimized JIT implementation.
Parameters
----------
@ -376,9 +383,22 @@ def periodic_distance(points, reference, boxsize):
-------
dist : 1-dimensional array of shape `(n_points, )`
"""
delta = numpy.abs(points - reference)
delta = numpy.where(delta > boxsize / 2, boxsize - delta, delta)
return numpy.linalg.norm(delta, axis=1)
npoints = len(points)
half_box = boxsize / 2
dist = numpy.zeros(npoints, dtype=points.dtype)
for i in range(npoints):
for j in range(3):
dist_1d = abs(points[i, j] - reference[j])
if dist_1d > (half_box):
dist_1d = boxsize - dist_1d
dist[i] += dist_1d**2
dist[i] = dist[i]**0.5
return dist
def shift_to_center_of_box(points, cm, boxsize, set_cm_to_zero=False):
@ -407,26 +427,74 @@ def shift_to_center_of_box(points, cm, boxsize, set_cm_to_zero=False):
return pos
def mass_to_radius(mass, rho):
@jit(nopython=True, fastmath=True, boundscheck=False)
def radius_to_mass(radius, rho):
"""
Compute the radius of a sphere with a given mass and density.
Compute the mass of a sphere with a given radius and density.
Parameters
----------
mass : float
Mass of the sphere.
radius : float
Radius of the sphere.
rho : float
Density of the sphere.
Returns
-------
rad : float
Radius of the sphere.
mass : float
"""
return ((3 * mass) / (4 * numpy.pi * rho))**(1./3)
return ((4 * numpy.pi * rho) / 3) * radius**3
@jit(nopython=True)
@jit(nopython=True, fastmath=True, boundscheck=False)
def find_first_below_threshold(x, threshold):
"""
Find index of first element in `x` that is below `threshold`. The index
must be greater than 0. If no such element is found, return `None`.
Parameters
----------
x : 1-dimensional array
Array to search in.
threshold : float
Threshold value.
Returns
-------
index : int or None
"""
for i in range(1, len(x)):
if 1 < x[i - 1] and x[i] < threshold:
return i
return None
@jit(nopython=True, fastmath=True, boundscheck=False)
def negll_nfw_concentration(log_c, xs, w):
"""
Negative log-likelihood of the NFW concentration parameter.
Parameters
----------
log_c : float
Logarithm of the concentration parameter.
xs : 1-dimensional array
Normalised radii.
w : 1-dimensional array
Weights.
Returns
------
negll : float
"""
c = 10**log_c
ll = xs / (1 + c * xs)**2 * c**2
ll *= (1 + c) / ((1 + c) * numpy.log(1 + c) - c)
ll = numpy.sum(numpy.log(w * ll))
return -ll
@jit(nopython=True, fastmath=True, boundscheck=False)
def delta2ncells(delta):
"""
Calculate the number of cells in `delta` that are non-zero.
@ -451,7 +519,7 @@ def delta2ncells(delta):
return tot
@jit(nopython=True)
@jit(nopython=True, fastmath=True, boundscheck=False)
def number_counts(x, bin_edges):
"""
Calculate counts of samples in bins.

View file

@ -13,5 +13,5 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .match import (ParticleOverlap, RealisationsMatcher, # noqa
calculate_overlap, calculate_overlap_indxs,
cosine_similarity, find_neighbour)
calculate_overlap, calculate_overlap_indxs, pos2cell,
cosine_similarity, find_neighbour, get_halo_cell_limits)

View file

@ -21,8 +21,9 @@ from functools import lru_cache
from math import ceil
import numpy
from numba import jit
from scipy.ndimage import gaussian_filter
from numba import jit
from tqdm import tqdm, trange
from ..read import load_halo_particles
@ -45,34 +46,39 @@ class BaseMatcher(ABC):
box_size : int
"""
if self._box_size is None:
raise RuntimeError("`box_size` is not set.")
raise RuntimeError("`box_size` has not been set.")
return self._box_size
@box_size.setter
def box_size(self, value):
assert isinstance(value, int)
assert value > 0
if not (isinstance(value, int) and value > 0):
raise ValueError("`box_size` must be a positive integer.")
if not value != 0 and (value & (value - 1) == 0):
raise ValueError("`box_size` must be a power of 2.")
self._box_size = value
@property
def bckg_halfsize(self):
"""
Number of to each side of the centre of the box to calculate the
density field. This is because in CSiBORG we are only interested in the
high-resolution region.
Background half-size for density field calculation. This is the
grid distance from the center of the box to each side over which to
evaluate the background density field. Must be less than or equal to
half the box size.
Returns
-------
bckg_halfsize : int
"""
if self._bckg_halfsize is None:
raise RuntimeError("`bckg_halfsize` is not set.")
raise RuntimeError("`bckg_halfsize` has not been set.")
return self._bckg_halfsize
@bckg_halfsize.setter
def bckg_halfsize(self, value):
assert isinstance(value, int)
assert value > 0
if not (isinstance(value, int) and value > 0):
raise ValueError("`bckg_halfsize` must be a positive integer.")
if value > self.box_size // 2:
raise ValueError("`bckg_halfsize` must be <= half the box size.")
self._bckg_halfsize = value
@ -83,26 +89,26 @@ class BaseMatcher(ABC):
class RealisationsMatcher(BaseMatcher):
"""
A tool to match haloes between IC realisations.
Matches haloes between IC realisations.
Parameters
----------
box_size : int
Number of cells in the box.
bckg_halfsize : int
Number of to each side of the centre of the box to calculate the
density field. This is because in CSiBORG we are only interested in the
high-resolution region.
Background half-size for density field calculation. This is the
grid distance from the center of the box to each side over which to
evaluate the background density field. Must be less than or equal to
half the box size.
nmult : float or int, optional
Multiple of the sum of pair initial Lagrangian patch sizes
within which to return neighbours. By default 1.
Multiplier of the sum of the initial Lagrangian patch sizes of a halo
pair. Determines the range within which neighbors are returned.
dlogmass : float, optional
Tolerance on the absolute logarithmic mass difference of potential
matches. By default 2.
matches.
mass_kind : str, optional
The mass kind whose similarity is to be checked. Must be a valid
catalogue key. By default `totpartmass`, i.e. the total particle
mass associated with a halo.
Mass kind whose similarity is to be checked. Must be a valid key in the
halo catalogue.
"""
_nmult = None
_dlogmass = None
@ -111,21 +117,19 @@ class RealisationsMatcher(BaseMatcher):
def __init__(self, box_size, bckg_halfsize, nmult=1.0, dlogmass=2.0,
mass_kind="totpartmass"):
assert nmult > 0
assert dlogmass > 0
assert isinstance(mass_kind, str)
self.box_size = box_size
self.halfsize = bckg_halfsize
self._nmult = nmult
self._dlogmass = dlogmass
self._mass_kind = mass_kind
self.bckg_halfsize = bckg_halfsize
self.nmult = nmult
self.dlogmass = dlogmass
self.mass_kind = mass_kind
self._overlapper = ParticleOverlap(box_size, bckg_halfsize)
@property
def nmult(self):
"""
Multiple of the sum of pair initial Lagrangian patch sizes within which
to return neighbours.
Multiplier of the sum of the initial Lagrangian patch sizes of a halo
pair. Determines the range within which neighbors are returned.
Returns
-------
@ -133,6 +137,12 @@ class RealisationsMatcher(BaseMatcher):
"""
return self._nmult
@nmult.setter
def nmult(self, value):
if not (value > 0 and isinstance(value, (int, float))):
raise ValueError("`nmult` must be a positive integer or float.")
self._nmult = float(value)
@property
def dlogmass(self):
"""
@ -145,10 +155,17 @@ class RealisationsMatcher(BaseMatcher):
"""
return self._dlogmass
@dlogmass.setter
def dlogmass(self, value):
if not (value > 0 and isinstance(value, (float, int))):
raise ValueError("`dlogmass` must be a positive float.")
self._dlogmass = float(value)
@property
def mass_kind(self):
"""
Mass kind whose similarity is to be checked.
Mass kind whose similarity is to be checked. Must be a valid key in the
halo catalogue.
Returns
-------
@ -156,6 +173,12 @@ class RealisationsMatcher(BaseMatcher):
"""
return self._mass_kind
@mass_kind.setter
def mass_kind(self, value):
if not isinstance(value, str):
raise ValueError("`mass_kind` must be a string.")
self._mass_kind = value
@property
def overlapper(self):
"""
@ -172,34 +195,33 @@ class RealisationsMatcher(BaseMatcher):
r"""
Find all neighbours whose CM separation is less than `nmult` times the
sum of their initial Lagrangian patch sizes and calculate their
overlap. Enforces that the neighbours' are similar in mass up to
overlap. Enforces that the neighbours are similar in mass up to
`dlogmass` dex.
Parameters
----------
cat0 : :py:class:`csiborgtools.read.CSiBORGHaloCatalogue`
cat0 : instance of :py:class:`csiborgtools.read.BaseCatalogue`
Halo catalogue of the reference simulation.
catx : :py:class:`csiborgtools.read.CSiBORGHaloCatalogue`
catx : instance of :py:class:`csiborgtools.read.BaseCatalogue`
Halo catalogue of the cross simulation.
particles0 : 2-dimensional array
Array of particles in box units in the reference simulation.
The columns must be `x`, `y`, `z` and `M`.
Particles archive file of the reference simulation. The columns
must be `x`, `y`, `z` and `M`.
particlesx : 2-dimensional array
Array of particles in box units in the cross simulation.
The columns must be `x`, `y`, `z` and `M`.
Particles archive file of the cross simulation. The columns must be
`x`, `y`, `z` and `M`.
halo_map0 : 2-dimensional array
Halo map of the reference simulation.
halo_mapx : 2-dimensional array
Halo map of the cross simulation.
delta_bckg : 3-dimensional array
Summed background density field of the reference and cross
simulations calculated with particles assigned to haloes at the
final snapshot. Assumed to only be sampled in cells
:math:`[512, 1536)^3`.
simulations calculated with particles assigned to halos at the
final snapshot. Calculated on a grid determined by `bckg_halfsize`.
cache_size : int, optional
Caching size for loading the cross simulation halos.
verbose : bool, optional
iterator verbosity flag. by default `true`.
Iterator verbosity flag. By default `true`.
Returns
-------
@ -279,21 +301,21 @@ class RealisationsMatcher(BaseMatcher):
halo_mapx, delta_bckg, match_indxs, smooth_kwargs,
cache_size=10000, verbose=True):
r"""
Calculate the smoothed overlaps for pair previously identified via
`self.cross(...)` to have a non-zero overlap.
Calculate the smoothed overlaps for pairs previously identified via
`self.cross(...)` to have a non-zero NGP overlap.
Parameters
----------
cat0 : :py:class:`csiborgtools.read.CSiBORGHaloCatalogue`
cat0 : instance of :py:class:`csiborgtools.read.BaseCatalogue`
Halo catalogue of the reference simulation.
catx : :py:class:`csiborgtools.read.CSiBORGHaloCatalogue`
catx : instance of :py:class:`csiborgtools.read.BaseCatalogue`
Halo catalogue of the cross simulation.
particles0 : 2-dimensional array
Array of particles in box units in the reference simulation.
The columns must be `x`, `y`, `z` and `M`.
Particles archive file of the reference simulation. The columns
must be `x`, `y`, `z` and `M`.
particlesx : 2-dimensional array
Array of particles in box units in the cross simulation.
The columns must be `x`, `y`, `z` and `M`.
Particles archive file of the cross simulation. The columns must be
`x`, `y`, `z` and `M`.
halo_map0 : 2-dimensional array
Halo map of the reference simulation.
halo_mapx : 2-dimensional array
@ -301,8 +323,7 @@ class RealisationsMatcher(BaseMatcher):
delta_bckg : 3-dimensional array
Smoothed summed background density field of the reference and cross
simulations calculated with particles assigned to halos at the
final snapshot. Assumed to only be sampled in cells
:math:`[512, 1536)^3`.
final snapshot. Calculated on a grid determined by `bckg_halfsize`.
match_indxs : 1-dimensional array of arrays
Indices of halo counterparts in the cross catalogue.
smooth_kwargs : kwargs
@ -310,7 +331,7 @@ class RealisationsMatcher(BaseMatcher):
cache_size : int, optional
Caching size for loading the cross simulation halos.
verbose : bool, optional
Iterator verbosity flag. By default `True`.
Iterator verbosity flag. By default `true`.
Returns
-------
@ -328,8 +349,8 @@ class RealisationsMatcher(BaseMatcher):
if verbose:
print(f"{datetime.now()}: calculating smoothed overlaps.",
flush=True)
indxs = cat0["index"]
cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
indxs = cat0["index"]
for i, k0 in enumerate(tqdm(indxs) if verbose else indxs):
pos0, mass0, __, mins0, maxs0 = load_processed_halo(
k0, particles0, halo_map0, hid2map0, nshift=nshift,
@ -348,41 +369,10 @@ class RealisationsMatcher(BaseMatcher):
###############################################################################
# Matching statistics #
# Overlap calculator #
###############################################################################
def cosine_similarity(x, y):
r"""
Calculate the cosine similarity between two Cartesian vectors. Defined
as :math:`\Sum_{i} x_i y_{i} / (|x| * |y|)`.
Parameters
----------
x : 1-dimensional array
The first vector.
y : 1- or 2-dimensional array
The second vector. Can be 2-dimensional of shape `(n_samples, 3)`,
in which case the calculation is broadcasted.
Returns
-------
out : float or 1-dimensional array
The cosine similarity. If y is 1-dimensinal returns only a float.
"""
# Quick check of dimensions
if x.ndim != 1:
raise ValueError("`x` must be a 1-dimensional array.")
y = y.reshape(-1, 3) if y.ndim == 1 else y
out = numpy.sum(x * y, axis=1)
out /= numpy.linalg.norm(x) * numpy.linalg.norm(y, axis=1)
if out.size == 1:
return out[0]
return out
class ParticleOverlap(BaseMatcher):
r"""
Halo overlaps calculator. The density field calculation is based on the
@ -394,9 +384,10 @@ class ParticleOverlap(BaseMatcher):
box_size : int
Number of cells in the box.
bckg_halfsize : int
Number of to each side of the centre of the box to calculate the
density field. This is because in CSiBORG we are only interested in the
high-resolution region.
Background half-size for density field calculation. This is the
grid distance from the center of the box to each side over which to
evaluate the background density field. Must be less than or equal to
half the box size.
"""
def __init__(self, box_size, bckg_halfsize):
@ -414,16 +405,16 @@ class ParticleOverlap(BaseMatcher):
Parameters
----------
particles : 2-dimensional array
Array of particles.
Particles archive file. The columns must be `x`, `y`, `z` and `M`.
halo_map : 2-dimensional array
Array containing start and end indices in the particle array
corresponding to each halo.
hid2map : dict
Dictionary mapping halo IDs to `halo_map` array positions.
halo_cat: :py:class:`csiborgtools.read.CSiBORGHaloCatalogue`
halo_cat : instance of :py:class:`csiborgtools.read.BaseCatalogue`
Halo catalogue.
delta : 3-dimensional array, optional
Array to store the density field in. If `None` a new array is
Array to store the density field. If `None` a new array is
created.
verbose : bool, optional
Verbosity flag for loading the halos' particles.
@ -449,6 +440,7 @@ class ParticleOverlap(BaseMatcher):
pos, mass = pos[:, :3], pos[:, 3]
pos = pos2cell(pos, self.box_size)
# We mask out particles outside the cubical high-resolution region
mask = numpy.all((cellmin <= pos) & (pos < cellmax), axis=1)
pos = pos[mask]
@ -465,14 +457,13 @@ class ParticleOverlap(BaseMatcher):
Parameters
----------
pos : 2-dimensional array
Halo particle position array.
Halo's particles position array.
mass : 1-dimensional array
Halo particle mass array.
Halo's particles mass array.
mins, maxs : 1-dimensional arrays of shape `(3,)`
Minimun and maximum cell numbers along each dimension.
subbox : bool, optional
Whether to calculate the density field on a grid strictly enclosing
the halo.
Whether to calculate the field on a grid enclosing the halo.
smooth_kwargs : kwargs, optional
Kwargs to be passed to :py:func:`scipy.ndimage.gaussian_filter`.
If `None` no smoothing is applied.
@ -483,25 +474,25 @@ class ParticleOverlap(BaseMatcher):
"""
nshift = read_nshift(smooth_kwargs)
cells = pos2cell(pos, self.box_size)
# Check that minima and maxima are integers
if not (mins is None and maxs is None):
assert mins.dtype.char in numpy.typecodes["AllInteger"]
assert maxs.dtype.char in numpy.typecodes["AllInteger"]
if subbox:
if mins is None or maxs is None:
mins, maxs = get_halolims(cells, self.box_size, nshift)
ncells = maxs - mins + 1 # To get the number of cells
mins, maxs = get_halo_cell_limits(cells, self.box_size, nshift)
ncells = maxs - mins + 1
else:
mins = [0, 0, 0]
ncells = (self.box_size, ) * 3
# Preallocate and fill the array
delta = numpy.zeros(ncells, dtype=numpy.float32)
fill_delta(delta, cells[:, 0], cells[:, 1], cells[:, 2], *mins, mass)
if smooth_kwargs is not None:
gaussian_filter(delta, output=delta, **smooth_kwargs)
return delta
def make_deltas(self, pos1, pos2, mass1, mass2, mins1=None, maxs1=None,
@ -543,6 +534,7 @@ class ParticleOverlap(BaseMatcher):
nshift = read_nshift(smooth_kwargs)
pos1 = pos2cell(pos1, self.box_size)
pos2 = pos2cell(pos2, self.box_size)
xc1, yc1, zc1 = [pos1[:, i] for i in range(3)]
xc2, yc2, zc2 = [pos2[:, i] for i in range(3)]
@ -551,6 +543,7 @@ class ParticleOverlap(BaseMatcher):
xmin = min(numpy.min(xc1), numpy.min(xc2)) - nshift
ymin = min(numpy.min(yc1), numpy.min(yc2)) - nshift
zmin = min(numpy.min(zc1), numpy.min(zc2)) - nshift
# Make sure shifting does not go beyond boundaries
xmin, ymin, zmin = [max(px, 0) for px in (xmin, ymin, zmin)]
@ -558,6 +551,7 @@ class ParticleOverlap(BaseMatcher):
xmax = max(numpy.max(xc1), numpy.max(xc2)) + nshift
ymax = max(numpy.max(yc1), numpy.max(yc2)) + nshift
zmax = max(numpy.max(zc1), numpy.max(zc2)) + nshift
# Make sure shifting does not go beyond boundaries
xmax, ymax, zmax = [min(px, self.box_size - 1)
for px in (xmax, ymax, zmax)]
@ -565,10 +559,9 @@ class ParticleOverlap(BaseMatcher):
xmin, ymin, zmin = [min(mins1[i], mins2[i]) for i in range(3)]
xmax, ymax, zmax = [max(maxs1[i], maxs2[i]) for i in range(3)]
cellmins = (xmin, ymin, zmin) # Cell minima
ncells = xmax - xmin + 1, ymax - ymin + 1, zmax - zmin + 1 # Num cells
cellmins = (xmin, ymin, zmin)
ncells = (xmax - xmin + 1, ymax - ymin + 1, zmax - zmin + 1,)
# Preallocate and fill the arrays
delta1 = numpy.zeros(ncells, dtype=numpy.float32)
delta2 = numpy.zeros(ncells, dtype=numpy.float32)
@ -590,6 +583,7 @@ class ParticleOverlap(BaseMatcher):
if smooth_kwargs is not None:
gaussian_filter(delta1, output=delta1, **smooth_kwargs)
gaussian_filter(delta2, output=delta2, **smooth_kwargs)
return delta1, delta2, cellmins, nonzero
def __call__(self, pos1, pos2, mass1, mass2, delta_bckg,
@ -644,9 +638,10 @@ class ParticleOverlap(BaseMatcher):
if smooth_kwargs is not None:
return calculate_overlap(delta1, delta2, cellmins, delta_bckg,
self.box_size, self.bckg_halfsize)
# Calculate masses not given
totmass1 = numpy.sum(mass1) if totmass1 is None else totmass1
totmass2 = numpy.sum(mass2) if totmass2 is None else totmass2
return calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg,
nonzero, totmass1, totmass2,
self.box_size, self.bckg_halfsize)
@ -681,29 +676,26 @@ def pos2cell(pos, ncells):
def read_nshift(smooth_kwargs):
"""
Read off the number of cells to pad the density field if smoothing is
applied. Defaults to the ceiling of twice of the smoothing scale.
Determine the number of cells to pad the density field if smoothing is
applied. It defaults to the ceiling of three times the smoothing scale.
Parameters
----------
smooth_kwargs : kwargs, optional
Kwargs to be passed to :py:func:`scipy.ndimage.gaussian_filter`.
If `None` no smoothing is applied.
smooth_kwargs : dict or None
Arguments to be passed to :py:func:`scipy.ndimage.gaussian_filter`.
If `None`, no smoothing is applied.
Returns
-------
nshift : int
"""
if smooth_kwargs is None:
return 0
else:
return ceil(2 * smooth_kwargs["sigma"])
return 0 if smooth_kwargs is None else ceil(3 * smooth_kwargs["sigma"])
@jit(nopython=True)
def fill_delta(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
"""
Fill array `delta` at the specified indices with their weights. This is a
Fill array `delta` by adding `weights` to the specified cells. This is a
JIT implementation.
Parameters
@ -715,20 +707,23 @@ def fill_delta(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
xmin, ymin, zmin : ints
Minimum cell IDs of particles.
weights : 1-dimensional arrays
Particle mass.
Weights
Returns
-------
None
"""
for n in range(xcell.size):
delta[xcell[n] - xmin, ycell[n] - ymin, zcell[n] - zmin] += weights[n]
n_particles = xcell.size
for n in range(n_particles):
i, j, k = xcell[n] - xmin, ycell[n] - ymin, zcell[n] - zmin
delta[i, j, k] += weights[n]
@jit(nopython=True)
def fill_delta_indxs(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
"""
Fill array `delta` at the specified indices with their weights and return
Fill array `delta` by adding `weights` to the specified cells and return
indices where `delta` was assigned a value. This is a JIT implementation.
Parameters
@ -740,36 +735,41 @@ def fill_delta_indxs(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
xmin, ymin, zmin : ints
Minimum cell IDs of particles.
weights : 1-dimensional arrays
Particle mass.
Weights.
Returns
-------
cells : 1-dimensional array
Indices where `delta` was assigned a value.
"""
# Array to count non-zero cells
cells = numpy.full((xcell.size, 3), numpy.nan, numpy.int32)
n_particles = xcell.size
cells = numpy.full((n_particles, 3), numpy.nan, numpy.int32)
count_nonzero = 0
for n in range(xcell.size):
for n in range(n_particles):
i, j, k = xcell[n] - xmin, ycell[n] - ymin, zcell[n] - zmin
# If a cell is zero add it
if delta[i, j, k] == 0:
cells[count_nonzero, :] = i, j, k
cells[count_nonzero] = i, j, k
count_nonzero += 1
delta[i, j, k] += weights[n]
return cells[:count_nonzero, :] # Cutoff unassigned places
return cells[:count_nonzero]
def get_halolims(pos, ncells, nshift=None):
@jit(nopython=True)
def get_halo_cell_limits(pos, ncells, nshift=0):
"""
Get the lower and upper limit of a halo's positions or cell numbers.
Get the lower and upper limit of a halo's cell numbers. Optionally,
floating point positions are also supported. However, in this case `nshift`
must be 0. Be careful, no error will be raised.
Parameters
----------
pos : 2-dimensional array
Halo particle array. Columns must be `x`, `y`, `z`.
Halo particle array. The first three columns must be the cell numbers
corresponding to `x`, `y`, `z`.
ncells : int
Number of grid cells of the box along a single dimension.
nshift : int, optional
@ -778,16 +778,12 @@ def get_halolims(pos, ncells, nshift=None):
Returns
-------
mins, maxs : 1-dimensional arrays of shape `(3, )`
Minimum and maximum along each axis.
"""
# Check that in case of `nshift` we have integer positions.
dtype = pos.dtype
if nshift is not None and dtype.char not in numpy.typecodes["AllInteger"]:
raise TypeError("`nshift` supported only positions are cells.")
nshift = 0 if nshift is None else nshift # To simplify code below
mins = numpy.full(3, numpy.nan, dtype=dtype)
maxs = numpy.full(3, numpy.nan, dtype=dtype)
for i in range(3):
mins[i] = max(numpy.min(pos[:, i]) - nshift, 0)
maxs[i] = min(numpy.max(pos[:, i]) + nshift, ncells - 1)
@ -810,27 +806,29 @@ def calculate_overlap(delta1, delta2, cellmins, delta_bckg, box_size,
delta2 : 3-dimensional array
Density field of the second halo.
cellmins : len-3 tuple
Tuple of left-most cell ID in the full box.
Tuple of lower cell ID in the full box.
delta_bckg : 3-dimensional array
Summed background density field of the reference and cross simulations
calculated with particles assigned to halos at the final snapshot.
Assumed to only be sampled in cells :math:`[512, 1536)^3`.
Calculated on a grid determined by `bckg_halfsize`.
box_size : int
Number of cells in the box.
bckg_halfsize : int
Number of to each side of the centre of the box to calculate the
density field. This is because in CSiBORG we are only interested in the
high-resolution region.
Background half-size for density field calculation. This is the
grid distance from the center of the box to each side over which to
evaluate the background density field. Must be less than or equal to
half the box size.
Returns
-------
overlap : float
"""
totmass = 0.0 # Total mass of halo 1 and halo 2
intersect = 0.0 # Weighted intersecting mass
i0, j0, k0 = cellmins # Unpack things
totmass = 0.0
intersect = 0.0
bckg_size = 2 * bckg_halfsize
bckg_offset = box_size // 2 - bckg_halfsize
i0, j0, k0 = cellmins
imax, jmax, kmax = delta1.shape
for i in range(imax):
@ -868,11 +866,11 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
delta2 : 3-dimensional array
Density field of the second halo.
cellmins : len-3 tuple
Tuple of left-most cell ID in the full box.
Tuple of lower cell ID in the full box.
delta_bckg : 3-dimensional array
Summed background density field of the reference and cross simulations
calculated with particles assigned to halos at the final snapshot.
Assumed to only be sampled in cells :math:`[512, 1536)^3`.
Calculated on a grid determined by `bckg_halfsize`.
nonzero : 2-dimensional array of shape `(n_cells, 3)`
Indices of cells that are non-zero of the lower mass halo. Expected to
be precomputed from `fill_delta_indxs`.
@ -882,19 +880,21 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
box_size : int
Number of cells in the box.
bckg_halfsize : int
Number of to each side of the centre of the box to calculate the
density field. This is because in CSiBORG we are only interested in the
high-resolution region.
Background half-size for density field calculation. This is the
grid distance from the center of the box to each side over which to
evaluate the background density field. Must be less than or equal to
half the box size.
Returns
-------
overlap : float
"""
intersect = 0.0 # Weighted intersecting mass
i0, j0, k0 = cellmins # Unpack cell minimas
intersect = 0.0
bckg_size = 2 * bckg_halfsize
bckg_offset = box_size // 2 - bckg_halfsize
i0, j0, k0 = cellmins
for n in range(nonzero.shape[0]):
i, j, k = nonzero[n, :]
m1, m2 = delta1[i, j, k], delta2[i, j, k]
@ -905,7 +905,7 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
jj = j0 + j - bckg_offset # background density field.
kk = k0 + k - bckg_offset
ishighres = 0 <= ii < bckg_size # Is this cell is in the high
ishighres = 0 <= ii < bckg_size # Is this cell is in the high
ishighres &= 0 <= jj < bckg_size # resolution region for which the
ishighres &= 0 <= kk < bckg_size # background field is calculated.
@ -933,9 +933,9 @@ def load_processed_halo(hid, particles, halo_map, hid2map, ncells, nshift):
hid2map : dict
Dictionary mapping halo IDs to `halo_map` array positions.
ncells : int
Number of cells in the original density field. Typically 2048.
Number of cells in the box density field.
nshift : int
Number of cells to pad the density field.
Cell padding for the density field.
Returns
-------
@ -952,29 +952,28 @@ def load_processed_halo(hid, particles, halo_map, hid2map, ncells, nshift):
"""
pos = load_halo_particles(hid, particles, halo_map, hid2map)
pos, mass = pos[:, :3], pos[:, 3]
pos = pos2cell(pos, ncells)
totmass = numpy.sum(mass)
mins, maxs = get_halolims(pos, ncells=ncells, nshift=nshift)
return pos, mass, totmass, mins, maxs
mins, maxs = get_halo_cell_limits(pos, ncells=ncells, nshift=nshift)
return pos, mass, numpy.sum(mass), mins, maxs
def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1.0,
enforce_int32=False, verbose=True):
"""
Find all neigbours of a trained KNN model whose center of mass separation
Find all neigbours of a fitted kNN model whose center of mass separation
is less than `nmult` times the sum of their respective radii.
Parameters
----------
knn : :py:class:`sklearn.neighbors.NearestNeighbors`
Fitted nearest neighbour search.
X : 2-dimensional array
Array of shape `(n_samples, 3)`, where the latter axis represents
`x`, `y` and `z`.
X : 2-dimensional array of shape `(n_samples, 3)`
Array of halo positions from the cross simulation.
radiusX: 1-dimensional array of shape `(n_samples, )`
Patch radii corresponding to haloes in `X`.
Lagrangian patch radii corresponding to haloes in `X`.
radiusKNN : 1-dimensional array
Patch radii corresponding to haloes used to train `knn`.
Lagrangian patch radii corresponding to haloes used to train the kNN.
nmult : float, optional
Multiple of the sum of two radii below which to consider a match.
enforce_int32 : bool, optional
@ -988,22 +987,24 @@ def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1.0,
indxs : 1-dimensional array `(n_samples,)` of arrays
Matches to `X` from `knn`.
"""
assert X.ndim == 2 and X.shape[1] == 3 # shape of X ok?
assert X.shape[0] == radiusX.size # patchX matches X?
assert radiusKNN.size == knn.n_samples_fit_ # patchknn matches the knn?
if X.shape != (radiusX.size, 3):
raise ValueError("Mismatch in shape of `X` or `radiusX`")
if radiusKNN.size != knn.n_samples_fit_:
raise ValueError("Mismatch in shape of `radiusKNN` or `knn`")
nsamples = X.shape[0]
nsamples = len(X)
indxs = [None] * nsamples
patchknn_max = numpy.max(radiusKNN) # Maximum for completeness
patchknn_max = numpy.max(radiusKNN)
for i in trange(nsamples) if verbose else range(nsamples):
dist, indx = knn.radius_neighbors(
X[i, :].reshape(-1, 3), radiusX[i] + patchknn_max,
X[i].reshape(1, -1), radiusX[i] + patchknn_max,
sort_results=True)
# Note that `dist` and `indx` are wrapped in 1-element arrays
# so we take the first item where appropriate
mask = (dist[0] / (radiusX[i] + radiusKNN[indx[0]])) < nmult
indxs[i] = indx[0][mask]
if enforce_int32:
indxs[i] = indxs[i].astype(numpy.int32)
@ -1048,3 +1049,32 @@ def find_neighbour(nsim0, cats):
cross_hindxs[:, i] = catx["index"][numpy.ravel(ind)]
return dists, cross_hindxs
def cosine_similarity(x, y):
r"""
Calculate the cosine similarity between two Cartesian vectors. Defined
as :math:`\Sum_{i} x_i y_{i} / (|x| * |y|)`.
Parameters
----------
x : 1-dimensional array
The first vector.
y : 1- or 2-dimensional array
The second vector. Can be 2-dimensional of shape `(n_samples, 3)`,
in which case the calculation is broadcasted.
Returns
-------
out : float or 1-dimensional array
"""
if x.ndim != 1:
raise ValueError("`x` must be a 1-dimensional array.")
if y.ndim == 1:
y = y.reshape(1, -1)
out = numpy.sum(x * y, axis=1)
out /= numpy.linalg.norm(x) * numpy.linalg.norm(y, axis=1)
return out[0] if out.size == 1 else out

View file

@ -263,8 +263,10 @@ class QuijoteBox(BaseBox):
----------
nsnap : int
Snapshot number.
**kwargs : dict
Empty keyword arguments. For backwards compatibility.
nsim : int
IC realisation index.
paths : py:class`csiborgtools.read.Paths`
Paths manager
"""
def __init__(self, nsnap, nsim, paths):

View file

@ -58,7 +58,8 @@ class BaseCatalogue(ABC):
@nsim.setter
def nsim(self, nsim):
assert isinstance(nsim, int)
if not isinstance(nsim, (int, numpy.integer)):
raise TypeError("`nsim` must be an integer!")
self._nsim = nsim
@abstractproperty
@ -614,9 +615,9 @@ class QuijoteHaloCatalogue(BaseCatalogue):
SFR=False, read_IDs=False)
cols = [("x", numpy.float32), ("y", numpy.float32),
("z", numpy.float32), ("vx", numpy.float32),
("vy", numpy.float32), ("vz", numpy.float32),
("group_mass", numpy.float32), ("npart", numpy.int32),
("z", numpy.float32), ("fof_vx", numpy.float32),
("fof_vy", numpy.float32), ("fof_vz", numpy.float32),
("group_mass", numpy.float32), ("fof_npart", numpy.int32),
("index", numpy.int32)]
data = cols_to_structured(fof.GroupLen.size, cols)
@ -624,9 +625,9 @@ class QuijoteHaloCatalogue(BaseCatalogue):
vel = fof.GroupVel * (1 + self.redshift)
for i, p in enumerate(["x", "y", "z"]):
data[p] = pos[:, i]
data["v" + p] = vel[:, i]
data["fof_v" + p] = vel[:, i]
data["group_mass"] = fof.GroupMass * 1e10
data["npart"] = fof.GroupLen
data["fof_npart"] = fof.GroupLen
# We want to start indexing from 1. Index 0 is reserved for
# particles unassigned to any FoF group.
data["index"] = 1 + numpy.arange(data.size, dtype=numpy.int32)
@ -634,7 +635,7 @@ class QuijoteHaloCatalogue(BaseCatalogue):
if load_initial:
data = self.load_initial(data, paths, "quijote")
if load_fitted:
assert nsnap == 4
data = self.load_fitted(data, paths, "quijote")
if load_initial and with_lagpatch:
data = data[numpy.isfinite(data["lagpatch_size"])]

View file

@ -366,6 +366,7 @@ class Paths:
snapshots : 1-dimensional array
"""
simpath = self.snapshots(nsim, simname, tonew=False)
if simname == "csiborg":
# Get all files in simpath that start with output_
snaps = glob(join(simpath, "output_*"))
@ -456,6 +457,8 @@ class Paths:
else:
raise ValueError(f"Unknown simulation name `{simname}`.")
try_create_directory(fdir)
fname = f"out_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npy"
return join(fdir, fname)
@ -477,6 +480,7 @@ class Paths:
path : str
"""
fdir = join(self.postdir, "overlap")
try_create_directory(fdir)
fname = f"overlap_{str(nsim0).zfill(5)}_{str(nsimx).zfill(5)}.npz"
@ -508,9 +512,10 @@ class Paths:
-------
path : str
"""
fdir = join(self.postdir, "environment")
assert kind in ["density", "velocity", "potential", "radvel",
"environment"]
fdir = join(self.postdir, "environment")
try_create_directory(fdir)
if in_rsp:

View file

@ -37,7 +37,8 @@ except ModuleNotFoundError:
def fit_halo(particles, box):
"""
Fit a single halo from the particle array.
Fit a single halo from the particle array. Only halos with more than 100
particles are fitted.
Parameters
----------
@ -59,12 +60,17 @@ def fit_halo(particles, box):
for i, v in enumerate(["vx", "vy", "vz"]):
out[v] = numpy.average(halo.vel[:, i], weights=halo["M"])
m200c, r200c, cm = halo.spherical_overdensity_mass(200, kind="crit",
maxiter=100)
if out["npart"] < 100:
return out
cm, dist = halo.center_of_mass()
m200c, r200c = halo.spherical_overdensity_mass(dist, 200)
angmom = halo.angular_momentum(dist, cm, r200c)
out["m200c"] = m200c
out["r200c"] = r200c
out["lambda200c"] = halo.lambda_bullock(cm, r200c)
out["conc"] = halo.nfw_concentration(cm, r200c)
out["lambda200c"] = halo.lambda_bullock(angmom, m200c, r200c)
out["conc"] = halo.nfw_concentration(dist, r200c)
return out
@ -81,9 +87,6 @@ def _main(nsim, simname, verbose):
verbose : bool
Verbosity flag.
"""
# if simname == "quijote":
# raise NotImplementedError("Quijote not implemented yet.")
cols = [("index", numpy.int32),
("npart", numpy.int32),
("totpartmass", numpy.float32),
@ -116,7 +119,6 @@ def _main(nsim, simname, verbose):
for i in trange(len(cat)) if verbose else range(len(cat)):
hid = cat["index"][i]
out["index"][i] = hid
# print("i = ", i)
part = csiborgtools.read.load_halo_particles(hid, particles, halo_map,
hid2map)
# Skip if no particles.
@ -125,7 +127,7 @@ def _main(nsim, simname, verbose):
_out = fit_halo(part, box)
for key in _out.keys():
out[key][i] = _out[key]
out[key][i] = _out.get(key, numpy.nan)
fout = paths.structfit(nsnap, nsim, simname)
if verbose:

View file

@ -66,7 +66,7 @@ def _main(nsim, simname, verbose):
if simname == "csiborg":
cat = csiborgtools.read.CSiBORGHaloCatalogue(
nsim, paths, rawdata=True, load_fitted=False, load_initial=False)
nsim, paths, bounds=None, load_fitted=False, load_initial=False)
else:
cat = csiborgtools.read.QuijoteHaloCatalogue(
nsim, paths, nsnap=4, load_fitted=False, load_initial=False)

View file

@ -11,10 +11,7 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to match all pairs of CSiBORG simulations. Mathches main haloes whose
mass is above 1e12 solar masses.
"""
"""A script to match all IC pairs of a simulation."""
from argparse import ArgumentParser
from distutils.util import strtobool
from itertools import combinations
@ -34,10 +31,15 @@ except ModuleNotFoundError:
import csiborgtools
def get_combs():
def get_combs(simname):
"""
Get the list of all pairs of simulations, then permute them with a known
seed to minimise loading the same files simultaneously.
Get the list of all pairs of IC indices and permute them with a fixed
seed.
Parameters
----------
simname : str
Simulation name.
Returns
-------
@ -45,38 +47,49 @@ def get_combs():
List of pairs of simulations.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
ics = paths.get_ics("csiborg")
combs = list(combinations(ics, 2))
combs = list(combinations(paths.get_ics(simname), 2))
Random(42).shuffle(combs)
return combs
def do_work(comb):
def main(comb, simname, sigma, verbose):
"""
Match a pair of simulations.
Parameters
----------
comb : tuple
Pair of simulations.
Pair of simulation IC indices.
simname : str
Simulation name.
sigma : float
Smoothing scale in number of grid cells.
verbose : bool
Verbosity flag.
Returns
-------
None
"""
nsim0, nsimx = comb
pair_match(nsim0, nsimx, args.sigma, args.smoothen, args.verbose)
pair_match(nsim0, nsimx, simname, sigma, verbose)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--sigma", type=float, default=None)
parser.add_argument("--smoothen", type=lambda x: bool(strtobool(x)),
default=None)
parser.add_argument("--simname", type=str, help="Simulation name.",
choices=["csiborg", "quijote"])
parser.add_argument("--sigma", type=float, default=0,
help="Smoothing scale in number of grid cells.")
parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
default=False)
default=False, help="Verbosity flag.")
args = parser.parse_args()
comm = MPI.COMM_WORLD
combs = get_combs()
work_delegation(do_work, combs, comm, master_verbose=True)
def _main(comb):
main(comb, args.simname, args.sigma, args.verbose)
work_delegation(_main, combs, MPI.COMM_WORLD)

View file

@ -11,7 +11,13 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""A script to calculate overlap between two CSiBORG realisations."""
"""
A script to calculate overlap between two IC realisations of the same
simulation. The matching is performed for haloes whose total particles mass is
- CSiBORG: > 1e13 Msun/h,
- Quijote: > 1e14 Msun/h,
since Quijote has much lower resolution than CSiBORG.
"""
from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime
@ -29,95 +35,123 @@ except ModuleNotFoundError:
import csiborgtools
def pair_match(nsim0, nsimx, sigma, smoothen, verbose):
# TODO fix this.
simname = "csiborg"
overlapper_kwargs = {"box_size": 512, "bckg_halfsize": 475}
from csiborgtools.read import CSiBORGHaloCatalogue, read_h5
def pair_match(nsim0, nsimx, simname, sigma, verbose):
"""
Calculate overlaps between two simulations.
Parameters
----------
nsim0 : int
The reference simulation IC index.
nsimx : int
The cross simulation IC index.
simname : str
Simulation name.
sigma : float
Smoothing scale in number of grid cells.
verbose : bool
Verbosity flag.
Returns
-------
None
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
smooth_kwargs = {"sigma": sigma, "mode": "constant", "cval": 0.0}
overlapper = csiborgtools.match.ParticleOverlap(**overlapper_kwargs)
matcher = csiborgtools.match.RealisationsMatcher(**overlapper_kwargs)
smooth_kwargs = {"sigma": sigma, "mode": "wrap"}
# Load the raw catalogues (i.e. no selection) including the initial CM
# positions and the particle archives.
bounds = {"totpartmass": (1e12, None)}
cat0 = CSiBORGHaloCatalogue(nsim0, paths, load_initial=True, bounds=bounds,
with_lagpatch=True, load_clumps_cat=True)
catx = CSiBORGHaloCatalogue(nsimx, paths, load_initial=True, bounds=bounds,
with_lagpatch=True, load_clumps_cat=True)
if simname == "csiborg":
overlapper_kwargs = {"box_size": 2048, "bckg_halfsize": 475}
mass_kind = "fof_totpartmass"
bounds = {mass_kind: (1e13, None)}
cat0 = csiborgtools.read.CSiBORGHaloCatalogue(
nsim0, paths, bounds=bounds, load_fitted=False,
with_lagpatch=True)
catx = csiborgtools.read.CSiBORGHaloCatalogue(
nsimx, paths, bounds=bounds, load_fitted=False,
with_lagpatch=True)
elif simname == "quijote":
overlapper_kwargs = {"box_size": 512, "bckg_halfsize": 256}
mass_kind = "group_mass"
bounds = {mass_kind: (1e14, None)}
cat0 = csiborgtools.read.QuijoteHaloCatalogue(
nsim0, paths, 4, load_fitted=False, with_lagpatch=True)
catx = csiborgtools.read.QuijoteHaloCatalogue(
nsimx, paths, 4, load_fitted=False, with_lagpatch=True)
else:
raise ValueError(f"Unknown simulation name: `{simname}`.")
clumpmap0 = read_h5(paths.particles(nsim0, simname))["clumpmap"]
parts0 = read_h5(paths.initmatch(nsim0, simname, "particles"))["particles"]
clid2map0 = {clid: i for i, clid in enumerate(clumpmap0[:, 0])}
halomap0 = csiborgtools.read.read_h5(
paths.particles(nsim0, simname))["halomap"]
parts0 = csiborgtools.read.read_h5(
paths.initmatch(nsim0, simname, "particles"))["particles"]
hid2map0 = {hid: i for i, hid in enumerate(halomap0[:, 0])}
clumpmapx = read_h5(paths.particles(nsimx, simname))["clumpmap"]
partsx = read_h5(paths.initmatch(nsimx, simname, "particles"))["particles"]
clid2mapx = {clid: i for i, clid in enumerate(clumpmapx[:, 0])}
halomapx = csiborgtools.read.read_h5(
paths.particles(nsimx, simname))["halomap"]
partsx = csiborgtools.read.read_h5(
paths.initmatch(nsimx, simname, "particles"))["particles"]
hid2mapx = {hid: i for i, hid in enumerate(halomapx[:, 0])}
# We generate the background density fields. Loads halos's particles one by
# one from the archive, concatenates them and calculates the NGP density
# field.
if verbose:
print(f"{datetime.now()}: generating the background density fields.",
print(f"{datetime.now()}: calculating the background density fields.",
flush=True)
delta_bckg = overlapper.make_bckg_delta(parts0, clumpmap0, clid2map0, cat0,
overlapper = csiborgtools.match.ParticleOverlap(**overlapper_kwargs)
delta_bckg = overlapper.make_bckg_delta(parts0, halomap0, hid2map0, cat0,
verbose=verbose)
delta_bckg = overlapper.make_bckg_delta(partsx, clumpmapx, clid2mapx, catx,
delta_bckg = overlapper.make_bckg_delta(partsx, halomapx, hid2mapx, catx,
delta=delta_bckg, verbose=verbose)
# We calculate the overlap between the NGP fields.
if verbose:
print(f"{datetime.now()}: crossing the simulations.", flush=True)
print(f"{datetime.now()}: NGP crossing the simulations.", flush=True)
matcher = csiborgtools.match.RealisationsMatcher(
mass_kind=mass_kind, **overlapper_kwargs)
match_indxs, ngp_overlap = matcher.cross(cat0, catx, parts0, partsx,
clumpmap0, clumpmapx, delta_bckg,
halomap0, halomapx, delta_bckg,
verbose=verbose)
# We wish to store the halo IDs of the matches, not their array positions
# in the catalogues
# We want to store the halo IDs of the matches, not their array positions
# in the catalogues.
match_hids = deepcopy(match_indxs)
for i, matches in enumerate(match_indxs):
for j, match in enumerate(matches):
match_hids[i][j] = catx["index"][match]
fout = paths.overlap(nsim0, nsimx, smoothed=False)
if verbose:
print(f"{datetime.now()}: saving to ... `{fout}`.", flush=True)
numpy.savez(fout, ref_hids=cat0["index"], match_hids=match_hids,
ngp_overlap=ngp_overlap)
if verbose:
print(f"{datetime.now()}: calculated NGP overlap, saved to {fout}.",
flush=True)
if not smoothen:
quit()
if not sigma > 0:
return
# We now smoothen up the background density field for the smoothed overlap
# calculation.
if verbose:
print(f"{datetime.now()}: smoothing the background field.", flush=True)
gaussian_filter(delta_bckg, output=delta_bckg, **smooth_kwargs)
# We calculate the smoothed overlap for the pairs whose NGP overlap is > 0.
smoothed_overlap = matcher.smoothed_cross(cat0, catx, parts0, partsx,
clumpmap0, clumpmapx, delta_bckg,
halomap0, halomapx, delta_bckg,
match_indxs, smooth_kwargs,
verbose=verbose)
fout = paths.overlap(nsim0, nsimx, smoothed=True)
numpy.savez(fout, smoothed_overlap=smoothed_overlap, sigma=sigma)
if verbose:
print(f"{datetime.now()}: calculated smoothing, saved to {fout}.",
flush=True)
print(f"{datetime.now()}: saving to ... `{fout}`.", flush=True)
numpy.savez(fout, smoothed_overlap=smoothed_overlap, sigma=sigma)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--nsim0", type=int)
parser.add_argument("--nsimx", type=int)
parser.add_argument("--sigma", type=float, default=None)
parser.add_argument("--smoothen", type=lambda x: bool(strtobool(x)),
default=None)
parser.add_argument("--nsim0", type=int,
help="Reference simulation IC index.")
parser.add_argument("--nsimx", type=int,
help="Cross simulation IC index.")
parser.add_argument("--simname", type=str, help="Simulation name.")
parser.add_argument("--sigma", type=float, default=0,
help="Smoothing scale in number of grid cells.")
parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
default=False)
default=False, help="Verbosity flag.")
args = parser.parse_args()
pair_match(args.nsim0, args.nsimx, args.sigma, args.smoothen, args.verbose)
pair_match(args.nsim0, args.nsimx, args.simname, args.sigma, args.verbose)