Lagrangian patch + HMF calculation (#65)

* Rename lagpatch

* Fix old bug

* Fix small bug

* Add number of cells calculation

* Fix a small bug

* Rename column

* Move file

* Small changes

* Edit style

* Add plot script

* Add delta2ncells

* Add HMF calculation

* Move definition around

* Add HMF plot

* pep8

* Update HMF plotting routine

* Small edit
This commit is contained in:
Richard Stiskalek 2023-06-01 14:45:52 +01:00 committed by GitHub
parent f1dbe6f03f
commit dbf93b9416
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 376 additions and 132 deletions

View file

@ -12,6 +12,7 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .halo import Clump, Halo, dist_centmass # noqa
from .halo import (Clump, Halo, delta2ncells, dist_centmass, # noqa
number_counts)
from .haloprofile import NFWPosterior, NFWProfile # noqa
from .utils import split_jobs # noqa

View file

@ -15,9 +15,10 @@
"""A clump object."""
from abc import ABC
from numba import jit
import numpy
from numba import jit
class BaseStructure(ABC):
"""
@ -337,3 +338,51 @@ def dist_centmass(clump):
cmx, cmy, cmz = [numpy.average(xi, weights=mass) for xi in (x, y, z)]
dist = ((x - cmx)**2 + (y - cmy)**2 + (z - cmz)**2)**0.5
return dist, [cmx, cmy, cmz]
@jit(nopython=True)
def delta2ncells(delta):
"""
Calculate the number of cells in `delta` that are non-zero.
Parameters
----------
delta : 3-dimensional array
Halo density field.
Returns
-------
ncells : int
Number of non-zero cells.
"""
tot = 0
imax, jmax, kmax = delta.shape
for i in range(imax):
for j in range(jmax):
for k in range(kmax):
if delta[i, j, k] > 0:
tot += 1
return tot
@jit(nopython=True)
def number_counts(x, bin_edges):
"""
Calculate counts of samples in bins.
Parameters
----------
x : 1-dimensional array
Samples' values.
bin_edges : 1-dimensional array
Bin edges.
Returns
-------
counts : 1-dimensional array
Bin counts.
"""
out = numpy.full(bin_edges.size - 1, numpy.nan, dtype=numpy.float32)
for i in range(bin_edges.size - 1):
out[i] = numpy.sum((x >= bin_edges[i]) & (x < bin_edges[i + 1]))
return out

View file

@ -16,5 +16,4 @@ from .match import (ParticleOverlap, RealisationsMatcher, # noqa
calculate_overlap, calculate_overlap_indxs,
cosine_similarity)
from .nearest_neighbour import find_neighbour # noqa
from .num_density import binned_counts, number_density # noqa
from .utils import concatenate_parts # noqa

View file

@ -163,7 +163,7 @@ class RealisationsMatcher:
print(f"{datetime.now()}: querying the KNN.", flush=True)
match_indxs = radius_neighbours(
catx.knn(in_initial=True), cat0.position(in_initial=True),
radiusX=cat0["lagpatch"], radiusKNN=catx["lagpatch"],
radiusX=cat0["lagpatch_size"], radiusKNN=catx["lagpatch_size"],
nmult=self.nmult, enforce_int32=True, verbose=verbose)
# We next remove neighbours whose mass is too large/small.
@ -419,7 +419,7 @@ class ParticleOverlap:
delta : 3-dimensional array
"""
nshift = read_nshift(smooth_kwargs)
cells = self.pos2cell(pos)
cells = pos2cell(pos, BOX_SIZE)
# Check that minima and maxima are integers
if not (mins is None and maxs is None):
assert mins.dtype.char in numpy.typecodes["AllInteger"]
@ -432,10 +432,10 @@ class ParticleOverlap:
ncells = maxs - mins + 1 # To get the number of cells
else:
mins = [0, 0, 0]
ncells = BOX_SIZE
ncells = (BOX_SIZE, ) * 3
# Preallocate and fill the array
delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
delta = numpy.zeros(ncells, dtype=numpy.float32)
fill_delta(delta, cells[:, 0], cells[:, 1], cells[:, 2], *mins, mass)
if smooth_kwargs is not None:
gaussian_filter(delta, output=delta, **smooth_kwargs)

View file

@ -1,116 +0,0 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Calculation of number density functions.
"""
import numpy
def binned_counts(x, bins):
"""
Calculate number of samples in bins.
Parameters
----------
x : 1-dimensional array
Samples' values.
bins : 1-dimensional array
Bin edges of shape `(n_edges, )`.
Returns
-------
centres : 1-dimensional array
Bin centres of shape `(n_edges - 1, )`.
counts : 1-dimensional array
Bin counts of shape `(n_edges - 1, )`.
"""
if not isinstance(bins, numpy.ndarray) and bins.ndim == 1:
raise TypeError("`bins` must a 1-dimensional array.")
n_bins = bins.size
# Bin centres
centres = numpy.asarray(
[0.5 * (bins[i + 1] + bins[i]) for i in range(n_bins - 1)])
# Bin counts
out = numpy.full(n_bins - 1, numpy.nan, dtype=int)
for i in range(n_bins - 1):
out[i] = numpy.sum((x >= bins[i]) & (x < bins[i + 1]))
return centres, out
def number_density(data, feat, bins, max_dist, to_log10, return_counts=False):
"""
Calculate volume-limited number density of a feature `feat` from array
`data`, normalised also by the bin width.
Parameters
----------
data : structured array
Input array of halos.
feat : str
Parameter whose number density to calculate.
bins : 1-dimensional array
Bin edges. Note that if `to_log10` then the edges must be specified
in the logarithmic space, not linear.
max_dist : float
Maximum radial distance of the volume limited sample.
to_log10 : bool
Whether to take a logarithm of base 10 of the feature. If so, then the
bins must also be logarithmic.
return_counts : bool, optional
Whether to also return number counts in each bin. By default `False`.
Returns
-------
centres : 1-dimensional array
Bin centres of shape `(n_edges - 1, )`. If `to_log10` then converts the
bin centres back to linear space.
nd : 1-dimensional array
Number density of shape `(n_edges - 1, )`.
nd_err : 1-dimensional array
Poissonian uncertainty of `nd` of shape `(n_edges - 1, )`.
counts: 1-dimensional array, optional
Counts in each bin of shape `(n_edges - 1, )`. Returned only if
`return_counts`.
"""
# Extract the param and optionally convert to log10
x = data[feat]
x = numpy.log10(x) if to_log10 else x
# Get only things within distance from the origin
rdist = (data["peak_x"]**2 + data["peak_y"]**2 + data["peak_z"]**2)**0.5
x = x[rdist < max_dist]
# Make sure bins equally spaced
dbins = numpy.diff(bins)
dbin = dbins[0]
if not numpy.alltrue(dbins == dbin):
raise ValueError("Bins must be equally spaced. Currently `{}`."
.format(bins))
# Encompassed volume around the origin
volume = 4 * numpy.pi / 3 * max_dist**3
# Poissonian statistics
bin_centres, counts = binned_counts(x, bins)
nd = counts / volume / dbin
nd_err = counts**0.5 / volume / dbin
# Convert bins to linear space if log10
if to_log10:
bin_centres = 10**bin_centres
out = (bin_centres, nd, nd_err)
if return_counts:
out += counts
return out

View file

@ -27,7 +27,7 @@ from .readsim import ParticleReader
CONV_NAME = {
"length": ["x", "y", "z", "peak_x", "peak_y", "peak_z", "Rs", "rmin",
"rmax", "r200c", "r500c", "r200m", "x0", "y0", "z0",
"lagpatch"],
"lagpatch_size"],
"velocity": ["vx", "vy", "vz"],
"mass": ["mass_cl", "totpartmass", "m200c", "m500c", "mass_mmain", "M",
"m200m"],

View file

@ -540,7 +540,7 @@ class HaloCatalogue(BaseCSiBORG):
if not rawdata:
if with_lagpatch:
self._data = self._data[numpy.isfinite(self['lagpatch'])]
self._data = self._data[numpy.isfinite(self["lagpatch_size"])]
# Flip positions and convert from code units to cMpc. Convert M too
flip_cols(self._data, "x", "z")
for p in ("x", "y", "z"):
@ -551,7 +551,7 @@ class HaloCatalogue(BaseCSiBORG):
self._data = self.box.convert_from_box(self._data, names)
if load_initial:
names = ["x0", "y0", "z0", "lagpatch"]
names = ["x0", "y0", "z0", "lagpatch_size"]
self._data = self.box.convert_from_box(self._data, names)
if bounds is not None:

View file

@ -276,10 +276,10 @@ class PairOverlap:
if norm_kind == "r200c":
norm = self.cat0("r200c")
if norm_kind == "ref_patch":
norm = self.cat0("lagpatch")
norm = self.cat0("lagpatch_size")
if norm_kind == "sum_patch":
patch0 = self.cat0("lagpatch")
patchx = self.catx("lagpatch")
patch0 = self.cat0("lagpatch_size")
patchx = self.catx("lagpatch_size")
norm = [None] * len(self)
for i, ind in enumerate(self["match_indxs"]):
norm[i] = patch0[i] + patchx[ind]

View file

@ -387,6 +387,28 @@ class Paths:
fname = f"{kind}_{MAS}_{str(nsim).zfill(5)}_grid{grid}.npy"
return join(fdir, fname)
def halo_counts(self, simname, nsim):
"""
Path to the files containing the binned halo counts.
Parameters
----------
simname : str
Simulation name. Must be `csiborg` or `quijote`.
nsim : int
IC realisation index.
Returns
-------
path : str
"""
fdir = join(self.postdir, "HMF")
if not isdir(fdir):
makedirs(fdir)
warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1)
fname = f"halo_counts_{simname}_{str(nsim).zfill(5)}.npz"
return join(fdir, fname)
def cross_nearest(self, simname, run, nsim=None, nobs=None):
"""
Path to the files containing distance from a halo in a reference