Lagrangian patch + HMF calculation (#65)

* Rename lagpatch

* Fix old bug

* Fix small bug

* Add number of cells calculation

* Fix a small bug

* Rename column

* Move file

* Small changes

* Edit style

* Add plot script

* Add delta2ncells

* Add HMF calculation

* Move definition around

* Add HMF plot

* pep8

* Update HMF plotting routine

* Small edit
This commit is contained in:
Richard Stiskalek 2023-06-01 14:45:52 +01:00 committed by GitHub
parent f1dbe6f03f
commit dbf93b9416
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 376 additions and 132 deletions

View file

@ -12,6 +12,7 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .halo import Clump, Halo, dist_centmass # noqa
from .halo import (Clump, Halo, delta2ncells, dist_centmass, # noqa
number_counts)
from .haloprofile import NFWPosterior, NFWProfile # noqa
from .utils import split_jobs # noqa

View file

@ -15,9 +15,10 @@
"""A clump object."""
from abc import ABC
from numba import jit
import numpy
from numba import jit
class BaseStructure(ABC):
"""
@ -337,3 +338,51 @@ def dist_centmass(clump):
cmx, cmy, cmz = [numpy.average(xi, weights=mass) for xi in (x, y, z)]
dist = ((x - cmx)**2 + (y - cmy)**2 + (z - cmz)**2)**0.5
return dist, [cmx, cmy, cmz]
@jit(nopython=True)
def delta2ncells(delta):
"""
Calculate the number of cells in `delta` that are non-zero.
Parameters
----------
delta : 3-dimensional array
Halo density field.
Returns
-------
ncells : int
Number of non-zero cells.
"""
tot = 0
imax, jmax, kmax = delta.shape
for i in range(imax):
for j in range(jmax):
for k in range(kmax):
if delta[i, j, k] > 0:
tot += 1
return tot
@jit(nopython=True)
def number_counts(x, bin_edges):
"""
Calculate counts of samples in bins.
Parameters
----------
x : 1-dimensional array
Samples' values.
bin_edges : 1-dimensional array
Bin edges.
Returns
-------
counts : 1-dimensional array
Bin counts.
"""
out = numpy.full(bin_edges.size - 1, numpy.nan, dtype=numpy.float32)
for i in range(bin_edges.size - 1):
out[i] = numpy.sum((x >= bin_edges[i]) & (x < bin_edges[i + 1]))
return out

View file

@ -16,5 +16,4 @@ from .match import (ParticleOverlap, RealisationsMatcher, # noqa
calculate_overlap, calculate_overlap_indxs,
cosine_similarity)
from .nearest_neighbour import find_neighbour # noqa
from .num_density import binned_counts, number_density # noqa
from .utils import concatenate_parts # noqa

View file

@ -163,7 +163,7 @@ class RealisationsMatcher:
print(f"{datetime.now()}: querying the KNN.", flush=True)
match_indxs = radius_neighbours(
catx.knn(in_initial=True), cat0.position(in_initial=True),
radiusX=cat0["lagpatch"], radiusKNN=catx["lagpatch"],
radiusX=cat0["lagpatch_size"], radiusKNN=catx["lagpatch_size"],
nmult=self.nmult, enforce_int32=True, verbose=verbose)
# We next remove neighbours whose mass is too large/small.
@ -419,7 +419,7 @@ class ParticleOverlap:
delta : 3-dimensional array
"""
nshift = read_nshift(smooth_kwargs)
cells = self.pos2cell(pos)
cells = pos2cell(pos, BOX_SIZE)
# Check that minima and maxima are integers
if not (mins is None and maxs is None):
assert mins.dtype.char in numpy.typecodes["AllInteger"]
@ -432,10 +432,10 @@ class ParticleOverlap:
ncells = maxs - mins + 1 # To get the number of cells
else:
mins = [0, 0, 0]
ncells = BOX_SIZE
ncells = (BOX_SIZE, ) * 3
# Preallocate and fill the array
delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
delta = numpy.zeros(ncells, dtype=numpy.float32)
fill_delta(delta, cells[:, 0], cells[:, 1], cells[:, 2], *mins, mass)
if smooth_kwargs is not None:
gaussian_filter(delta, output=delta, **smooth_kwargs)

View file

@ -1,116 +0,0 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Calculation of number density functions.
"""
import numpy
def binned_counts(x, bins):
"""
Calculate number of samples in bins.
Parameters
----------
x : 1-dimensional array
Samples' values.
bins : 1-dimensional array
Bin edges of shape `(n_edges, )`.
Returns
-------
centres : 1-dimensional array
Bin centres of shape `(n_edges - 1, )`.
counts : 1-dimensional array
Bin counts of shape `(n_edges - 1, )`.
"""
if not isinstance(bins, numpy.ndarray) and bins.ndim == 1:
raise TypeError("`bins` must a 1-dimensional array.")
n_bins = bins.size
# Bin centres
centres = numpy.asarray(
[0.5 * (bins[i + 1] + bins[i]) for i in range(n_bins - 1)])
# Bin counts
out = numpy.full(n_bins - 1, numpy.nan, dtype=int)
for i in range(n_bins - 1):
out[i] = numpy.sum((x >= bins[i]) & (x < bins[i + 1]))
return centres, out
def number_density(data, feat, bins, max_dist, to_log10, return_counts=False):
"""
Calculate volume-limited number density of a feature `feat` from array
`data`, normalised also by the bin width.
Parameters
----------
data : structured array
Input array of halos.
feat : str
Parameter whose number density to calculate.
bins : 1-dimensional array
Bin edges. Note that if `to_log10` then the edges must be specified
in the logarithmic space, not linear.
max_dist : float
Maximum radial distance of the volume limited sample.
to_log10 : bool
Whether to take a logarithm of base 10 of the feature. If so, then the
bins must also be logarithmic.
return_counts : bool, optional
Whether to also return number counts in each bin. By default `False`.
Returns
-------
centres : 1-dimensional array
Bin centres of shape `(n_edges - 1, )`. If `to_log10` then converts the
bin centres back to linear space.
nd : 1-dimensional array
Number density of shape `(n_edges - 1, )`.
nd_err : 1-dimensional array
Poissonian uncertainty of `nd` of shape `(n_edges - 1, )`.
counts: 1-dimensional array, optional
Counts in each bin of shape `(n_edges - 1, )`. Returned only if
`return_counts`.
"""
# Extract the param and optionally convert to log10
x = data[feat]
x = numpy.log10(x) if to_log10 else x
# Get only things within distance from the origin
rdist = (data["peak_x"]**2 + data["peak_y"]**2 + data["peak_z"]**2)**0.5
x = x[rdist < max_dist]
# Make sure bins equally spaced
dbins = numpy.diff(bins)
dbin = dbins[0]
if not numpy.alltrue(dbins == dbin):
raise ValueError("Bins must be equally spaced. Currently `{}`."
.format(bins))
# Encompassed volume around the origin
volume = 4 * numpy.pi / 3 * max_dist**3
# Poissonian statistics
bin_centres, counts = binned_counts(x, bins)
nd = counts / volume / dbin
nd_err = counts**0.5 / volume / dbin
# Convert bins to linear space if log10
if to_log10:
bin_centres = 10**bin_centres
out = (bin_centres, nd, nd_err)
if return_counts:
out += counts
return out

View file

@ -27,7 +27,7 @@ from .readsim import ParticleReader
CONV_NAME = {
"length": ["x", "y", "z", "peak_x", "peak_y", "peak_z", "Rs", "rmin",
"rmax", "r200c", "r500c", "r200m", "x0", "y0", "z0",
"lagpatch"],
"lagpatch_size"],
"velocity": ["vx", "vy", "vz"],
"mass": ["mass_cl", "totpartmass", "m200c", "m500c", "mass_mmain", "M",
"m200m"],

View file

@ -540,7 +540,7 @@ class HaloCatalogue(BaseCSiBORG):
if not rawdata:
if with_lagpatch:
self._data = self._data[numpy.isfinite(self['lagpatch'])]
self._data = self._data[numpy.isfinite(self["lagpatch_size"])]
# Flip positions and convert from code units to cMpc. Convert M too
flip_cols(self._data, "x", "z")
for p in ("x", "y", "z"):
@ -551,7 +551,7 @@ class HaloCatalogue(BaseCSiBORG):
self._data = self.box.convert_from_box(self._data, names)
if load_initial:
names = ["x0", "y0", "z0", "lagpatch"]
names = ["x0", "y0", "z0", "lagpatch_size"]
self._data = self.box.convert_from_box(self._data, names)
if bounds is not None:

View file

@ -276,10 +276,10 @@ class PairOverlap:
if norm_kind == "r200c":
norm = self.cat0("r200c")
if norm_kind == "ref_patch":
norm = self.cat0("lagpatch")
norm = self.cat0("lagpatch_size")
if norm_kind == "sum_patch":
patch0 = self.cat0("lagpatch")
patchx = self.catx("lagpatch")
patch0 = self.cat0("lagpatch_size")
patchx = self.catx("lagpatch_size")
norm = [None] * len(self)
for i, ind in enumerate(self["match_indxs"]):
norm[i] = patch0[i] + patchx[ind]

View file

@ -387,6 +387,28 @@ class Paths:
fname = f"{kind}_{MAS}_{str(nsim).zfill(5)}_grid{grid}.npy"
return join(fdir, fname)
def halo_counts(self, simname, nsim):
"""
Path to the files containing the binned halo counts.
Parameters
----------
simname : str
Simulation name. Must be `csiborg` or `quijote`.
nsim : int
IC realisation index.
Returns
-------
path : str
"""
fdir = join(self.postdir, "HMF")
if not isdir(fdir):
makedirs(fdir)
warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1)
fname = f"halo_counts_{simname}_{str(nsim).zfill(5)}.npz"
return join(fdir, fname)
def cross_nearest(self, simname, run, nsim=None, nobs=None):
"""
Path to the files containing distance from a halo in a reference

104
scripts/fit_hmf.py Normal file
View file

@ -0,0 +1,104 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to calculate the HMF for CSIBORG and Quijote haloes.
"""
from argparse import ArgumentParser
from datetime import datetime
from distutils.util import strtobool
import numpy
from mpi4py import MPI
from taskmaster import work_delegation
from utils import get_nsims
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
def get_counts(nsim, bins, paths, parser_args):
"""
Calculate and save the number of haloes in each mass bin.
Parameters
----------
nsim : int
Simulation index.
bins : 1-dimensional array
Array of bin edges (in log10 mass).
paths : csiborgtools.read.Paths
Paths object.
parser_args : argparse.Namespace
Parsed command-line arguments.
Returns
-------
None
"""
simname = parser_args.simname
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
bounds = {"dist": (0, parser_args.Rmax)}
if simname == "csiborg":
cat = csiborgtools.read.HaloCatalogue(nsim, paths, bounds=bounds)
logmass = numpy.log10(cat["totpartmass"])
counts = csiborgtools.fits.number_counts(logmass, bins)
elif simname == "quijote":
cat0 = csiborgtools.read.QuijoteHaloCatalogue(nsim, paths, nsnap=4)
nmax = int(cat0.box.boxsize // (2 * parser_args.Rmax))**3
counts = numpy.full((nmax, len(bins) - 1), numpy.nan,
dtype=numpy.float32)
for nobs in range(nmax):
cat = cat0.pick_fiducial_observer(nobs, rmax=parser_args.Rmax)
logmass = numpy.log10(cat["group_mass"])
counts[nobs, :] = csiborgtools.fits.number_counts(logmass, bins)
fout = paths.halo_counts(simname, nsim)
if parser_args.verbose:
print(f"{datetime.now()}: saving halo counts to `{fout}`.")
numpy.savez(fout, counts=counts, bins=bins, rmax=parser_args.Rmax)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"],
help="Simulation name")
parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="Indices of simulations to cross. If `-1` processes all simulations.") # noqa
parser.add_argument("--Rmax", type=float, default=155/0.705,
help="High-resolution region radius")
parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
default=False)
parser_args = parser.parse_args()
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
verbose = nproc == 1
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsims = get_nsims(parser_args, paths)
bins = numpy.arange(11., 16., 0.2, dtype=numpy.float32)
def do_work(nsim):
get_counts(nsim, bins, paths, parser_args)
work_delegation(do_work, nsims, comm, master_verbose=parser_args.verbose)

View file

@ -56,13 +56,15 @@ cols_collect = [("index", numpy.int32),
("x", numpy.float32),
("y", numpy.float32),
("z", numpy.float32),
("lagpatch", numpy.float32),]
("lagpatch_size", numpy.float32),
("lagpatch_ncells", numpy.int32),]
# MPI loop over simulations
jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
for nsim in [ics[i] for i in jobs]:
nsnap = max(paths.get_snapshots(nsim))
overlapper = csiborgtools.match.ParticleOverlap()
print(f"{datetime.now()}: rank {rank} calculating simulation `{nsim}`.",
flush=True)
@ -88,11 +90,16 @@ for nsim in [ics[i] for i in jobs]:
if part is None or part.size < 100:
continue
# Calculate the centre of mass and the Lagrangian patch size.
dist, cm = csiborgtools.fits.dist_centmass(part)
# We enforce a maximum patchsize of 0.075 in box coordinates.
patchsize = min(numpy.percentile(dist, 99), 0.075)
out["x"][i], out["y"][i], out["z"][i] = cm
out["lagpatch"][i] = patchsize
out["lagpatch_size"][i] = patchsize
# Calculate the number of cells with > 0 density.
delta = overlapper.make_delta(part[:, :3], part[:, 3], subbox=True)
out["lagpatch_ncells"][i] = csiborgtools.fits.delta2ncells(delta)
out = out[ismain]
# Now save it

178
scripts_plots/plot_data.py Normal file
View file

@ -0,0 +1,178 @@
# Copyright (C) 2023 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from os.path import join
from argparse import ArgumentParser
import matplotlib.pyplot as plt
import numpy
import scienceplots # noqa
import utils
from cache_to_disk import cache_to_disk, delete_disk_caches_for_function # noqa
from tqdm import tqdm
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
def open_csiborg(nsim):
"""
Open a CSiBORG halo catalogue.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
bounds = {"totpartmass": (None, None), "dist": (0, 155/0.705)}
return csiborgtools.read.HaloCatalogue(nsim, paths, bounds=bounds)
def open_quijote(nsim, nobs=None):
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
cat = csiborgtools.read.QuijoteHaloCatalogue(nsim, paths, nsnap=4)
if nobs is not None:
cat = cat.pick_fiducial_observer(nobs, rmax=155.5 / 0.705)
return cat
def plot_mass_vs_ncells(nsim, pdf=False):
cat = open_csiborg(nsim)
mpart = 4.38304044e+09
with plt.style.context(utils.mplstyle):
plt.figure()
plt.scatter(cat["totpartmass"], cat["lagpatch_ncells"], s=0.25,
rasterized=True)
plt.xscale("log")
plt.yscale("log")
for n in [1, 10, 100]:
plt.axvline(n * 512 * mpart, c="black", ls="--", zorder=0, lw=0.8)
plt.xlabel(r"$M_{\rm tot} / M_\odot$")
plt.ylabel(r"$N_{\rm cells}$")
for ext in ["png"] if pdf is False else ["png", "pdf"]:
fout = join(utils.fout, f"init_mass_vs_ncells_{nsim}.{ext}")
print(f"Saving to `{fout}`.")
plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
plt.close()
def process_counts(counts):
mean = numpy.mean(counts, axis=0)
std = numpy.std(counts, axis=0)
return mean, std
def plot_hmf(pdf=False):
print("Plotting the HMF...", flush=True)
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
csiborg_nsims = paths.get_ics("csiborg")
print("Loading CSiBORG halo counts.", flush=True)
for i, nsim in enumerate(tqdm(csiborg_nsims)):
data = numpy.load(paths.halo_counts("csiborg", nsim))
if i == 0:
bins = data["bins"]
csiborg_counts = numpy.full((len(csiborg_nsims), len(bins) - 1),
numpy.nan, dtype=numpy.float32)
csiborg_counts[i, :] = data["counts"]
csiborg_counts /= numpy.diff(bins).reshape(1, -1)
print("Loading Quijote halo counts.", flush=True)
quijote_nsims = paths.get_ics("quijote")
for i, nsim in enumerate(tqdm(quijote_nsims)):
data = numpy.load(paths.halo_counts("quijote", nsim))
if i == 0:
bins = data["bins"]
nmax = data["counts"].shape[0]
quijote_counts = numpy.full(
(len(quijote_nsims) * nmax, len(bins) - 1), numpy.nan,
dtype=numpy.float32)
quijote_counts[i * nmax:(i + 1) * nmax, :] = data["counts"]
quijote_counts /= numpy.diff(bins).reshape(1, -1)
x = 10**(0.5 * (bins[1:] + bins[:-1]))
# Edit lower limits
csiborg_counts[:, x < 1e12] = numpy.nan
quijote_counts[:, x < 8e12] = numpy.nan
# Edit upper limits
csiborg_counts[:, x > 4e15] = numpy.nan
quijote_counts[:, x > 4e15] = numpy.nan
with plt.style.context(utils.mplstyle):
cols = plt.rcParams["axes.prop_cycle"].by_key()["color"]
fig, ax = plt.subplots(nrows=2, sharex=True,
figsize=(3.5, 2.625 * 1.25),
gridspec_kw={"height_ratios": [1, 0.65]})
fig.subplots_adjust(hspace=0, wspace=0)
mean_csiborg, std_csiborg = process_counts(csiborg_counts)
ax[0].plot(x, mean_csiborg, label="CSiBORG")
ax[0].fill_between(x, mean_csiborg - std_csiborg,
mean_csiborg + std_csiborg, alpha=0.5)
mean_quijote, std_quijote = process_counts(quijote_counts)
ax[0].plot(x, mean_quijote, label="Quijote")
ax[0].fill_between(x, mean_quijote - std_quijote,
mean_quijote + std_quijote, alpha=0.5)
log_y = numpy.log10(mean_csiborg / mean_quijote)
err = numpy.sqrt((std_csiborg / mean_csiborg / numpy.log(10))**2
+ (std_quijote / mean_quijote / numpy.log(10))**2)
ax[1].plot(x, 10**log_y, c=cols[2])
ax[1].fill_between(x, 10**(log_y - err), 10**(log_y + err), alpha=0.5,
color=cols[2])
ax[1].axhline(1, color="k", ls=plt.rcParams["lines.linestyle"],
lw=0.5 * plt.rcParams["lines.linewidth"], zorder=0)
ax[0].set_ylabel(r"$\frac{\mathrm{d} n}{\mathrm{d}\log M_{\rm h}}~\mathrm{dex}^{-1}$") # noqa
ax[1].set_xlabel(r"$M_{\rm h}$ [$M_\odot$]")
ax[1].set_ylabel(r"$\mathrm{CSiBORG} / \mathrm{Quijote}$")
ax[0].set_xscale("log")
ax[0].set_yscale("log")
ax[1].set_yscale("log")
ax[0].legend()
fig.tight_layout(h_pad=0, w_pad=0)
for ext in ["png"] if pdf is False else ["png", "pdf"]:
fout = join(utils.fout, f"hmf_comparison.{ext}")
print(f"Saving to `{fout}`.")
fig.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
plt.close()
###############################################################################
# Command line interface #
###############################################################################
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument('-c', '--clean', action='store_true')
args = parser.parse_args()
cached_funcs = []
if args.clean:
for func in cached_funcs:
print(f"Cleaning cache for function {func}.")
delete_disk_caches_for_function(func)
# plot_mass_vs_occupancy(7444)
# plot_mass_vs_normcells(7444 + 24 * 4, pdf=False)
# plot_mass_vs_ncells(7444, pdf=True)
plot_hmf(pdf=True)

View file

@ -15,4 +15,4 @@
dpi = 450
fout = "../plots/"
mplstyle = ["science", "notebook"]
mplstyle = ["science"]