JAX and fix conc (#6)

* change to log10 initlogRs

* add uncertainty

* add check if hessian negative

* update TODO

* update TODO

* output the error too

* save e_logRs

* add concentration calculation

* calcul concentration

* missed comma in a list

* Update TODO

* fix bug

* add box units and pretty status

* make uncertainty optional

* add BIC function

* remove BIC again

* add new overdensity calculation

* change defualt values to max dist and mass

* change to r200

* new halo find

* speed up the calculation

* add commented fucn

* update TODO

* add check whether too close to outside boundary

* Update TODO

* extract velocity as well

* calculate m200 and m500

* update nb

* update TODO
This commit is contained in:
Richard Stiskalek 2022-11-05 21:17:05 +00:00 committed by GitHub
parent ee84c12a55
commit 7f58b1f78c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 278 additions and 594 deletions

View file

@ -1,11 +1,12 @@
# CSiBORG tools
## :scroll: Short-term TODO
- [x] Calculate $M_{\rm vir}, R_{\rm vir}, c$ from $R_s, \rho_0, \ldots$
- [x] In `NFWPosterior` correct for the radius in which particles are fitted.
- [x] Calculate $M_{\rm 500c}$ by sphere shrinking
- [x] Change to log10 of the scale factor
- [ ] Calculate uncertainty on $R_{\rm s}$, switch to `JAX` and get gradients.
- [ ] Calculate mass functions
- [ ] Add functions for converting the output file to box units.
- [ ] Calculate catalogues for all realisations.
- [x] Verify the bulk flow of particles and the clump
- [x] Check why for some halos $M_{500c} > M_{200c}$
## :hourglass: Long-term TODO
@ -13,4 +14,5 @@
- [ ] Calculate the cross-correlation in CSiBORG. Should see the scale of the constraints?
## :bulb: Open questions
## :bulb: Open questions
- Propagate uncertainty of $\log R_{\rm s}$ to concentration

View file

@ -18,7 +18,6 @@ Tools for splitting the particles and a clump object.
import numpy
from scipy.optimize import minimize_scalar
from os import remove
from warnings import warn
from os.path import join
@ -467,7 +466,7 @@ class Clump:
@property
def rhoc(self):
"""
r"""
The critical density :math:`\rho_c` at this snapshot in box units.
Returns
@ -510,7 +509,7 @@ class Clump:
"""
return numpy.mean(self.pos + self.clump_pos, axis=0)
def enclosed_spherical_mass(self, rmax, rmin=None):
def enclosed_spherical_mass(self, rmax, rmin=0):
"""
The enclosed spherical mass between two radii. All quantities remain
in the box units.
@ -520,152 +519,106 @@ class Clump:
rmax : float
The maximum radial distance.
rmin : float, optional
The minimum radial distance. By default the radial distance of the
closest particle.
The minimum radial distance. By default 0.
Returns
-------
M_enclosed : float
The enclosed mass.
"""
rmin = self.rmin if rmin is None else rmin
return numpy.sum(self.m[(self.r >= rmin) & (self.r <= rmax)])
def enclosed_spherical_density(self, rmax, rmin=None):
def enclosed_spherical_volume(self, rmax, rmin=0):
"""
The enclosed spherical density between two radii. All quantities
remain in box units.
Enclosed volume within two radii.
Parameters
----------
rmax : float
The maximum radial distance.
rmin : float, optional
The minimum radial distance. By default the radial distance of the
closest particle.
The minimum radial distance. By default 0.
Returns
-------
rho_enclosed : float
The enclosed density.
vol : float
The enclosed volume.
"""
rmin = self.rmin if rmin is None else rmin
M = self.enclosed_spherical_mass(rmax, rmin)
V = 4 * numpy.pi / 3 * (rmax**3 - rmin**3)
return M / V
return 4 * numpy.pi / 3 * (rmax**3 - rmin**3)
def radius_enclosed_overdensity(self, delta):
def spherical_overdensity_mass(self, delta, n_particles_min=10):
r"""
Radius of where the mean enclosed spherical density reaches a multiple
of the critical radius at a given redshift `self.rho_c`. Returns
`numpy.nan` if the fit does not converge. Note that `rhoc` must be in
box units!
Spherical overdensity mass and radius. The mass is defined as the
enclosed mass within a radius of where the mean enclosed spherical
density reaches a multiple of the critical radius at a given redshift
`self.rho_c`.
Starts from the furthest particle, working its way inside the halo
through an ordered list of particles. The corresponding values is the
radial distance of the first particle whose addition sufficiently
increases the mean density.
Parameters
----------
delta : int or float
delta : list of int or float
The :math:`\delta_{\rm x}` parameters where :math:`\mathrm{x}` is
the overdensity multiple.
n_particles_min : int
The minimum number of enclosed particles for a radius to be
considered trustworthy.
Returns
-------
rx : float
The radius where the enclosed density reaches required value.
mx : float
The corresponding spherical enclosed mass.
"""
# Loss function to minimise
def loss(r):
return abs(self.enclosed_spherical_density(r, self.rmin)
- delta * self.rhoc)
# If single `delta` turn to list
delta = [delta] if isinstance(delta, (float, int)) else delta
# If given a list or tuple turn to array
_istlist = isinstance(delta, (list, tuple))
delta = numpy.asarray(delta, dtype=float) if _istlist else delta
res = minimize_scalar(loss, bounds=(self.rmin, self.rmax),
method='bounded')
return res.x if res.success else numpy.nan
# Ordering of deltas
order_delta = numpy.argsort(delta)
# Sort the particles
order_particles = numpy.argsort(self.r)[::-1]
# Density to aim for
n_delta = delta.size
target_density = delta * self.rhoc
@property
def r200(self):
r"""
The radius at which the mean spherical density reaches 200 times
the critical density, :math:`R_{200c}`. Returns `numpy.nan` if the
estimate fails.
# The sum of particle masses, starting from the outside
# Adds the furtherst particle ensure that the 0th index is tot mass
cummass_ordered = (self.total_particle_mass
+ self.m[order_particles][0]
- numpy.cumsum(self.m[order_particles]))
# Enclosed volumes at particle radii
volumes = self.enclosed_spherical_volume(self.r[order_particles])
densities = cummass_ordered / volumes
Returns
-------
r200 : float
The R200c radius
"""
return self.radius_enclosed_overdensity(200)
# Pre-allocate arrays
rfound = numpy.full_like(delta, numpy.nan)
mfound = numpy.full_like(rfound, numpy.nan)
@property
def r178(self):
r"""
The radius at which the mean spherical density reaches 178 times
the critical density, :math:`R_{178c}`. Returns `numpy.nan` if the
estimate fails.
for n in order_delta:
overdense_mask = densities > target_density[n]
Returns
-------
r178 : float
The R178c radius
"""
return self.radius_enclosed_overdensity(178)
# Enforce that we have at least several particles enclosed
if numpy.sum(overdense_mask) < n_particles_min:
continue
# The outermost particle radius where the overdensity is achieved
k = numpy.where(overdense_mask)[0][0]
rfound[n] = self.r[order_particles][k]
mfound[n] = cummass_ordered[k]
@property
def r500(self):
r"""
The radius at which the mean spherical density reaches 500 times
the critical density, :math:`R_{500c}`. Returns `numpy.nan` if the
estimate fails.
# If only one delta return simply numbers
if n_delta == 1:
rfound = rfound[0]
mfound = mfound[0]
Returns
-------
r500 : float
The R500c radius
"""
return self.radius_enclosed_overdensity(500)
@property
def m200(self):
r"""
The mass enclosed within the :math:`R_{200c}` region, obtained from
`self.r200`. Returns `numpy.nan` if the radius estimate fails.
Returns
-------
m200 : float
The M200 mass
"""
r200 = self.radius_enclosed_overdensity(200)
return self.enclosed_spherical_mass(r200)
@property
def m178(self):
r"""
The mass enclosed within the :math:`R_{178c}` region, obtained from
`self.r178`. This is approximately the virial mass, though this notion
depends on the dynamical state of the clump. Returns `numpy.nan` if
the radius estimate fails.
Returns
-------
m178 : float
The M178 mass
"""
r178 = self.radius_enclosed_overdensity(178)
return self.enclosed_spherical_mass(r178)
@property
def m500(self):
r"""
The mass enclosed within the :math:`R_{500c}` region, obtained from
`self.r500`. Returns `numpy.nan` if the radius estimate fails.
Returns
-------
m500 : float
The M500 mass
"""
r500 = self.radius_enclosed_overdensity(500)
return self.enclosed_spherical_mass(r500)
return rfound, mfound
@classmethod
def from_arrays(cls, particles, clump, rhoc=None):

View file

@ -18,11 +18,34 @@ Halo profiles functions and posteriors.
import numpy
from jax import numpy as jnumpy
from jax import grad
from scipy.optimize import minimize_scalar
from scipy.stats import uniform
from .halo import Clump
def jax_switch(f1, f2, use_jax):
"""
Return function `f1` if `use_jax` else return `f2`.
Parameters
----------
f1 : function
The JAX function.
f2 : function
The non-JAX function.
use_jax : bool
Whether to use the JAX function.
Returns
-------
chosen_func : function
The chosen function.
"""
return f1 if use_jax else f2
class NFWProfile:
r"""
The Navarro-Frenk-White (NFW) density profile defined as
@ -66,7 +89,7 @@ class NFWProfile:
return rho0 / (x * (1 + x)**2)
@staticmethod
def logprofile(r, Rs, rho0):
def logprofile(r, Rs, rho0, use_jax=False):
r"""
Natural logarithm of the halo profile evaluated at :math:`r`.
@ -78,17 +101,20 @@ class NFWProfile:
Scale radius :math:`R_s`.
rho0 : float
NFW density parameter :math:`\rho_0`.
use_jax : bool, optional
Whether to use `JAX` expressions. By default `False`.
Returns
-------
logdensity : float or 1-dimensional array
Logarithmic density of the NFW profile at :math:`r`.
"""
log = jax_switch(jnumpy.log, numpy.log, use_jax)
x = r / Rs
return numpy.log(rho0) - numpy.log(x) - 2 * numpy.log(1 + x)
return log(rho0) - log(x) - 2 * log(1 + x)
@staticmethod
def enclosed_mass(r, Rs, rho0):
def enclosed_mass(r, Rs, rho0, use_jax=False):
r"""
Enclosed mass of a NFW profile in radius :math:`r`.
@ -100,6 +126,8 @@ class NFWProfile:
Scale radius :math:`R_s`.
rho0 : float
NFW density parameter :math:`\rho_0`.
use_jax : bool, optional
Whether to use `JAX` expressions. By default `False`.
Returns
-------
@ -107,11 +135,11 @@ class NFWProfile:
The enclosed mass.
"""
x = r / Rs
out = numpy.log(1 + x) - x / (1 + x)
out = jax_switch(jnumpy.log, numpy.log, use_jax)(1 + x) - x / (1 + x)
return 4 * numpy.pi * rho0 * Rs**3 * out
def bounded_enclosed_mass(self, rmin, rmax, Rs, rho0):
"""
def bounded_enclosed_mass(self, rmin, rmax, Rs, rho0, use_jax=False):
r"""
Calculate the enclosed mass between :math:`r_min <= r <= r_max`.
Parameters
@ -124,14 +152,16 @@ class NFWProfile:
Scale radius :math:`R_s`.
rho0 : float
NFW density parameter :math:`\rho_0`.
use_jax : bool, optional
Whether to use `JAX` expressions. By default `False`.
Returns
-------
M : float
The enclosed mass within the radial range.
"""
return (self.enclosed_mass(rmax, Rs, rho0)
- self.enclosed_mass(rmin, Rs, rho0))
return (self.enclosed_mass(rmax, Rs, rho0, use_jax)
- self.enclosed_mass(rmin, Rs, rho0, use_jax))
def pdf(self, r, Rs, rmin, rmax):
r"""
@ -319,7 +349,7 @@ class NFWPosterior(NFWProfile):
self._clump = clump
# The minimum separation
rmin = self.clump.rmin
rmax = self.clump.r200
rmax, __ = self.clump.spherical_overdensity_mass(200)
# Set the distances
self._rmin = rmin
self._rmax = rmax
@ -340,7 +370,7 @@ class NFWPosterior(NFWProfile):
gamma = 4 * numpy.pi * self.r**2 * self.m * self.Npart
self._ll0 = numpy.sum(numpy.log(gamma)) - self.Npart * self._logMtot
def rho0_from_logRs(self, logRs):
def rho0_from_Rs(self, Rs):
r"""
Obtain :math:`\rho_0` of the NFW profile from the integral constraint
on total mass. Calculated as the ratio between the total particle mass
@ -357,8 +387,7 @@ class NFWPosterior(NFWProfile):
The NFW density parameter.
"""
Mtot = numpy.exp(self._logMtot)
Mnfw_norm = self.bounded_enclosed_mass(self.rmin, self.rmax,
10**logRs, 1)
Mnfw_norm = self.bounded_enclosed_mass(self.rmin, self.rmax, Rs, 1)
return Mtot / Mnfw_norm
def logprior(self, logRs):
@ -379,7 +408,7 @@ class NFWPosterior(NFWProfile):
return - numpy.infty
return - self._logprior_volume
def loglikelihood(self, logRs):
def loglikelihood(self, logRs, use_jax=False):
"""
Logarithmic likelihood.
@ -387,6 +416,8 @@ class NFWPosterior(NFWProfile):
----------
logRs : float
Logarithmic scale factor in units matching the coordinates.
use_jax : bool, optional
Whether to use `JAX` expressions. By default `False`.
Returns
-------
@ -394,11 +425,14 @@ class NFWPosterior(NFWProfile):
The logarithmic likelihood.
"""
Rs = 10**logRs
log = jnumpy.log if use_jax else numpy.log
# Expected enclosed mass from a NFW
Mnfw = self.bounded_enclosed_mass(self.rmin, self.rmax,
Rs, 1)
ll = self._ll0 + numpy.sum(self.logprofile(self.r, Rs, 1))
return ll - self.Npart * numpy.log(Mnfw)
Rs, 1, use_jax)
ll = jax_switch(jnumpy.sum, numpy.sum, use_jax)(self.logprofile(
self.r, Rs, 1, use_jax))
ll += self._ll0
return ll - self.Npart * log(Mnfw)
@property
def initlogRs(self):
@ -415,9 +449,9 @@ class NFWPosterior(NFWProfile):
bins = numpy.linspace(self.rmin, self.rmax,
self._binsguess)
counts, edges = numpy.histogram(self.r, bins)
return numpy.log(edges[numpy.argmax(counts)])
return numpy.log10(edges[numpy.argmax(counts)])
def __call__(self, logRs):
def __call__(self, logRs, use_jax=False):
"""
Logarithmic posterior. Sum of the logarithmic prior and likelihood.
@ -425,6 +459,8 @@ class NFWPosterior(NFWProfile):
----------
logRs : float
Logarithmic scale factor in units matching the coordinates.
use_jax : bool, optional
Whether to use `JAX` expressions. By default `False`.
Returns
-------
@ -434,21 +470,73 @@ class NFWPosterior(NFWProfile):
lp = self.logprior(logRs)
if not numpy.isfinite(lp):
return - numpy.infty
return self.loglikelihood(logRs) + lp
return self.loglikelihood(logRs, use_jax) + lp
def maxpost_logRs(self):
def uncertainty_at_maxpost(self, logRs_max):
r"""
Maximum a-posterio estimate of the scale radius :math:`\log R_{\rm s}`.
Returns the scale radius if the fit converged, otherwise `numpy.nan`.
Calculate Gaussian approximation of the uncertainty at `logRs_max`, the
maximum a-posteriori esitmate. This is the square root of the negative
inverse 2nd derivate of the logarithimic posterior with respect to the
logarithm of the scale factor. This is only valid `logRs_max` is the
maximum of the posterior!
This uses `JAX`. The functions should be compiled but unless there is
a need for more speed this is fine as it is.
Parameters
----------
logRs_max : float
Position :math:`\log R_{\rm s}` to evaluate the uncertainty. Must
be the maximum.
Returns
-------
res : float
The scale radius.
uncertainty : float
The uncertainty calculated as outlined above.
"""
def f(x):
return self(x, use_jax=True)
# Evaluate the second derivative
h = grad(grad(f))(logRs_max)
h = float(h)
if not h < 0:
return numpy.nan
return (- 1 / h)**0.5
def maxpost_logRs(self, calc_err=False, eps=1e-4):
r"""
Maximum a-posterio estimate of the scale radius :math:`\log R_{\rm s}`.
Returns the scale radius if the fit converged, otherwise `numpy.nan`.
Checks whether :math:`log r_{\rm max} / R_{\rm s} > \epsilon`, where
to ensure that the scale radius is not too close to the boundary which
occurs if the fit fails.
Parameters
----------
calc_err : bool, optional
Optional toggle to calculate the uncertainty on the scale radius.
By default false.
Returns
-------
logRs: float
The log scale radius.
uncertainty : float
The uncertainty on the scale radius. Calculated following
`self.uncertainty_at_maxpost`.
"""
# Loss function to optimize
def loss(logRs):
return - self(logRs)
res = minimize_scalar(loss, bounds=(self._logrmin, self._logrmax),
method='bounded')
return res.x if res.success else numpy.nan
if self._logrmax - res.x < eps:
res.success = False
if not res.success:
return numpy.nan, numpy.nan
e_logRs = self.uncertainty_at_maxpost(res.x) if calc_err else numpy.nan
return res.x, e_logRs

File diff suppressed because one or more lines are too long

View file

@ -18,6 +18,7 @@ realisation must have been split in advance by `run_split_halos`.
"""
import numpy
from datetime import datetime
from os.path import join
from mpi4py import MPI
try:
@ -43,52 +44,56 @@ nproc = comm.Get_size()
dumpdir = utils.dumpdir
loaddir = join(utils.dumpdir, "temp")
cols_collect = [("npart", I64), ("totpartmass", F64), ("logRs", F64),
("rho0", F64), ("rmin", F64), ("rmax", F64),
("r200", F64), ("r178", F64), ("r500", F64),
("m200", F64), ("m178", F64), ("m500", F64)]
cols_collect = [("npart", I64), ("totpartmass", F64), ("Rs", F64),
("rho0", F64), ("conc", F64), ("rmin", F64),
("rmax", F64), ("r200", F64), ("r500", F64),
("m200", F64), ("m500", F64)]
# NOTE later loop over sims too
Nsim = Nsims[0]
simpath = csiborgtools.io.get_sim_path(Nsim)
box = csiborgtools.units.BoxUnits(Nsnap, simpath)
jobs = csiborgtools.fits.split_jobs(utils.Nsplits, nproc)[rank]
for Nsplit in jobs:
print("Rank {} working on {}.".format(rank, Nsplit))
for icount, Nsplit in enumerate(jobs):
print("{}: rank {} working {} / {} jobs.".format(datetime.now(), rank,
icount + 1, len(jobs)))
parts, part_clumps, clumps = csiborgtools.fits.load_split_particles(
Nsplit, loaddir, Nsim, Nsnap, remove_split=False)
N = clumps.size
cols = [("index", I64), ("npart", I64), ("totpartmass", F64),
("logRs", F64), ("rho0", F64), ("rmin", F64), ("rmax", F64),
("r200", F64), ("r178", F64), ("r500", F64),
("m200", F64), ("m178", F64), ("m500", F64)]
("Rs", F64), ("rho0", F64), ("conc", F64),
("rmin", F64), ("rmax", F64),
("r200", F64), ("r500", F64), ("m200", F64), ("m500", F64)]
out = csiborgtools.utils.cols_to_structured(N, cols)
out["index"] = clumps["index"]
for n in range(N):
# Pick clump and its particles
xs = csiborgtools.fits.pick_single_clump(n, parts, part_clumps, clumps)
clump = csiborgtools.fits.Clump.from_arrays(*xs)
clump = csiborgtools.fits.Clump.from_arrays(*xs, rhoc=box.box_rhoc)
out["npart"][n] = clump.Npart
out["rmin"][n] = clump.rmin
out["rmax"][n] = clump.rmax
out["totpartmass"][n] = clump.total_particle_mass
out["r200"][n] = clump.r200
out["r178"][n] = clump.r178
out["r500"][n] = clump.r500
out["m200"][n] = clump.m200
out["m178"][n] = clump.m178
out["m500"][n] = clump.m200
# Spherical overdensity radii and masses
rs, ms = clump.spherical_overdensity_mass([200, 500])
out["r200"][n] = rs[0]
out["r500"][n] = rs[1]
out["m200"][n] = ms[0]
out["m500"][n] = ms[1]
# NFW profile fit
if clump.Npart > 10 and numpy.isfinite(out["r200"][n]):
# NOTE here it calculates the r200 again, but its fast so does not
# matter anyway.
nfwpost = csiborgtools.fits.NFWPosterior(clump)
logRs = nfwpost.maxpost_logRs()
logRs, __ = nfwpost.maxpost_logRs()
Rs = 10**logRs
if not numpy.isnan(logRs):
out["logRs"][n] = logRs
out["rho0"][n] = nfwpost.rho0_from_logRs(logRs)
out["Rs"][n] = Rs
out["rho0"][n] = nfwpost.rho0_from_Rs(Rs)
out["conc"][n] = out["r200"][n] / Rs
csiborgtools.io.dump_split(out, Nsplit, Nsim, Nsnap, dumpdir)

View file

@ -29,7 +29,7 @@ import utils
Nsims = [9844]
Nsnap = 1016
partcols = ["x", "y", "z", "M", "level"]
partcols = ["x", "y", "z", "vx", "vy", "vz", "M", "level"]
dumpdir = join(utils.dumpdir, "temp")
for Nsim in tqdm(Nsims):