JAX and fix conc (#6)

* change to log10 initlogRs * add uncertainty * add check if hessian negative * update TODO * update TODO * output the error too * save e_logRs * add concentration calculation * calcul concentration * missed comma in a list * Update TODO * fix bug * add box units and pretty status * make uncertainty optional * add BIC function * remove BIC again * add new overdensity calculation * change defualt values to max dist and mass * change to r200 * new halo find * speed up the calculation * add commented fucn * update TODO * add check whether too close to outside boundary * Update TODO * extract velocity as well * calculate m200 and m500 * update nb * update TODO
2025-06-30 19:41:12 +00:00 · 2022-11-05 21:17:05 +00:00 · 2022-11-05 21:17:05 +00:00 · 7f58b1f78c
commit 7f58b1f78c
parent ee84c12a55
6 changed files with 278 additions and 594 deletions
--- a/README.md
+++ b/README.md
@ -1,11 +1,12 @@
 # CSiBORG tools

 ## :scroll: Short-term TODO
- [x] Calculate $M_{\rm vir}, R_{\rm vir}, c$ from $R_s, \rho_0, \ldots$
- [x] In `NFWPosterior` correct for the radius in which particles are fitted.
- [x] Calculate $M_{\rm 500c}$ by sphere shrinking
- [x] Change to log10 of the scale factor
- [ ] Calculate uncertainty on $R_{\rm s}$, switch to `JAX` and get gradients.
+- [ ] Calculate mass functions
+- [ ] Add functions for converting the output file to box units.
+- [ ] Calculate catalogues for all realisations.
+- [x] Verify the bulk flow of particles and the clump
+- [x] Check why for some halos $M_{500c} > M_{200c}$
+


 ## :hourglass: Long-term TODO
@ -13,4 +14,5 @@
 - [ ] Calculate the cross-correlation in CSiBORG. Should see the scale of the constraints?


-## :bulb: Open questions
+## :bulb: Open questions
+- Propagate uncertainty of $\log R_{\rm s}$ to concentration
--- a/csiborgtools/fits/halo.py
+++ b/csiborgtools/fits/halo.py
@ -18,7 +18,6 @@ Tools for splitting the particles and a clump object.


 import numpy
-from scipy.optimize import minimize_scalar
 from os import remove
 from warnings import warn
 from os.path import join
@ -467,7 +466,7 @@ class Clump:

    @property
    def rhoc(self):
-        """
+        r"""
        The critical density :math:`\rho_c` at this snapshot in box units.

        Returns
@ -510,7 +509,7 @@ class Clump:
        """
        return numpy.mean(self.pos + self.clump_pos, axis=0)

-    def enclosed_spherical_mass(self, rmax, rmin=None):
+    def enclosed_spherical_mass(self, rmax, rmin=0):
        """
        The enclosed spherical mass between two radii. All quantities remain
        in the box units.
@ -520,152 +519,106 @@ class Clump:
        rmax : float
            The maximum radial distance.
        rmin : float, optional
-            The minimum radial distance. By default the radial distance of the
-            closest particle.
+            The minimum radial distance. By default 0.

        Returns
        -------
        M_enclosed : float
            The enclosed mass.
        """
-        rmin = self.rmin if rmin is None else rmin
        return numpy.sum(self.m[(self.r >= rmin) & (self.r <= rmax)])

-    def enclosed_spherical_density(self, rmax, rmin=None):
+    def enclosed_spherical_volume(self, rmax, rmin=0):
        """
-        The enclosed spherical density between two radii. All quantities
-        remain in box units.
+        Enclosed volume within two radii.

        Parameters
        ----------
        rmax : float
            The maximum radial distance.
        rmin : float, optional
-            The minimum radial distance. By default the radial distance of the
-            closest particle.
+            The minimum radial distance. By default 0.

        Returns
        -------
-        rho_enclosed : float
-            The enclosed density.
+        vol : float
+            The enclosed volume.
        """
-        rmin = self.rmin if rmin is None else rmin
-        M = self.enclosed_spherical_mass(rmax, rmin)
-        V = 4 * numpy.pi / 3 * (rmax**3 - rmin**3)
-        return M / V
+        return 4 * numpy.pi / 3 * (rmax**3 - rmin**3)

-    def radius_enclosed_overdensity(self, delta):
+    def spherical_overdensity_mass(self, delta, n_particles_min=10):
        r"""
-        Radius of where the mean enclosed spherical density reaches a multiple
-        of the critical radius at a given redshift `self.rho_c`. Returns
-        `numpy.nan` if the fit does not converge. Note that `rhoc` must be in
-        box units!
+        Spherical overdensity mass and radius. The mass is defined as the
+        enclosed mass within a radius of where the mean enclosed spherical
+        density reaches a multiple of the critical radius at a given redshift
+        `self.rho_c`.
+
+        Starts from the furthest particle, working its way inside the halo
+        through an ordered list of particles. The corresponding values is the
+        radial distance of the first particle whose addition sufficiently
+        increases the mean density.
+

        Parameters
        ----------
-        delta : int or float
+        delta : list of int or float
            The :math:`\delta_{\rm x}` parameters where :math:`\mathrm{x}` is
            the overdensity multiple.
+        n_particles_min : int
+            The minimum number of enclosed particles for a radius to be
+            considered trustworthy.

        Returns
        -------
        rx : float
            The radius where the enclosed density reaches required value.
+        mx :  float
+            The corresponding spherical enclosed mass.
        """
-        # Loss function to minimise
-        def loss(r):
-            return abs(self.enclosed_spherical_density(r, self.rmin)
-                       - delta * self.rhoc)
+        # If single `delta` turn to list
+        delta = [delta] if isinstance(delta, (float, int)) else delta
+        # If given a list or tuple turn to array
+        _istlist = isinstance(delta, (list, tuple))
+        delta = numpy.asarray(delta, dtype=float) if _istlist else delta

-        res = minimize_scalar(loss, bounds=(self.rmin, self.rmax),
-                              method='bounded')
-        return res.x if res.success else numpy.nan
+        # Ordering of deltas
+        order_delta = numpy.argsort(delta)
+        # Sort the particles
+        order_particles = numpy.argsort(self.r)[::-1]
+        # Density to aim for
+        n_delta = delta.size
+        target_density = delta * self.rhoc

-    @property
-    def r200(self):
-        r"""
-        The radius at which the mean spherical density reaches 200 times
-        the critical density, :math:`R_{200c}`. Returns `numpy.nan` if the
-        estimate fails.
+        # The sum of particle masses, starting from the outside
+        # Adds the furtherst particle ensure that the 0th index is tot mass
+        cummass_ordered = (self.total_particle_mass
+                           + self.m[order_particles][0]
+                           - numpy.cumsum(self.m[order_particles]))
+        # Enclosed volumes at particle radii
+        volumes = self.enclosed_spherical_volume(self.r[order_particles])
+        densities = cummass_ordered / volumes

-        Returns
-        -------
-        r200 : float
-            The R200c radius
-        """
-        return self.radius_enclosed_overdensity(200)
+        # Pre-allocate arrays
+        rfound = numpy.full_like(delta, numpy.nan)
+        mfound = numpy.full_like(rfound, numpy.nan)

-    @property
-    def r178(self):
-        r"""
-        The radius at which the mean spherical density reaches 178 times
-        the critical density, :math:`R_{178c}`. Returns `numpy.nan` if the
-        estimate fails.
+        for n in order_delta:
+            overdense_mask = densities > target_density[n]

-        Returns
-        -------
-        r178 : float
-            The R178c radius
-        """
-        return self.radius_enclosed_overdensity(178)
+            # Enforce that we have at least several particles enclosed
+            if numpy.sum(overdense_mask) < n_particles_min:
+                continue
+            # The outermost particle radius where the overdensity is achieved
+            k = numpy.where(overdense_mask)[0][0]
+            rfound[n] = self.r[order_particles][k]
+            mfound[n] = cummass_ordered[k]

-    @property
-    def r500(self):
-        r"""
-        The radius at which the mean spherical density reaches 500 times
-        the critical density, :math:`R_{500c}`. Returns `numpy.nan` if the
-        estimate fails.
+        # If only one delta return simply numbers
+        if n_delta == 1:
+            rfound = rfound[0]
+            mfound = mfound[0]

-        Returns
-        -------
-        r500 : float
-            The R500c radius
-        """
-        return self.radius_enclosed_overdensity(500)
-
-    @property
-    def m200(self):
-        r"""
-        The mass enclosed within the :math:`R_{200c}` region, obtained from
-        `self.r200`. Returns `numpy.nan` if the radius estimate fails.
-
-        Returns
-        -------
-        m200 : float
-            The M200 mass
-        """
-        r200 = self.radius_enclosed_overdensity(200)
-        return self.enclosed_spherical_mass(r200)
-
-    @property
-    def m178(self):
-        r"""
-        The mass enclosed within the :math:`R_{178c}` region, obtained from
-        `self.r178`. This is approximately the virial mass, though this notion
-        depends on the dynamical state of the clump. Returns `numpy.nan` if
-        the radius estimate fails.
-
-        Returns
-        -------
-        m178 : float
-            The M178 mass
-        """
-        r178 = self.radius_enclosed_overdensity(178)
-        return self.enclosed_spherical_mass(r178)
-
-    @property
-    def m500(self):
-        r"""
-        The mass enclosed within the :math:`R_{500c}` region, obtained from
-        `self.r500`. Returns `numpy.nan` if the radius estimate fails.
-
-        Returns
-        -------
-        m500 : float
-            The M500 mass
-        """
-        r500 = self.radius_enclosed_overdensity(500)
-        return self.enclosed_spherical_mass(r500)
+        return rfound, mfound

    @classmethod
    def from_arrays(cls, particles, clump, rhoc=None):
--- a/csiborgtools/fits/haloprofile.py
+++ b/csiborgtools/fits/haloprofile.py
@ -18,11 +18,34 @@ Halo profiles functions and posteriors.


 import numpy
+from jax import numpy as jnumpy
+from jax import grad
 from scipy.optimize import minimize_scalar
 from scipy.stats import uniform
 from .halo import Clump


+def jax_switch(f1, f2, use_jax):
+    """
+    Return function `f1` if `use_jax` else return `f2`.
+
+    Parameters
+    ----------
+    f1 : function
+        The JAX function.
+    f2 : function
+        The non-JAX function.
+    use_jax : bool
+        Whether to use the JAX function.
+
+    Returns
+    -------
+    chosen_func : function
+        The chosen function.
+    """
+    return f1 if use_jax else f2
+
+
 class NFWProfile:
    r"""
    The Navarro-Frenk-White (NFW) density profile defined as
@ -66,7 +89,7 @@ class NFWProfile:
        return rho0 / (x * (1 + x)**2)

    @staticmethod
-    def logprofile(r, Rs, rho0):
+    def logprofile(r, Rs, rho0, use_jax=False):
        r"""
        Natural logarithm of the halo profile evaluated at :math:`r`.

@ -78,17 +101,20 @@ class NFWProfile:
            Scale radius :math:`R_s`.
        rho0 : float
            NFW density parameter :math:`\rho_0`.
+        use_jax : bool, optional
+            Whether to use `JAX` expressions. By default `False`.

        Returns
        -------
        logdensity : float or 1-dimensional array
            Logarithmic density of the NFW profile at :math:`r`.
        """
+        log = jax_switch(jnumpy.log, numpy.log, use_jax)
        x = r / Rs
-        return numpy.log(rho0) - numpy.log(x) - 2 * numpy.log(1 + x)
+        return log(rho0) - log(x) - 2 * log(1 + x)

    @staticmethod
-    def enclosed_mass(r, Rs, rho0):
+    def enclosed_mass(r, Rs, rho0, use_jax=False):
        r"""
        Enclosed mass  of a NFW profile in radius :math:`r`.

@ -100,6 +126,8 @@ class NFWProfile:
            Scale radius :math:`R_s`.
        rho0 : float
            NFW density parameter :math:`\rho_0`.
+        use_jax : bool, optional
+            Whether to use `JAX` expressions. By default `False`.

        Returns
        -------
@ -107,11 +135,11 @@ class NFWProfile:
            The enclosed mass.
        """
        x = r / Rs
-        out = numpy.log(1 + x) - x / (1 + x)
+        out = jax_switch(jnumpy.log, numpy.log, use_jax)(1 + x) - x / (1 + x)
        return 4 * numpy.pi * rho0 * Rs**3 * out

-    def bounded_enclosed_mass(self, rmin, rmax, Rs, rho0):
-        """
+    def bounded_enclosed_mass(self, rmin, rmax, Rs, rho0, use_jax=False):
+        r"""
        Calculate the enclosed mass between :math:`r_min <= r <= r_max`.

        Parameters
@ -124,14 +152,16 @@ class NFWProfile:
            Scale radius :math:`R_s`.
        rho0 : float
            NFW density parameter :math:`\rho_0`.
+        use_jax : bool, optional
+            Whether to use `JAX` expressions. By default `False`.

        Returns
        -------
        M : float
            The enclosed mass within the radial range.
        """
-        return (self.enclosed_mass(rmax, Rs, rho0)
-                - self.enclosed_mass(rmin, Rs, rho0))
+        return (self.enclosed_mass(rmax, Rs, rho0, use_jax)
+                - self.enclosed_mass(rmin, Rs, rho0, use_jax))

    def pdf(self, r, Rs, rmin, rmax):
        r"""
@ -319,7 +349,7 @@ class NFWPosterior(NFWProfile):
        self._clump = clump
        # The minimum separation
        rmin = self.clump.rmin
-        rmax = self.clump.r200
+        rmax, __ = self.clump.spherical_overdensity_mass(200)
        # Set the distances
        self._rmin = rmin
        self._rmax = rmax
@ -340,7 +370,7 @@ class NFWPosterior(NFWProfile):
        gamma = 4 * numpy.pi * self.r**2 * self.m * self.Npart
        self._ll0 = numpy.sum(numpy.log(gamma)) - self.Npart * self._logMtot

-    def rho0_from_logRs(self, logRs):
+    def rho0_from_Rs(self, Rs):
        r"""
        Obtain :math:`\rho_0` of the NFW profile from the integral constraint
        on total mass. Calculated as the ratio between the total particle mass
@ -357,8 +387,7 @@ class NFWPosterior(NFWProfile):
            The NFW density parameter.
        """
        Mtot = numpy.exp(self._logMtot)
-        Mnfw_norm = self.bounded_enclosed_mass(self.rmin, self.rmax,
-                                               10**logRs, 1)
+        Mnfw_norm = self.bounded_enclosed_mass(self.rmin, self.rmax, Rs, 1)
        return Mtot / Mnfw_norm

    def logprior(self, logRs):
@ -379,7 +408,7 @@ class NFWPosterior(NFWProfile):
            return - numpy.infty
        return - self._logprior_volume

-    def loglikelihood(self, logRs):
+    def loglikelihood(self, logRs, use_jax=False):
        """
        Logarithmic likelihood.

@ -387,6 +416,8 @@ class NFWPosterior(NFWProfile):
        ----------
        logRs : float
            Logarithmic scale factor in units matching the coordinates.
+        use_jax : bool, optional
+            Whether to use `JAX` expressions. By default `False`.

        Returns
        -------
@ -394,11 +425,14 @@ class NFWPosterior(NFWProfile):
            The logarithmic likelihood.
        """
        Rs = 10**logRs
+        log = jnumpy.log if use_jax else numpy.log
        # Expected enclosed mass from a NFW
        Mnfw = self.bounded_enclosed_mass(self.rmin, self.rmax,
-                                          Rs, 1)
-        ll = self._ll0 + numpy.sum(self.logprofile(self.r, Rs, 1))
-        return ll - self.Npart * numpy.log(Mnfw)
+                                          Rs, 1, use_jax)
+        ll = jax_switch(jnumpy.sum, numpy.sum, use_jax)(self.logprofile(
+            self.r, Rs, 1, use_jax))
+        ll += self._ll0
+        return ll - self.Npart * log(Mnfw)

    @property
    def initlogRs(self):
@ -415,9 +449,9 @@ class NFWPosterior(NFWProfile):
        bins = numpy.linspace(self.rmin, self.rmax,
                              self._binsguess)
        counts, edges = numpy.histogram(self.r, bins)
-        return numpy.log(edges[numpy.argmax(counts)])
+        return numpy.log10(edges[numpy.argmax(counts)])

-    def __call__(self, logRs):
+    def __call__(self, logRs, use_jax=False):
        """
        Logarithmic posterior. Sum of the logarithmic prior and likelihood.

@ -425,6 +459,8 @@ class NFWPosterior(NFWProfile):
        ----------
        logRs : float
            Logarithmic scale factor in units matching the coordinates.
+        use_jax : bool, optional
+            Whether to use `JAX` expressions. By default `False`.

        Returns
        -------
@ -434,21 +470,73 @@ class NFWPosterior(NFWProfile):
        lp = self.logprior(logRs)
        if not numpy.isfinite(lp):
            return - numpy.infty
-        return self.loglikelihood(logRs) + lp
+        return self.loglikelihood(logRs, use_jax) + lp

-    def maxpost_logRs(self):
+    def uncertainty_at_maxpost(self, logRs_max):
        r"""
-        Maximum a-posterio estimate of the scale radius :math:`\log R_{\rm s}`.
-        Returns the scale radius if the fit converged, otherwise `numpy.nan`.
+        Calculate Gaussian approximation of the uncertainty at `logRs_max`, the
+        maximum a-posteriori esitmate. This is the square root of the negative
+        inverse 2nd derivate of the logarithimic posterior with respect to the
+        logarithm of the scale factor. This is only valid `logRs_max` is the
+        maximum of the posterior!
+
+        This uses `JAX`. The functions should be compiled but unless there is
+        a need for more speed this is fine as it is.
+
+        Parameters
+        ----------
+        logRs_max : float
+            Position :math:`\log R_{\rm s}` to evaluate the uncertainty. Must
+            be the maximum.

        Returns
        -------
-        res : float
-            The scale radius.
+        uncertainty : float
+            The uncertainty calculated as outlined above.
+        """
+        def f(x):
+            return self(x, use_jax=True)
+
+        # Evaluate the second derivative
+        h = grad(grad(f))(logRs_max)
+        h = float(h)
+        if not h < 0:
+            return numpy.nan
+        return (- 1 / h)**0.5
+
+    def maxpost_logRs(self, calc_err=False, eps=1e-4):
+        r"""
+        Maximum a-posterio estimate of the scale radius :math:`\log R_{\rm s}`.
+        Returns the scale radius if the fit converged, otherwise `numpy.nan`.
+        Checks whether :math:`log r_{\rm max} / R_{\rm s} > \epsilon`, where
+        to ensure that the scale radius is not too close to the boundary which
+        occurs if the fit fails.
+
+        Parameters
+        ----------
+        calc_err : bool, optional
+            Optional toggle to calculate the uncertainty on the scale radius.
+            By default false.
+
+        Returns
+        -------
+        logRs: float
+            The log scale radius.
+        uncertainty : float
+            The uncertainty on the scale radius. Calculated following
+            `self.uncertainty_at_maxpost`.
        """
        # Loss function to optimize
        def loss(logRs):
            return - self(logRs)
+
        res = minimize_scalar(loss, bounds=(self._logrmin, self._logrmax),
                              method='bounded')
-        return res.x if res.success else numpy.nan
+
+        if self._logrmax - res.x < eps:
+            res.success = False
+
+        if not res.success:
+            return numpy.nan, numpy.nan
+        e_logRs = self.uncertainty_at_maxpost(res.x) if calc_err else numpy.nan
+        return res.x, e_logRs
--- a/scripts/plot_concentration.ipynb
+++ b/scripts/plot_concentration.ipynb
--- a/scripts/run_fit_halos.py
+++ b/scripts/run_fit_halos.py
@ -18,6 +18,7 @@ realisation must have been split in advance by `run_split_halos`.
 """

 import numpy
+from datetime import datetime
 from os.path import join
 from mpi4py import MPI
 try:
@ -43,52 +44,56 @@ nproc = comm.Get_size()

 dumpdir = utils.dumpdir
 loaddir = join(utils.dumpdir, "temp")
-cols_collect = [("npart", I64), ("totpartmass", F64), ("logRs", F64),
-                ("rho0", F64), ("rmin", F64), ("rmax", F64),
-                ("r200", F64), ("r178", F64), ("r500", F64),
-                ("m200", F64), ("m178", F64), ("m500", F64)]
+cols_collect = [("npart", I64), ("totpartmass", F64), ("Rs", F64),
+                ("rho0", F64), ("conc", F64), ("rmin", F64),
+                ("rmax", F64), ("r200", F64), ("r500", F64),
+                ("m200", F64), ("m500", F64)]

 # NOTE later loop over sims too
 Nsim = Nsims[0]
+simpath = csiborgtools.io.get_sim_path(Nsim)
+box = csiborgtools.units.BoxUnits(Nsnap, simpath)

 jobs = csiborgtools.fits.split_jobs(utils.Nsplits, nproc)[rank]
-for Nsplit in jobs:
-    print("Rank {} working on {}.".format(rank, Nsplit))
+for icount, Nsplit in enumerate(jobs):
+    print("{}: rank {} working {} / {} jobs.".format(datetime.now(), rank,
+                                                     icount + 1, len(jobs)))
    parts, part_clumps, clumps = csiborgtools.fits.load_split_particles(
        Nsplit, loaddir, Nsim, Nsnap, remove_split=False)

    N = clumps.size
    cols = [("index", I64), ("npart", I64), ("totpartmass", F64),
-            ("logRs", F64), ("rho0", F64), ("rmin", F64), ("rmax", F64),
-            ("r200", F64), ("r178", F64), ("r500", F64),
-            ("m200", F64), ("m178", F64), ("m500", F64)]
+            ("Rs", F64), ("rho0", F64), ("conc", F64),
+            ("rmin", F64), ("rmax", F64),
+            ("r200", F64), ("r500", F64), ("m200", F64), ("m500", F64)]
    out = csiborgtools.utils.cols_to_structured(N, cols)
    out["index"] = clumps["index"]

    for n in range(N):
        # Pick clump and its particles
        xs = csiborgtools.fits.pick_single_clump(n, parts, part_clumps, clumps)
-        clump = csiborgtools.fits.Clump.from_arrays(*xs)
+        clump = csiborgtools.fits.Clump.from_arrays(*xs, rhoc=box.box_rhoc)
        out["npart"][n] = clump.Npart
        out["rmin"][n] = clump.rmin
        out["rmax"][n] = clump.rmax
        out["totpartmass"][n] = clump.total_particle_mass
-        out["r200"][n] = clump.r200
-        out["r178"][n] = clump.r178
-        out["r500"][n] = clump.r500
-        out["m200"][n] = clump.m200
-        out["m178"][n] = clump.m178
-        out["m500"][n] = clump.m200
+
+        # Spherical overdensity radii and masses
+        rs, ms = clump.spherical_overdensity_mass([200, 500])
+        out["r200"][n] = rs[0]
+        out["r500"][n] = rs[1]
+        out["m200"][n] = ms[0]
+        out["m500"][n] = ms[1]

        # NFW profile fit
        if clump.Npart > 10 and numpy.isfinite(out["r200"][n]):
-            # NOTE here it calculates the r200 again, but its fast so does not
-            # matter anyway.
            nfwpost = csiborgtools.fits.NFWPosterior(clump)
-            logRs = nfwpost.maxpost_logRs()
+            logRs, __ = nfwpost.maxpost_logRs()
+            Rs = 10**logRs
            if not numpy.isnan(logRs):
-                out["logRs"][n] = logRs
-                out["rho0"][n] = nfwpost.rho0_from_logRs(logRs)
+                out["Rs"][n] = Rs
+                out["rho0"][n] = nfwpost.rho0_from_Rs(Rs)
+                out["conc"][n] = out["r200"][n] / Rs

    csiborgtools.io.dump_split(out, Nsplit, Nsim, Nsnap, dumpdir)

--- a/scripts/run_split_halos.py
+++ b/scripts/run_split_halos.py
@ -29,7 +29,7 @@ import utils

 Nsims = [9844]
 Nsnap = 1016
-partcols = ["x", "y", "z", "M", "level"]
+partcols = ["x", "y", "z", "vx", "vy", "vz", "M", "level"]
 dumpdir = join(utils.dumpdir, "temp")

 for Nsim in tqdm(Nsims):