Lagrangian patch + HMF calculation (#65)

* Rename lagpatch * Fix old bug * Fix small bug * Add number of cells calculation * Fix a small bug * Rename column * Move file * Small changes * Edit style * Add plot script * Add delta2ncells * Add HMF calculation * Move definition around * Add HMF plot * pep8 * Update HMF plotting routine * Small edit
2025-06-08 18:01:11 +00:00 · 2023-06-01 14:45:52 +01:00 · 2023-06-01 14:45:52 +01:00 · dbf93b9416
commit dbf93b9416
parent f1dbe6f03f
13 changed files with 376 additions and 132 deletions
--- a/csiborgtools/fits/init.py
+++ b/csiborgtools/fits/init.py
@ -12,6 +12,7 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-from .halo import Clump, Halo, dist_centmass  # noqa
+from .halo import (Clump, Halo, delta2ncells, dist_centmass,  # noqa
+                   number_counts)
 from .haloprofile import NFWPosterior, NFWProfile  # noqa
 from .utils import split_jobs  # noqa
--- a/csiborgtools/fits/halo.py
+++ b/csiborgtools/fits/halo.py
@ -15,9 +15,10 @@
 """A clump object."""
 from abc import ABC

-from numba import jit
 import numpy

+from numba import jit
+

 class BaseStructure(ABC):
    """
@ -337,3 +338,51 @@ def dist_centmass(clump):
    cmx, cmy, cmz = [numpy.average(xi, weights=mass) for xi in (x, y, z)]
    dist = ((x - cmx)**2 + (y - cmy)**2 + (z - cmz)**2)**0.5
    return dist, [cmx, cmy, cmz]
+
+
+@jit(nopython=True)
+def delta2ncells(delta):
+    """
+    Calculate the number of cells in `delta` that are non-zero.
+
+    Parameters
+    ----------
+    delta : 3-dimensional array
+        Halo density field.
+
+    Returns
+    -------
+    ncells : int
+        Number of non-zero cells.
+    """
+    tot = 0
+    imax, jmax, kmax = delta.shape
+    for i in range(imax):
+        for j in range(jmax):
+            for k in range(kmax):
+                if delta[i, j, k] > 0:
+                    tot += 1
+    return tot
+
+
+@jit(nopython=True)
+def number_counts(x, bin_edges):
+    """
+    Calculate counts of samples in bins.
+
+    Parameters
+    ----------
+    x : 1-dimensional array
+        Samples' values.
+    bin_edges : 1-dimensional array
+        Bin edges.
+
+    Returns
+    -------
+    counts : 1-dimensional array
+        Bin counts.
+    """
+    out = numpy.full(bin_edges.size - 1, numpy.nan, dtype=numpy.float32)
+    for i in range(bin_edges.size - 1):
+        out[i] = numpy.sum((x >= bin_edges[i]) & (x < bin_edges[i + 1]))
+    return out
--- a/csiborgtools/match/init.py
+++ b/csiborgtools/match/init.py
@ -16,5 +16,4 @@ from .match import (ParticleOverlap, RealisationsMatcher,  # noqa
                    calculate_overlap, calculate_overlap_indxs,
                    cosine_similarity)
 from .nearest_neighbour import find_neighbour  # noqa
-from .num_density import binned_counts, number_density  # noqa
 from .utils import concatenate_parts  # noqa
--- a/csiborgtools/match/match.py
+++ b/csiborgtools/match/match.py
@ -163,7 +163,7 @@ class RealisationsMatcher:
            print(f"{datetime.now()}: querying the KNN.", flush=True)
        match_indxs = radius_neighbours(
            catx.knn(in_initial=True), cat0.position(in_initial=True),
-            radiusX=cat0["lagpatch"], radiusKNN=catx["lagpatch"],
+            radiusX=cat0["lagpatch_size"], radiusKNN=catx["lagpatch_size"],
            nmult=self.nmult, enforce_int32=True, verbose=verbose)

        # We next remove neighbours whose mass is too large/small.
@ -419,7 +419,7 @@ class ParticleOverlap:
        delta : 3-dimensional array
        """
        nshift = read_nshift(smooth_kwargs)
-        cells = self.pos2cell(pos)
+        cells = pos2cell(pos, BOX_SIZE)
        # Check that minima and maxima are integers
        if not (mins is None and maxs is None):
            assert mins.dtype.char in numpy.typecodes["AllInteger"]
@ -432,10 +432,10 @@ class ParticleOverlap:
            ncells = maxs - mins + 1  # To get the number of cells
        else:
            mins = [0, 0, 0]
-            ncells = BOX_SIZE
+            ncells = (BOX_SIZE, ) * 3

        # Preallocate and fill the array
-        delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
+        delta = numpy.zeros(ncells, dtype=numpy.float32)
        fill_delta(delta, cells[:, 0], cells[:, 1], cells[:, 2], *mins, mass)
        if smooth_kwargs is not None:
            gaussian_filter(delta, output=delta, **smooth_kwargs)
--- a/csiborgtools/match/num_density.py
+++ b/csiborgtools/match/num_density.py
@ -1,116 +0,0 @@
-# Copyright (C) 2022 Richard Stiskalek
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
-# Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""
-Calculation of number density functions.
-"""
-import numpy
-
-
-def binned_counts(x, bins):
-    """
-    Calculate number of samples in bins.
-
-    Parameters
-    ----------
-    x : 1-dimensional array
-        Samples' values.
-    bins : 1-dimensional array
-        Bin edges of shape `(n_edges, )`.
-
-    Returns
-    -------
-    centres : 1-dimensional array
-        Bin centres of shape `(n_edges - 1, )`.
-    counts : 1-dimensional array
-        Bin counts of shape `(n_edges - 1, )`.
-    """
-    if not isinstance(bins, numpy.ndarray) and bins.ndim == 1:
-        raise TypeError("`bins` must a 1-dimensional array.")
-
-    n_bins = bins.size
-    # Bin centres
-    centres = numpy.asarray(
-        [0.5 * (bins[i + 1] + bins[i]) for i in range(n_bins - 1)])
-    # Bin counts
-    out = numpy.full(n_bins - 1, numpy.nan, dtype=int)
-    for i in range(n_bins - 1):
-        out[i] = numpy.sum((x >= bins[i]) & (x < bins[i + 1]))
-    return centres, out
-
-
-def number_density(data, feat, bins, max_dist, to_log10, return_counts=False):
-    """
-    Calculate volume-limited number density of a feature `feat` from array
-    `data`, normalised also by the bin width.
-
-    Parameters
-    ----------
-    data : structured array
-        Input array of halos.
-    feat : str
-        Parameter whose number density to calculate.
-    bins : 1-dimensional array
-        Bin edges. Note that if `to_log10` then the edges must be specified
-        in the logarithmic space, not linear.
-    max_dist : float
-        Maximum radial distance of the volume limited sample.
-    to_log10 : bool
-        Whether to take a logarithm of base 10 of the feature. If so, then the
-        bins must also be logarithmic.
-    return_counts : bool, optional
-        Whether to also return number counts in each bin. By default `False`.
-
-
-    Returns
-    -------
-    centres : 1-dimensional array
-        Bin centres of shape `(n_edges - 1, )`. If `to_log10` then converts the
-        bin centres back to linear space.
-    nd : 1-dimensional array
-        Number density of shape `(n_edges - 1, )`.
-    nd_err : 1-dimensional array
-        Poissonian uncertainty of `nd` of shape `(n_edges - 1, )`.
-    counts: 1-dimensional array, optional
-        Counts in each bin of shape `(n_edges - 1, )`. Returned only if
-        `return_counts`.
-    """
-    # Extract the param and optionally convert to log10
-    x = data[feat]
-    x = numpy.log10(x) if to_log10 else x
-    # Get only things within distance from the origin
-    rdist = (data["peak_x"]**2 + data["peak_y"]**2 + data["peak_z"]**2)**0.5
-    x = x[rdist < max_dist]
-
-    # Make sure bins equally spaced
-    dbins = numpy.diff(bins)
-    dbin = dbins[0]
-    if not numpy.alltrue(dbins == dbin):
-        raise ValueError("Bins must be equally spaced. Currently `{}`."
-                         .format(bins))
-
-    # Encompassed volume around the origin
-    volume = 4 * numpy.pi / 3 * max_dist**3
-    # Poissonian statistics
-    bin_centres, counts = binned_counts(x, bins)
-    nd = counts / volume / dbin
-    nd_err = counts**0.5 / volume / dbin
-    # Convert bins to linear space if log10
-    if to_log10:
-        bin_centres = 10**bin_centres
-
-    out = (bin_centres, nd, nd_err)
-    if return_counts:
-        out += counts
-    return out
--- a/csiborgtools/read/box_units.py
+++ b/csiborgtools/read/box_units.py
@ -27,7 +27,7 @@ from .readsim import ParticleReader
 CONV_NAME = {
    "length": ["x", "y", "z", "peak_x", "peak_y", "peak_z", "Rs", "rmin",
               "rmax", "r200c", "r500c", "r200m", "x0", "y0", "z0",
-               "lagpatch"],
+               "lagpatch_size"],
    "velocity": ["vx", "vy", "vz"],
    "mass": ["mass_cl", "totpartmass", "m200c", "m500c", "mass_mmain", "M",
             "m200m"],
--- a/csiborgtools/read/halo_cat.py
+++ b/csiborgtools/read/halo_cat.py
@ -540,7 +540,7 @@ class HaloCatalogue(BaseCSiBORG):

        if not rawdata:
            if with_lagpatch:
-                self._data = self._data[numpy.isfinite(self['lagpatch'])]
+                self._data = self._data[numpy.isfinite(self["lagpatch_size"])]
            # Flip positions and convert from code units to cMpc. Convert M too
            flip_cols(self._data, "x", "z")
            for p in ("x", "y", "z"):
@ -551,7 +551,7 @@ class HaloCatalogue(BaseCSiBORG):
            self._data = self.box.convert_from_box(self._data, names)

            if load_initial:
-                names = ["x0", "y0", "z0", "lagpatch"]
+                names = ["x0", "y0", "z0", "lagpatch_size"]
                self._data = self.box.convert_from_box(self._data, names)

            if bounds is not None:
--- a/csiborgtools/read/overlap_summary.py
+++ b/csiborgtools/read/overlap_summary.py
@ -276,10 +276,10 @@ class PairOverlap:
        if norm_kind == "r200c":
            norm = self.cat0("r200c")
        if norm_kind == "ref_patch":
-            norm = self.cat0("lagpatch")
+            norm = self.cat0("lagpatch_size")
        if norm_kind == "sum_patch":
-            patch0 = self.cat0("lagpatch")
-            patchx = self.catx("lagpatch")
+            patch0 = self.cat0("lagpatch_size")
+            patchx = self.catx("lagpatch_size")
            norm = [None] * len(self)
            for i, ind in enumerate(self["match_indxs"]):
                norm[i] = patch0[i] + patchx[ind]
--- a/csiborgtools/read/paths.py
+++ b/csiborgtools/read/paths.py
@ -387,6 +387,28 @@ class Paths:
        fname = f"{kind}_{MAS}_{str(nsim).zfill(5)}_grid{grid}.npy"
        return join(fdir, fname)

+    def halo_counts(self, simname, nsim):
+        """
+        Path to the files containing the binned halo counts.
+
+        Parameters
+        ----------
+        simname : str
+            Simulation name. Must be `csiborg` or `quijote`.
+        nsim : int
+            IC realisation index.
+
+        Returns
+        -------
+        path : str
+        """
+        fdir = join(self.postdir, "HMF")
+        if not isdir(fdir):
+            makedirs(fdir)
+            warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1)
+        fname = f"halo_counts_{simname}_{str(nsim).zfill(5)}.npz"
+        return join(fdir, fname)
+
    def cross_nearest(self, simname, run, nsim=None, nobs=None):
        """
        Path to the files containing distance from a halo in a reference