Pantheon+ covariance (#138)

* Add improt * Add PP reading attempt * Remoe odd step requirement * Update script * Add CF4 * Add CF4 * Fix path bug * Update scripts * Update nb * Update script
2025-07-12 00:53:02 +00:00 · 2024-07-25 14:53:18 +01:00 · 2024-07-25 14:53:18 +01:00 · b22da9893f
commit b22da9893f
parent 8d49aa071b
10 changed files with 137 additions and 289 deletions
--- a/csiborgtools/flow/flow_model.py
+++ b/csiborgtools/flow/flow_model.py
@ -22,7 +22,6 @@ References
 [1] https://arxiv.org/abs/1912.09383.
 """
 from abc import ABC, abstractmethod
-from warnings import warn

 import numpy as np
 import numpyro
@ -86,15 +85,6 @@ class DataLoader:
        self._field_rdist, self._los_density, self._los_velocity, self._rmax = self._read_field(  # noqa
            simname, ksim, catalogue, ksmooth, paths)

-        if len(self._field_rdist) % 2 == 0:
-            if verbose:
-                warn(f"The number of radial steps is even. Skipping the first "
-                     f"step at {self._field_rdist[0]} because Simpson's rule "
-                     "requires an odd number of steps.")
-            self._field_rdist = self._field_rdist[1:]
-            self._los_density = self._los_density[..., 1:]
-            self._los_velocity = self._los_velocity[..., 1:]
-
        if len(self._cat) != self._los_density.shape[1]:
            raise ValueError("The number of objects in the catalogue does not "
                             "match the number of objects in the field.")
@ -139,7 +129,7 @@ class DataLoader:
        # But some CF4 delta values are < -1. Check that CF4 really reports
        # this.
        if simname in ["CF4", "CF4gp"]:
-            self._los_density = np.clip(self._los_density, 1e-5, None,)
+            self._los_density = np.clip(self._los_density, 1e-2, None,)

        # Lilow+2024 outside of the range data is NaN. Replace it with some
        # finite values. This is OK because the PV tracers are not so far.
@ -523,7 +513,9 @@ class BaseFlowValidationModel(ABC):
        self.r_xrange = r_xrange
        self.r2_xrange = r2_xrange

-        z_xrange = z_at_value(cosmo.comoving_distance, r_xrange * u.Mpc)
+        # Require `zmin` < 0 because the first radial step is likely at 0.
+        z_xrange = z_at_value(
+            cosmo.comoving_distance, r_xrange * u.Mpc, zmin=-0.01)
        mu_xrange = cosmo.distmod(z_xrange).value
        self.z_xrange = jnp.asarray(z_xrange)
        self.mu_xrange = jnp.asarray(mu_xrange)
--- a/csiborgtools/params.py
+++ b/csiborgtools/params.py
@ -124,4 +124,5 @@ paths_glamdring = {
    "borg2_dir": "/mnt/extraspace/rstiskalek/BORG_STOPYRA_2023",
    "tng300_1_dir": "/mnt/extraspace/rstiskalek/TNG300-1/",
    "aux_cat_dir": "/mnt/extraspace/rstiskalek/catalogs",
+    "CF4_dir": "/mnt/extraspace/rstiskalek/catalogs/CF4",
    }
--- a/csiborgtools/read/init.py
+++ b/csiborgtools/read/init.py
@ -22,5 +22,5 @@ from .snapshot import (CSiBORG1Snapshot, CSiBORG2Snapshot, QuijoteSnapshot,
                       Carrick2015Field, Lilow2024Field)                        # noqa
 from .obs import (SDSS, MCXCClusters, PlanckClusters, TwoMPPGalaxies,           # noqa
                  TwoMPPGroups, ObservedCluster, match_array_to_no_masking,     # noqa
-                  cols_to_structured)                                           # noqa
+                  cols_to_structured, read_pantheonplus_data)                   # noqa
 from .paths import Paths                                                        # noqa
--- a/csiborgtools/read/obs.py
+++ b/csiborgtools/read/obs.py
@ -19,15 +19,14 @@ from abc import ABC, abstractmethod
 from os.path import join
 from warnings import warn

-import numpy
+import numpy as np
 from astropy import units
 from astropy.coordinates import SkyCoord
-from astropy.io import fits
 from astropy.cosmology import FlatLambdaCDM
+from astropy.io import fits
 from scipy import constants

-from ..utils import radec_to_cartesian
-
+from ..utils import fprint, radec_to_cartesian

 ###############################################################################
 #                           Text survey base class                            #
@ -107,11 +106,11 @@ class TwoMPPGalaxies(TextSurvey):
        from scipy.constants import c

        # Read the catalogue and select non-fake galaxies
-        cat = numpy.genfromtxt(fpath, delimiter="|", )
+        cat = np.genfromtxt(fpath, delimiter="|", )
        cat = cat[cat[:, 12] == 0, :]
        # Pre=allocate array and fillt it
-        cols = [("RA", numpy.float64), ("DEC", numpy.float64),
-                ("Ksmag", numpy.float64), ("ZCMB", numpy.float64)]
+        cols = [("RA", np.float64), ("DEC", np.float64),
+                ("Ksmag", np.float64), ("ZCMB", np.float64)]
        data = cols_to_structured(cat.shape[0], cols)
        data["RA"] = cat[:, 1]
        data["DEC"] = cat[:, 2]
@ -151,11 +150,11 @@ class TwoMPPGroups(TextSurvey):
        self._set_data(fpath)

    def _set_data(self, fpath):
-        cat = numpy.genfromtxt(fpath, delimiter="|", )
+        cat = np.genfromtxt(fpath, delimiter="|", )
        # Pre-allocate and fill the array
-        cols = [("RA", numpy.float64), ("DEC", numpy.float64),
-                ("K2mag", numpy.float64), ("Rich", numpy.int64),
-                ("sigma", numpy.float64)]
+        cols = [("RA", np.float64), ("DEC", np.float64),
+                ("K2mag", np.float64), ("Rich", np.int64),
+                ("sigma", np.float64)]
        data = cols_to_structured(cat.shape[0], cols)
        data["K2mag"] = cat[:, 3]
        data["Rich"] = cat[:, 4]
@ -238,7 +237,7 @@ class FitsSurvey(ABC):
    def masked_size(self):
        if self.selection_mask is None:
            return self.size
-        return numpy.sum(self.selection_mask)
+        return np.sum(self.selection_mask)

    @property
    def selection_mask(self):
@ -247,7 +246,7 @@ class FitsSurvey(ABC):

    @selection_mask.setter
    def selection_mask(self, mask):
-        if not (isinstance(mask, numpy.ndarray)
+        if not (isinstance(mask, np.ndarray)
                and mask.ndim == 1
                and mask.dtype == bool):
            raise TypeError("`selection_mask` must be a 1-dimensional boolean "
@ -307,9 +306,9 @@ class FitsSurvey(ABC):
        if key == "INDEX":
            mask = self.selection_mask
            if mask is None:
-                return numpy.arange(self.size)
+                return np.arange(self.size)
            else:
-                return numpy.arange(mask.size)[mask]
+                return np.arange(mask.size)[mask]

        # Check duplicates
        if key in self.routine_keys and key in self.fits_keys:
@ -407,7 +406,7 @@ class PlanckClusters(FitsSurvey):
        -------
        indxs : list of int
            Array of MCXC indices to match the Planck array. If no counterpart
-            is found returns `numpy.nan`.
+            is found returns `np.nan`.
        """
        if not isinstance(mcxc, MCXCClusters):
            raise TypeError("`mcxc` must be `MCXCClusters` type.")
@ -416,7 +415,7 @@ class PlanckClusters(FitsSurvey):
        planck_names = [name.decode() for name in self["MCXC"]]
        mcxc_names = [name for name in mcxc["MCXC"]]

-        indxs = [numpy.nan] * len(planck_names)
+        indxs = [np.nan] * len(planck_names)
        for i, name in enumerate(planck_names):
            if name == "":
                continue
@ -585,7 +584,7 @@ class SDSS(FitsSurvey):
    def size(self):
        mask = self.selection_mask
        if mask is not None:
-            return numpy.sum(mask)
+            return np.sum(mask)
        else:
            return self.get_fitsitem("ZDIST").size

@ -598,7 +597,7 @@ class SDSS(FitsSurvey):
        self._check_in_list(band, self._bands, "band")
        k = self._bands.index(band)
        mag = self.get_fitsitem("{}_ABSMAG".format(photo))[:, k]
-        return mag + 5 * numpy.log10(self.h)
+        return mag + 5 * np.log10(self.h)

    def _kcorr(self, photo, band):
        """
@ -616,7 +615,7 @@ class SDSS(FitsSurvey):
        lumdist = (1 + self.get_fitsitem("ZDIST")) * self._dist()
        absmag = self._absmag(photo, band)
        kcorr = self._kcorr(photo, band)
-        return absmag + 25 + 5 * numpy.log10(lumdist) + kcorr
+        return absmag + 25 + 5 * np.log10(lumdist) + kcorr

    def _colour(self, photo, band1, band2):
        """
@ -698,7 +697,7 @@ class BaseSingleObservation(ABC):
    @spherical_pos.setter
    def spherical_pos(self, pos):
        if isinstance(pos, (list, tuple)):
-            pos = numpy.array(pos)
+            pos = np.array(pos)

        if not pos.shape == (3,):
            raise ValueError("`spherical_pos` must be a of shape (3,).")
@ -764,6 +763,57 @@ class ObservedCluster(BaseSingleObservation):
        self.mass = mass


+###############################################################################
+#                           Pantheon+ data                                    #
+###############################################################################
+
+
+def read_pantheonplus_covariance(fname, ww, ):
+    """Read in a Pantheon+ covariance matrix."""
+    origlen = len(ww)
+    # Pantheon+SH0ES routine to read in the covariance matrix
+    with open(fname) as f:
+        # Keep this line, otherwise will fail
+        line = f.readline()  # noqa
+        n = int(np.sum(ww))
+        C = np.zeros((n, n))
+        ii = -1
+        jj = -1
+        for i in range(origlen):
+            jj = -1
+            if ww[i]:
+                ii += 1
+            for j in range(origlen):
+                if ww[j]:
+                    jj += 1
+                val = float(f.readline())
+                if ww[i]:
+                    if ww[j]:
+                        C[ii, jj] = val
+
+    return C
+
+
+def read_pantheonplus_data(fname_data, fname_covmat_statsys, fname_covmat_vpec,
+                           subtract_vpec, verbose=True):
+    """Read in the Pantheon+ covariance matrix."""
+    fprint("reading the Pantheon+ data.", verbose)
+    data = np.genfromtxt(fname_data, names=True, dtype=None, encoding=None)
+    ww = np.ones(len(data), dtype=bool)
+
+    fprint("reading the Pantheon+ STAT+SYS covariance matrix.", verbose)
+    C = read_pantheonplus_covariance(fname_covmat_statsys, ww)
+
+    if subtract_vpec:
+        fprint("reading the Pantheon+ VPEC covariance matrix.", verbose)
+        C_vpec = read_pantheonplus_covariance(fname_covmat_vpec, ww)
+
+    # Subtracting the VPEC covariance matrix from the STAT+SYS covariance
+    # matrix produces negative eigenvalues. Emailed Maria to ask about this.
+
+    return data, C, C_vpec
+
+
 ###############################################################################
 #                           Utility functions                                 #
 ###############################################################################
@ -786,10 +836,10 @@ def match_array_to_no_masking(arr, surv):
    dtype = arr.dtype
    if arr.ndim > 1:
        shape = arr.shape
-        out = numpy.full((surv.selection_mask.size, *shape[1:]), numpy.nan,
+        out = np.full((surv.selection_mask.size, *shape[1:]), np.nan,
                      dtype=dtype)
    else:
-        out = numpy.full(surv.selection_mask.size, numpy.nan, dtype=dtype)
+        out = np.full(surv.selection_mask.size, np.nan, dtype=dtype)

    for i, indx in enumerate(surv["INDEX"]):
        out[indx] = arr[i]
@ -808,4 +858,4 @@ def cols_to_structured(N, cols):
    names, formats = zip(*cols)
    dtype = {"names": names, "formats": formats}

-    return numpy.full(N, numpy.nan, dtype=dtype)
+    return np.full(N, np.nan, dtype=dtype)
--- a/csiborgtools/read/paths.py
+++ b/csiborgtools/read/paths.py
@ -61,6 +61,8 @@ class Paths:
        Path to the TNG300-1 simulation directory.
    aux_cat_dir : str
        Path to the directory containing auxiliary catalogues.
+    CF4_dir : str
+        Path to the CosmicFlows4 directory with density & velocity fields.
    """
    def __init__(self,
                 csiborg1_srcdir,
@ -72,7 +74,8 @@ class Paths:
                 borg1_dir,
                 borg2_dir,
                 tng300_1_dir,
-                 aux_cat_dir
+                 aux_cat_dir,
+                 CF4_dir,
                 ):
        self.csiborg1_srcdir = csiborg1_srcdir
        self.csiborg2_main_srcdir = csiborg2_main_srcdir
@ -84,6 +87,7 @@ class Paths:
        self.tng300_1_dir = tng300_1_dir
        self.postdir = postdir
        self.aux_cat_dir = aux_cat_dir
+        self.CF4_dir = CF4_dir

    def get_ics(self, simname):
        """Get available IC realisation IDs for a given simulation."""
@ -115,8 +119,11 @@ class Paths:
            files = [int(search(r'chain_(\d+)', f).group(1)) for f in files]
        elif simname == "Carrick2015":
            return [0]
-        elif simname in ["CF4", "CF4gp"]:
-            return [0]
+        elif simname == "CF4":
+            files = glob(join(self.CF4_dir, "CF4_new_128-z008_realization*_delta.fits"))  # noqa
+            files = [search(r'realization(\d+)_delta\.fits', file).group(1)
+                     for file in files if search(r'realization(\d+)_delta\.fits', file)]  # noqa
+            files = [int(file) for file in files]
        elif simname == "Lilow2024":
            return [0]
        else:
--- a/notebooks/flow/process_upglade.ipynb
+++ b/notebooks/flow/process_upglade.ipynb
--- a/scripts/field_los.py
+++ b/scripts/field_los.py
@ -160,18 +160,17 @@ def get_field(simname, nsim, kind, MAS, grid):
            return field
        else:
            raise ValueError(f"Unknown field kind: `{kind}`.")
-    elif "CF4" in simname:
-        folder = "/mnt/extraspace/rstiskalek/catalogs"
+    elif simname == "CF4":
+        folder = "/mnt/extraspace/rstiskalek/catalogs/CF4"
        warn(f"Using local paths from `{folder}`.", RuntimeWarning)

        if kind == "density":
-            fpath = join(folder, "CF4_new_64-z008_delta.fits")
+            fpath = join(folder, f"CF4_new_128-z008_realization{nsim}_delta.fits")     # noqa
        elif kind == "velocity":
-            fpath = join(folder, "CF4_new_64-z008_velocity.fits")
+            fpath = join(folder, f"CF4_new_128-z008_realization{nsim}_velocity.fits")  # noqa
        else:
            raise ValueError(f"Unknown field kind: `{kind}`.")

-        fpath = fpath.replace("CF4", "CF4gp") if "CF4gp" in simname else fpath
        field = fits.open(fpath)[0].data

        # https://projets.ip2i.in2p3.fr//cosmicflows/ says to multiply by 52
--- a/scripts/field_los.sh
+++ b/scripts/field_los.sh
@ -1,18 +1,17 @@
 nthreads=1
 memory=64
-on_login=0
+on_login=1
 queue="berg"
 env="/mnt/users/rstiskalek/csiborgtools/venv_csiborg/bin/python"
 file="field_los.py"

-simname=${1}
 nsims="-1"
 MAS="SPH"
 grid=1024


-for catalogue in "UPGLADE"; do
-# for catalogue in "Foundation"; do
+for simname in "CF4"; do
+    for catalogue in "Foundation"; do
        pythoncm="$env $file --catalogue $catalogue --nsims $nsims --simname $simname --MAS $MAS --grid $grid"
        if [ $on_login -eq 1 ]; then
            echo $pythoncm
@ -27,3 +26,4 @@ for catalogue in "UPGLADE"; do

        sleep 0.05
    done
+done
--- a/scripts/flow_validation.py
+++ b/scripts/flow_validation.py
@ -248,7 +248,7 @@ if __name__ == "__main__":
                               "beta_min": -1.0, "beta_max": 3.0,
                               "sigma_v_min": 1.0, "sigma_v_max": 750.,
                               "sample_Vmono": False,
-                               "sample_alpha": False,
+                               "sample_alpha": True,
                               "sample_beta": True,
                               "sample_sigma_v_ext": False,
                               }
--- a/scripts/flow_validation.sh
+++ b/scripts/flow_validation.sh
@ -21,7 +21,7 @@ fi
 # for simname in "Lilow2024" "CF4" "CF4gp" "csiborg1" "csiborg2_main" "csiborg2X"; do
 for simname in "Carrick2015"; do
 # for simname in "csiborg1" "csiborg2_main" "csiborg2X"; do
-    for catalogue in "Pantheon+_zSN"; do
+    for catalogue in "Foundation"; do
    # for catalogue in "2MTF"; do
        # for ksim in 0 1 2; do
        # for ksim in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20; do