Add simple distance flow model (#114)

* Add imports * Add field LOS paths * Add basic flow model * Edit script * Add nb * Add nb * Update nb * Add some docs * Add RA reading * Add imoprts * Updates to the flow model * Update script * Bring back A2 * Update imports * Update imports * Add Carrick to ICs * Add Carrick boxsize * Add Carrick and fix minor bugs * Add Carrick box * Update script * Edit imports * Add fixed flow! * Update omega_m and add it * Update nb * Update nb * Update nb * Remove old print statements * Update params * Add thinning of chains * Add import * Add flow validation script * Add submit script * Add ksmooth * Update nb * Update params * Update script * Update string * Move where distributions are defined * Add density bias parameter * Add lognorm mean * Update scripts * Update script
2025-06-07 17:31:13 +00:00 · 2024-03-08 10:44:19 +00:00 · 2024-03-08 10:44:19 +00:00 · a65e3cb15b
commit a65e3cb15b
parent fb93f85543
14 changed files with 1762 additions and 60 deletions
--- a/csiborgtools/init.py
+++ b/csiborgtools/init.py
@ -12,14 +12,15 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-from csiborgtools import clustering, field, halo, match, read, summary          # noqa
+from csiborgtools import clustering, field, flow, halo, match, read, summary    # noqa
 from .utils import (center_of_mass, delta2ncells, number_counts,                # noqa
                    periodic_distance, periodic_distance_two_points,            # noqa
                    binned_statistic, cosine_similarity, fprint,                # noqa
                    hms_to_degrees, dms_to_degrees, great_circle_distance,      # noqa
-                    radec_to_cartesian, cartesian_to_radec)                     # noqa
+                    radec_to_cartesian, cartesian_to_radec,                     # noqa
-from .params import paths_glamdring, simname2boxsize                            # noqa
+                    thin_samples_by_acl)                                        # noqa
 from .params import paths_glamdring, simname2boxsize, simname2Omega_m           # noqa
 ###############################################################################
--- a/csiborgtools/flow/init.py
+++ b/csiborgtools/flow/init.py
@ -0,0 +1,17 @@
 # Copyright (C) 2024 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 from .flow_model import (DataLoader, radial_velocity_los, dist2redshift,        # noqa
                         dist2distmodulus, predict_zobs, project_Vext,          # noqa
                         SD_PV_validation_model)                                # noqa
--- a/csiborgtools/flow/flow_model.py
+++ b/csiborgtools/flow/flow_model.py
@ -0,0 +1,657 @@
 # Copyright (C) 2024 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 Validation of the CSiBORG velocity field against PV measurements. Based on [1].
 References
 ----------
 [1] https://arxiv.org/abs/1912.09383.
 """
 from datetime import datetime
 from warnings import warn
 import numpy as np
 import numpyro
 import numpyro.distributions as dist
 from astropy import units as u
 from astropy.coordinates import SkyCoord
 from astropy.cosmology import FlatLambdaCDM
 from h5py import File
 from jax import numpy as jnp
 from jax import vmap
 from tqdm import tqdm, trange
 from ..params import simname2Omega_m
 from ..read import CSiBORG1Catalogue
 SPEED_OF_LIGHT = 299792.458  # km / s
 def t():
    """Shortcut to get the current time."""
    return datetime.now().strftime("%H:%M:%S")
 def radec_to_galactic(ra, dec):
    """
    Convert right ascension and declination to galactic coordinates (all in
    degrees.)
    Parameters
    ----------
    ra, dec : float or 1-dimensional array
        Right ascension and declination in degrees.
    Returns
    -------
    l, b : float or 1-dimensional array
    """
    c = SkyCoord(ra=ra*u.degree, dec=dec*u.degree, frame='icrs')
    return c.galactic.l.degree, c.galactic.b.degree
 ###############################################################################
 #                             Data loader                                     #
 ###############################################################################
 class DataLoader:
    """
    Data loader for the line of sight (LOS) interpolated fields and the
    corresponding catalogues.
    Parameters
    ----------
    simname : str
        Simulation name.
    catalogue : str
        Name of the catalogue with LOS objects.
    catalogue_fpath : str
        Path to the LOS catalogue file.
    paths : csiborgtools.read.Paths
        Paths object.
    ksmooth : int, optional
        Smoothing index.
    store_full_velocity : bool, optional
        Whether to store the full 3D velocity field. Otherwise stores only
        the radial velocity.
    """
    def __init__(self, simname, catalogue, catalogue_fpath, paths,
                 ksmooth=None, store_full_velocity=False):
        print(f"{t()}: reading the catalogue.")
        self._cat = self._read_catalogue(catalogue, catalogue_fpath)
        self._catname = catalogue
        print(f"{t()}: reading the interpolated field.")
        self._field_rdist, self._los_density, self._los_velocity = self._read_field(  # noqa
            simname, catalogue, ksmooth, paths)
        if len(self._field_rdist) % 2 == 0:
            warn(f"The number of radial steps is even. Skipping the first "
                 f"step at {self._field_rdist[0]} because Simpson's rule "
                 "requires an odd number of steps.")
            self._field_rdist = self._field_rdist[1:]
            self._los_density = self._los_density[..., 1:]
            self._los_velocity = self._los_velocity[..., 1:]
        if len(self._cat) != len(self._los_density):
            raise ValueError("The number of objects in the catalogue does not "
                             "match the number of objects in the field.")
        print(f"{t()}: calculating the radial velocity.")
        nobject, nsim = self._los_density.shape[:2]
        # In case of Carrick 2015 the box is in galactic coordinates..
        if simname == "Carrick2015":
            d1, d2 = radec_to_galactic(self._cat["RA"], self._cat["DEC"])
        else:
            d1, d2 = self._cat["RA"], self._cat["DEC"]
        radvel = np.empty((nobject, nsim, len(self._field_rdist)),
                          self._los_velocity.dtype)
        for i in trange(nobject):
            for j in range(nsim):
                radvel[i, j, :] = radial_velocity_los(
                    self._los_velocity[:, i, j, ...], d1[i], d2[i])
        self._los_radial_velocity = radvel
        if not store_full_velocity:
            self._los_velocity = None
        Omega_m = simname2Omega_m(simname)
        # Normalize the CSiBORG density by the mean matter density
        if "csiborg" in simname:
            cosmo = FlatLambdaCDM(H0=100, Om0=Omega_m)
            mean_rho_matter = cosmo.critical_density0.to("Msun/kpc^3").value
            mean_rho_matter *= Omega_m
            self._los_density /= mean_rho_matter
        # Since Carrick+2015 provide `rho / <rho> - 1`
        if simname == "Carrick2015":
            self._los_density += 1
    @property
    def cat(self):
        """
        The distance indicators catalogue.
        Returns
        -------
        structured array
        """
        return self._cat
    @property
    def catname(self):
        """
        Name of the catalogue.
        Returns
        -------
        str
        """
        return self._catname
    @property
    def rdist(self):
        """
        Radial distances where the field was interpolated for each object.
        Returns
        -------
        1-dimensional array
        """
        return self._field_rdist
    @property
    def los_density(self):
        """
        Density field along the line of sight.
        Returns
        ----------
        3-dimensional array of shape (n_objects, n_simulations, n_steps)
        """
        return self._los_density
    @property
    def los_velocity(self):
        """
        Velocity field along the line of sight.
        Returns
        -------
        4-dimensional array of shape (n_objects, n_simulations, 3, n_steps)
        """
        if self._los_velocity is None:
            raise ValueError("The 3D velocities were not stored.")
        return self._los_velocity
    @property
    def los_radial_velocity(self):
        """
        Radial velocity along the line of sight.
        Returns
        -------
        3-dimensional array of shape (n_objects, n_simulations, n_steps)
        """
        return self._los_radial_velocity
    def _read_field(self, simname, catalogue, k, paths):
        """Read in the interpolated field."""
        out_density = None
        out_velocity = None
        has_smoothed = False
        nsims = paths.get_ics(simname)
        with File(paths.field_los(simname, catalogue), 'r') as f:
            has_smoothed = True if f[f"density_{nsims[0]}"].ndim > 2 else False
            if has_smoothed and (k is None or not isinstance(k, int)):
                raise ValueError("The output contains smoothed field but "
                                 "`ksmooth` is None. It must be provided.")
            for i, nsim in enumerate(tqdm(nsims)):
                if out_density is None:
                    nobject, nstep = f[f"density_{nsim}"].shape[:2]
                    out_density = np.empty(
                        (nobject, len(nsims), nstep), dtype=np.float32)
                    out_velocity = np.empty(
                        (3, nobject, len(nsims), nstep), dtype=np.float32)
                indx = (..., k) if has_smoothed else (...)
                out_density[:, i, :] = f[f"density_{nsim}"][indx]
                out_velocity[:, :, i, :] = f[f"velocity_{nsim}"][indx]
            rdist = f[f"rdist_{nsims[0]}"][:]
        return rdist, out_density, out_velocity
    def _read_catalogue(self, catalogue, catalogue_fpath):
        """
        Read in the distance indicator catalogue.
        """
        if catalogue == "A2":
            with File(catalogue_fpath, 'r') as f:
                dtype = [(key, np.float32) for key in f.keys()]
                arr = np.empty(len(f["RA"]), dtype=dtype)
                for key in f.keys():
                    arr[key] = f[key][:]
        elif catalogue == "LOSS" or catalogue == "Foundation":
            with File(catalogue_fpath, 'r') as f:
                grp = f[catalogue]
                dtype = [(key, np.float32) for key in grp.keys()]
                arr = np.empty(len(grp["RA"]), dtype=dtype)
                for key in grp.keys():
                    arr[key] = grp[key][:]
        elif "csiborg1" in catalogue:
            nsim = int(catalogue.split("_")[-1])
            cat = CSiBORG1Catalogue(nsim, bounds={"totmass": (1e13, None)})
            seed = 42
            gen = np.random.default_rng(seed)
            mask = gen.choice(len(cat), size=100, replace=False)
            keys = ["r_hMpc", "RA", "DEC"]
            dtype = [(key, np.float32) for key in keys]
            arr = np.empty(len(mask), dtype=dtype)
            sph_pos = cat["spherical_pos"]
            arr["r_hMpc"] = sph_pos[mask, 0]
            arr["RA"] = sph_pos[mask, 1]
            arr["DEC"] = sph_pos[mask, 2]
            # TODO: add peculiar velocit
        else:
            raise ValueError(f"Unknown catalogue: `{catalogue}`.")
        return arr
 ###############################################################################
 #                       Supplementary flow functions                          #
 ###############################################################################
 def radial_velocity_los(los_velocity, ra, dec):
    """
    Calculate the radial velocity along the line of sight.
    Parameters
    ----------
    los_velocity : 2-dimensional array of shape (3, n_steps)
        Line of sight velocity field.
    ra, dec : floats
        Right ascension and declination of the line of sight.
    is_degrees : bool, optional
        Whether the angles are in degrees.
    Returns
    -------
    1-dimensional array of shape (n_steps)
    """
    types = (float, np.float32, np.float64)
    if not isinstance(ra, types) and not isinstance(dec, types):
        raise ValueError("RA and dec must be floats.")
    if los_velocity.ndim != 2 and los_velocity.shape[0] != 3:
        raise ValueError("The shape of `los_velocity` must be (3, n_steps).")
    ra_rad, dec_rad = np.deg2rad(ra), np.deg2rad(dec)
    vx, vy, vz = los_velocity
    return (vx * np.cos(ra_rad) * np.cos(dec_rad)
            + vy * np.sin(ra_rad) * np.cos(dec_rad)
            + vz * np.sin(dec_rad))
 ###############################################################################
 #                           JAX Flow model                                    #
 ###############################################################################
 def lognorm_mean_std_to_loc_scale(mu, std):
    """
    Calculate the location and scale parameters for the log-normal distribution
    from the mean and standard deviation.
    Parameters
    ----------
    mu, std : float
        Mean and standard deviation.
    Returns
    -------
    loc, scale : float
    """
    loc = np.log(mu) - 0.5 * np.log(1 + (std / mu) ** 2)
    scale = np.sqrt(np.log(1 + (std / mu) ** 2))
    return loc, scale
 def simps(y, dx):
    """
    Simpson's rule 1D integration, assuming that the number of steps is even
    and that the step size is constant.
    Parameters
    ----------
    y : 1-dimensional array
        Function values.
    dx : float
        Step size.
    Returns
    -------
    float
    """
    if len(y) % 2 == 0:
        raise ValueError("The number of steps must be odd.")
    return dx / 3 * jnp.sum(y[0:-1:2] + 4 * y[1::2] + y[2::2])
 def dist2redshift(dist, Omega_m):
    """
    Convert comoving distance to cosmological redshift if the Universe is
    flat and z << 1.
    Parameters
    ----------
    dist : float or 1-dimensional array
        Comoving distance in `Mpc / h`.
    Omega_m : float
        Matter density parameter.
    Returns
    -------
    float or 1-dimensional array
    """
    H0 = 100
    eta = 3 * Omega_m / 2
    return 1 / eta * (1 - (1 - 2 * H0 * dist / SPEED_OF_LIGHT * eta)**0.5)
 def dist2distmodulus(dist, Omega_m):
    """
    Convert comoving distance to distance modulus, assuming z << 1.
    Parameters
    ----------
    dist : float or 1-dimensional array
        Comoving distance in `Mpc / h`.
    Omega_m : float
        Matter density parameter.
    Returns
    -------
    float or 1-dimensional array
    """
    zcosmo = dist2redshift(dist, Omega_m)
    luminosity_distance = dist * (1 + zcosmo)
    return 5 * jnp.log10(luminosity_distance) + 25
 def project_Vext(Vext_x, Vext_y, Vext_z, RA, dec):
    """
    Project the external velocity onto the line of sight along direction
    specified by RA/dec. Note that the angles must be in radians.
    Parameters
    ----------
    Vext_x, Vext_y, Vext_z : floats
        Components of the external velocity.
    RA, dec : floats
        Right ascension and declination in radians
    Returns
    -------
    float
    """
    cos_dec = jnp.cos(dec)
    return (Vext_x * jnp.cos(RA) * cos_dec
            + Vext_y * jnp.sin(RA) * cos_dec
            + Vext_z * jnp.sin(dec))
 def predict_zobs(dist, beta, Vext_radial, vpec_radial, Omega_m):
    """
    Predict the observed redshift at a given comoving distance given some
    velocity field.
    Parameters
    ----------
    dist : float
        Comoving distance in `Mpc / h`.
    beta : float
        Velocity bias parameter.
    Vext_radial : float
        Radial component of the external velocity along the LOS.
    vpec_radial : float
        Radial component of the peculiar velocity along the LOS.
    Omega_m : float
        Matter density parameter.
    Returns
    -------
    float
    """
    zcosmo = dist2redshift(dist, Omega_m)
    vrad = beta * vpec_radial + Vext_radial
    return (1 + zcosmo) * (1 + vrad / SPEED_OF_LIGHT) - 1
 ###############################################################################
 #                          Flow validation models                             #
 ###############################################################################
 def calculate_ptilde_wo_bias(xrange, mu, err, r_squared_xrange=None):
    """
    Calculate `ptilde(r)` without any bias.
    Parameters
    ----------
    xrange : 1-dimensional array
        Radial distances where the field was interpolated for each object.
    mu : float
        Comoving distance in `Mpc / h`.
    err : float
        Error on the comoving distance in `Mpc / h`.
    r_squared_xrange : 1-dimensional array, optional
        Radial distances squared where the field was interpolated for each
        object. If not provided, the `r^2` correction is not applied.
    Returns
    -------
    1-dimensional array
    """
    ptilde = jnp.exp(-0.5 * ((xrange - mu) / err)**2)
    if r_squared_xrange is not None:
        ptilde *= r_squared_xrange
    return ptilde
 def calculate_ll_zobs(zobs, zobs_pred, sigma_v):
    """
    Calculate the likelihood of the observed redshift given the predicted
    redshift.
    Parameters
    ----------
    zobs : float
        Observed redshift.
    zobs_pred : float
        Predicted redshift.
    sigma_v : float
        Velocity uncertainty.
    Returns
    -------
    float
    """
    dcz = SPEED_OF_LIGHT * (zobs - zobs_pred)
    return jnp.exp(-0.5 * (dcz / sigma_v)**2) / jnp.sqrt(2 * np.pi) / sigma_v
 class SD_PV_validation_model:
    """
    Simple distance peculiar velocity (PV) validation model, assuming that
    we already have a calibrated estimate of the comoving distance to the
    objects.
    Parameters
    ----------
    los_density : 2-dimensional array of shape (n_objects, n_steps)
        LOS density field.
    los_velocity : 3-dimensional array of shape (n_objects, n_steps)
        LOS radial velocity field.
    RA, dec : 1-dimensional arrays of shape (n_objects)
        Right ascension and declination in degrees.
    z_obs : 1-dimensional array of shape (n_objects)
        Observed redshifts.
    r_hMpc : 1-dimensional array of shape (n_objects)
        Estimated comoving distances in `h^-1 Mpc`.
    e_r_hMpc : 1-dimensional array of shape (n_objects)
        Errors on the estimated comoving distances in `h^-1 Mpc`.
    r_xrange : 1-dimensional array
        Radial distances where the field was interpolated for each object.
    Omega_m : float
        Matter density parameter.
    """
    def __init__(self, los_density, los_velocity, RA, dec, z_obs,
                 r_hMpc, e_r_hMpc, r_xrange, Omega_m):
        # Convert everything to JAX arrays.
        dt = jnp.float32
        self._los_density = jnp.asarray(los_density, dtype=dt)
        self._los_velocity = jnp.asarray(los_velocity, dtype=dt)
        self._RA = jnp.asarray(np.deg2rad(RA), dtype=dt)
        self._dec = jnp.asarray(np.deg2rad(dec), dtype=dt)
        self._z_obs = jnp.asarray(z_obs, dtype=dt)
        self._r_hMpc = jnp.asarray(r_hMpc, dtype=dt)
        self._e_rhMpc = jnp.asarray(e_r_hMpc, dtype=dt)
        # Get radius squared
        r2_xrange = r_xrange**2
        r2_xrange /= r2_xrange.mean()
        # Get the stepsize, we need it to be constant for Simpson's rule.
        dr = np.diff(r_xrange)
        if not np.all(np.isclose(dr, dr[0], atol=1e-5)):
            raise ValueError("The radial step size must be constant.")
        dr = dr[0]
        # Get the various vmapped functions
        self._vmap_ptilde_wo_bias = vmap(lambda mu, err: calculate_ptilde_wo_bias(r_xrange, mu, err, r2_xrange))                        # noqa
        self._vmap_simps = vmap(lambda y: simps(y, dr))
        self._vmap_zobs = vmap(lambda beta, Vr, vpec_rad: predict_zobs(r_xrange, beta, Vr, vpec_rad, Omega_m), in_axes=(None, 0, 0))    # noqa
        self._vmap_ll_zobs = vmap(lambda zobs, zobs_pred, sigma_v: calculate_ll_zobs(zobs, zobs_pred, sigma_v), in_axes=(0, 0, None))   # noqa
        # Vext_x, Vext_y, Vext_z: external velocity components
        self._dist_Vext = dist.Uniform(-1000, 1000)
        # We want sigma_v to be 150 +- 100 km / s (lognormal)
        self._dist_sigma_v = dist.LogNormal(
            *lognorm_mean_std_to_loc_scale(150, 100))
        # Density power-law bias
        self._dist_alpha = dist.LogNormal(
            *lognorm_mean_std_to_loc_scale(1.0, 0.5))
        # Velocity bias
        self._dist_beta = dist.Normal(1., 0.5)
    def __call__(self):
        """
        The simple distance NumPyro PV validation model. Samples the following
        parameters:
            - `Vext_x`, `Vext_y`, `Vext_z`: external velocity components
            - `alpha`: density bias parameter
            - `beta`: velocity bias parameter
            - `sigma_v`: velocity uncertainty
        """
        Vx = numpyro.sample("Vext_x", self._dist_Vext)
        Vy = numpyro.sample("Vext_y", self._dist_Vext)
        Vz = numpyro.sample("Vext_z", self._dist_Vext)
        alpha = numpyro.sample("alpha", self._dist_alpha)
        beta = numpyro.sample("beta", self._dist_beta)
        sigma_v = numpyro.sample("sigma_v", self._dist_sigma_v)
        Vext_rad = project_Vext(Vx, Vy, Vz, self._RA, self._dec)
        # Calculate p(r) and multiply it by the galaxy bias
        ptilde = self._vmap_ptilde_wo_bias(self._r_hMpc, self._e_rhMpc)
        ptilde *= self._los_density**alpha
        # Normalization of p(r)
        pnorm = self._vmap_simps(ptilde)
        # Calculate p(z_obs) and multiply it by p(r)
        zobs_pred = self._vmap_zobs(beta, Vext_rad, self._los_velocity)
        ptilde *= self._vmap_ll_zobs(self._z_obs, zobs_pred, sigma_v)
        ll = jnp.sum(jnp.log(self._vmap_simps(ptilde) / pnorm))
        numpyro.factor("ll", ll)
 # def SN_PV_wcal_validation_model(los_overdensity=None, los_velocity=None,
 #                                 RA=None, dec=None, z_CMB=None,
 #                                 mB=None, x1=None, c=None,
 #                                 e_mB=None, e_x1=None, e_c=None,
 #                                 mu_xrange=None, r_xrange=None,
 #                                 norm_r2_xrange=None, Omega_m=None, dr=None):
 #     """
 #     Pass
 #     """
 #     Vx = numpyro.sample("Vext_x", dist.Uniform(-1000, 1000))
 #     Vy = numpyro.sample("Vext_y", dist.Uniform(-1000, 1000))
 #     Vz = numpyro.sample("Vext_z", dist.Uniform(-1000, 1000))
 #     beta = numpyro.sample("beta", dist.Uniform(-10, 10))
 #
 #     # TODO: Later sample these as well.
 #     e_mu_intrinsic = 0.064
 #     alpha_cal = 0.135
 #     beta_cal = 2.9
 #     mag_cal = -18.555
 #     sigma_v = 112
 #
 #     # TODO: Check these for fiducial values.
 #     mu = mB - mag_cal + alpha_cal * x1 - beta_cal * c
 #     squared_e_mu = e_mB**2 + alpha_cal**2 * e_x1**2 + beta_cal**2 * e_c**2
 #
 #     squared_e_mu += e_mu_intrinsic**2
 #     ll = 0.
 #     for i in range(len(los_overdensity)):
 #         # Project the external velocity for this galaxy.
 #         Vext_rad = project_Vext(Vx, Vy, Vz, RA[i], dec[i])
 #
 #         dmu = mu_xrange - mu[i]
 #         ptilde = norm_r2_xrange * jnp.exp(-0.5 * dmu**2 / squared_e_mu[i])
 #         # TODO: Add some bias
 #         ptilde *= (1 + los_overdensity[i])
 #
 #         zobs_pred = predict_zobs(r_xrange, beta, Vext_rad, los_velocity[i],
 #                                  Omega_m)
 #
 #         dczobs = SPEED_OF_LIGHT * (z_CMB[i] - zobs_pred)
 #
 #         ll_zobs = jnp.exp(-0.5 * (dczobs / sigma_v)**2) / sigma_v
 #
 #         ll += jnp.log(simps(ptilde * ll_zobs, dr))
 #         ll -= jnp.log(simps(ptilde, dr))
 #
 #     numpyro.factor("ll", ll)
--- a/csiborgtools/params.py
+++ b/csiborgtools/params.py
@ -13,7 +13,7 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
-Various user parameters for csiborgtools.
+Various user parameters for CSiBORGTools.
 """
@ -37,7 +37,8 @@ def simname2boxsize(simname):
         "borg1": 677.7,
         "borg2": 676.6,
         "quijote": 1000.,
-         "TNG300-1": 205.
+         "TNG300-1": 205.,
         "Carrick2015": 400.,
         }
    boxsize = d.get(simname, None)
@ -48,6 +49,32 @@ def simname2boxsize(simname):
    return boxsize
 def simname2Omega_m(simname):
    """
    Return Omega_m for a given simname.
    Parameters
    ----------
    simname : str
        Simulation name.
    Returns
    -------
    Omega_m: float
    """
    d = {"csiborg1": 0.307,
         "borg1": 0.307,
         "Carrick2015": 0.3,
         }
    omega_m = d.get(simname, None)
    if omega_m is None:
        raise ValueError("Unknown simname: {}".format(simname))
    return omega_m
 paths_glamdring = {
    "csiborg1_srcdir": "/mnt/extraspace/rstiskalek/csiborg1",
    "csiborg2_main_srcdir": "/mnt/extraspace/rstiskalek/csiborg2_main",
--- a/csiborgtools/read/paths.py
+++ b/csiborgtools/read/paths.py
@ -111,6 +111,8 @@ class Paths:
            files = glob(join(self.quijote_dir, "fiducial_processed",
                              "chain_*"))
            files = [int(search(r'chain_(\d+)', f).group(1)) for f in files]
        elif simname == "Carrick2015":
            return [0]
        else:
            raise ValueError(f"Unknown simulation name `{simname}`.")
@ -653,3 +655,22 @@ class Paths:
        str
        """
        return self.tng300_1_dir
    def field_los(self, simnname, catalogue):
        """
        Path to the files containing the line-of-sight fields.
        Parameters
        ----------
        simname : str
            Simulation name.
        catalogue : str
            Catalogue name.
        Returns
        -------
        str
        """
        fdir = join(self.postdir, "field_los")
        try_create_directory(fdir)
        return join(fdir, f"los_{catalogue}_{simnname}.hdf5")
--- a/csiborgtools/utils.py
+++ b/csiborgtools/utils.py
@ -15,7 +15,9 @@
 """
 Collection of stand-off utility functions used in the scripts.
 """
-import numpy
+from copy import deepcopy
 import numpy as np
 from numba import jit
 from datetime import datetime
@ -30,17 +32,17 @@ def center_of_mass(particle_positions, particles_mass, boxsize):
    Calculate the center of mass of a halo while assuming periodic boundary
    conditions of a cubical box.
    """
-    cm = numpy.zeros(3, dtype=particle_positions.dtype)
+    cm = np.zeros(3, dtype=particle_positions.dtype)
    totmass = sum(particles_mass)
    # Convert positions to unit circle coordinates in the complex plane,
    # calculate the weighted average and convert it back to box coordinates.
    for i in range(3):
-        cm_i = sum(particles_mass * numpy.exp(
+        cm_i = sum(particles_mass * np.exp(
-            2j * numpy.pi * particle_positions[:, i] / boxsize))
+            2j * np.pi * particle_positions[:, i] / boxsize))
        cm_i /= totmass
-        cm_i = numpy.arctan2(cm_i.imag, cm_i.real) * boxsize / (2 * numpy.pi)
+        cm_i = np.arctan2(cm_i.imag, cm_i.real) * boxsize / (2 * np.pi)
        if cm_i < 0:
            cm_i += boxsize
@ -58,7 +60,7 @@ def periodic_distance(points, reference_point, boxsize):
    npoints = len(points)
    half_box = boxsize / 2
-    dist = numpy.zeros(npoints, dtype=points.dtype)
+    dist = np.zeros(npoints, dtype=points.dtype)
    for i in range(npoints):
        for j in range(3):
            dist_1d = abs(points[i, j] - reference_point[j])
@ -124,15 +126,15 @@ def cartesian_to_radec(X):
    """
    x, y, z = X[:, 0], X[:, 1], X[:, 2]
-    dist = numpy.linalg.norm(X, axis=1)
+    dist = np.linalg.norm(X, axis=1)
-    dec = numpy.arcsin(z / dist)
+    dec = np.arcsin(z / dist)
-    ra = numpy.arctan2(y, x)
+    ra = np.arctan2(y, x)
-    ra[ra < 0] += 2 * numpy.pi
+    ra[ra < 0] += 2 * np.pi
-    ra *= 180 / numpy.pi
+    ra *= 180 / np.pi
-    dec *= 180 / numpy.pi
+    dec *= 180 / np.pi
-    return numpy.vstack([dist, ra, dec]).T
+    return np.vstack([dist, ra, dec]).T
 def radec_to_cartesian(X):
@ -142,11 +144,11 @@ def radec_to_cartesian(X):
    """
    dist, ra, dec = X[:, 0], X[:, 1], X[:, 2]
-    cdec = numpy.cos(dec * numpy.pi / 180)
+    cdec = np.cos(dec * np.pi / 180)
-    return numpy.vstack([
+    return np.vstack([
-        dist * cdec * numpy.cos(ra * numpy.pi / 180),
+        dist * cdec * np.cos(ra * np.pi / 180),
-        dist * cdec * numpy.sin(ra * numpy.pi / 180),
+        dist * cdec * np.sin(ra * np.pi / 180),
-        dist * numpy.sin(dec * numpy.pi / 180)
+        dist * np.sin(dec * np.pi / 180)
        ]).T
@ -159,14 +161,14 @@ def great_circle_distance(x1, x2):
    ra1, dec1 = x1
    ra2, dec2 = x2
-    ra1 *= numpy.pi / 180
+    ra1 *= np.pi / 180
-    dec1 *= numpy.pi / 180
+    dec1 *= np.pi / 180
-    ra2 *= numpy.pi / 180
+    ra2 *= np.pi / 180
-    dec2 *= numpy.pi / 180
+    dec2 *= np.pi / 180
-    return 180 / numpy.pi * numpy.arccos(
+    return 180 / np.pi * np.arccos(
-        numpy.sin(dec1) * numpy.sin(dec2)
+        np.sin(dec1) * np.sin(dec2)
-        + numpy.cos(dec1) * numpy.cos(dec2) * numpy.cos(ra1 - ra2)
+        + np.cos(dec1) * np.cos(dec2) * np.cos(ra1 - ra2)
        )
@ -193,8 +195,8 @@ def cosine_similarity(x, y):
    if y.ndim == 1:
        y = y.reshape(1, -1)
-    out = numpy.sum(x * y, axis=1)
+    out = np.sum(x * y, axis=1)
-    out /= numpy.linalg.norm(x) * numpy.linalg.norm(y, axis=1)
+    out /= np.linalg.norm(x) * np.linalg.norm(y, axis=1)
    return out[0] if out.size == 1 else out
@ -258,8 +260,8 @@ def real2redshift(pos, vel, observer_location, observer_velocity, boxsize,
        Redshift-space Cartesian position in `Mpc / h`.
    """
    if make_copy:
-        pos = numpy.copy(pos)
+        pos = np.copy(pos)
-        vel = numpy.copy(vel)
+        vel = np.copy(vel)
    H0_inv = 1. / 100
@ -267,8 +269,8 @@ def real2redshift(pos, vel, observer_location, observer_velocity, boxsize,
    pos -= observer_location
    vel -= observer_velocity
-    vr_dot = numpy.einsum('ij,ij->i', pos, vel)
+    vr_dot = np.einsum('ij,ij->i', pos, vel)
-    norm2 = numpy.einsum('ij,ij->i', pos, pos)
+    norm2 = np.einsum('ij,ij->i', pos, pos)
    pos *= (1 + H0_inv * vr_dot / norm2).reshape(-1, 1)
@ -293,9 +295,9 @@ def number_counts(x, bin_edges):
    """
    Calculate counts of samples in bins.
    """
-    out = numpy.full(bin_edges.size - 1, numpy.nan, dtype=numpy.float32)
+    out = np.full(bin_edges.size - 1, np.nan, dtype=np.float32)
    for i in range(bin_edges.size - 1):
-        out[i] = numpy.sum((x >= bin_edges[i]) & (x < bin_edges[i + 1]))
+        out[i] = np.sum((x >= bin_edges[i]) & (x < bin_edges[i + 1]))
    return out
@ -303,12 +305,12 @@ def binned_statistic(x, y, left_edges, bin_width, statistic):
    """
    Calculate a binned statistic.
    """
-    out = numpy.full(left_edges.size, numpy.nan, dtype=x.dtype)
+    out = np.full(left_edges.size, np.nan, dtype=x.dtype)
    for i in range(left_edges.size):
        mask = (x >= left_edges[i]) & (x < left_edges[i] + bin_width)
-        if numpy.any(mask):
+        if np.any(mask):
            out[i] = statistic(y[mask])
    return out
@ -317,3 +319,112 @@ def fprint(msg, verbose=True):
    """Print and flush a message with a timestamp."""
    if verbose:
        print(f"{datetime.now()}:   {msg}", flush=True)
 ###############################################################################
 #                            ACL of MCMC chains                               #
 ###############################################################################
 def calculate_acf(data):
    """
    Calculates the autocorrelation of some data. Taken from `epsie` package
    written by Collin Capano.
    Parameters
    ----------
    data : 1-dimensional array
        The data to calculate the autocorrelation of.
    Returns
    -------
    acf : 1-dimensional array
    """
    # zero the mean
    data = data - data.mean()
    # zero-pad to 2 * nearest power of 2
    newlen = int(2**(1 + np.ceil(np.log2(len(data)))))
    x = np.zeros(newlen)
    x[:len(data)] = data[:]
    # correlate
    acf = np.correlate(x, x, mode='full')
    # drop corrupted region
    acf = acf[len(acf)//2:]
    # normalize
    acf /= acf[0]
    return acf
 def calculate_acl(data):
    """
    Calculate the autocorrelation length of some data. Taken from `epsie`
    package written by Collin Capano. Algorithm used is from:
        N. Madras and A.D. Sokal, J. Stat. Phys. 50, 109 (1988).
    Parameters
    ----------
    data : 1-dimensional array
        The data to calculate the autocorrelation length of.
    Returns
    -------
    acl : int
    """
    # calculate the acf
    acf = calculate_acf(data)
    # now the ACL: Following from Sokal, this is estimated
    # as the first point where M*tau[k] <= k, where
    # tau = 2*cumsum(acf) - 1, and M is a tuneable parameter,
    # generally chosen to be = 5 (which we use here)
    m = 5
    cacf = 2. * np.cumsum(acf) - 1.
    win = m * cacf <= np.arange(len(cacf))
    if win.any():
        acl = int(np.ceil(cacf[np.where(win)[0][0]]))
    else:
        # data is too short to estimate the ACL, just choose
        # the length of the data
        acl = len(data)
    return acl
 def thin_samples_by_acl(samples):
    """
    Thin MCMC samples by the autocorrelation length of each chain.
    Parameters
    ----------
    samples : dict
        Dictionary of samples. Each value is a 2-dimensional array of shape
        `(nchains, nsamples)`.
    Returns
    -------
    thinned_samples : dict
        Dictionary of thinned samples. Each value is a 1-dimensional array of
        shape `(n_thinned_samples)`, where the samples are concatenated across
        the chain.
    """
    keys = list(samples.keys())
    nchains = 1 if samples[keys[0]].ndim == 1 else samples[keys[0]].shape[0]
    samples = deepcopy(samples)
    if nchains == 1:
        for key in keys:
            samples[key] = samples[key].reshape(1, -1)
    # Calculate the ACL of each chain.
    acl = np.zeros(nchains, dtype=int)
    for i in range(nchains):
        acl[i] = max(calculate_acl(samples[key][i]) for key in keys)
    thinned_samples = {}
    for key in keys:
        key_samples = []
        for i in range(nchains):
            key_samples.append(samples[key][i, ::acl[i]])
        thinned_samples[key] = np.hstack(key_samples)
    return thinned_samples
--- a/notebooks/flow_calibration.ipynb
+++ b/notebooks/flow_calibration.ipynb
--- a/scripts/field_los.py
+++ b/scripts/field_los.py
@ -20,22 +20,24 @@ from datetime import datetime
 from gc import collect
 from os import makedirs, remove, rmdir
 from os.path import exists, join
 from warnings import warn
 import csiborgtools
 import numpy as np
 from astropy import units as u
 from astropy.coordinates import SkyCoord
 from h5py import File
 from mpi4py import MPI
 from taskmaster import work_delegation
 from utils import get_nsims
 ###############################################################################
 #                             I/O functions                                   #
 ###############################################################################
-def get_los(catalogue_name, comm):
+def get_los(catalogue_name, simname, comm):
    """
    Get the line of sight RA/dec coordinates for the given catalogue.
@ -43,6 +45,10 @@ def get_los(catalogue_name, comm):
    ----------
    catalogue_name : str
        Catalogue name.
    simname : str
        Simulation name.
    comm : mpi4py.MPI.Comm
        MPI communicator.
    Returns
    -------
@ -50,16 +56,42 @@ def get_los(catalogue_name, comm):
        RA/dec coordinates of the line of sight.
    """
    if comm.Get_rank() == 0:
-        pv_supranta_folder = "/mnt/extraspace/rstiskalek/catalogs/PV_Supranta"
+        folder = "/mnt/extraspace/rstiskalek/catalogs"
-        if catalogue_name == "A2":
+        if catalogue_name == "LOSS" or catalogue_name == "Foundation":
-            with File(join(pv_supranta_folder, "A2.h5"), 'r') as f:
+            fpath = join(folder, "PV_compilation_Supranta2019.hdf5")
            with File(fpath, 'r') as f:
                grp = f[catalogue_name]
                RA = grp["RA"][:]
                dec = grp["DEC"][:]
        elif catalogue_name == "A2":
            fpath = join(folder, "A2.h5")
            with File(fpath, 'r') as f:
                RA = f["RA"][:]
                dec = f["DEC"][:]
        elif "csiborg1" in catalogue_name:
            nsim = int(catalogue_name.split("_")[-1])
            cat = csiborgtools.read.CSiBORG1Catalogue(
                nsim, bounds={"totmass": (1e13, None)})
            seed = 42
            gen = np.random.default_rng(seed)
            mask = gen.choice(len(cat), size=100, replace=False)
            sph_pos = cat["spherical_pos"]
            RA = sph_pos[mask, 1]
            dec = sph_pos[mask, 2]
        else:
            raise ValueError(f"Unknown field name: `{catalogue_name}`.")
-        pos = np.vstack((RA, dec)).T
+        # The Carrick+2015 is in galactic coordinates, so we need to convert
        # the RA/dec to galactic coordinates.
        if simname == "Carrick2015":
            c = SkyCoord(ra=RA*u.degree, dec=dec*u.degree, frame='icrs')
            pos = np.vstack((c.galactic.l, c.galactic.b)).T
        else:
            pos = np.vstack((RA, dec)).T
    else:
        pos = None
@ -90,6 +122,17 @@ def get_field(simname, nsim, kind, MAS, grid):
    # Open the field reader.
    if simname == "csiborg1":
        field_reader = csiborgtools.read.CSiBORG1Field(nsim)
    elif simname == "Carrick2015":
        folder = "/mnt/extraspace/rstiskalek/catalogs"
        warn(f"Using local paths from `{folder}`.", RuntimeWarning)
        if kind == "density":
            fpath = join(folder, "twompp_density_carrick2015.npy")
            return np.load(fpath).astype(np.float32)
        elif kind == "velocity":
            fpath = join(folder, "twompp_velocity_carrick2015.npy")
            return np.load(fpath).astype(np.float32)
        else:
            raise ValueError(f"Unknown field kind: `{kind}`.")
    else:
        raise ValueError(f"Unknown simulation name: `{simname}`.")
@ -230,8 +273,8 @@ if __name__ == "__main__":
    args = parser.parse_args()
    rmax = 200
-    dr = 0.1
+    dr = 0.5
-    smooth_scales = None
+    smooth_scales = [0, 2, 4, 6]
    comm = MPI.COMM_WORLD
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
@ -248,8 +291,8 @@ if __name__ == "__main__":
        dump_folder = None
    dump_folder = comm.bcast(dump_folder, root=0)
-    # Get the line of sight RA/dec coordinates.
+    # Get the line of sight sky coordinates.
-    pos = get_los(args.catalogue, comm)
+    pos = get_los(args.catalogue, args.simname, comm)
    def main(nsim):
        interpolate_field(pos, args.simname, nsim, args.MAS, args.grid,
--- a/scripts/field_los.sh
+++ b/scripts/field_los.sh
@ -1,4 +1,4 @@
-nthreads=1
+nthreads=11
 memory=64
 on_login=${1}
 queue="berg"
@ -6,7 +6,8 @@ env="/mnt/users/rstiskalek/csiborgtools/venv_csiborg/bin/python"
 file="field_los.py"
 catalogue="A2"
-nsims="7444"
+# catalogue="csiborg1_9844"
 nsims="-1"
 simname="csiborg1"
 MAS="SPH"
 grid=1024
--- a/scripts/field_sample.py
+++ b/scripts/field_sample.py
@ -134,8 +134,6 @@ def open_galaxy_positions(survey_name, comm, scatter=None):
                    if scatter < 0:
                        raise ValueError("Scatter must be positive.")
                    if scatter > 0:
                        print(f"Adding scatter of {scatter} Mpc / h.",
                              flush=True)
                        pos = scatter_along_radial_direction(pos, scatter,
                                                             boxsize)
@ -186,7 +184,6 @@ def evaluate_field(field, pos, boxsize, smooth_scales, verbose=True):
                field, scale * mpc2box, boxsize=1, make_copy=True)
        else:
            field_smoothed = numpy.copy(field)
        print("Going to evaluate the field....")
        val[:, i] = csiborgtools.field.evaluate_sky(
            field_smoothed, pos=pos, mpc2box=mpc2box)
--- a/scripts/field_sample.sh
+++ b/scripts/field_sample.sh
@ -1,5 +1,5 @@
-nthreads=1
+nthreads=11
-memory=32
+memory=64
 on_login=${1}
 queue="berg"
 env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
@ -7,11 +7,11 @@ file="field_sample.py"
 nsims="-1"
-simname="TNG300-1"
+simname="csiborg1"
-survey="TNG300-1"
+survey="SDSS"
 smooth_scales="0 2 4 8 16"
 kind="density"
-MAS="PCS"
+MAS="SPH"
 grid=1024
 scatter=0
--- a/scripts/flow_validation.py
+++ b/scripts/flow_validation.py
@ -0,0 +1,211 @@
 # Copyright (C) 2024 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 Script to run the PV validation model on various catalogues and simulations.
 The script is MPI parallelized over the IC realizations.
 """
 from argparse import ArgumentParser
 from datetime import datetime
 from os import makedirs, remove, rmdir
 from os.path import exists, join
 import csiborgtools
 import jax
 import numpy as np
 from h5py import File
 from mpi4py import MPI
 from numpyro.infer import MCMC, NUTS
 from taskmaster import work_delegation  # noqa
 def get_model(args, nsim):
    """
    Load the data and create the NumPyro model.
    Parameters
    ----------
    args : argparse.Namespace
        Command line arguments.
    nsim : int
        Simulation index.
    Returns
    -------
    numpyro.Primitive
    """
    folder = "/mnt/extraspace/rstiskalek/catalogs/"
    if args.catalogue == "A2":
        fpath = join(folder, "A2.h5")
    elif args.catalogue == "LOSS" or args.catalogue == "Foundation":
        raise NotImplementedError("To be implemented..")
    else:
        raise ValueError(f"Unknown catalogue: `{args.catalogue}`.")
    loader = csiborgtools.flow.DataLoader(args.simname, args.catalogue, fpath,
                                          paths, ksmooth=args.ksmooth)
    Omega_m = csiborgtools.simname2Omega_m(args.simname)
    # Read in the data from the loader.
    los_overdensity = loader.los_density[:, nsim, :]
    los_velocity = loader.los_radial_velocity[:, nsim, :]
    RA = loader.cat["RA"]
    dec = loader.cat["DEC"]
    z_obs = loader.cat["z_obs"]
    r_hMpc = loader.cat["r_hMpc"]
    e_r_hMpc = loader.cat["e_rhMpc"]
    return csiborgtools.flow.SD_PV_validation_model(
        los_overdensity, los_velocity, RA, dec, z_obs, r_hMpc, e_r_hMpc,
        loader.rdist, Omega_m)
 def run_model(model, nsteps, nchains, nsim, dump_folder, show_progress=True):
    """
    Run the NumPyro model and save the thinned samples to a temporary file.
    Parameters
    ----------
    model : jax.numpyro.Primitive
        Model to be run.
    nsteps : int
        Number of steps.
    nchains : int
        Number of chains.
    nsim : int
        Simulation index.
    dump_folder : str
        Folder where the temporary files are stored.
    show_progress : bool
        Whether to show the progress bar.
    Returns
    -------
    None
    """
    nuts_kernel = NUTS(model)
    mcmc = MCMC(nuts_kernel, num_warmup=nsteps // 2, num_samples=nsteps // 2,
                chain_method="sequential", num_chains=nchains,
                progress_bar=show_progress)
    rng_key = jax.random.PRNGKey(42)
    mcmc.run(rng_key)
    if show_progress:
        print(f"Summary of the MCMC run of simulation indexed {nsim}:")
        mcmc.print_summary()
    samples = mcmc.get_samples()
    thinned_samples = csiborgtools.thin_samples_by_acl(samples)
    # Save the samples to the temporary folder.
    fname = join(dump_folder, f"samples_{nsim}.npz")
    np.savez(fname, **thinned_samples)
 def combine_from_simulations(catalogue_name, simname, nsims, outfolder,
                             dumpfolder, ksmooth):
    """
    Combine the results from individual simulations into a single file.
    Parameters
    ----------
    catalogue_name : str
        Catalogue name.
    simname : str
        Simulation name.
    nsims : list
        List of IC realisations.
    outfolder : str
        Output folder.
    dumpfolder : str
        Dumping folder where the temporary files are stored.
    ksmooth : int
        Smoothing index.
    Returns
    -------
    None
    """
    fname_out = join(
        outfolder,
        f"flow_samples_{catalogue_name}_{simname}_smooth_{ksmooth}.hdf5")
    print(f"Combining results from invidivual simulations to `{fname_out}`.")
    if exists(fname_out):
        remove(fname_out)
    for nsim in nsims:
        fname = join(dumpfolder, f"samples_{nsim}.npz")
        data = np.load(fname)
        with File(fname_out, 'a') as f:
            grp = f.create_group(f"sim_{nsim}")
            for key in data.files:
                grp.create_dataset(key, data=data[key])
        # Remove the temporary file.
        remove(fname)
    # Remove the dumping folder.
    rmdir(dumpfolder)
    print("Finished combining results.")
 ###############################################################################
 #                        Command line interface                               #
 ###############################################################################
 if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument("--simname", type=str, required=True,
                        help="Simulation name.")
    parser.add_argument("--catalogue", type=str, required=True,
                        help="PV catalogue.")
    parser.add_argument("--ksmooth", type=int, required=True,
                        help="Smoothing index.")
    args = parser.parse_args()
    comm = MPI.COMM_WORLD
    rank, size = comm.Get_rank(), comm.Get_size()
    out_folder = "/mnt/extraspace/rstiskalek/csiborg_postprocessing/peculiar_velocity"  # noqa
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    nsims = paths.get_ics(args.simname)
    nsteps = 5000
    nchains = 1
    # Create the dumping folder.
    if comm.Get_rank() == 0:
        dump_folder = join(out_folder,
                           f"temp_{str(datetime.now())}".replace(" ", "_"))
        print(f"Creating folder `{dump_folder}`.")
        makedirs(dump_folder)
    else:
        dump_folder = None
    dump_folder = comm.bcast(dump_folder, root=0)
    def main(nsim):
        model = get_model(args, nsim)
        run_model(model, nsteps, nchains, nsim, dump_folder,
                  show_progress=size == 1)
    work_delegation(main, nsims, comm, master_verbose=True)
    comm.Barrier()
    if rank == 0:
        combine_from_simulations(args.catalogue, args.simname, nsims,
                                 out_folder, dump_folder, args.ksmooth)
--- a/scripts/flow_validation.sh
+++ b/scripts/flow_validation.sh
@ -0,0 +1,23 @@
 memory=4
 on_login=${1}
 nthreads=${2}
 queue="berg"
 env="/mnt/users/rstiskalek/csiborgtools/venv_csiborg/bin/python"
 file="flow_validation.py"
 catalogue="A2"
 simname="Carrick2015"
 ksmooth=2
 pythoncm="$env $file --catalogue $catalogue --simname $simname --ksmooth $ksmooth"
 if [ $on_login -eq 1 ]; then
    echo $pythoncm
    $pythoncm
 else
    cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
    echo "Submitting:"
    echo $cm
    echo
    eval $cm
 fi
--- a/scripts_independent/A2_to_hdf5.ipynb
+++ b/scripts_independent/A2_to_hdf5.ipynb
@ -0,0 +1,150 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Copyright (C) 2024 Richard Stiskalek\n",
    "# This program is free software; you can redistribute it and/or modify it\n",
    "# under the terms of the GNU General Public License as published by the\n",
    "# Free Software Foundation; either version 3 of the License, or (at your\n",
    "# option) any later version.\n",
    "# This program is distributed in the hope that it will be useful, but\n",
    "# WITHOUT ANY WARRANTY; without even the implied warranty of\n",
    "# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General\n",
    "# Public License for more details.\n",
    "#\n",
    "# You should have received a copy of the GNU General Public License along\n",
    "# with this program; if not, write to the Free Software Foundation, Inc.,\n",
    "# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.\n",
    "from os.path import join\n",
    "\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from h5py import File\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Supernovae data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "a2dir = \"/Users/richard/Data/PV/A2_paper_data/A2\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### LOSS data set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "names = [\"z_CMB\", \"mB\", \"x1\", \"c\", \"e_mB\", \"e_x1\", \"e_c\", \"RA\", \"DEC\"]\n",
    "dtype = [(n, np.float32) for n in names]\n",
    "data = np.genfromtxt(join(a2dir, \"loss.csv\"), delimiter=\",\", skip_header=1,\n",
    "                     usecols=[5 + n for n in range(len(names))])\n",
    "\n",
    "loss_data = np.empty(len(data), dtype=dtype)\n",
    "for i, n in enumerate(names):\n",
    "    loss_data[n] = data[:, i]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Foundation data set "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "names = [\"z_CMB\", \"RA\", \"DEC\", \"x1\", \"mB\", \"c\", \"peak\", \"e_peak\", \"e_x1\", \"e_mB\", \"e_c\"]\n",
    "dtype = [(n, np.float32) for n in names]\n",
    "data = np.genfromtxt(join(a2dir, \"foundation.csv\"), delimiter=\",\", skip_header=1,\n",
    "                     usecols=[3 + n for n in range(len(names))])\n",
    "\n",
    "foundation_data = np.empty(len(data), dtype=dtype)\n",
    "for i, n in enumerate(names):\n",
    "    foundation_data[n] = data[:, i]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Write output as HDF5 file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "outdir = \"/Users/richard/Downloads\"\n",
    "fname = \"PV_compilation_Supranta2019.hdf5\"\n",
    "\n",
    "with File(join(outdir, fname), 'w') as f:\n",
    "    # Write LOSS\n",
    "    grp = f.create_group(\"LOSS\")\n",
    "    for name in loss_data.dtype.names:\n",
    "        grp.create_dataset(name, data=loss_data[name])\n",
    "\n",
    "    # Write Foundation\n",
    "    grp = f.create_group(\"Foundation\")\n",
    "    for name in foundation_data.dtype.names:\n",
    "        grp.create_dataset(name, data=foundation_data[name])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }