Add simple distance flow model (#114)

* Add imports * Add field LOS paths * Add basic flow model * Edit script * Add nb * Add nb * Update nb * Add some docs * Add RA reading * Add imoprts * Updates to the flow model * Update script * Bring back A2 * Update imports * Update imports * Add Carrick to ICs * Add Carrick boxsize * Add Carrick and fix minor bugs * Add Carrick box * Update script * Edit imports * Add fixed flow! * Update omega_m and add it * Update nb * Update nb * Update nb * Remove old print statements * Update params * Add thinning of chains * Add import * Add flow validation script * Add submit script * Add ksmooth * Update nb * Update params * Update script * Update string * Move where distributions are defined * Add density bias parameter * Add lognorm mean * Update scripts * Update script
2025-01-30 21:41:36 +00:00 · 2024-03-08 10:44:19 +00:00 · 2024-03-08 10:44:19 +00:00 · a65e3cb15b
commit a65e3cb15b
parent fb93f85543
14 changed files with 1762 additions and 60 deletions
--- a/csiborgtools/init.py
+++ b/csiborgtools/init.py
@ -12,14 +12,15 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-from csiborgtools import clustering, field, halo, match, read, summary          # noqa
+from csiborgtools import clustering, field, flow, halo, match, read, summary    # noqa

 from .utils import (center_of_mass, delta2ncells, number_counts,                # noqa
                    periodic_distance, periodic_distance_two_points,            # noqa
                    binned_statistic, cosine_similarity, fprint,                # noqa
                    hms_to_degrees, dms_to_degrees, great_circle_distance,      # noqa
-                    radec_to_cartesian, cartesian_to_radec)                     # noqa
-from .params import paths_glamdring, simname2boxsize                            # noqa
+                    radec_to_cartesian, cartesian_to_radec,                     # noqa
+                    thin_samples_by_acl)                                        # noqa
+from .params import paths_glamdring, simname2boxsize, simname2Omega_m           # noqa


 ###############################################################################
--- a/csiborgtools/flow/init.py
+++ b/csiborgtools/flow/init.py
@ -0,0 +1,17 @@
+# Copyright (C) 2024 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+from .flow_model import (DataLoader, radial_velocity_los, dist2redshift,        # noqa
+                         dist2distmodulus, predict_zobs, project_Vext,          # noqa
+                         SD_PV_validation_model)                                # noqa
--- a/csiborgtools/flow/flow_model.py
+++ b/csiborgtools/flow/flow_model.py
@ -0,0 +1,657 @@
+# Copyright (C) 2024 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+Validation of the CSiBORG velocity field against PV measurements. Based on [1].
+
+References
+----------
+[1] https://arxiv.org/abs/1912.09383.
+"""
+from datetime import datetime
+from warnings import warn
+
+import numpy as np
+import numpyro
+import numpyro.distributions as dist
+from astropy import units as u
+from astropy.coordinates import SkyCoord
+from astropy.cosmology import FlatLambdaCDM
+from h5py import File
+from jax import numpy as jnp
+from jax import vmap
+from tqdm import tqdm, trange
+
+from ..params import simname2Omega_m
+from ..read import CSiBORG1Catalogue
+
+SPEED_OF_LIGHT = 299792.458  # km / s
+
+
+def t():
+    """Shortcut to get the current time."""
+    return datetime.now().strftime("%H:%M:%S")
+
+
+def radec_to_galactic(ra, dec):
+    """
+    Convert right ascension and declination to galactic coordinates (all in
+    degrees.)
+
+    Parameters
+    ----------
+    ra, dec : float or 1-dimensional array
+        Right ascension and declination in degrees.
+
+    Returns
+    -------
+    l, b : float or 1-dimensional array
+    """
+    c = SkyCoord(ra=ra*u.degree, dec=dec*u.degree, frame='icrs')
+    return c.galactic.l.degree, c.galactic.b.degree
+
+
+###############################################################################
+#                             Data loader                                     #
+###############################################################################
+
+
+class DataLoader:
+    """
+    Data loader for the line of sight (LOS) interpolated fields and the
+    corresponding catalogues.
+
+    Parameters
+    ----------
+    simname : str
+        Simulation name.
+    catalogue : str
+        Name of the catalogue with LOS objects.
+    catalogue_fpath : str
+        Path to the LOS catalogue file.
+    paths : csiborgtools.read.Paths
+        Paths object.
+    ksmooth : int, optional
+        Smoothing index.
+    store_full_velocity : bool, optional
+        Whether to store the full 3D velocity field. Otherwise stores only
+        the radial velocity.
+    """
+    def __init__(self, simname, catalogue, catalogue_fpath, paths,
+                 ksmooth=None, store_full_velocity=False):
+        print(f"{t()}: reading the catalogue.")
+        self._cat = self._read_catalogue(catalogue, catalogue_fpath)
+        self._catname = catalogue
+
+        print(f"{t()}: reading the interpolated field.")
+        self._field_rdist, self._los_density, self._los_velocity = self._read_field(  # noqa
+            simname, catalogue, ksmooth, paths)
+
+        if len(self._field_rdist) % 2 == 0:
+            warn(f"The number of radial steps is even. Skipping the first "
+                 f"step at {self._field_rdist[0]} because Simpson's rule "
+                 "requires an odd number of steps.")
+            self._field_rdist = self._field_rdist[1:]
+            self._los_density = self._los_density[..., 1:]
+            self._los_velocity = self._los_velocity[..., 1:]
+
+        if len(self._cat) != len(self._los_density):
+            raise ValueError("The number of objects in the catalogue does not "
+                             "match the number of objects in the field.")
+
+        print(f"{t()}: calculating the radial velocity.")
+        nobject, nsim = self._los_density.shape[:2]
+
+        # In case of Carrick 2015 the box is in galactic coordinates..
+        if simname == "Carrick2015":
+            d1, d2 = radec_to_galactic(self._cat["RA"], self._cat["DEC"])
+        else:
+            d1, d2 = self._cat["RA"], self._cat["DEC"]
+
+        radvel = np.empty((nobject, nsim, len(self._field_rdist)),
+                          self._los_velocity.dtype)
+        for i in trange(nobject):
+            for j in range(nsim):
+                radvel[i, j, :] = radial_velocity_los(
+                    self._los_velocity[:, i, j, ...], d1[i], d2[i])
+        self._los_radial_velocity = radvel
+
+        if not store_full_velocity:
+            self._los_velocity = None
+
+        Omega_m = simname2Omega_m(simname)
+
+        # Normalize the CSiBORG density by the mean matter density
+        if "csiborg" in simname:
+            cosmo = FlatLambdaCDM(H0=100, Om0=Omega_m)
+            mean_rho_matter = cosmo.critical_density0.to("Msun/kpc^3").value
+            mean_rho_matter *= Omega_m
+            self._los_density /= mean_rho_matter
+
+        # Since Carrick+2015 provide `rho / <rho> - 1`
+        if simname == "Carrick2015":
+            self._los_density += 1
+
+    @property
+    def cat(self):
+        """
+        The distance indicators catalogue.
+
+        Returns
+        -------
+        structured array
+        """
+        return self._cat
+
+    @property
+    def catname(self):
+        """
+        Name of the catalogue.
+
+        Returns
+        -------
+        str
+        """
+        return self._catname
+
+    @property
+    def rdist(self):
+        """
+        Radial distances where the field was interpolated for each object.
+
+        Returns
+        -------
+        1-dimensional array
+        """
+        return self._field_rdist
+
+    @property
+    def los_density(self):
+        """
+        Density field along the line of sight.
+
+        Returns
+        ----------
+        3-dimensional array of shape (n_objects, n_simulations, n_steps)
+        """
+        return self._los_density
+
+    @property
+    def los_velocity(self):
+        """
+        Velocity field along the line of sight.
+
+        Returns
+        -------
+        4-dimensional array of shape (n_objects, n_simulations, 3, n_steps)
+        """
+        if self._los_velocity is None:
+            raise ValueError("The 3D velocities were not stored.")
+        return self._los_velocity
+
+    @property
+    def los_radial_velocity(self):
+        """
+        Radial velocity along the line of sight.
+
+        Returns
+        -------
+        3-dimensional array of shape (n_objects, n_simulations, n_steps)
+        """
+        return self._los_radial_velocity
+
+    def _read_field(self, simname, catalogue, k, paths):
+        """Read in the interpolated field."""
+        out_density = None
+        out_velocity = None
+        has_smoothed = False
+
+        nsims = paths.get_ics(simname)
+        with File(paths.field_los(simname, catalogue), 'r') as f:
+            has_smoothed = True if f[f"density_{nsims[0]}"].ndim > 2 else False
+            if has_smoothed and (k is None or not isinstance(k, int)):
+                raise ValueError("The output contains smoothed field but "
+                                 "`ksmooth` is None. It must be provided.")
+
+            for i, nsim in enumerate(tqdm(nsims)):
+                if out_density is None:
+                    nobject, nstep = f[f"density_{nsim}"].shape[:2]
+                    out_density = np.empty(
+                        (nobject, len(nsims), nstep), dtype=np.float32)
+                    out_velocity = np.empty(
+                        (3, nobject, len(nsims), nstep), dtype=np.float32)
+
+                indx = (..., k) if has_smoothed else (...)
+                out_density[:, i, :] = f[f"density_{nsim}"][indx]
+                out_velocity[:, :, i, :] = f[f"velocity_{nsim}"][indx]
+
+            rdist = f[f"rdist_{nsims[0]}"][:]
+
+        return rdist, out_density, out_velocity
+
+    def _read_catalogue(self, catalogue, catalogue_fpath):
+        """
+        Read in the distance indicator catalogue.
+        """
+        if catalogue == "A2":
+            with File(catalogue_fpath, 'r') as f:
+                dtype = [(key, np.float32) for key in f.keys()]
+                arr = np.empty(len(f["RA"]), dtype=dtype)
+                for key in f.keys():
+                    arr[key] = f[key][:]
+        elif catalogue == "LOSS" or catalogue == "Foundation":
+            with File(catalogue_fpath, 'r') as f:
+                grp = f[catalogue]
+
+                dtype = [(key, np.float32) for key in grp.keys()]
+                arr = np.empty(len(grp["RA"]), dtype=dtype)
+                for key in grp.keys():
+                    arr[key] = grp[key][:]
+        elif "csiborg1" in catalogue:
+            nsim = int(catalogue.split("_")[-1])
+            cat = CSiBORG1Catalogue(nsim, bounds={"totmass": (1e13, None)})
+
+            seed = 42
+            gen = np.random.default_rng(seed)
+            mask = gen.choice(len(cat), size=100, replace=False)
+
+            keys = ["r_hMpc", "RA", "DEC"]
+            dtype = [(key, np.float32) for key in keys]
+            arr = np.empty(len(mask), dtype=dtype)
+
+            sph_pos = cat["spherical_pos"]
+            arr["r_hMpc"] = sph_pos[mask, 0]
+            arr["RA"] = sph_pos[mask, 1]
+            arr["DEC"] = sph_pos[mask, 2]
+            # TODO: add peculiar velocit
+        else:
+            raise ValueError(f"Unknown catalogue: `{catalogue}`.")
+
+        return arr
+
+
+###############################################################################
+#                       Supplementary flow functions                          #
+###############################################################################
+
+
+def radial_velocity_los(los_velocity, ra, dec):
+    """
+    Calculate the radial velocity along the line of sight.
+
+    Parameters
+    ----------
+    los_velocity : 2-dimensional array of shape (3, n_steps)
+        Line of sight velocity field.
+    ra, dec : floats
+        Right ascension and declination of the line of sight.
+    is_degrees : bool, optional
+        Whether the angles are in degrees.
+
+    Returns
+    -------
+    1-dimensional array of shape (n_steps)
+    """
+    types = (float, np.float32, np.float64)
+    if not isinstance(ra, types) and not isinstance(dec, types):
+        raise ValueError("RA and dec must be floats.")
+
+    if los_velocity.ndim != 2 and los_velocity.shape[0] != 3:
+        raise ValueError("The shape of `los_velocity` must be (3, n_steps).")
+
+    ra_rad, dec_rad = np.deg2rad(ra), np.deg2rad(dec)
+
+    vx, vy, vz = los_velocity
+    return (vx * np.cos(ra_rad) * np.cos(dec_rad)
+            + vy * np.sin(ra_rad) * np.cos(dec_rad)
+            + vz * np.sin(dec_rad))
+
+
+###############################################################################
+#                           JAX Flow model                                    #
+###############################################################################
+
+
+def lognorm_mean_std_to_loc_scale(mu, std):
+    """
+    Calculate the location and scale parameters for the log-normal distribution
+    from the mean and standard deviation.
+
+    Parameters
+    ----------
+    mu, std : float
+        Mean and standard deviation.
+
+    Returns
+    -------
+    loc, scale : float
+    """
+    loc = np.log(mu) - 0.5 * np.log(1 + (std / mu) ** 2)
+    scale = np.sqrt(np.log(1 + (std / mu) ** 2))
+    return loc, scale
+
+
+def simps(y, dx):
+    """
+    Simpson's rule 1D integration, assuming that the number of steps is even
+    and that the step size is constant.
+
+    Parameters
+    ----------
+    y : 1-dimensional array
+        Function values.
+    dx : float
+        Step size.
+
+    Returns
+    -------
+    float
+    """
+    if len(y) % 2 == 0:
+        raise ValueError("The number of steps must be odd.")
+
+    return dx / 3 * jnp.sum(y[0:-1:2] + 4 * y[1::2] + y[2::2])
+
+
+def dist2redshift(dist, Omega_m):
+    """
+    Convert comoving distance to cosmological redshift if the Universe is
+    flat and z << 1.
+
+    Parameters
+    ----------
+    dist : float or 1-dimensional array
+        Comoving distance in `Mpc / h`.
+    Omega_m : float
+        Matter density parameter.
+
+    Returns
+    -------
+    float or 1-dimensional array
+    """
+    H0 = 100
+    eta = 3 * Omega_m / 2
+    return 1 / eta * (1 - (1 - 2 * H0 * dist / SPEED_OF_LIGHT * eta)**0.5)
+
+
+def dist2distmodulus(dist, Omega_m):
+    """
+    Convert comoving distance to distance modulus, assuming z << 1.
+
+    Parameters
+    ----------
+    dist : float or 1-dimensional array
+        Comoving distance in `Mpc / h`.
+    Omega_m : float
+        Matter density parameter.
+
+    Returns
+    -------
+    float or 1-dimensional array
+    """
+    zcosmo = dist2redshift(dist, Omega_m)
+    luminosity_distance = dist * (1 + zcosmo)
+    return 5 * jnp.log10(luminosity_distance) + 25
+
+
+def project_Vext(Vext_x, Vext_y, Vext_z, RA, dec):
+    """
+    Project the external velocity onto the line of sight along direction
+    specified by RA/dec. Note that the angles must be in radians.
+
+    Parameters
+    ----------
+    Vext_x, Vext_y, Vext_z : floats
+        Components of the external velocity.
+    RA, dec : floats
+        Right ascension and declination in radians
+
+    Returns
+    -------
+    float
+    """
+    cos_dec = jnp.cos(dec)
+    return (Vext_x * jnp.cos(RA) * cos_dec
+            + Vext_y * jnp.sin(RA) * cos_dec
+            + Vext_z * jnp.sin(dec))
+
+
+def predict_zobs(dist, beta, Vext_radial, vpec_radial, Omega_m):
+    """
+    Predict the observed redshift at a given comoving distance given some
+    velocity field.
+
+    Parameters
+    ----------
+    dist : float
+        Comoving distance in `Mpc / h`.
+    beta : float
+        Velocity bias parameter.
+    Vext_radial : float
+        Radial component of the external velocity along the LOS.
+    vpec_radial : float
+        Radial component of the peculiar velocity along the LOS.
+    Omega_m : float
+        Matter density parameter.
+
+    Returns
+    -------
+    float
+    """
+    zcosmo = dist2redshift(dist, Omega_m)
+
+    vrad = beta * vpec_radial + Vext_radial
+    return (1 + zcosmo) * (1 + vrad / SPEED_OF_LIGHT) - 1
+
+
+###############################################################################
+#                          Flow validation models                             #
+###############################################################################
+
+
+def calculate_ptilde_wo_bias(xrange, mu, err, r_squared_xrange=None):
+    """
+    Calculate `ptilde(r)` without any bias.
+
+    Parameters
+    ----------
+    xrange : 1-dimensional array
+        Radial distances where the field was interpolated for each object.
+    mu : float
+        Comoving distance in `Mpc / h`.
+    err : float
+        Error on the comoving distance in `Mpc / h`.
+    r_squared_xrange : 1-dimensional array, optional
+        Radial distances squared where the field was interpolated for each
+        object. If not provided, the `r^2` correction is not applied.
+
+    Returns
+    -------
+    1-dimensional array
+    """
+    ptilde = jnp.exp(-0.5 * ((xrange - mu) / err)**2)
+
+    if r_squared_xrange is not None:
+        ptilde *= r_squared_xrange
+
+    return ptilde
+
+
+def calculate_ll_zobs(zobs, zobs_pred, sigma_v):
+    """
+    Calculate the likelihood of the observed redshift given the predicted
+    redshift.
+
+    Parameters
+    ----------
+    zobs : float
+        Observed redshift.
+    zobs_pred : float
+        Predicted redshift.
+    sigma_v : float
+        Velocity uncertainty.
+
+    Returns
+    -------
+    float
+    """
+    dcz = SPEED_OF_LIGHT * (zobs - zobs_pred)
+    return jnp.exp(-0.5 * (dcz / sigma_v)**2) / jnp.sqrt(2 * np.pi) / sigma_v
+
+
+class SD_PV_validation_model:
+    """
+    Simple distance peculiar velocity (PV) validation model, assuming that
+    we already have a calibrated estimate of the comoving distance to the
+    objects.
+
+    Parameters
+    ----------
+    los_density : 2-dimensional array of shape (n_objects, n_steps)
+        LOS density field.
+    los_velocity : 3-dimensional array of shape (n_objects, n_steps)
+        LOS radial velocity field.
+    RA, dec : 1-dimensional arrays of shape (n_objects)
+        Right ascension and declination in degrees.
+    z_obs : 1-dimensional array of shape (n_objects)
+        Observed redshifts.
+    r_hMpc : 1-dimensional array of shape (n_objects)
+        Estimated comoving distances in `h^-1 Mpc`.
+    e_r_hMpc : 1-dimensional array of shape (n_objects)
+        Errors on the estimated comoving distances in `h^-1 Mpc`.
+    r_xrange : 1-dimensional array
+        Radial distances where the field was interpolated for each object.
+    Omega_m : float
+        Matter density parameter.
+    """
+
+    def __init__(self, los_density, los_velocity, RA, dec, z_obs,
+                 r_hMpc, e_r_hMpc, r_xrange, Omega_m):
+        # Convert everything to JAX arrays.
+        dt = jnp.float32
+        self._los_density = jnp.asarray(los_density, dtype=dt)
+        self._los_velocity = jnp.asarray(los_velocity, dtype=dt)
+        self._RA = jnp.asarray(np.deg2rad(RA), dtype=dt)
+        self._dec = jnp.asarray(np.deg2rad(dec), dtype=dt)
+        self._z_obs = jnp.asarray(z_obs, dtype=dt)
+        self._r_hMpc = jnp.asarray(r_hMpc, dtype=dt)
+        self._e_rhMpc = jnp.asarray(e_r_hMpc, dtype=dt)
+
+        # Get radius squared
+        r2_xrange = r_xrange**2
+        r2_xrange /= r2_xrange.mean()
+
+        # Get the stepsize, we need it to be constant for Simpson's rule.
+        dr = np.diff(r_xrange)
+        if not np.all(np.isclose(dr, dr[0], atol=1e-5)):
+            raise ValueError("The radial step size must be constant.")
+        dr = dr[0]
+
+        # Get the various vmapped functions
+        self._vmap_ptilde_wo_bias = vmap(lambda mu, err: calculate_ptilde_wo_bias(r_xrange, mu, err, r2_xrange))                        # noqa
+        self._vmap_simps = vmap(lambda y: simps(y, dr))
+        self._vmap_zobs = vmap(lambda beta, Vr, vpec_rad: predict_zobs(r_xrange, beta, Vr, vpec_rad, Omega_m), in_axes=(None, 0, 0))    # noqa
+        self._vmap_ll_zobs = vmap(lambda zobs, zobs_pred, sigma_v: calculate_ll_zobs(zobs, zobs_pred, sigma_v), in_axes=(0, 0, None))   # noqa
+
+        # Vext_x, Vext_y, Vext_z: external velocity components
+        self._dist_Vext = dist.Uniform(-1000, 1000)
+        # We want sigma_v to be 150 +- 100 km / s (lognormal)
+        self._dist_sigma_v = dist.LogNormal(
+            *lognorm_mean_std_to_loc_scale(150, 100))
+        # Density power-law bias
+        self._dist_alpha = dist.LogNormal(
+            *lognorm_mean_std_to_loc_scale(1.0, 0.5))
+        # Velocity bias
+        self._dist_beta = dist.Normal(1., 0.5)
+
+    def __call__(self):
+        """
+        The simple distance NumPyro PV validation model. Samples the following
+        parameters:
+            - `Vext_x`, `Vext_y`, `Vext_z`: external velocity components
+            - `alpha`: density bias parameter
+            - `beta`: velocity bias parameter
+            - `sigma_v`: velocity uncertainty
+        """
+        Vx = numpyro.sample("Vext_x", self._dist_Vext)
+        Vy = numpyro.sample("Vext_y", self._dist_Vext)
+        Vz = numpyro.sample("Vext_z", self._dist_Vext)
+        alpha = numpyro.sample("alpha", self._dist_alpha)
+        beta = numpyro.sample("beta", self._dist_beta)
+        sigma_v = numpyro.sample("sigma_v", self._dist_sigma_v)
+
+        Vext_rad = project_Vext(Vx, Vy, Vz, self._RA, self._dec)
+
+        # Calculate p(r) and multiply it by the galaxy bias
+        ptilde = self._vmap_ptilde_wo_bias(self._r_hMpc, self._e_rhMpc)
+        ptilde *= self._los_density**alpha
+
+        # Normalization of p(r)
+        pnorm = self._vmap_simps(ptilde)
+
+        # Calculate p(z_obs) and multiply it by p(r)
+        zobs_pred = self._vmap_zobs(beta, Vext_rad, self._los_velocity)
+        ptilde *= self._vmap_ll_zobs(self._z_obs, zobs_pred, sigma_v)
+
+        ll = jnp.sum(jnp.log(self._vmap_simps(ptilde) / pnorm))
+        numpyro.factor("ll", ll)
+
+
+# def SN_PV_wcal_validation_model(los_overdensity=None, los_velocity=None,
+#                                 RA=None, dec=None, z_CMB=None,
+#                                 mB=None, x1=None, c=None,
+#                                 e_mB=None, e_x1=None, e_c=None,
+#                                 mu_xrange=None, r_xrange=None,
+#                                 norm_r2_xrange=None, Omega_m=None, dr=None):
+#     """
+#     Pass
+#     """
+#     Vx = numpyro.sample("Vext_x", dist.Uniform(-1000, 1000))
+#     Vy = numpyro.sample("Vext_y", dist.Uniform(-1000, 1000))
+#     Vz = numpyro.sample("Vext_z", dist.Uniform(-1000, 1000))
+#     beta = numpyro.sample("beta", dist.Uniform(-10, 10))
+#
+#     # TODO: Later sample these as well.
+#     e_mu_intrinsic = 0.064
+#     alpha_cal = 0.135
+#     beta_cal = 2.9
+#     mag_cal = -18.555
+#     sigma_v = 112
+#
+#     # TODO: Check these for fiducial values.
+#     mu = mB - mag_cal + alpha_cal * x1 - beta_cal * c
+#     squared_e_mu = e_mB**2 + alpha_cal**2 * e_x1**2 + beta_cal**2 * e_c**2
+#
+#     squared_e_mu += e_mu_intrinsic**2
+#     ll = 0.
+#     for i in range(len(los_overdensity)):
+#         # Project the external velocity for this galaxy.
+#         Vext_rad = project_Vext(Vx, Vy, Vz, RA[i], dec[i])
+#
+#         dmu = mu_xrange - mu[i]
+#         ptilde = norm_r2_xrange * jnp.exp(-0.5 * dmu**2 / squared_e_mu[i])
+#         # TODO: Add some bias
+#         ptilde *= (1 + los_overdensity[i])
+#
+#         zobs_pred = predict_zobs(r_xrange, beta, Vext_rad, los_velocity[i],
+#                                  Omega_m)
+#
+#         dczobs = SPEED_OF_LIGHT * (z_CMB[i] - zobs_pred)
+#
+#         ll_zobs = jnp.exp(-0.5 * (dczobs / sigma_v)**2) / sigma_v
+#
+#         ll += jnp.log(simps(ptilde * ll_zobs, dr))
+#         ll -= jnp.log(simps(ptilde, dr))
+#
+#     numpyro.factor("ll", ll)
--- a/csiborgtools/params.py
+++ b/csiborgtools/params.py
@ -13,7 +13,7 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
-Various user parameters for csiborgtools.
+Various user parameters for CSiBORGTools.
 """


@ -37,7 +37,8 @@ def simname2boxsize(simname):
         "borg1": 677.7,
         "borg2": 676.6,
         "quijote": 1000.,
-         "TNG300-1": 205.
+         "TNG300-1": 205.,
+         "Carrick2015": 400.,
         }

    boxsize = d.get(simname, None)
@ -48,6 +49,32 @@ def simname2boxsize(simname):
    return boxsize


+def simname2Omega_m(simname):
+    """
+    Return Omega_m for a given simname.
+
+    Parameters
+    ----------
+    simname : str
+        Simulation name.
+
+    Returns
+    -------
+    Omega_m: float
+    """
+    d = {"csiborg1": 0.307,
+         "borg1": 0.307,
+         "Carrick2015": 0.3,
+         }
+
+    omega_m = d.get(simname, None)
+
+    if omega_m is None:
+        raise ValueError("Unknown simname: {}".format(simname))
+
+    return omega_m
+
+
 paths_glamdring = {
    "csiborg1_srcdir": "/mnt/extraspace/rstiskalek/csiborg1",
    "csiborg2_main_srcdir": "/mnt/extraspace/rstiskalek/csiborg2_main",
--- a/csiborgtools/read/paths.py
+++ b/csiborgtools/read/paths.py
@ -111,6 +111,8 @@ class Paths:
            files = glob(join(self.quijote_dir, "fiducial_processed",
                              "chain_*"))
            files = [int(search(r'chain_(\d+)', f).group(1)) for f in files]
+        elif simname == "Carrick2015":
+            return [0]
        else:
            raise ValueError(f"Unknown simulation name `{simname}`.")

@ -653,3 +655,22 @@ class Paths:
        str
        """
        return self.tng300_1_dir
+
+    def field_los(self, simnname, catalogue):
+        """
+        Path to the files containing the line-of-sight fields.
+
+        Parameters
+        ----------
+        simname : str
+            Simulation name.
+        catalogue : str
+            Catalogue name.
+
+        Returns
+        -------
+        str
+        """
+        fdir = join(self.postdir, "field_los")
+        try_create_directory(fdir)
+        return join(fdir, f"los_{catalogue}_{simnname}.hdf5")
--- a/csiborgtools/utils.py
+++ b/csiborgtools/utils.py
@ -15,7 +15,9 @@
 """
 Collection of stand-off utility functions used in the scripts.
 """
-import numpy
+from copy import deepcopy
+
+import numpy as np
 from numba import jit
 from datetime import datetime

@ -30,17 +32,17 @@ def center_of_mass(particle_positions, particles_mass, boxsize):
    Calculate the center of mass of a halo while assuming periodic boundary
    conditions of a cubical box.
    """
-    cm = numpy.zeros(3, dtype=particle_positions.dtype)
+    cm = np.zeros(3, dtype=particle_positions.dtype)
    totmass = sum(particles_mass)

    # Convert positions to unit circle coordinates in the complex plane,
    # calculate the weighted average and convert it back to box coordinates.
    for i in range(3):
-        cm_i = sum(particles_mass * numpy.exp(
-            2j * numpy.pi * particle_positions[:, i] / boxsize))
+        cm_i = sum(particles_mass * np.exp(
+            2j * np.pi * particle_positions[:, i] / boxsize))
        cm_i /= totmass

-        cm_i = numpy.arctan2(cm_i.imag, cm_i.real) * boxsize / (2 * numpy.pi)
+        cm_i = np.arctan2(cm_i.imag, cm_i.real) * boxsize / (2 * np.pi)

        if cm_i < 0:
            cm_i += boxsize
@ -58,7 +60,7 @@ def periodic_distance(points, reference_point, boxsize):
    npoints = len(points)
    half_box = boxsize / 2

-    dist = numpy.zeros(npoints, dtype=points.dtype)
+    dist = np.zeros(npoints, dtype=points.dtype)
    for i in range(npoints):
        for j in range(3):
            dist_1d = abs(points[i, j] - reference_point[j])
@ -124,15 +126,15 @@ def cartesian_to_radec(X):
    """
    x, y, z = X[:, 0], X[:, 1], X[:, 2]

-    dist = numpy.linalg.norm(X, axis=1)
-    dec = numpy.arcsin(z / dist)
-    ra = numpy.arctan2(y, x)
-    ra[ra < 0] += 2 * numpy.pi
+    dist = np.linalg.norm(X, axis=1)
+    dec = np.arcsin(z / dist)
+    ra = np.arctan2(y, x)
+    ra[ra < 0] += 2 * np.pi

-    ra *= 180 / numpy.pi
-    dec *= 180 / numpy.pi
+    ra *= 180 / np.pi
+    dec *= 180 / np.pi

-    return numpy.vstack([dist, ra, dec]).T
+    return np.vstack([dist, ra, dec]).T


 def radec_to_cartesian(X):
@ -142,11 +144,11 @@ def radec_to_cartesian(X):
    """
    dist, ra, dec = X[:, 0], X[:, 1], X[:, 2]

-    cdec = numpy.cos(dec * numpy.pi / 180)
-    return numpy.vstack([
-        dist * cdec * numpy.cos(ra * numpy.pi / 180),
-        dist * cdec * numpy.sin(ra * numpy.pi / 180),
-        dist * numpy.sin(dec * numpy.pi / 180)
+    cdec = np.cos(dec * np.pi / 180)
+    return np.vstack([
+        dist * cdec * np.cos(ra * np.pi / 180),
+        dist * cdec * np.sin(ra * np.pi / 180),
+        dist * np.sin(dec * np.pi / 180)
        ]).T


@ -159,14 +161,14 @@ def great_circle_distance(x1, x2):
    ra1, dec1 = x1
    ra2, dec2 = x2

-    ra1 *= numpy.pi / 180
-    dec1 *= numpy.pi / 180
-    ra2 *= numpy.pi / 180
-    dec2 *= numpy.pi / 180
+    ra1 *= np.pi / 180
+    dec1 *= np.pi / 180
+    ra2 *= np.pi / 180
+    dec2 *= np.pi / 180

-    return 180 / numpy.pi * numpy.arccos(
-        numpy.sin(dec1) * numpy.sin(dec2)
-        + numpy.cos(dec1) * numpy.cos(dec2) * numpy.cos(ra1 - ra2)
+    return 180 / np.pi * np.arccos(
+        np.sin(dec1) * np.sin(dec2)
+        + np.cos(dec1) * np.cos(dec2) * np.cos(ra1 - ra2)
        )


@ -193,8 +195,8 @@ def cosine_similarity(x, y):
    if y.ndim == 1:
        y = y.reshape(1, -1)

-    out = numpy.sum(x * y, axis=1)
-    out /= numpy.linalg.norm(x) * numpy.linalg.norm(y, axis=1)
+    out = np.sum(x * y, axis=1)
+    out /= np.linalg.norm(x) * np.linalg.norm(y, axis=1)

    return out[0] if out.size == 1 else out

@ -258,8 +260,8 @@ def real2redshift(pos, vel, observer_location, observer_velocity, boxsize,
        Redshift-space Cartesian position in `Mpc / h`.
    """
    if make_copy:
-        pos = numpy.copy(pos)
-        vel = numpy.copy(vel)
+        pos = np.copy(pos)
+        vel = np.copy(vel)

    H0_inv = 1. / 100

@ -267,8 +269,8 @@ def real2redshift(pos, vel, observer_location, observer_velocity, boxsize,
    pos -= observer_location
    vel -= observer_velocity

-    vr_dot = numpy.einsum('ij,ij->i', pos, vel)
-    norm2 = numpy.einsum('ij,ij->i', pos, pos)
+    vr_dot = np.einsum('ij,ij->i', pos, vel)
+    norm2 = np.einsum('ij,ij->i', pos, pos)

    pos *= (1 + H0_inv * vr_dot / norm2).reshape(-1, 1)

@ -293,9 +295,9 @@ def number_counts(x, bin_edges):
    """
    Calculate counts of samples in bins.
    """
-    out = numpy.full(bin_edges.size - 1, numpy.nan, dtype=numpy.float32)
+    out = np.full(bin_edges.size - 1, np.nan, dtype=np.float32)
    for i in range(bin_edges.size - 1):
-        out[i] = numpy.sum((x >= bin_edges[i]) & (x < bin_edges[i + 1]))
+        out[i] = np.sum((x >= bin_edges[i]) & (x < bin_edges[i + 1]))
    return out


@ -303,12 +305,12 @@ def binned_statistic(x, y, left_edges, bin_width, statistic):
    """
    Calculate a binned statistic.
    """
-    out = numpy.full(left_edges.size, numpy.nan, dtype=x.dtype)
+    out = np.full(left_edges.size, np.nan, dtype=x.dtype)

    for i in range(left_edges.size):
        mask = (x >= left_edges[i]) & (x < left_edges[i] + bin_width)

-        if numpy.any(mask):
+        if np.any(mask):
            out[i] = statistic(y[mask])
    return out

@ -317,3 +319,112 @@ def fprint(msg, verbose=True):
    """Print and flush a message with a timestamp."""
    if verbose:
        print(f"{datetime.now()}:   {msg}", flush=True)
+
+
+###############################################################################
+#                            ACL of MCMC chains                               #
+###############################################################################
+
+
+def calculate_acf(data):
+    """
+    Calculates the autocorrelation of some data. Taken from `epsie` package
+    written by Collin Capano.
+
+    Parameters
+    ----------
+    data : 1-dimensional array
+        The data to calculate the autocorrelation of.
+
+    Returns
+    -------
+    acf : 1-dimensional array
+    """
+    # zero the mean
+    data = data - data.mean()
+    # zero-pad to 2 * nearest power of 2
+    newlen = int(2**(1 + np.ceil(np.log2(len(data)))))
+    x = np.zeros(newlen)
+    x[:len(data)] = data[:]
+    # correlate
+    acf = np.correlate(x, x, mode='full')
+    # drop corrupted region
+    acf = acf[len(acf)//2:]
+    # normalize
+    acf /= acf[0]
+    return acf
+
+
+def calculate_acl(data):
+    """
+    Calculate the autocorrelation length of some data. Taken from `epsie`
+    package written by Collin Capano. Algorithm used is from:
+        N. Madras and A.D. Sokal, J. Stat. Phys. 50, 109 (1988).
+
+    Parameters
+    ----------
+    data : 1-dimensional array
+        The data to calculate the autocorrelation length of.
+
+    Returns
+    -------
+    acl : int
+    """
+    # calculate the acf
+    acf = calculate_acf(data)
+    # now the ACL: Following from Sokal, this is estimated
+    # as the first point where M*tau[k] <= k, where
+    # tau = 2*cumsum(acf) - 1, and M is a tuneable parameter,
+    # generally chosen to be = 5 (which we use here)
+    m = 5
+    cacf = 2. * np.cumsum(acf) - 1.
+    win = m * cacf <= np.arange(len(cacf))
+    if win.any():
+        acl = int(np.ceil(cacf[np.where(win)[0][0]]))
+    else:
+        # data is too short to estimate the ACL, just choose
+        # the length of the data
+        acl = len(data)
+    return acl
+
+
+def thin_samples_by_acl(samples):
+    """
+    Thin MCMC samples by the autocorrelation length of each chain.
+
+    Parameters
+    ----------
+    samples : dict
+        Dictionary of samples. Each value is a 2-dimensional array of shape
+        `(nchains, nsamples)`.
+
+    Returns
+    -------
+    thinned_samples : dict
+        Dictionary of thinned samples. Each value is a 1-dimensional array of
+        shape `(n_thinned_samples)`, where the samples are concatenated across
+        the chain.
+    """
+    keys = list(samples.keys())
+    nchains = 1 if samples[keys[0]].ndim == 1 else samples[keys[0]].shape[0]
+
+    samples = deepcopy(samples)
+
+    if nchains == 1:
+        for key in keys:
+            samples[key] = samples[key].reshape(1, -1)
+
+    # Calculate the ACL of each chain.
+    acl = np.zeros(nchains, dtype=int)
+    for i in range(nchains):
+        acl[i] = max(calculate_acl(samples[key][i]) for key in keys)
+
+    thinned_samples = {}
+    for key in keys:
+        key_samples = []
+        for i in range(nchains):
+            key_samples.append(samples[key][i, ::acl[i]])
+
+        thinned_samples[key] = np.hstack(key_samples)
+
+    return thinned_samples
--- a/notebooks/flow_calibration.ipynb
+++ b/notebooks/flow_calibration.ipynb
--- a/scripts/field_los.py
+++ b/scripts/field_los.py
@ -20,22 +20,24 @@ from datetime import datetime
 from gc import collect
 from os import makedirs, remove, rmdir
 from os.path import exists, join
+from warnings import warn

 import csiborgtools
 import numpy as np
+from astropy import units as u
+from astropy.coordinates import SkyCoord
 from h5py import File
 from mpi4py import MPI
 from taskmaster import work_delegation

 from utils import get_nsims

-
 ###############################################################################
 #                             I/O functions                                   #
 ###############################################################################


-def get_los(catalogue_name, comm):
+def get_los(catalogue_name, simname, comm):
    """
    Get the line of sight RA/dec coordinates for the given catalogue.

@ -43,6 +45,10 @@ def get_los(catalogue_name, comm):
    ----------
    catalogue_name : str
        Catalogue name.
+    simname : str
+        Simulation name.
+    comm : mpi4py.MPI.Comm
+        MPI communicator.

    Returns
    -------
@ -50,16 +56,42 @@ def get_los(catalogue_name, comm):
        RA/dec coordinates of the line of sight.
    """
    if comm.Get_rank() == 0:
-        pv_supranta_folder = "/mnt/extraspace/rstiskalek/catalogs/PV_Supranta"
+        folder = "/mnt/extraspace/rstiskalek/catalogs"

-        if catalogue_name == "A2":
-            with File(join(pv_supranta_folder, "A2.h5"), 'r') as f:
+        if catalogue_name == "LOSS" or catalogue_name == "Foundation":
+            fpath = join(folder, "PV_compilation_Supranta2019.hdf5")
+            with File(fpath, 'r') as f:
+                grp = f[catalogue_name]
+                RA = grp["RA"][:]
+                dec = grp["DEC"][:]
+        elif catalogue_name == "A2":
+            fpath = join(folder, "A2.h5")
+            with File(fpath, 'r') as f:
                RA = f["RA"][:]
                dec = f["DEC"][:]
+        elif "csiborg1" in catalogue_name:
+            nsim = int(catalogue_name.split("_")[-1])
+            cat = csiborgtools.read.CSiBORG1Catalogue(
+                nsim, bounds={"totmass": (1e13, None)})
+
+            seed = 42
+            gen = np.random.default_rng(seed)
+            mask = gen.choice(len(cat), size=100, replace=False)
+
+            sph_pos = cat["spherical_pos"]
+            RA = sph_pos[mask, 1]
+            dec = sph_pos[mask, 2]
        else:
            raise ValueError(f"Unknown field name: `{catalogue_name}`.")

-        pos = np.vstack((RA, dec)).T
+        # The Carrick+2015 is in galactic coordinates, so we need to convert
+        # the RA/dec to galactic coordinates.
+        if simname == "Carrick2015":
+            c = SkyCoord(ra=RA*u.degree, dec=dec*u.degree, frame='icrs')
+            pos = np.vstack((c.galactic.l, c.galactic.b)).T
+        else:
+            pos = np.vstack((RA, dec)).T
+
    else:
        pos = None

@ -90,6 +122,17 @@ def get_field(simname, nsim, kind, MAS, grid):
    # Open the field reader.
    if simname == "csiborg1":
        field_reader = csiborgtools.read.CSiBORG1Field(nsim)
+    elif simname == "Carrick2015":
+        folder = "/mnt/extraspace/rstiskalek/catalogs"
+        warn(f"Using local paths from `{folder}`.", RuntimeWarning)
+        if kind == "density":
+            fpath = join(folder, "twompp_density_carrick2015.npy")
+            return np.load(fpath).astype(np.float32)
+        elif kind == "velocity":
+            fpath = join(folder, "twompp_velocity_carrick2015.npy")
+            return np.load(fpath).astype(np.float32)
+        else:
+            raise ValueError(f"Unknown field kind: `{kind}`.")
    else:
        raise ValueError(f"Unknown simulation name: `{simname}`.")

@ -230,8 +273,8 @@ if __name__ == "__main__":
    args = parser.parse_args()

    rmax = 200
-    dr = 0.1
-    smooth_scales = None
+    dr = 0.5
+    smooth_scales = [0, 2, 4, 6]

    comm = MPI.COMM_WORLD
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
@ -248,8 +291,8 @@ if __name__ == "__main__":
        dump_folder = None
    dump_folder = comm.bcast(dump_folder, root=0)

-    # Get the line of sight RA/dec coordinates.
-    pos = get_los(args.catalogue, comm)
+    # Get the line of sight sky coordinates.
+    pos = get_los(args.catalogue, args.simname, comm)

    def main(nsim):
        interpolate_field(pos, args.simname, nsim, args.MAS, args.grid,
--- a/scripts/field_los.sh
+++ b/scripts/field_los.sh
@ -1,4 +1,4 @@
-nthreads=1
+nthreads=11
 memory=64
 on_login=${1}
 queue="berg"
@ -6,7 +6,8 @@ env="/mnt/users/rstiskalek/csiborgtools/venv_csiborg/bin/python"
 file="field_los.py"

 catalogue="A2"
-nsims="7444"
+# catalogue="csiborg1_9844"
+nsims="-1"
 simname="csiborg1"
 MAS="SPH"
 grid=1024
--- a/scripts/field_sample.py
+++ b/scripts/field_sample.py
@ -134,8 +134,6 @@ def open_galaxy_positions(survey_name, comm, scatter=None):
                    if scatter < 0:
                        raise ValueError("Scatter must be positive.")
                    if scatter > 0:
-                        print(f"Adding scatter of {scatter} Mpc / h.",
-                              flush=True)
                        pos = scatter_along_radial_direction(pos, scatter,
                                                             boxsize)

@ -186,7 +184,6 @@ def evaluate_field(field, pos, boxsize, smooth_scales, verbose=True):
                field, scale * mpc2box, boxsize=1, make_copy=True)
        else:
            field_smoothed = numpy.copy(field)
-        print("Going to evaluate the field....")
        val[:, i] = csiborgtools.field.evaluate_sky(
            field_smoothed, pos=pos, mpc2box=mpc2box)

--- a/scripts/field_sample.sh
+++ b/scripts/field_sample.sh
@ -1,5 +1,5 @@
-nthreads=1
-memory=32
+nthreads=11
+memory=64
 on_login=${1}
 queue="berg"
 env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
@ -7,11 +7,11 @@ file="field_sample.py"


 nsims="-1"
-simname="TNG300-1"
-survey="TNG300-1"
+simname="csiborg1"
+survey="SDSS"
 smooth_scales="0 2 4 8 16"
 kind="density"
-MAS="PCS"
+MAS="SPH"
 grid=1024
 scatter=0

--- a/scripts/flow_validation.py
+++ b/scripts/flow_validation.py
@ -0,0 +1,211 @@
+# Copyright (C) 2024 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+Script to run the PV validation model on various catalogues and simulations.
+The script is MPI parallelized over the IC realizations.
+"""
+from argparse import ArgumentParser
+from datetime import datetime
+from os import makedirs, remove, rmdir
+from os.path import exists, join
+
+import csiborgtools
+import jax
+import numpy as np
+from h5py import File
+from mpi4py import MPI
+from numpyro.infer import MCMC, NUTS
+from taskmaster import work_delegation  # noqa
+
+
+def get_model(args, nsim):
+    """
+    Load the data and create the NumPyro model.
+
+    Parameters
+    ----------
+    args : argparse.Namespace
+        Command line arguments.
+    nsim : int
+        Simulation index.
+
+    Returns
+    -------
+    numpyro.Primitive
+    """
+    folder = "/mnt/extraspace/rstiskalek/catalogs/"
+    if args.catalogue == "A2":
+        fpath = join(folder, "A2.h5")
+    elif args.catalogue == "LOSS" or args.catalogue == "Foundation":
+        raise NotImplementedError("To be implemented..")
+    else:
+        raise ValueError(f"Unknown catalogue: `{args.catalogue}`.")
+
+    loader = csiborgtools.flow.DataLoader(args.simname, args.catalogue, fpath,
+                                          paths, ksmooth=args.ksmooth)
+    Omega_m = csiborgtools.simname2Omega_m(args.simname)
+
+    # Read in the data from the loader.
+    los_overdensity = loader.los_density[:, nsim, :]
+    los_velocity = loader.los_radial_velocity[:, nsim, :]
+
+    RA = loader.cat["RA"]
+    dec = loader.cat["DEC"]
+    z_obs = loader.cat["z_obs"]
+
+    r_hMpc = loader.cat["r_hMpc"]
+    e_r_hMpc = loader.cat["e_rhMpc"]
+
+    return csiborgtools.flow.SD_PV_validation_model(
+        los_overdensity, los_velocity, RA, dec, z_obs, r_hMpc, e_r_hMpc,
+        loader.rdist, Omega_m)
+
+
+def run_model(model, nsteps, nchains, nsim, dump_folder, show_progress=True):
+    """
+    Run the NumPyro model and save the thinned samples to a temporary file.
+
+    Parameters
+    ----------
+    model : jax.numpyro.Primitive
+        Model to be run.
+    nsteps : int
+        Number of steps.
+    nchains : int
+        Number of chains.
+    nsim : int
+        Simulation index.
+    dump_folder : str
+        Folder where the temporary files are stored.
+    show_progress : bool
+        Whether to show the progress bar.
+
+    Returns
+    -------
+    None
+    """
+    nuts_kernel = NUTS(model)
+    mcmc = MCMC(nuts_kernel, num_warmup=nsteps // 2, num_samples=nsteps // 2,
+                chain_method="sequential", num_chains=nchains,
+                progress_bar=show_progress)
+    rng_key = jax.random.PRNGKey(42)
+    mcmc.run(rng_key)
+
+    if show_progress:
+        print(f"Summary of the MCMC run of simulation indexed {nsim}:")
+        mcmc.print_summary()
+
+    samples = mcmc.get_samples()
+    thinned_samples = csiborgtools.thin_samples_by_acl(samples)
+
+    # Save the samples to the temporary folder.
+    fname = join(dump_folder, f"samples_{nsim}.npz")
+    np.savez(fname, **thinned_samples)
+
+
+def combine_from_simulations(catalogue_name, simname, nsims, outfolder,
+                             dumpfolder, ksmooth):
+    """
+    Combine the results from individual simulations into a single file.
+
+    Parameters
+    ----------
+    catalogue_name : str
+        Catalogue name.
+    simname : str
+        Simulation name.
+    nsims : list
+        List of IC realisations.
+    outfolder : str
+        Output folder.
+    dumpfolder : str
+        Dumping folder where the temporary files are stored.
+    ksmooth : int
+        Smoothing index.
+
+    Returns
+    -------
+    None
+    """
+    fname_out = join(
+        outfolder,
+        f"flow_samples_{catalogue_name}_{simname}_smooth_{ksmooth}.hdf5")
+    print(f"Combining results from invidivual simulations to `{fname_out}`.")
+
+    if exists(fname_out):
+        remove(fname_out)
+
+    for nsim in nsims:
+        fname = join(dumpfolder, f"samples_{nsim}.npz")
+        data = np.load(fname)
+
+        with File(fname_out, 'a') as f:
+            grp = f.create_group(f"sim_{nsim}")
+            for key in data.files:
+                grp.create_dataset(key, data=data[key])
+
+        # Remove the temporary file.
+        remove(fname)
+
+    # Remove the dumping folder.
+    rmdir(dumpfolder)
+    print("Finished combining results.")
+
+###############################################################################
+#                        Command line interface                               #
+###############################################################################
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument("--simname", type=str, required=True,
+                        help="Simulation name.")
+    parser.add_argument("--catalogue", type=str, required=True,
+                        help="PV catalogue.")
+    parser.add_argument("--ksmooth", type=int, required=True,
+                        help="Smoothing index.")
+    args = parser.parse_args()
+
+    comm = MPI.COMM_WORLD
+    rank, size = comm.Get_rank(), comm.Get_size()
+    out_folder = "/mnt/extraspace/rstiskalek/csiborg_postprocessing/peculiar_velocity"  # noqa
+
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    nsims = paths.get_ics(args.simname)
+
+    nsteps = 5000
+    nchains = 1
+
+    # Create the dumping folder.
+    if comm.Get_rank() == 0:
+        dump_folder = join(out_folder,
+                           f"temp_{str(datetime.now())}".replace(" ", "_"))
+        print(f"Creating folder `{dump_folder}`.")
+        makedirs(dump_folder)
+    else:
+        dump_folder = None
+    dump_folder = comm.bcast(dump_folder, root=0)
+
+    def main(nsim):
+        model = get_model(args, nsim)
+        run_model(model, nsteps, nchains, nsim, dump_folder,
+                  show_progress=size == 1)
+
+    work_delegation(main, nsims, comm, master_verbose=True)
+    comm.Barrier()
+
+    if rank == 0:
+        combine_from_simulations(args.catalogue, args.simname, nsims,
+                                 out_folder, dump_folder, args.ksmooth)
--- a/scripts/flow_validation.sh
+++ b/scripts/flow_validation.sh
@ -0,0 +1,23 @@
+memory=4
+on_login=${1}
+nthreads=${2}
+queue="berg"
+env="/mnt/users/rstiskalek/csiborgtools/venv_csiborg/bin/python"
+file="flow_validation.py"
+
+catalogue="A2"
+simname="Carrick2015"
+ksmooth=2
+
+
+pythoncm="$env $file --catalogue $catalogue --simname $simname --ksmooth $ksmooth"
+if [ $on_login -eq 1 ]; then
+    echo $pythoncm
+    $pythoncm
+else
+    cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
+    echo "Submitting:"
+    echo $cm
+    echo
+    eval $cm
+fi
--- a/scripts_independent/A2_to_hdf5.ipynb
+++ b/scripts_independent/A2_to_hdf5.ipynb
@ -0,0 +1,150 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Copyright (C) 2024 Richard Stiskalek\n",
+    "# This program is free software; you can redistribute it and/or modify it\n",
+    "# under the terms of the GNU General Public License as published by the\n",
+    "# Free Software Foundation; either version 3 of the License, or (at your\n",
+    "# option) any later version.\n",
+    "# This program is distributed in the hope that it will be useful, but\n",
+    "# WITHOUT ANY WARRANTY; without even the implied warranty of\n",
+    "# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General\n",
+    "# Public License for more details.\n",
+    "#\n",
+    "# You should have received a copy of the GNU General Public License along\n",
+    "# with this program; if not, write to the Free Software Foundation, Inc.,\n",
+    "# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.\n",
+    "from os.path import join\n",
+    "\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "from h5py import File\n",
+    "\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Supernovae data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "a2dir = \"/Users/richard/Data/PV/A2_paper_data/A2\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### LOSS data set"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "names = [\"z_CMB\", \"mB\", \"x1\", \"c\", \"e_mB\", \"e_x1\", \"e_c\", \"RA\", \"DEC\"]\n",
+    "dtype = [(n, np.float32) for n in names]\n",
+    "data = np.genfromtxt(join(a2dir, \"loss.csv\"), delimiter=\",\", skip_header=1,\n",
+    "                     usecols=[5 + n for n in range(len(names))])\n",
+    "\n",
+    "loss_data = np.empty(len(data), dtype=dtype)\n",
+    "for i, n in enumerate(names):\n",
+    "    loss_data[n] = data[:, i]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Foundation data set "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "names = [\"z_CMB\", \"RA\", \"DEC\", \"x1\", \"mB\", \"c\", \"peak\", \"e_peak\", \"e_x1\", \"e_mB\", \"e_c\"]\n",
+    "dtype = [(n, np.float32) for n in names]\n",
+    "data = np.genfromtxt(join(a2dir, \"foundation.csv\"), delimiter=\",\", skip_header=1,\n",
+    "                     usecols=[3 + n for n in range(len(names))])\n",
+    "\n",
+    "foundation_data = np.empty(len(data), dtype=dtype)\n",
+    "for i, n in enumerate(names):\n",
+    "    foundation_data[n] = data[:, i]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Write output as HDF5 file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "outdir = \"/Users/richard/Downloads\"\n",
+    "fname = \"PV_compilation_Supranta2019.hdf5\"\n",
+    "\n",
+    "with File(join(outdir, fname), 'w') as f:\n",
+    "    # Write LOSS\n",
+    "    grp = f.create_group(\"LOSS\")\n",
+    "    for name in loss_data.dtype.names:\n",
+    "        grp.create_dataset(name, data=loss_data[name])\n",
+    "\n",
+    "    # Write Foundation\n",
+    "    grp = f.create_group(\"Foundation\")\n",
+    "    for name in foundation_data.dtype.names:\n",
+    "        grp.create_dataset(name, data=foundation_data[name])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}