Direct distance sampling (#155)

* Update nb * Update dependency * Pass marg arg * Add mu sampling * Update imprts * Move cosmography to a ceparate module * Add mock void * Check Vext likelihoo * Add void mock * Add void mocks
2025-06-07 09:31:10 +00:00 · 2024-10-07 22:16:35 +02:00 · 2024-10-07 22:16:35 +02:00 · 76a7609f7f
commit 76a7609f7f
parent 34f997cd33
8 changed files with 685 additions and 555 deletions
--- a/csiborgtools/flow/init.py
+++ b/csiborgtools/flow/init.py
@ -18,4 +18,5 @@ from .flow_model import (PV_LogLikelihood, PV_validation_model, dist2redshift,
                         Observed2CosmologicalRedshift, predict_zobs,           # noqa
                         project_Vext, stack_pzosmo_over_realizations)          # noqa
 from .selection import ToyMagnitudeSelection                                    # noqa
-from .void_model import load_void_data, interpolate_void                        # noqa
+from .void_model import (load_void_data, interpolate_void, select_void_h,       # noqa
+                         mock_void)                                             # noqa
--- a/csiborgtools/flow/cosmography.py
+++ b/csiborgtools/flow/cosmography.py
@ -0,0 +1,94 @@
+# Copyright (C) 2024 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""Various cosmography functions for converting between distance indicators."""
+from jax import numpy as jnp
+
+from ..params import SPEED_OF_LIGHT
+
+H0 = 100  # km / s / Mpc
+
+
+def dist2redshift(dist, Omega_m, h=1.):
+    """
+    Convert comoving distance to cosmological redshift if the Universe is
+    flat and z << 1.
+    """
+    eta = 3 * Omega_m / 2
+    return 1 / eta * (1 - (1 - 2 * 100 * h * dist / SPEED_OF_LIGHT * eta)**0.5)
+
+
+def redshift2dist(z, Omega_m):
+    """
+    Convert cosmological redshift to comoving distance if the Universe is
+    flat and z << 1.
+    """
+    q0 = 3 * Omega_m / 2 - 1
+    return SPEED_OF_LIGHT * z / (2 * H0) * (2 - z * (1 + q0))
+
+
+def gradient_redshift2dist(z, Omega_m):
+    """
+    Gradient of the redshift to comoving distance conversion if the Universe is
+    flat and z << 1.
+    """
+    q0 = 3 * Omega_m / 2 - 1
+    return SPEED_OF_LIGHT / H0 * (1 - z * (1 + q0))
+
+
+def distmod2dist(mu, Om0):
+    """
+    Convert distance modulus to distance in `Mpc / h`. The expression is valid
+    for a flat universe over the range of 0.00001 < z < 0.1.
+    """
+    term1 = jnp.exp((0.443288 * mu) + (-14.286531))
+    term2 = (0.506973 * mu) + 12.954633
+    term3 = ((0.028134 * mu) ** (
+        ((0.684713 * mu)
+         + ((0.151020 * mu) + (1.235158 * Om0))) - jnp.exp(0.072229 * mu)))
+    term4 = (-0.045160) * mu
+    return (-0.000301) + (term1 * (term2 - (term3 - term4)))
+
+
+def distmod2dist_gradient(mu, Om0):
+    """
+    Calculate the derivative of comoving distance in `Mpc / h` with respect to
+    the distance modulus. The expression is valid for a flat universe over the
+    range of 0.00001 < z < 0.1.
+    """
+    term1 = jnp.exp((0.443288 * mu) + (-14.286531))
+    dterm1 = 0.443288 * term1
+
+    term2 = (0.506973 * mu) + 12.954633
+    dterm2 = 0.506973
+
+    term3 = ((0.028134 * mu)**(((0.684713 * mu) + ((0.151020 * mu) + (1.235158 * Om0))) - jnp.exp(0.072229 * mu)))  # noqa
+    ln_base = jnp.log(0.028134) + jnp.log(mu)
+    exponent = 0.835733 * mu + 1.235158 * Om0 - jnp.exp(0.072229 * mu)
+    exponent_derivative = 0.835733 - 0.072229 * jnp.exp(0.072229 * mu)
+    dterm3 = term3 * ((1 / mu) * exponent + exponent_derivative * ln_base)
+
+    term4 = (-0.045160) * mu
+    dterm4 = -0.045160
+
+    return (dterm1 * (term2 - (term3 - term4))
+            + term1 * (dterm2 - (dterm3 - dterm4)))
+
+
+def distmod2redshift(mu, Om0):
+    """
+    Convert distance modulus to redshift, assuming `h = 1`. The expression is
+    valid for a flat universe over the range of 0.00001 < z < 0.1.
+    """
+    return jnp.exp(((0.461108 * mu) - ((0.022187 * Om0) + (((0.022347 * mu)** (12.631788 - ((-6.708757) * Om0))) + 19.529852))))  # noqa
--- a/csiborgtools/flow/flow_model.py
+++ b/csiborgtools/flow/flow_model.py
@ -25,10 +25,11 @@ from abc import ABC, abstractmethod

 import numpy as np
 from astropy import units as u
-from astropy.coordinates import SkyCoord, angular_separation
 from astropy.cosmology import FlatLambdaCDM, z_at_value
+from interpax import interp1d
 from jax import jit
 from jax import numpy as jnp
+from jax import vmap
 from jax.scipy.special import erf, logsumexp
 from numpyro import factor, plate, sample
 from numpyro.distributions import MultivariateNormal, Normal, Uniform
@ -37,57 +38,19 @@ from tqdm import trange

 from ..params import SPEED_OF_LIGHT
 from ..utils import fprint
+from .cosmography import (dist2redshift, distmod2dist, distmod2dist_gradient,
+                          distmod2redshift, gradient_redshift2dist)
 from .selection import toy_log_magnitude_selection
-from .void_model import interpolate_void, load_void_data
+from .void_model import (angular_distance_from_void_axis, interpolate_void,
+                         load_void_data)

 H0 = 100  # km / s / Mpc


 ###############################################################################
-#                           JAX Flow model                                    #
+#                       Various flow utilities                                #
 ###############################################################################

-def dist2redshift(dist, Omega_m, h=1.):
-    """
-    Convert comoving distance to cosmological redshift if the Universe is
-    flat and z << 1.
-    """
-    eta = 3 * Omega_m / 2
-    return 1 / eta * (1 - (1 - 2 * 100 * h * dist / SPEED_OF_LIGHT * eta)**0.5)
-
-
-def redshift2dist(z, Omega_m):
-    """
-    Convert cosmological redshift to comoving distance if the Universe is
-    flat and z << 1.
-    """
-    q0 = 3 * Omega_m / 2 - 1
-    return SPEED_OF_LIGHT * z / (2 * H0) * (2 - z * (1 + q0))
-
-
-def gradient_redshift2dist(z, Omega_m):
-    """
-    Gradient of the redshift to comoving distance conversion if the Universe is
-    flat and z << 1.
-    """
-    q0 = 3 * Omega_m / 2 - 1
-    return SPEED_OF_LIGHT / H0 * (1 - z * (1 + q0))
-
-
-def distmod2dist(mu, Om0):
-    """
-    Convert distance modulus to distance in `Mpc / h`. The expression is valid
-    for a flat universe over the range of 0.00001 < z < 0.1.
-    """
-    term1 = jnp.exp((0.443288 * mu) + (-14.286531))
-    term2 = (0.506973 * mu) + 12.954633
-    term3 = ((0.028134 * mu) ** (
-        ((0.684713 * mu)
-         + ((0.151020 * mu) + (1.235158 * Om0))) - jnp.exp(0.072229 * mu)))
-    term4 = (-0.045160) * mu
-    return (-0.000301) + (term1 * (term2 - (term3 - term4)))
-
-
 def project_Vext(Vext_x, Vext_y, Vext_z, RA_radians, dec_radians):
    """Project the external velocity vector onto the line of sight."""
    cos_dec = jnp.cos(dec_radians)
@ -150,6 +113,37 @@ def upper_truncated_normal_logpdf(x, loc, scale, xmax):
    return normal_logpdf(x, loc, scale) - jnp.log(norm)


+###############################################################################
+#                            LOS interpolation                                #
+###############################################################################
+
+
+def interpolate_los(r, los, rgrid, method="cubic"):
+    """
+    Interpolate the LOS field at a given radial distance.
+
+    Parameters
+    ----------
+    r : 1-dimensional array of shape `(n_gal, )`
+        Radial distances at which to interpolate the LOS field.
+    los : 3-dimensional array of shape `(n_sims, n_gal, n_steps)`
+        LOS field.
+    rmin, rmax : float
+        Minimum and maximum radial distances in the data.
+    order : int, optional
+        The order of the interpolation. Default is 1, can be 0.
+
+    Returns
+    -------
+    2-dimensional array of shape `(n_sims, n_gal)`
+    """
+    # Vectorize over the inner loop (ngal) first, then the outer loop (nsim)
+    def f(rn, los_row):
+        return interp1d(rn, rgrid, los_row, method=method)
+
+    return vmap(vmap(f, in_axes=(0, 0)), in_axes=(None, 0))(r, los)
+
+
 ###############################################################################
 #                          Base flow validation                               #
 ###############################################################################
@ -232,17 +226,12 @@ class BaseFlowValidationModel(ABC):
        rLG_grid *= h
        rLG_min, rLG_max = rLG_grid.min(), rLG_grid.max()
        rgrid_min, rgrid_max = 0, 250
-        fprint(f"setting radial grid from {rLG_min} to {rLG_max} Mpc.")
+        fprint(f"setting radial grid from {rLG_min} to {rLG_max} Mpc / h.")
        rgrid_max *= h

-        # Get angular separation (in degrees) of each object from the model
-        # axis.
-        model_axis = SkyCoord(l=117, b=4, frame='galactic', unit='deg').icrs
-        coords = SkyCoord(ra=RA, dec=dec, unit='deg').icrs
-
-        phi = angular_separation(coords.ra.rad, coords.dec.rad,
-                                 model_axis.ra.rad, model_axis.dec.rad)
-        phi = jnp.asarray(phi * 180 / np.pi, dtype=jnp.float32)
+        # Get angular separation of each object from the model axis.
+        phi = angular_distance_from_void_axis(RA, dec)
+        phi = jnp.asarray(phi, dtype=jnp.float32)

        if kind == "density":
            void_grid = jnp.log(void_grid)
@ -291,6 +280,12 @@ class BaseFlowValidationModel(ABC):

        return self._los_velocity

+    def log_los_density_at_r(self, r):
+        return interpolate_los(r, self.log_los_density(), self.r_xrange, )
+
+    def los_velocity_at_r(self, r):
+        return interpolate_los(r, self.los_velocity(), self.r_xrange, )
+
    @abstractmethod
    def __call__(self, **kwargs):
        pass
@ -514,16 +509,16 @@ class PV_LogLikelihood(BaseFlowValidationModel):
        Name of the catalogue.
    void_kwargs : dict, optional
        Void data parameters. If `None` the data is not void data.
-    with_num_dist_marginalisation : bool, optional
-        Whether to use numerical distance marginalisation, in which case
-        the tracers cannot be coupled by a covariance matrix. By default
-        `True`.
+    wo_num_dist_marginalisation : bool, optional
+        Whether to directly sample the distance without numerical
+        marginalisation. in which case the tracers can be coupled by a
+        covariance matrix. By default `False`.
    """

    def __init__(self, los_density, los_velocity, RA, dec, z_obs, e_zobs,
                 calibration_params, abs_calibration_params, mag_selection,
                 r_xrange, Omega_m, kind, name, void_kwargs=None,
-                 with_num_dist_marginalisation=True):
+                 wo_num_dist_marginalisation=False):
        if e_zobs is not None:
            e2_cz_obs = jnp.asarray((SPEED_OF_LIGHT * e_zobs)**2)
        else:
@ -549,7 +544,7 @@ class PV_LogLikelihood(BaseFlowValidationModel):
            values += [jnp.log(los_density), los_velocity]

            # Density required only if not numerically marginalising.
-            if not with_num_dist_marginalisation:
+            if not wo_num_dist_marginalisation:
                names += ["_los_density"]
                values += [los_density]

@ -561,12 +556,9 @@ class PV_LogLikelihood(BaseFlowValidationModel):
        self.kind = kind
        self.name = name
        self.Omega_m = Omega_m
-        self.with_num_dist_marginalisation = with_num_dist_marginalisation
+        self.wo_num_dist_marginalisation = wo_num_dist_marginalisation
        self.norm = - self.ndata * jnp.log(self.num_sims)

-        # TODO: Somewhere here prepare the interpolators in case of no
-        # numerical marginalisation.
-
        if mag_selection is not None:
            self.mag_selection_kind = mag_selection["kind"]

@ -767,30 +759,20 @@ class PV_LogLikelihood(BaseFlowValidationModel):
        else:
            raise ValueError(f"Unknown kind: `{self.kind}`.")

-        # h = field_calibration_params["h"]
        # ----------------------------------------------------------------
        # 2. Log-likelihood of the true distance and observed redshifts.
        # The marginalisation of the true distance can be done numerically.
        # ----------------------------------------------------------------
-        if self.with_num_dist_marginalisation:
+        if not self.wo_num_dist_marginalisation:

            if field_calibration_params["sample_h"]:
-                raise NotImplementedError("Sampling of h not implemented.")
-                # Rescale the grid to account for the sampled H0. For distance
-                # modulus going from Mpc / h to Mpc implies larger numerical
-                # values, so there has to be a minus sign since h < 1.
-                # mu_xrange = self.mu_xrange - 5 * jnp.log(h)
-
-                # The redshift should also be boosted since now the object are
-                # further away?
-
-                # Actually, the redshift ought to remain the same?
-            else:
-                mu_xrange = self.mu_xrange
+                raise NotImplementedError(
+                    "Sampling of 'h' is not supported if numerically "
+                    "marginalising the true distance.")

            # Calculate p(r) (Malmquist bias). Shape is (ndata, nxrange)
            log_ptilde = log_ptilde_wo_bias(
-                mu_xrange[None, :], mu[:, None], e2_mu[:, None],
+                self.mu_xrange[None, :], mu[:, None], e2_mu[:, None],
                self.log_r2_xrange[None, :])

            if self.is_void_data:
@ -832,56 +814,52 @@ class PV_LogLikelihood(BaseFlowValidationModel):
            return ll0 + jnp.sum(logsumexp(ll, axis=0)) + self.norm
        else:
            if field_calibration_params["sample_h"]:
-                raise NotImplementedError("Sampling of h not implemented.")
-
-            raise NotImplementedError(
-                "Sampling of distance is not implemented. Work in progress.")
+                raise NotImplementedError(
+                    "Sampling of h is not yet implemented.")

            e_mu = jnp.sqrt(e2_mu)
            # True distance modulus, shape is `(n_data)``
            with plate("plate_mu", self.ndata):
                mu_true = sample("mu", Normal(mu, e_mu))

-            # True distance, shape is `(n_data)``
+            # True distance and redshift, shape is `(n_data)`.
            r_true = distmod2dist(mu_true, self.Omega_m)
-            # TODO:
-            z_true = None
+            z_true = distmod2redshift(mu_true, self.Omega_m)

            if self.is_void_data:
                raise NotImplementedError(
                    "Void data not implemented yet for distance sampling.")
            else:
-                # grid log(density), shape is `(n_sims, n_data, n_rad)`
-                log_los_density_grid = self.los_density()
-
-                # TODO: Need to add the interpolators for these
+                # Grid log(density), shape is `(n_sims, n_data, n_rad)`
+                log_los_density_grid = self.log_los_density()
                # Densities and velocities at the true distances, shape is
                # `(n_sims, n_data)`
-                log_density = None
-                los_velocity = None
+                log_density = self.log_los_density_at_r(r_true)
+                los_velocity = self.los_velocity_at_r(r_true)

            alpha = distmod_params["alpha"]

-            # Check dimensions of all this
-
            # Normalisation of p(mu), shape is `(n_sims, n_data, n_rad)`
            pnorm = (
-                self.log_r2_xrange[None, None, :]
+                + self.log_r2_xrange[None, None, :]
                + alpha * log_los_density_grid
                + normal_logpdf(
                    self.mu_xrange[None, :], mu[:, None], e_mu[:, None])[None, ...])  # noqa
-
            pnorm = jnp.exp(pnorm)
-
-            # Normalization of p(mu). Shape is now (nsims, ndata)
+            # Now integrate over the radial steps. Shape is `(nsims, ndata)`.
+            # No Jacobian here because I integrate over distance, not the
+            # distance modulus.
            pnorm = simpson(pnorm, x=self.r_xrange, axis=-1)

-            # TODO: There should be a Jacobian?
+            # Jacobian |dr / dmu|_(mu_true), shape is `(n_data)`.
+            jac = jnp.abs(distmod2dist_gradient(mu_true, self.Omega_m))
+
            # Calculate unnormalized log p(mu). Shape is (nsims, ndata)
            ll = (
-                2 * (jnp.log(r_true) - self.log_r2_xrange_mean)[None, :]
+                + jnp.log(jac)[None, :]
+                + (2 * jnp.log(r_true) - self.log_r2_xrange_mean)[None, :]
                + alpha * log_density
-                + normal_logpdf(mu_true, mu, e_mu)[None, :])
+                )

            # Subtract the normalization. Shape remains (nsims, ndata)
            ll -= jnp.log(pnorm)
@ -933,7 +911,7 @@ def PV_validation_model(models, distmod_hyperparams_per_model,
    # We sample the components of Vext with a uniform prior, which means
    # there is a |Vext|^2 prior, we correct for this so that the sampling
    # is effecitvely uniformly in magnitude of Vext and angles.
-    if "Vext" in field_calibration_params:
+    if "Vext" in field_calibration_params and not field_calibration_hyperparams["no_Vext"]:  # noqa
        ll -= jnp.log(jnp.sum(field_calibration_params["Vext"]**2))

    for n in range(len(models)):
--- a/csiborgtools/flow/io.py
+++ b/csiborgtools/flow/io.py
@ -20,6 +20,7 @@ from h5py import File
 from ..params import SPEED_OF_LIGHT, simname2Omega_m
 from ..utils import fprint, radec_to_galactic, radec_to_supergalactic
 from .flow_model import PV_LogLikelihood
+from .void_model import load_void_data, mock_void, select_void_h

 H0 = 100  # km / s / Mpc

@ -242,6 +243,25 @@ class DataLoader:
                arr = np.empty(len(f["RA"]), dtype=dtype)
                for key in f.keys():
                    arr[key] = f[key][:]
+        elif "IndranilVoidTFRMock" in catalogue:
+            # The name can be e.g. "IndranilVoidTFRMock_exp_34_0", where the
+            # first and second number are the LG observer index and random
+            # seed.
+            profile, rLG_index, seed = catalogue.split("_")[1:]
+            rLG_index = int(rLG_index)
+            seed = int(seed)
+            rLG, vrad_data = load_void_data(profile, "vrad")
+            h = select_void_h(profile)
+            print(f"Mock observed galaxies for LG observer with index "
+                  f"{rLG_index} at {rLG[rLG_index] * h} Mpc / h and "
+                  f"seed {seed}.")
+            mock_data = mock_void(vrad_data, rLG_index, profile, seed=seed)[0]
+
+            # Convert the dictionary to a structured array
+            dtype = [(key, np.float32) for key in mock_data.keys()]
+            arr = np.empty(len(mock_data["RA"]), dtype=dtype)
+            for key in mock_data.keys():
+                arr[key] = mock_data[key]
        elif "UPGLADE" in catalogue:
            with File(catalogue_fpath, 'r') as f:
                dtype = [(key, np.float32) for key in f.keys()]
@ -354,8 +374,8 @@ def mask_fields(density, velocity, mask, return_none):


 def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
-              absolute_calibration=None, calibration_fpath=None,
-              void_kwargs=None):
+              wo_num_dist_marginalisation=False, absolute_calibration=None,
+              calibration_fpath=None, void_kwargs=None):
    """
    Get a model and extract the relevant data from the loader.

@ -369,9 +389,14 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
        Maximum observed redshift in the CMB frame to include.
    mag_selection : dict, optional
        Magnitude selection parameters.
+    wo_num_dist_marginalisation : bool, optional
+        Whether to directly sample the distance without numerical
+        marginalisation. in which case the tracers can be coupled by a
+        covariance matrix. By default `False`.
    add_absolute_calibration : bool, optional
        Whether to add an absolute calibration for CF4 TFRs.
    calibration_fpath : str, optional
+        Path to the file containing the absolute calibration of CF4 TFR.

    Returns
    -------
@ -418,7 +443,8 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
            los_overdensity, los_velocity,
            RA[mask], dec[mask], zCMB[mask], e_zCMB, calibration_params,
            None, mag_selection, loader.rdist, loader._Omega_m, "SN",
-            name=kind, void_kwargs=void_kwargs)
+            name=kind, void_kwargs=void_kwargs,
+            wo_num_dist_marginalisation=wo_num_dist_marginalisation)
    elif "Pantheon+" in kind:
        keys = ["RA", "DEC", "zCMB", "mB", "x1", "c", "biasCor_m_b", "mBERR",
                "x1ERR", "cERR", "biasCorErr_m_b", "zCMB_SN", "zCMB_Group",
@ -451,8 +477,9 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
            los_overdensity, los_velocity,
            RA[mask], dec[mask], zCMB[mask], e_zCMB[mask], calibration_params,
            None, mag_selection, loader.rdist, loader._Omega_m, "SN",
-            name=kind, void_kwargs=void_kwargs)
-    elif kind in ["SFI_gals", "2MTF", "SFI_gals_masked"]:
+            name=kind, void_kwargs=void_kwargs,
+            wo_num_dist_marginalisation=wo_num_dist_marginalisation)
+    elif kind in ["SFI_gals", "2MTF", "SFI_gals_masked"] or "IndranilVoidTFRMock" in kind:  # noqa
        keys = ["RA", "DEC", "z_CMB", "mag", "eta", "e_mag", "e_eta"]
        RA, dec, zCMB, mag, eta, e_mag, e_eta = (loader.cat[k] for k in keys)

@ -467,7 +494,8 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
            los_overdensity, los_velocity,
            RA[mask], dec[mask], zCMB[mask], None, calibration_params, None,
            mag_selection, loader.rdist, loader._Omega_m, "TFR", name=kind,
-            void_kwargs=void_kwargs)
+            void_kwargs=void_kwargs,
+            wo_num_dist_marginalisation=wo_num_dist_marginalisation)
    elif "CF4_TFR_" in kind:
        # The full name can be e.g. "CF4_TFR_not2MTForSFI_i" or "CF4_TFR_i".
        band = kind.split("_")[-1]
@ -535,7 +563,8 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
            los_overdensity, los_velocity,
            RA[mask], dec[mask], z_obs[mask], None, calibration_params,
            abs_calibration_params, mag_selection, loader.rdist,
-            loader._Omega_m, "TFR", name=kind, void_kwargs=void_kwargs)
+            loader._Omega_m, "TFR", name=kind, void_kwargs=void_kwargs,
+            wo_num_dist_marginalisation=wo_num_dist_marginalisation)
    elif kind in ["CF4_GroupAll"]:
        # Note, this for some reason works terribly.
        keys = ["RA", "DE", "Vcmb", "DMzp", "eDM"]
@ -556,7 +585,8 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
            los_overdensity, los_velocity,
            RA[mask], dec[mask], zCMB[mask], None, calibration_params, None,
            mag_selection,  loader.rdist, loader._Omega_m, "simple",
-            name=kind, void_kwargs=void_kwargs)
+            name=kind, void_kwargs=void_kwargs,
+            wo_num_dist_marginalisation=wo_num_dist_marginalisation)
    else:
        raise ValueError(f"Catalogue `{kind}` not recognized.")

--- a/csiborgtools/flow/void_model.py
+++ b/csiborgtools/flow/void_model.py
@ -19,11 +19,44 @@ from os.path import join
 from re import search

 import numpy as np
+from astropy.coordinates import SkyCoord, angular_separation
 from jax import numpy as jnp
 from jax import vmap
 from jax.scipy.ndimage import map_coordinates
+from scipy.interpolate import RegularGridInterpolator
 from tqdm import tqdm

+from ..utils import galactic_to_radec
+from ..params import SPEED_OF_LIGHT
+from .cosmography import distmod2dist, distmod2redshift
+
+###############################################################################
+#                         Basic void computations                             #
+###############################################################################
+
+
+def angular_distance_from_void_axis(RA, dec):
+    """
+    Calculate the angular distance of a galaxy from the void axis, all in
+    degrees.
+    """
+    # Calculate the separation angle between the galaxy and the model axis.
+    model_axis = SkyCoord(l=117, b=4, frame='galactic', unit='deg').icrs
+    coords = SkyCoord(ra=RA, dec=dec, unit='deg').icrs
+    return angular_separation(
+        coords.ra.rad, coords.dec.rad,
+        model_axis.ra.rad, model_axis.dec.rad) * 180 / np.pi
+
+
+def select_void_h(kind):
+    """Select 'little h' for void profile `kind`."""
+    hs = {"mb": 0.7615, "gauss": 0.7724, "exp": 0.7725}
+    try:
+        return hs[kind]
+    except KeyError:
+        raise ValueError(f"Unknown void kind: `{kind}`.")
+
+
 ###############################################################################
 #                            I/O of the void data                             #
 ###############################################################################
@ -97,9 +130,9 @@ def interpolate_void(rLG, r, phi, data, rgrid_min, rgrid_max, rLG_min, rLG_max,
    ----------
    rLG : float
        The observer's distance from the center of the void.
-    r : 1-dimensional array
+    r : 1-dimensional array of shape `(nsteps,)
        The radial distances at which to interpolate the velocities.
-    phi : 1-dimensional array
+    phi : 1-dimensional array of shape `(ngal,)`
        The angles at which to interpolate the velocities, in degrees,
        defining the galaxy position.
    data : 3-dimensional array of shape (nLG, nrad, nphi)
@ -114,7 +147,7 @@ def interpolate_void(rLG, r, phi, data, rgrid_min, rgrid_max, rLG_min, rLG_max,

    Returns
    -------
-    vel : 2-dimensional array of shape (len(phi), len(r))
+    vel : 2-dimensional array of shape `(ngal, nsteps)`
    """
    nLG, nrad, nphi = data.shape

@ -139,3 +172,106 @@ def interpolate_void(rLG, r, phi, data, rgrid_min, rgrid_max, rLG_min, rLG_max,
        return map_coordinates(data, X, order=order, mode='nearest')

    return vmap(interpolate_single_phi)(phi)
+
+
+###############################################################################
+#                          Mock void data                                     #
+###############################################################################
+
+
+def mock_void(vrad_data, rLG_index, profile,
+              a_TF=-22.8, b_TF=-7.2, sigma_TF=0.1, sigma_v=100.,
+              mean_eta=0.069, std_eta=0.078, mean_e_eta=0.012,
+              mean_mag=10.31, std_mag=0.83, mean_e_mag=0.044,
+              bmin=None, add_malmquist=False, nsamples=2000, seed=42,
+              Om0=0.3175, verbose=False, **kwargs):
+    """Mock 2MTF-like TFR data with void velocities."""
+    truths = {"a": a_TF, "b": b_TF, "e_mu": sigma_TF, "sigma_v": sigma_v,
+              "mean_eta": mean_eta, "std_eta": std_eta,
+              "mean_mag": mean_mag, "std_mag": std_mag,
+              }
+
+    gen = np.random.default_rng(seed)
+
+    # Sample the sky-distribution, either full-sky or mask out the Galactic
+    # plane.
+    l = gen.uniform(0, 360, size=nsamples)  # noqa
+    if bmin is None:
+        b = np.arcsin(gen.uniform(-1, 1, size=nsamples))
+    else:
+        b = np.arcsin(gen.uniform(np.sin(np.deg2rad(bmin)), 1,
+                                  size=nsamples))
+        b[gen.rand(nsamples) < 0.5] *= -1
+
+    b = np.rad2deg(b)
+
+    RA, DEC = galactic_to_radec(l, b)
+    # Calculate the angular separation from the void axis, in degrees.
+    phi = angular_distance_from_void_axis(RA, DEC)
+
+    # Sample the linewidth of each galaxy from a Gaussian distribution to mimic
+    # the MNR procedure.
+    eta_true = gen.normal(mean_eta, std_eta, nsamples)
+    eta_obs = gen.normal(eta_true, mean_e_eta)
+
+    # Subtract the mean of the observed linewidths, so that they are
+    # centered around zero. For consistency subtract from both observed
+    # and true values.
+    eta_mean_sampled = np.mean(eta_obs)
+    eta_true -= eta_mean_sampled
+    eta_obs -= eta_mean_sampled
+
+    # Sample the magnitude from some Gaussian distribution to replicate MNR.
+    mag_true = gen.normal(mean_mag, std_mag, nsamples)
+    mag_obs = gen.normal(mag_true, mean_e_mag)
+
+    # Calculate the 'true' distance modulus and redshift from the TFR distance.
+    mu_TFR = mag_true - (a_TF + b_TF * eta_true)
+    if add_malmquist:
+        raise NotImplementedError("Malmquist bias not implemented yet.")
+    else:
+        mu_true = gen.normal(mu_TFR, sigma_TF)
+
+    # Convert the true distance modulus to true distance and cosmological
+    # redshift.
+    r = distmod2dist(mu_true, Om0)
+    zcosmo = distmod2redshift(mu_true, Om0)
+
+    # Little h of this void profile
+    h = select_void_h(profile)
+
+    # Extract the velocities for the galaxies from the grid for this LG
+    # index.
+    vrad_data_rLG = vrad_data[rLG_index]
+
+    r_grid = np.arange(0, 251) * h
+    phi_grid = np.arange(0, 181)
+    Vr = RegularGridInterpolator((r_grid, phi_grid), vrad_data_rLG,
+                                 fill_value=np.nan, bounds_error=False,
+                                 method="cubic")(np.vstack([r, phi]).T)
+
+    # The true redshift of the source.
+    zCMB_true = (1 + zcosmo) * (1 + Vr / SPEED_OF_LIGHT) - 1
+    zCMB_obs = gen.normal(zCMB_true, sigma_v / SPEED_OF_LIGHT)
+
+    sample = {"RA": RA,
+              "DEC": DEC,
+              "z_CMB": zCMB_obs,
+              "eta": eta_obs,
+              "mag": mag_obs,
+              "e_eta": np.ones(nsamples) * mean_e_eta,
+              "e_mag": np.ones(nsamples) * mean_e_mag,
+              "r": r,
+              "distmod_true": mu_true,
+              "distmod_TFR": mu_TFR}
+
+    # Apply a true distance cut to the mocks.
+    mask = r < np.max(r_grid)
+    for key in sample:
+        sample[key] = sample[key][mask]
+
+    if verbose and np.any(~mask):
+        print(f"Removed {(~mask).sum()} out of {mask.size} samples "
+              "due to the true distance cutoff.")
+
+    return sample, truths
--- a/notebooks/flow/reconstruction_comparison.ipynb
+++ b/notebooks/flow/reconstruction_comparison.ipynb
--- a/scripts/flow/flow_validation.py
+++ b/scripts/flow/flow_validation.py
@ -88,7 +88,7 @@ def print_variables(names, variables):


 def get_models(ksim, get_model_kwargs, mag_selection, void_kwargs,
-               verbose=True):
+               wo_num_dist_marginalisation, verbose=True):
    """Load the data and create the NumPyro models."""
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    folder = "/mnt/extraspace/rstiskalek/catalogs/"
@ -120,6 +120,8 @@ def get_models(ksim, get_model_kwargs, mag_selection, void_kwargs,
            fpath = join(folder, "PV/CF4/CF4_TF-distances.hdf5")
        elif cat in ["CF4_GroupAll"]:
            fpath = join(folder, "PV/CF4/CF4_GroupAll.hdf5")
+        elif "IndranilVoidTFRMock" in cat:
+            fpath = None
        else:
            raise ValueError(f"Unsupported catalogue: `{ARGS.catalogue}`.")

@ -128,20 +130,13 @@ def get_models(ksim, get_model_kwargs, mag_selection, void_kwargs,
                                              ksmooth=ARGS.ksmooth)
        models[i] = csiborgtools.flow.get_model(
            loader, mag_selection=mag_selection[i], void_kwargs=void_kwargs,
+            wo_num_dist_marginalisation=wo_num_dist_marginalisation,
            **get_model_kwargs)

    fprint(f"num. radial steps is {len(loader.rdist)}")
    return models


-def select_void_h(kind):
-    hs = {"mb": 0.7615, "gauss": 0.7724, "exp": 0.7725}
-    try:
-        return hs[kind]
-    except KeyError:
-        raise ValueError(f"Unknown void kind: `{kind}`.")
-
-
 def get_harmonic_evidence(samples, log_posterior, nchains_harmonic, epoch_num):
    """Compute evidence using the `harmonic` package."""
    data, names = csiborgtools.dict_samples_to_array(samples)
@ -243,7 +238,7 @@ def get_distmod_hyperparams(catalogue, sample_alpha, sample_mag_dipole):
                "alpha_min": alpha_min, "alpha_max": alpha_max,
                "sample_alpha": sample_alpha
                }
-    elif catalogue in ["SFI_gals", "2MTF"] or "CF4_TFR" in catalogue:
+    elif catalogue in ["SFI_gals", "2MTF"] or "CF4_TFR" in catalogue or "IndranilVoidTFRMock" in catalogue:  # noqa
        return {"e_mu_min": 0.001, "e_mu_max": 1.0,
                "a_mean": -21., "a_std": 5.0,
                "b_mean": -5.95, "b_std": 4.0,
@ -299,7 +294,7 @@ if __name__ == "__main__":
    ###########################################################################

    # `None` means default behaviour
-    nsteps = 10_000
+    nsteps = 2_000
    nburn = 2_000
    zcmb_min = None
    zcmb_max = 0.05
@ -313,8 +308,9 @@ if __name__ == "__main__":
    sample_Vmag_vax = False
    sample_Vmono = False
    sample_mag_dipole = False
+    wo_num_dist_marginalisation = False
    absolute_calibration = None
-    calculate_harmonic = False if inference_method == "bayes" else True
+    calculate_harmonic = (False if inference_method == "bayes" else True) and (not wo_num_dist_marginalisation)  # noqa
    sample_h = True if absolute_calibration is not None else False

    fname_kwargs = {"inference_method": inference_method,
@ -341,6 +337,7 @@ if __name__ == "__main__":
                   "num_epochs": num_epochs,
                   "inference_method": inference_method,
                   "sample_mag_dipole": sample_mag_dipole,
+                   "wo_dist_marg": wo_num_dist_marginalisation,
                   "absolute_calibration": absolute_calibration,
                   "sample_h": sample_h,
                   }
@ -358,7 +355,7 @@ if __name__ == "__main__":
                "`IndranilVoid` does not have multiple realisations.")

        profile = ARGS.simname.split("_")[-1]
-        h = select_void_h(profile)
+        h = csiborgtools.flow.select_void_h(profile)
        rdist = np.arange(0, 165, 0.5)
        void_kwargs = {"profile": profile, "h": h, "order": 1, "rdist": rdist}
    else:
@ -377,7 +374,7 @@ if __name__ == "__main__":
    calibration_hyperparams = {"Vext_min": -3000, "Vext_max": 3000,
                               "Vmono_min": -1000, "Vmono_max": 1000,
                               "beta_min": -10.0, "beta_max": 10.0,
-                               "sigma_v_min": 1.0, "sigma_v_max": 5000 if "IndranilVoid_" in ARGS.simname else 750.,  # noqa
+                               "sigma_v_min": 1.0, "sigma_v_max": 1000 if "IndranilVoid_" in ARGS.simname else 750.,  # noqa
                               "h_min": 0.01, "h_max": 5.0,
                               "no_Vext": False if no_Vext is None else no_Vext,        # noqa
                               "sample_Vmag_vax": sample_Vmag_vax,
@ -420,7 +417,8 @@ if __name__ == "__main__":
            print(f"{'Current simulation:':<20} {i + 1} ({ksim}) out of {len(ksim_iterator)}.")  # noqa

        fname_kwargs["nsim"] = ksim
-        models = get_models(ksim, get_model_kwargs, mag_selection, void_kwargs)
+        models = get_models(ksim, get_model_kwargs, mag_selection, void_kwargs,
+                            wo_num_dist_marginalisation)
        model_kwargs = {
            "models": models,
            "field_calibration_hyperparams": calibration_hyperparams,
--- a/setup.py
+++ b/setup.py
@ -11,6 +11,7 @@ INSTALL_REQ += [
    "mpi4py",
    "numba",
    "numpyro",
+    "interpax"
    "quadax",
    "scikit-learn",
    "tqdm",