Mocks with absolute calibration (#156)

* Rename nb * Add Carrick 2MTF mocks * Add more 2MTF mock support * Add mocks generator * Add mock gen nb * Control over MAlmquist * Control over Malmquist * Update imports * Update script * Add H0 to TFR mocks * Clear up saving * Add h sampling * Update saving * Update mocks * Add calibration to mocks * More support to read in absmag * Add absmag * Update script
2025-07-05 14:01:12 +00:00 · 2024-10-09 20:41:36 +02:00 · 2024-10-09 20:41:36 +02:00 · 4167ba4fb1
commit 4167ba4fb1
parent 76a7609f7f
8 changed files with 902 additions and 118 deletions
--- a/csiborgtools/flow/init.py
+++ b/csiborgtools/flow/init.py
@ -17,6 +17,7 @@ from .io import (DataLoader, get_model, read_absolute_calibration,
 from .flow_model import (PV_LogLikelihood, PV_validation_model, dist2redshift,  # noqa
                         Observed2CosmologicalRedshift, predict_zobs,           # noqa
                         project_Vext, stack_pzosmo_over_realizations)          # noqa
+from .mocks import mock_Carrick2MTF                                             # noqa
 from .selection import ToyMagnitudeSelection                                    # noqa
 from .void_model import (load_void_data, interpolate_void, select_void_h,       # noqa
                         mock_void)                                             # noqa
--- a/csiborgtools/flow/flow_model.py
+++ b/csiborgtools/flow/flow_model.py
@ -170,28 +170,6 @@ class BaseFlowValidationModel(ABC):

        self._setattr_as_jax(names, values)

-    def _set_abs_calibration_params(self, abs_calibration_params):
-        self.with_absolute_calibration = abs_calibration_params is not None
-
-        if abs_calibration_params is None:
-            self.with_absolute_calibration = False
-            return
-
-        self.calibration_distmod = jnp.asarray(
-            abs_calibration_params["calibration_distmod"][..., 0])
-        self.calibration_edistmod = jnp.asarray(
-            abs_calibration_params["calibration_distmod"][..., 1])
-        self.data_with_calibration = jnp.asarray(
-            abs_calibration_params["data_with_calibration"])
-        self.data_wo_calibration = ~self.data_with_calibration
-
-        # Calculate the log of the number of calibrators. Where there is no
-        # calibrator set the number of calibrators to 1 to avoid log(0) and
-        # this way only zeros are being added.
-        length_calibration = abs_calibration_params["length_calibration"]
-        length_calibration[length_calibration == 0] = 1
-        self.log_length_calibration = jnp.log(length_calibration)
-
    def _set_radial_spacing(self, r_xrange, Omega_m):
        cosmo = FlatLambdaCDM(H0=H0, Om0=Omega_m)

@ -356,7 +334,8 @@ def e2_distmod_TFR(e2_mag, e2_eta, eta, b, c, e_mu_intrinsic):

 def sample_TFR(e_mu_min, e_mu_max, a_mean, a_std, b_mean, b_std,
               c_mean, c_std, alpha_min, alpha_max, sample_alpha,
-               a_dipole_mean, a_dipole_std, sample_a_dipole, name):
+               a_dipole_mean, a_dipole_std, sample_a_dipole,
+               sample_curvature, name):
    """Sample Tully-Fisher calibration parameters."""
    e_mu = sample(f"e_mu_{name}", Uniform(e_mu_min, e_mu_max))
    a = sample(f"a_{name}", Normal(a_mean, a_std))
@ -367,7 +346,11 @@ def sample_TFR(e_mu_min, e_mu_max, a_mean, a_std, b_mean, b_std,
        ax, ay, az = 0.0, 0.0, 0.0

    b = sample(f"b_{name}", Normal(b_mean, b_std))
-    c = sample(f"c_{name}", Normal(c_mean, c_std))
+
+    if sample_curvature:
+        c = sample(f"c_{name}", Normal(c_mean, c_std))
+    else:
+        c = 0.

    alpha = sample_alpha_bias(name, alpha_min, alpha_max, sample_alpha)

@ -495,8 +478,6 @@ class PV_LogLikelihood(BaseFlowValidationModel):
        Errors on the observed redshifts.
    calibration_params : dict
        Calibration parameters of each object.
-    abs_calibration_params : dict
-        Absolute calibration parameters.
    mag_selection : dict
        Magnitude selection parameters, optional.
    r_xrange : 1-dimensional array
@ -513,12 +494,17 @@ class PV_LogLikelihood(BaseFlowValidationModel):
        Whether to directly sample the distance without numerical
        marginalisation. in which case the tracers can be coupled by a
        covariance matrix. By default `False`.
+    with_homogeneous_malmquist : bool, optional
+        Whether to include the homogeneous Malmquist bias. By default `True`.
+    with_inhomogeneous_malmquist : bool, optional
+        Whether to include the inhomogeneous Malmquist bias. By default `True`.
    """

    def __init__(self, los_density, los_velocity, RA, dec, z_obs, e_zobs,
-                 calibration_params, abs_calibration_params, mag_selection,
-                 r_xrange, Omega_m, kind, name, void_kwargs=None,
-                 wo_num_dist_marginalisation=False):
+                 calibration_params, mag_selection, r_xrange, Omega_m, kind,
+                 name, void_kwargs=None, wo_num_dist_marginalisation=False,
+                 with_homogeneous_malmquist=True,
+                 with_inhomogeneous_malmquist=True):
        if e_zobs is not None:
            e2_cz_obs = jnp.asarray((SPEED_OF_LIGHT * e_zobs)**2)
        else:
@ -550,13 +536,14 @@ class PV_LogLikelihood(BaseFlowValidationModel):

        self._setattr_as_jax(names, values)
        self._set_calibration_params(calibration_params)
-        self._set_abs_calibration_params(abs_calibration_params)
        self._set_radial_spacing(r_xrange, Omega_m)

        self.kind = kind
        self.name = name
        self.Omega_m = Omega_m
        self.wo_num_dist_marginalisation = wo_num_dist_marginalisation
+        self.with_homogeneous_malmquist = with_homogeneous_malmquist
+        self.with_inhomogeneous_malmquist = with_inhomogeneous_malmquist
        self.norm = - self.ndata * jnp.log(self.num_sims)

        if mag_selection is not None:
@ -771,9 +758,14 @@ class PV_LogLikelihood(BaseFlowValidationModel):
                    "marginalising the true distance.")

            # Calculate p(r) (Malmquist bias). Shape is (ndata, nxrange)
-            log_ptilde = log_ptilde_wo_bias(
-                self.mu_xrange[None, :], mu[:, None], e2_mu[:, None],
-                self.log_r2_xrange[None, :])
+            if self.with_homogeneous_malmquist:
+                log_ptilde = log_ptilde_wo_bias(
+                    self.mu_xrange[None, :], mu[:, None], e2_mu[:, None],
+                    self.log_r2_xrange[None, :])
+            else:
+                log_ptilde = log_ptilde_wo_bias(
+                    self.mu_xrange[None, :], mu[:, None], e2_mu[:, None],
+                    0.)

            if self.is_void_data:
                rLG = field_calibration_params["rLG"]
@ -785,7 +777,9 @@ class PV_LogLikelihood(BaseFlowValidationModel):

            # Inhomogeneous Malmquist bias. Shape: (nsims, ndata, nxrange)
            alpha = distmod_params["alpha"]
-            log_ptilde = log_ptilde[None, ...] + alpha * log_los_density
+            log_ptilde = log_ptilde[None, ...]
+            if self.with_inhomogeneous_malmquist:
+                log_ptilde += alpha * log_los_density

            ptilde = jnp.exp(log_ptilde)
            # Normalization of p(r). Shape: (nsims, ndata)
@ -802,29 +796,51 @@ class PV_LogLikelihood(BaseFlowValidationModel):
            ptilde *= likelihood_zobs(
                self.z_obs[None, :, None], zobs, e2_cz[None, :, None])

-            if self.with_absolute_calibration:
-                raise NotImplementedError(
-                    "Absolute calibration not implemented for this model. "
-                    "Use `PV_LogLikelihood_NoDistMarg` instead.")
-
            # Integrate over the radial distance. Shape: (nsims, ndata)
            ll = jnp.log(simpson(ptilde, x=self.r_xrange, axis=-1))
            ll -= jnp.log(pnorm)

            return ll0 + jnp.sum(logsumexp(ll, axis=0)) + self.norm
        else:
-            if field_calibration_params["sample_h"]:
-                raise NotImplementedError(
-                    "Sampling of h is not yet implemented.")
-
            e_mu = jnp.sqrt(e2_mu)
-            # True distance modulus, shape is `(n_data)``
+            # True distance modulus, shape is `(n_data)`. If we have absolute
+            # calibration, then this distance modulus assumes a particular h.
            with plate("plate_mu", self.ndata):
                mu_true = sample("mu", Normal(mu, e_mu))

-            # True distance and redshift, shape is `(n_data)`.
-            r_true = distmod2dist(mu_true, self.Omega_m)
-            z_true = distmod2redshift(mu_true, self.Omega_m)
+            # Likelihood of the true distance modulii given the calibration.
+            if field_calibration_params["sample_h"]:
+                raise RuntimeError(
+                    "Sampling of 'h' has not yet been thoroughly tested.")
+                h = field_calibration_params["h"]
+
+                # Now, the rest of the code except the calibration likelihood
+                # uses the distance modulus in units of h
+                mu_true_h = mu_true + 5 * jnp.log10(h)
+
+                # Calculate the log-likelihood of the calibration, but the
+                # shape is `(n_calibrators, n_data)`. Where there is no data
+                # we set the likelihood to 0 (or the log-likelihood to -inf)
+                ll_calibration = jnp.where(
+                   self.is_finite_calibrator,
+                   normal_logpdf(self.mu_calibration, mu_true[None, :],
+                                 self.e_mu_calibration),
+                   -jnp.inf)
+
+                # Now average out over the calibrators, however only if the
+                # there is at least one calibrator. If there isn't, then we
+                # just assing a log-likelihood of 0.
+                ll_calibration = jnp.where(
+                    self.any_calibrator,
+                    logsumexp(ll_calibration, axis=0) - jnp.log(self.counts_calibrators),  # noqa
+                    0.)
+            else:
+                mu_true_h = mu_true
+
+            # True distance and redshift, shape is `(n_data)`. The distance
+            # here is in units of `Mpc / h``.
+            r_true = distmod2dist(mu_true_h, self.Omega_m)
+            z_true = distmod2redshift(mu_true_h, self.Omega_m)

            if self.is_void_data:
                raise NotImplementedError(
@ -840,26 +856,29 @@ class PV_LogLikelihood(BaseFlowValidationModel):
            alpha = distmod_params["alpha"]

            # Normalisation of p(mu), shape is `(n_sims, n_data, n_rad)`
-            pnorm = (
-                + self.log_r2_xrange[None, None, :]
-                + alpha * log_los_density_grid
-                + normal_logpdf(
-                    self.mu_xrange[None, :], mu[:, None], e_mu[:, None])[None, ...])  # noqa
+            pnorm = normal_logpdf(
+                self.mu_xrange[None, :], mu[:, None], e_mu[:, None])[None, ...]
+            if self.with_homogeneous_malmquist:
+                pnorm += self.log_r2_xrange[None, None, :]
+            if self.with_inhomogeneous_malmquist:
+                pnorm += alpha * log_los_density_grid
+
            pnorm = jnp.exp(pnorm)
            # Now integrate over the radial steps. Shape is `(nsims, ndata)`.
            # No Jacobian here because I integrate over distance, not the
            # distance modulus.
            pnorm = simpson(pnorm, x=self.r_xrange, axis=-1)

-            # Jacobian |dr / dmu|_(mu_true), shape is `(n_data)`.
-            jac = jnp.abs(distmod2dist_gradient(mu_true, self.Omega_m))
+            # Jacobian |dr / dmu|_(mu_true_h), shape is `(n_data)`.
+            jac = jnp.abs(distmod2dist_gradient(mu_true_h, self.Omega_m))

            # Calculate unnormalized log p(mu). Shape is (nsims, ndata)
-            ll = (
-                + jnp.log(jac)[None, :]
-                + (2 * jnp.log(r_true) - self.log_r2_xrange_mean)[None, :]
-                + alpha * log_density
-                )
+            ll = 0.0
+            if self.with_homogeneous_malmquist:
+                ll += (+ jnp.log(jac)
+                       + (2 * jnp.log(r_true) - self.log_r2_xrange_mean))
+            if self.with_inhomogeneous_malmquist:
+                ll += alpha * log_density

            # Subtract the normalization. Shape remains (nsims, ndata)
            ll -= jnp.log(pnorm)
@ -871,13 +890,13 @@ class PV_LogLikelihood(BaseFlowValidationModel):
            zobs *= 1 + vrad / SPEED_OF_LIGHT
            zobs -= 1.

+            # Add the log-likelihood of observed redshifts. Shape remains
+            # `(nsims, ndata)`
            ll += log_likelihood_zobs(
                self.z_obs[None, :], zobs, e2_cz[None, :])

-            if self.with_absolute_calibration:
-                raise NotImplementedError(
-                    "Absolute calibration not implemented for this model. "
-                    "Use `PV_LogLikelihood_NoDistMarg` instead.")
+            if field_calibration_params["sample_h"]:
+                ll += ll_calibration[None, :]

            return ll0 + jnp.sum(logsumexp(ll, axis=0)) + self.norm

--- a/csiborgtools/flow/io.py
+++ b/csiborgtools/flow/io.py
@ -58,7 +58,8 @@ class DataLoader:
    def __init__(self, simname, ksim, catalogue, catalogue_fpath, paths,
                 ksmooth=None, store_full_velocity=False, verbose=True):
        fprint("reading the catalogue,", verbose)
-        self._cat = self._read_catalogue(catalogue, catalogue_fpath)
+        self._cat, self._absmag_calibration = self._read_catalogue(
+            catalogue, catalogue_fpath)
        self._catname = catalogue

        fprint("reading the interpolated field.", verbose)
@ -132,6 +133,15 @@ class DataLoader:
        """The distance indicators catalogue (structured array)."""
        return self._cat[self._mask]

+    @property
+    def absmag_calibration(self):
+        """Returns the absolute magnitude calibration with masking applied."""
+        if self._absmag_calibration is None:
+            return None
+
+        return {key: val[:, self._mask]
+                for key, val in self._absmag_calibration.items()}
+
    @property
    def catname(self):
        """Catalogue name."""
@ -187,6 +197,8 @@ class DataLoader:
            fpath = paths.field_los(simname, "Pantheon+")
        elif "CF4_TFR" in catalogue:
            fpath = paths.field_los(simname, "CF4_TFR")
+        elif "Carrick2MTFmock" in catalogue:
+            fpath = paths.field_los(simname, "2MTF")
        else:
            fpath = paths.field_los(simname, catalogue)

@ -217,6 +229,8 @@ class DataLoader:
        return rdist, los_density, los_velocity

    def _read_catalogue(self, catalogue, catalogue_fpath):
+        absmag_calibration = None
+
        if catalogue == "A2":
            with File(catalogue_fpath, 'r') as f:
                dtype = [(key, np.float32) for key in f.keys()]
@ -262,6 +276,20 @@ class DataLoader:
            arr = np.empty(len(mock_data["RA"]), dtype=dtype)
            for key in mock_data.keys():
                arr[key] = mock_data[key]
+        elif "Carrick2MTFmock" in catalogue:
+            with File(catalogue_fpath, 'r') as f:
+                keys_skip = ["mu_calibration", "e_mu_calibration"]
+                dtype = [(key, np.float32) for key in f.keys()
+                         if key not in keys_skip]
+                arr = np.empty(len(f["RA"]), dtype=dtype)
+                for key in f.keys():
+                    if key not in keys_skip:
+                        arr[key] = f[key][:]
+
+                absmag_calibration = {
+                    "mu_calibration": f["mu_calibration"][...],
+                    "e_mu_calibration": f["e_mu_calibration"][...]}
+
        elif "UPGLADE" in catalogue:
            with File(catalogue_fpath, 'r') as f:
                dtype = [(key, np.float32) for key in f.keys()]
@ -286,7 +314,7 @@ class DataLoader:
        else:
            raise ValueError(f"Unknown catalogue: `{catalogue}`.")

-        return arr
+        return arr, absmag_calibration


 ###############################################################################
@ -322,7 +350,7 @@ def radial_velocity_los(los_velocity, ra, dec):
 def read_absolute_calibration(kind, data_length, calibration_fpath):
    """
    Read the absolute calibration for the CF4 TFR sample from LEDA but
-    preprocessed by me.
+    preprocessed by me. Missing values are replaced with NaN.

    Parameters
    ----------
@ -335,13 +363,13 @@ def read_absolute_calibration(kind, data_length, calibration_fpath):

    Returns
    -------
-    data : 3-dimensional array of shape (data_length, max_calib, 2)
+    mu : 2-dimensional array of shape `(ncalib, ngalaxies)`
        Absolute calibration data.
-    with_calibration : 1-dimensional array of shape (data_length)
-        Whether the sample has a calibration.
-    length_calibration : 1-dimensional array of shape (data_length)
-        Number of calibration points per sample.
+    e_mu : 2-dimensional array of shape `(ncalib, ngalaxies)`
+        Uncertainties of the absolute calibration.
    """
+    raise RuntimeError("The read-in functions are not guaranteed to work "
+                       "properly.")
    data = {}
    with File(calibration_fpath, 'r') as f:
        for key in f[kind].keys():
@ -355,14 +383,14 @@ def read_absolute_calibration(kind, data_length, calibration_fpath):
    max_calib = max(len(val) for val in data.values())

    out = np.full((data_length, max_calib, 2), np.nan)
-    with_calibration = np.full(data_length, False)
-    length_calibration = np.full(data_length, 0)
    for i in data.keys():
        out[int(i), :len(data[i]), :] = data[i]
-        with_calibration[int(i)] = True
-        length_calibration[int(i)] = len(data[i])

-    return out, with_calibration, length_calibration
+    # Unpack from this the distsance modulus and its uncertainty.
+    mu = out[:, :, 0].T
+    e_mu = out[:, :, 1].T
+
+    return mu, e_mu


 def mask_fields(density, velocity, mask, return_none):
@ -422,8 +450,9 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,

        loader._field_rdist = rdist

-    if absolute_calibration is not None and "CF4_TFR_" not in kind:
-        raise ValueError("Absolute calibration supported only for the CF4 TFR sample.")  # noqa
+    if absolute_calibration is not None and not ("CF4_TFR_" in kind or "Carrick2MTFmock" in kind):  # noqa
+        raise ValueError("Absolute calibration supported only for either "
+                         "the CF4 TFR sample or Carrick 2MTF mocks.")

    if kind in ["LOSS", "Foundation"]:
        keys = ["RA", "DEC", "z_CMB", "mB", "x1", "c", "e_mB", "e_x1", "e_c"]
@ -442,7 +471,7 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
        model = PV_LogLikelihood(
            los_overdensity, los_velocity,
            RA[mask], dec[mask], zCMB[mask], e_zCMB, calibration_params,
-            None, mag_selection, loader.rdist, loader._Omega_m, "SN",
+            mag_selection, loader.rdist, loader._Omega_m, "SN",
            name=kind, void_kwargs=void_kwargs,
            wo_num_dist_marginalisation=wo_num_dist_marginalisation)
    elif "Pantheon+" in kind:
@ -476,26 +505,62 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
        model = PV_LogLikelihood(
            los_overdensity, los_velocity,
            RA[mask], dec[mask], zCMB[mask], e_zCMB[mask], calibration_params,
-            None, mag_selection, loader.rdist, loader._Omega_m, "SN",
+            mag_selection, loader.rdist, loader._Omega_m, "SN",
            name=kind, void_kwargs=void_kwargs,
            wo_num_dist_marginalisation=wo_num_dist_marginalisation)
-    elif kind in ["SFI_gals", "2MTF", "SFI_gals_masked"] or "IndranilVoidTFRMock" in kind:  # noqa
+    elif kind in ["SFI_gals", "2MTF", "SFI_gals_masked"] or "IndranilVoidTFRMock" in kind or "Carrick2MTFmock" in kind:  # noqa
        keys = ["RA", "DEC", "z_CMB", "mag", "eta", "e_mag", "e_eta"]
        RA, dec, zCMB, mag, eta, e_mag, e_eta = (loader.cat[k] for k in keys)

        mask = (zCMB < zcmb_max) & (zCMB > zcmb_min)
+        if "Carrick2MTFmock" in kind:
+            # For the mock we only want to select objects with the '2M++'
+            # volume.
+            mask &= loader.cat["r"] < 130
+            # The mocks are generated without Malmquist.
+            fprint("disabling homogeneous and inhomogeneous Malmquist bias for the mock.")  # noqa
+            with_homogeneous_malmquist = False
+            with_inhomogeneous_malmquist = False
+        else:
+            with_homogeneous_malmquist = True
+            with_inhomogeneous_malmquist = True
+
        calibration_params = {"mag": mag[mask], "eta": eta[mask],
                              "e_mag": e_mag[mask], "e_eta": e_eta[mask]}

+        # Append the calibration data
+        if "Carrick2MTFmock" in kind:
+            absmag_calibration = loader.absmag_calibration
+
+            # The shape of these is (`ncalibrators, nobjects`).
+            mu_calibration = absmag_calibration["mu_calibration"][:, mask]
+            e_mu_calibration = absmag_calibration["e_mu_calibration"][:, mask]
+            # Auxiliary parameters.
+            m = np.isfinite(mu_calibration)
+
+            # NumPyro refuses to start if any inputs are not finite, so we
+            # replace with some ficutive mean and very large standard
+            # deviation.
+            mu_calibration[~m] = 0.0
+            e_mu_calibration[~m] = 1000.0
+
+            calibration_params["mu_calibration"] = mu_calibration
+            calibration_params["e_mu_calibration"] = e_mu_calibration
+            calibration_params["is_finite_calibrator"] = m
+            calibration_params["counts_calibrators"] = np.sum(m, axis=0)
+            calibration_params["any_calibrator"] = np.any(m, axis=0)
+
        los_overdensity, los_velocity = mask_fields(
            los_overdensity, los_velocity, mask, void_kwargs is not None)

        model = PV_LogLikelihood(
            los_overdensity, los_velocity,
-            RA[mask], dec[mask], zCMB[mask], None, calibration_params, None,
+            RA[mask], dec[mask], zCMB[mask], None, calibration_params,
            mag_selection, loader.rdist, loader._Omega_m, "TFR", name=kind,
            void_kwargs=void_kwargs,
-            wo_num_dist_marginalisation=wo_num_dist_marginalisation)
+            wo_num_dist_marginalisation=wo_num_dist_marginalisation,
+            with_homogeneous_malmquist=with_homogeneous_malmquist,
+            with_inhomogeneous_malmquist=with_inhomogeneous_malmquist)
    elif "CF4_TFR_" in kind:
        # The full name can be e.g. "CF4_TFR_not2MTForSFI_i" or "CF4_TFR_i".
        band = kind.split("_")[-1]
@ -532,38 +597,39 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
        else:
            mask &= Qs == 5

-        # Read the absolute calibration
-        if absolute_calibration is not None:
-            CF4_length = len(RA)
-            distmod, with_calibration, length_calibration = read_absolute_calibration(  # noqa
-                "Cepheids", CF4_length, calibration_fpath)
-
-            distmod = distmod[mask]
-            with_calibration = with_calibration[mask]
-            length_calibration = length_calibration[mask]
-            fprint(f"found {np.sum(with_calibration)} galaxies with absolute calibration.")  # noqa
-
-            distmod = distmod[with_calibration]
-            length_calibration = length_calibration[with_calibration]
-
-            abs_calibration_params = {
-                "calibration_distmod": distmod,
-                "data_with_calibration": with_calibration,
-                "length_calibration": length_calibration}
-        else:
-            abs_calibration_params = None
-
        calibration_params = {"mag": mag[mask], "eta": eta[mask],
                              "e_mag": e_mag[mask], "e_eta": e_eta[mask]}

+        # Read the absolute calibration
+        mu_calibration, e_mu_calibration = read_absolute_calibration(
+            absolute_calibration, len(RA), calibration_fpath)
+
+        # The shape of these is (`ncalibrators, nobjects`).
+        mu_calibration = mu_calibration[:, mask]
+        e_mu_calibration = e_mu_calibration[:, mask]
+        # Auxiliary parameters.
+        m = np.isfinite(mu_calibration)
+
+        # NumPyro refuses to start if any inputs are not finite, so we
+        # replace with some ficutive mean and very large standard
+        # deviation.
+        mu_calibration[~m] = 0.0
+        e_mu_calibration[~m] = 1000.0
+
+        calibration_params["mu_calibration"] = mu_calibration
+        calibration_params["e_mu_calibration"] = e_mu_calibration
+        calibration_params["is_finite_calibrator"] = m
+        calibration_params["counts_calibrators"] = np.sum(m, axis=0)
+        calibration_params["any_calibrator"] = np.any(m, axis=0)
+
        los_overdensity, los_velocity = mask_fields(
            los_overdensity, los_velocity, mask, void_kwargs is not None)

        model = PV_LogLikelihood(
            los_overdensity, los_velocity,
            RA[mask], dec[mask], z_obs[mask], None, calibration_params,
-            abs_calibration_params, mag_selection, loader.rdist,
-            loader._Omega_m, "TFR", name=kind, void_kwargs=void_kwargs,
+            mag_selection, loader.rdist, loader._Omega_m, "TFR", name=kind,
+            void_kwargs=void_kwargs,
            wo_num_dist_marginalisation=wo_num_dist_marginalisation)
    elif kind in ["CF4_GroupAll"]:
        # Note, this for some reason works terribly.
@ -583,7 +649,7 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,

        model = PV_LogLikelihood(
            los_overdensity, los_velocity,
-            RA[mask], dec[mask], zCMB[mask], None, calibration_params, None,
+            RA[mask], dec[mask], zCMB[mask], None, calibration_params,
            mag_selection,  loader.rdist, loader._Omega_m, "simple",
            name=kind, void_kwargs=void_kwargs,
            wo_num_dist_marginalisation=wo_num_dist_marginalisation)
--- a/csiborgtools/flow/mocks.py
+++ b/csiborgtools/flow/mocks.py
@ -0,0 +1,150 @@
+# Copyright (C) 2024 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""Mock data generators."""
+import numpy as np
+
+from ..field.interp import evaluate_cartesian_regular
+from ..params import SPEED_OF_LIGHT
+from ..utils import cartesian_to_radec, radec_to_cartesian, radec_to_galactic
+from .cosmography import distmod2dist, distmod2redshift
+
+###############################################################################
+#                        Mock Quijote observations                            #
+###############################################################################
+
+
+def mock_Carrick2MTF(velocity_field, boxsize, RA_2MTF, DEC_2MTF,
+                     a_TF=-22.8, b_TF=-7.2, sigma_TF=0.25, sigma_v=100.,
+                     Vext=[150.0, 10.0, -100.0], h=1.0, beta=0.4,
+                     mean_eta=0.069, std_eta=0.078, mean_e_eta=0.012,
+                     mean_mag=10.31, std_mag=0.83, mean_e_mag=0.044,
+                     sigma_calibration=0.05, calibration_max_percentile=10,
+                     calibration_rand_fraction=0.5, nrepeat_calibration=1,
+                     seed=42, Om0=0.3, verbose=True, **kwargs):
+    """
+    Mock TFR catalogue build against the Carrick velocity field and the
+    2MTF sky distribution to avoid recomputing the LOS velocities.
+    """
+    nsamples = len(RA_2MTF)
+
+    # Convert Vext from ICRS to Galactic coordinates.
+    Vext = np.asarray(Vext).reshape(1, 3)
+    Vext_mag, Vext_RA, Vext_DEC = cartesian_to_radec(Vext).reshape(-1, )
+    Vext_l, Vext_b = radec_to_galactic(Vext_RA, Vext_DEC)
+    Vext_galactic = np.asanyarray([Vext_mag, Vext_l, Vext_b]).reshape(1, 3)
+    Vext = radec_to_cartesian(Vext_galactic).reshape(-1, )
+
+    truths = {"a": a_TF, "b": b_TF, "e_mu": sigma_TF, "sigma_v": sigma_v,
+              "Vext": Vext,
+              "mean_eta": mean_eta, "std_eta": std_eta,
+              "mean_mag": mean_mag, "std_mag": std_mag,
+              }
+
+    gen = np.random.default_rng(seed)
+
+    # The Carrick box is in the Galactic coordinates.
+    l, b = radec_to_galactic(RA_2MTF, DEC_2MTF)
+    gal_phi = np.deg2rad(l)
+    gal_theta = np.pi / 2 - np.deg2rad(b)
+
+    # Sample the linewidth of each galaxy from a Gaussian distribution to mimic
+    # the MNR procedure.
+    eta_true = gen.normal(mean_eta, std_eta, nsamples)
+    eta_obs = gen.normal(eta_true, mean_e_eta)
+
+    # Subtract the mean of the observed linewidths, so that they are
+    # centered around zero. For consistency subtract from both observed
+    # and true values.
+    eta_mean_sampled = np.mean(eta_obs)
+    eta_true -= eta_mean_sampled
+    eta_obs -= eta_mean_sampled
+
+    # Sample the magnitude from some Gaussian distribution to replicate MNR.
+    mag_true = gen.normal(mean_mag, std_mag, nsamples)
+    mag_obs = gen.normal(mag_true, mean_e_mag)
+
+    # Calculate the 'true' distance modulus and redshift from the TFR distance.
+
+    # If h != 1, then these distance modulii are in physical units.
+    mu_TFR = mag_true - (a_TF + b_TF * eta_true)
+    mu_true = gen.normal(mu_TFR, sigma_TF)
+    # This is the distance modulus in units of little h.
+    mu_true_h = mu_true + 5 * np.log10(h)
+
+    # Select a fraction of nearby galaxies.
+    mu_max = np.percentile(mu_true, calibration_max_percentile)
+    ks = np.where(mu_true < mu_max)[0]
+    nsel = int(calibration_rand_fraction * len(ks))
+    if verbose:
+        print(f"Assigning calibration to {nsel}/{nsamples} galaxies.")
+
+    mu_calibration = np.full((nrepeat_calibration, nsamples), np.nan)
+    e_mu_calibration = np.full((nrepeat_calibration, nsamples), np.nan)
+
+    for n in range(nrepeat_calibration):
+        ks_n = gen.choice(ks, nsel, replace=False)
+
+        mu_calibration[n, ks_n] = gen.normal(mu_true[ks_n], sigma_calibration)
+        e_mu_calibration[n, ks_n] = np.ones(len(ks_n)) * sigma_calibration
+
+    # Convert the true distance modulus to true distance and cosmological
+    # redshift. The distance is in Mpc/h because the box is in Mpc / h.
+    r = distmod2dist(mu_true_h, Om0)
+    zcosmo = distmod2redshift(mu_true_h, Om0)
+
+    # Calculate the Cartesian coordinates of each galaxy. This is initially
+    # centered at (0, 0, 0).
+    pos = r * np.asarray([
+        np.sin(gal_theta) * np.cos(gal_phi),
+        np.sin(gal_theta) * np.sin(gal_phi),
+        np.cos(gal_theta)])
+    pos = pos.T
+    pos_box = pos / boxsize + 0.5
+
+    vel = evaluate_cartesian_regular(
+        velocity_field[0], velocity_field[1], velocity_field[2],
+        pos=pos_box, smooth_scales=None, method="cubic")
+    vel = beta * np.vstack(vel).T
+
+    for i in range(3):
+        vel[:, i] += Vext[i]
+
+    # Compute the radial velocity.
+    Vr = np.sum(vel * pos, axis=1) / np.linalg.norm(pos, axis=1)
+
+    # The true redshift of the source.
+    zCMB_true = (1 + zcosmo) * (1 + Vr / SPEED_OF_LIGHT) - 1
+    zCMB_obs = gen.normal(zCMB_true, sigma_v / SPEED_OF_LIGHT)
+
+    # These galaxies will be masked out when LOS is read it because they are
+    # too far away.
+    distance_mask = r < 125
+    truths["distance_mask"] = distance_mask
+
+    sample = {"RA": RA_2MTF,
+              "DEC": DEC_2MTF,
+              "z_CMB": zCMB_obs,
+              "eta": eta_obs,
+              "mag": mag_obs,
+              "e_eta": np.ones(nsamples) * mean_e_eta,
+              "e_mag": np.ones(nsamples) * mean_e_mag,
+              "mu_true": mu_true,
+              "mu_TFR": mu_TFR,
+              "mu_calibration": mu_calibration,
+              "e_mu_calibration": e_mu_calibration,
+              "r": r,
+              }
+
+    return sample, truths
--- a/notebooks/flow/mock_carrick.ipynb
+++ b/notebooks/flow/mock_carrick.ipynb
--- a/notebooks/flow/mock_csiborg.ipynb
+++ b/notebooks/flow/mock_csiborg.ipynb
--- a/scripts/flow/flow_validation.py
+++ b/scripts/flow/flow_validation.py
@ -116,6 +116,9 @@ def get_models(ksim, get_model_kwargs, mag_selection, void_kwargs,
                     "Pantheon+_groups", "Pantheon+_groups_zSN",
                     "Pantheon+_zSN"]:
            fpath = join(folder, "PV_compilation.hdf5")
+        elif "Carrick2MTFmock" in cat:
+            ki = cat.split("_")[-1]
+            fpath =f"/mnt/extraspace/rstiskalek/csiborg_postprocessing/flow_mock/Carrick2MTFmock_seed{ki}.hdf5"  # noqa
        elif "CF4_TFR" in cat:
            fpath = join(folder, "PV/CF4/CF4_TF-distances.hdf5")
        elif cat in ["CF4_GroupAll"]:
@ -186,7 +189,7 @@ def run_model(model, nsteps, nburn,  model_kwargs, out_folder,
        neg_ln_evidence_err = (jax.numpy.nan, jax.numpy.nan)

    fname = join(out_folder, fname)
-    print(f"Saving results to `{fname}`.")
+    print(f"Saving results: `{fname}`.")
    with File(fname, "w") as f:
        # Write samples
        grp = f.create_group("samples")
@ -203,15 +206,22 @@ def run_model(model, nsteps, nburn,  model_kwargs, out_folder,
        grp.create_dataset("neg_lnZ_harmonic", data=neg_ln_evidence)
        grp.create_dataset("neg_lnZ_harmonic_err", data=neg_ln_evidence_err)

-    fname_summary = fname.replace(".hdf5", ".txt")
-    print(f"Saving summary to `{fname_summary}`.")
-    with open(fname_summary, 'w') as f:
+    fname_config = fname.replace(".hdf5", "_config.txt")
+    print(f"Saving configuration: `{fname_config}`.")
+    with open(fname_config, 'w') as f:
        original_stdout = sys.stdout
        sys.stdout = f

        print("User parameters:")
        for kwargs in kwargs_print:
            print_variables(kwargs.keys(), kwargs.values())
+        sys.stdout = original_stdout
+
+    fname_summary = fname.replace(".hdf5", "_summary.txt")
+    print(f"Saving summary: `{fname_summary}`.")
+    with open(fname_summary, 'w') as f:
+        original_stdout = sys.stdout
+        sys.stdout = f

        print("HMC summary:")
        print(f"{'BIC':<20} {BIC}")
@ -238,7 +248,7 @@ def get_distmod_hyperparams(catalogue, sample_alpha, sample_mag_dipole):
                "alpha_min": alpha_min, "alpha_max": alpha_max,
                "sample_alpha": sample_alpha
                }
-    elif catalogue in ["SFI_gals", "2MTF"] or "CF4_TFR" in catalogue or "IndranilVoidTFRMock" in catalogue:  # noqa
+    elif catalogue in ["SFI_gals", "2MTF"] or "CF4_TFR" in catalogue or "IndranilVoidTFRMock" in catalogue or "Carrick2MTFmock" in catalogue:  # noqa
        return {"e_mu_min": 0.001, "e_mu_max": 1.0,
                "a_mean": -21., "a_std": 5.0,
                "b_mean": -5.95, "b_std": 4.0,
@ -247,6 +257,7 @@ def get_distmod_hyperparams(catalogue, sample_alpha, sample_mag_dipole):
                "sample_a_dipole": sample_mag_dipole,
                "alpha_min": alpha_min, "alpha_max": alpha_max,
                "sample_alpha": sample_alpha,
+                "sample_curvature": False if "Carrick2MTFmock" in catalogue else True,  # noqa
                }
    elif catalogue in ["CF4_GroupAll"]:
        return {"e_mu_min": 0.001, "e_mu_max": 1.0,
@ -294,8 +305,8 @@ if __name__ == "__main__":
    ###########################################################################

    # `None` means default behaviour
-    nsteps = 2_000
-    nburn = 2_000
+    nsteps = 1_500
+    nburn = 1_500
    zcmb_min = None
    zcmb_max = 0.05
    nchains_harmonic = 10
@ -310,9 +321,16 @@ if __name__ == "__main__":
    sample_mag_dipole = False
    wo_num_dist_marginalisation = False
    absolute_calibration = None
-    calculate_harmonic = (False if inference_method == "bayes" else True) and (not wo_num_dist_marginalisation)  # noqa
+    calculate_harmonic = (False if (inference_method == "bayes") else True) and (not wo_num_dist_marginalisation)  # noqa
    sample_h = True if absolute_calibration is not None else False

+    # These mocks are generated without a density field, so there is no
+    # inhomogeneous Malmquist and we also do not need evidences.
+    for catalogue in ARGS.catalogue:
+        if "Carrick2MTFmock" in catalogue:
+            sample_alpha = False
+            calculate_harmonic = False
+
    fname_kwargs = {"inference_method": inference_method,
                    "smooth": ARGS.ksmooth,
                    "nsim": ARGS.ksim,
@ -375,7 +393,7 @@ if __name__ == "__main__":
                               "Vmono_min": -1000, "Vmono_max": 1000,
                               "beta_min": -10.0, "beta_max": 10.0,
                               "sigma_v_min": 1.0, "sigma_v_max": 1000 if "IndranilVoid_" in ARGS.simname else 750.,  # noqa
-                               "h_min": 0.01, "h_max": 5.0,
+                               "h_min": 0.25, "h_max": 1.,
                               "no_Vext": False if no_Vext is None else no_Vext,        # noqa
                               "sample_Vmag_vax": sample_Vmag_vax,
                               "sample_Vmono": sample_Vmono,
--- a/scripts/flow/flow_validation.sh
+++ b/scripts/flow/flow_validation.sh
@ -38,10 +38,10 @@ fi


 # for simname in "IndranilVoid_exp" "IndranilVoid_gauss" "IndranilVoid_mb"; do
-for simname in "IndranilVoid_exp"; do
+for simname in "Carrick2015"; do
 # for simname in "Carrick2015" "Lilow2024" "csiborg2_main" "csiborg2X" "manticore_2MPP_N128_DES_V1" "CF4" "CLONES"; do
    # for catalogue in "LOSS" "Foundation" "2MTF" "SFI_gals" "CF4_TFR_i" "CF4_TFR_w1"; do
-    for catalogue in "2MTF"; do
+    for catalogue in "CF4_TFR_w1"; do
    # for catalogue in "CF4_TFR_i" "CF4_TFR_w1"; do
    # for catalogue in "2MTF" "SFI_gals" "CF4_TFR_i" "CF4_TFR_w1"; do
        for ksim in "none"; do