More LOS (#137)

* Switch to CB2 * Update for extrapolation * Add 'nan' extrapolation * Update nb * Update submits * Add Rmax to the models * Update nb * Add print statement * Update script settings * Update flow model to new method * Update printing * Update path * Update so that it works * Update nb * Update submit * Add Rmin for hollow bulk flows * Update script * Update script * Update scripts back * Update scripts back * Fix normalization bug * Update script * pep8
2025-06-07 17:31:13 +00:00 · 2024-07-25 11:48:37 +01:00 · 2024-07-25 11:48:37 +01:00 · 8d49aa071b
commit 8d49aa071b
parent 73ffffb826
15 changed files with 379 additions and 198 deletions
--- a/csiborgtools/field/interp.py
+++ b/csiborgtools/field/interp.py
@ -16,16 +16,15 @@
 Tools for interpolating 3D fields at arbitrary positions.
 """
 import MAS_library as MASL
-import numpy
+import numpy as np
 import smoothing_library as SL
-from scipy.interpolate import RegularGridInterpolator
 from numba import jit
+from scipy.interpolate import RegularGridInterpolator
 from tqdm import tqdm, trange

 from ..utils import periodic_wrap_grid, radec_to_cartesian
 from .utils import divide_nonzero, force_single_precision, nside2radec

-
 ###############################################################################
 #                       Cartesian interpolation                               #
 ###############################################################################
@ -61,7 +60,7 @@ def evaluate_cartesian_cic(*fields, pos, smooth_scales=None, verbose=False):
    else:
        shape = (pos.shape[0], len(smooth_scales))

-    interp_fields = [numpy.full(shape, numpy.nan, dtype=numpy.float32)
+    interp_fields = [np.full(shape, np.nan, dtype=np.float32)
                     for __ in range(len(fields))]

    for i, field in enumerate(fields):
@ -73,7 +72,7 @@ def evaluate_cartesian_cic(*fields, pos, smooth_scales=None, verbose=False):

            for j, scale in enumerate(iterator):
                if not scale > 0:
-                    fsmooth = numpy.copy(field)
+                    fsmooth = np.copy(field)
                else:
                    fsmooth = smoothen_field(field, scale, 1., make_copy=True)
                MASL.CIC_interp(fsmooth, 1., pos, interp_fields[i][:, j])
@ -121,15 +120,15 @@ def evaluate_cartesian_regular(*fields, pos, smooth_scales=None,
    ngrid = fields[0].shape[0]
    cellsize = 1. / ngrid

-    X = numpy.linspace(0.5 * cellsize, 1 - 0.5 * cellsize, ngrid)
-    Y, Z = numpy.copy(X), numpy.copy(X)
+    X = np.linspace(0.5 * cellsize, 1 - 0.5 * cellsize, ngrid)
+    Y, Z = np.copy(X), np.copy(X)

-    interp_fields = [numpy.full(shape, numpy.nan, dtype=numpy.float32)
+    interp_fields = [np.full(shape, np.nan, dtype=np.float32)
                     for __ in range(len(fields))]
    for i, field in enumerate(fields):
        if smooth_scales is None:
            field_interp = RegularGridInterpolator(
-                (X, Y, Z), field, fill_value=None, bounds_error=False,
+                (X, Y, Z), field, fill_value=np.nan, bounds_error=False,
                method=method)
            interp_fields[i] = field_interp(pos)
        else:
@ -138,12 +137,12 @@ def evaluate_cartesian_regular(*fields, pos, smooth_scales=None,

            for j, scale in enumerate(iterator):
                if not scale > 0:
-                    fsmooth = numpy.copy(field)
+                    fsmooth = np.copy(field)
                else:
                    fsmooth = smoothen_field(field, scale, 1., make_copy=True)

                field_interp = RegularGridInterpolator(
-                    (X, Y, Z), fsmooth, fill_value=None, bounds_error=False,
+                    (X, Y, Z), fsmooth, fill_value=np.nan, bounds_error=False,
                    method=method)
                interp_fields[i][:, j] = field_interp(pos)

@ -175,9 +174,9 @@ def observer_peculiar_velocity(velocity_field, smooth_scales=None,
    vpec : 1-dimensional array of shape `(3,)` or `(len(smooth_scales), 3)`
    """
    if observer is None:
-        pos = numpy.asanyarray([0.5, 0.5, 0.5]).reshape(1, 3)
+        pos = np.asanyarray([0.5, 0.5, 0.5]).reshape(1, 3)
    else:
-        pos = numpy.asanyarray(observer).reshape(1, 3)
+        pos = np.asanyarray(observer).reshape(1, 3)

    vx, vy, vz = evaluate_cartesian_cic(
        *velocity_field, pos=pos, smooth_scales=smooth_scales, verbose=verbose)
@ -188,9 +187,9 @@ def observer_peculiar_velocity(velocity_field, smooth_scales=None,
    vz = vz.reshape(-1, )

    if smooth_scales is None:
-        return numpy.array([vx[0], vy[0], vz[0]])
+        return np.array([vx[0], vy[0], vz[0]])

-    return numpy.vstack([vx, vy, vz]).T
+    return np.vstack([vx, vy, vz]).T

 ###############################################################################
 #                   Evaluating the fields along a LOS                         #
@ -233,19 +232,19 @@ def evaluate_los(*fields, sky_pos, boxsize, rmax, dr, smooth_scales=None,
    """
    mpc2box = 1. / boxsize

-    if not isinstance(sky_pos, numpy.ndarray) and sky_pos.ndim != 2:
+    if not isinstance(sky_pos, np.ndarray) and sky_pos.ndim != 2:
        raise ValueError("`sky_pos` must be a 2D array.")
    nsamples = len(sky_pos)

-    if rmax > 0.5 * boxsize:
+    if interpolation_method == "cic" and rmax > 0.5 * boxsize:
        raise ValueError("The maximum radius must be within the box.")

    # Radial positions to evaluate for each line of sight.
-    rdist = numpy.arange(0, rmax, dr, dtype=fields[0].dtype)
+    rdist = np.arange(0, rmax, dr, dtype=fields[0].dtype)

    # Create an array of radial positions and sky coordinates of each line of
    # sight.
-    pos = numpy.empty((nsamples * len(rdist), 3), dtype=fields[0].dtype)
+    pos = np.empty((nsamples * len(rdist), 3), dtype=fields[0].dtype)
    for i in range(nsamples):
        start, end = i * len(rdist), (i + 1) * len(rdist)
        pos[start:end, 0] = rdist * mpc2box
@ -261,7 +260,7 @@ def evaluate_los(*fields, sky_pos, boxsize, rmax, dr, smooth_scales=None,
            smooth_scales = [smooth_scales]

        if isinstance(smooth_scales, list):
-            smooth_scales = numpy.array(smooth_scales, dtype=numpy.float32)
+            smooth_scales = np.array(smooth_scales, dtype=np.float32)

        smooth_scales *= mpc2box

@ -286,8 +285,7 @@ def evaluate_los(*fields, sky_pos, boxsize, rmax, dr, smooth_scales=None,

    field_interp_reshaped = [None] * len(fields)
    for i in range(len(fields)):
-        samples = numpy.full(shape_single, numpy.nan,
-                             dtype=field_interp[i].dtype)
+        samples = np.full(shape_single, np.nan, dtype=field_interp[i].dtype)
        for j in range(nsamples):
            start, end = j * len(rdist), (j + 1) * len(rdist)
            samples[j] = field_interp[i][start:end, ...]
@ -328,7 +326,7 @@ def evaluate_sky(*fields, pos, mpc2box, smooth_scales=None, verbose=False):
    (list of) 1-dimensional array of shape `(n_samples, len(smooth_scales))`
    """
    # Make a copy of the positions to avoid modifying the input.
-    pos = numpy.copy(pos)
+    pos = np.copy(pos)

    pos = force_single_precision(pos)
    pos[:, 0] *= mpc2box
@ -340,7 +338,7 @@ def evaluate_sky(*fields, pos, mpc2box, smooth_scales=None, verbose=False):
            smooth_scales = [smooth_scales]

        if isinstance(smooth_scales, list):
-            smooth_scales = numpy.array(smooth_scales, dtype=numpy.float32)
+            smooth_scales = np.array(smooth_scales, dtype=np.float32)

        smooth_scales *= mpc2box

@ -374,26 +372,26 @@ def make_sky(field, angpos, dist, boxsize, verbose=True):
    interp_field : 1-dimensional array of shape `(n_pos, )`.
    """
    dx = dist[1] - dist[0]
-    assert numpy.allclose(dist[1:] - dist[:-1], dx)
+    assert np.allclose(dist[1:] - dist[:-1], dx)
    assert angpos.ndim == 2 and dist.ndim == 1

    # We loop over the angular directions, at each step evaluating a vector
    # of distances. We pre-allocate arrays for speed.
-    dir_loop = numpy.full((dist.size, 3), numpy.nan, dtype=numpy.float32)
+    dir_loop = np.full((dist.size, 3), np.nan, dtype=np.float32)

    ndir = angpos.shape[0]
-    out = numpy.full(ndir, numpy.nan, dtype=numpy.float32)
+    out = np.full(ndir, np.nan, dtype=np.float32)
    for i in trange(ndir) if verbose else range(ndir):
        dir_loop[:, 0] = dist
        dir_loop[:, 1] = angpos[i, 0]
        dir_loop[:, 2] = angpos[i, 1]

-        out[i] = numpy.sum(
+        out[i] = np.sum(
            dist**2 * evaluate_sky(field, pos=dir_loop, mpc2box=1 / boxsize))

    # Assuming the field is in h^2 Msun / kpc**3, we need to convert Mpc / h
    # to kpc / h and multiply by the pixel area.
-    out *= dx * 1e9 * 4 * numpy.pi / len(angpos)
+    out *= dx * 1e9 * 4 * np.pi / len(angpos)
    return out


@ -432,8 +430,7 @@ def field_at_distance(field, distance, boxsize, smooth_scales=None, nside=128,
    # box Cartesian coordinates. We take HEALPix pixels because they are
    # uniformly distributed on the sky.
    angpos = nside2radec(nside)
-    X = numpy.hstack([numpy.ones(len(angpos)).reshape(-1, 1) * distance,
-                      angpos])
+    X = np.hstack([np.ones(len(angpos)).reshape(-1, 1) * distance, angpos])
    X = radec_to_cartesian(X) / boxsize + 0.5

    return evaluate_cartesian_cic(field, pos=X, smooth_scales=smooth_scales,
@ -448,8 +445,8 @@ def field_at_distance(field, distance, boxsize, smooth_scales=None, nside=128,
@jit(nopython=True)
 def make_gridpos(grid_size):
    """Make a regular grid of positions and distances from the center."""
-    grid_pos = numpy.full((grid_size**3, 3), numpy.nan, dtype=numpy.float32)
-    grid_dist = numpy.full(grid_size**3, numpy.nan, dtype=numpy.float32)
+    grid_pos = np.full((grid_size**3, 3), np.nan, dtype=np.float32)
+    grid_dist = np.full(grid_size**3, np.nan, dtype=np.float32)

    n = 0
    for i in range(grid_size):
@ -507,8 +504,8 @@ def field2rsp(field, radvel_field, box, MAS, init_value=0.):
    grid_pos += 0.5
    grid_pos = periodic_wrap_grid(grid_pos)

-    rsp_field = numpy.full(field.shape, init_value, dtype=numpy.float32)
-    cell_counts = numpy.zeros(rsp_field.shape, dtype=numpy.float32)
+    rsp_field = np.full(field.shape, init_value, dtype=np.float32)
+    cell_counts = np.zeros(rsp_field.shape, dtype=np.float32)

    # Interpolate the field to the grid positions.
    MASL.MA(grid_pos, rsp_field, 1., MAS, W=field.reshape(-1,))
@ -554,6 +551,6 @@ def smoothen_field(field, smooth_scale, boxsize, threads=1, make_copy=False):
                       threads)

    if make_copy:
-        field = numpy.copy(field)
+        field = np.copy(field)

    return SL.field_smoothing(field, W_k, threads)
--- a/csiborgtools/flow/flow_model.py
+++ b/csiborgtools/flow/flow_model.py
@ -82,8 +82,8 @@ class DataLoader:
        self._cat = self._read_catalogue(catalogue, catalogue_fpath)
        self._catname = catalogue

-        fprint("reading the interpolated field,", verbose)
-        self._field_rdist, self._los_density, self._los_velocity = self._read_field(  # noqa
+        fprint("reading the interpolated field.", verbose)
+        self._field_rdist, self._los_density, self._los_velocity, self._rmax = self._read_field(  # noqa
            simname, ksim, catalogue, ksmooth, paths)

        if len(self._field_rdist) % 2 == 0:
@ -183,6 +183,14 @@ class DataLoader:

        return self._los_velocity[:, :, self._mask, ...]

+    @property
+    def rmax(self):
+        """
+        Radial distance above which the underlying reconstruction is
+        extrapolated `(n_sims, n_objects)`.
+        """
+        return self._rmax[:, self._mask]
+
    @property
    def los_radial_velocity(self):
        """
@ -205,6 +213,7 @@ class DataLoader:

        los_density = [None] * len(ksims)
        los_velocity = [None] * len(ksims)
+        rmax = [None] * len(ksims)

        for n, ksim in enumerate(ksims):
            nsim = nsims[ksim]
@ -219,11 +228,13 @@ class DataLoader:
                los_density[n] = f[f"density_{nsim}"][indx]
                los_velocity[n] = f[f"velocity_{nsim}"][indx]
                rdist = f[f"rdist_{nsim}"][...]
+                rmax[n] = f[f"rmax_{nsim}"][indx]

        los_density = np.stack(los_density)
        los_velocity = np.stack(los_velocity)
+        rmax = np.stack(rmax)

-        return rdist, los_density, los_velocity
+        return rdist, los_density, los_velocity, rmax

    def _read_catalogue(self, catalogue, catalogue_fpath):
        if catalogue == "A2":
@ -456,7 +467,6 @@ def predict_zobs(dist, beta, Vext_radial, vpec_radial, Omega_m):
    velocity field.
    """
    zcosmo = dist2redshift(dist, Omega_m)
-
    vrad = beta * vpec_radial + Vext_radial
    return (1 + zcosmo) * (1 + vrad / SPEED_OF_LIGHT) - 1

@ -473,14 +483,13 @@ def calculate_ptilde_wo_bias(xrange, mu, err_squared, r_squared_xrange):
    return ptilde


-def calculate_likelihood_zobs(zobs, zobs_pred, sigma_v):
+def calculate_likelihood_zobs(zobs, zobs_pred, e2_cz):
    """
    Calculate the likelihood of the observed redshift given the predicted
    redshift.
    """
    dcz = SPEED_OF_LIGHT * (zobs[:, None] - zobs_pred)
-    sigma_v = sigma_v[:, None]
-    return jnp.exp(-0.5 * (dcz / sigma_v)**2) / jnp.sqrt(2 * np.pi) / sigma_v
+    return jnp.exp(-0.5 * dcz**2 / e2_cz) / jnp.sqrt(2 * np.pi * e2_cz)

 ###############################################################################
 #                          Base flow validation                               #
@ -598,27 +607,18 @@ def sample_TFR(e_mu_min, e_mu_max, a_mean, a_std, b_mean, b_std):

 def sample_calibration(Vext_min, Vext_max, Vmono_min, Vmono_max,
                       alpha_min, alpha_max, beta_min, beta_max, sigma_v_min,
-                       sigma_v_max, sample_Vmono, sample_alpha, sample_beta):
+                       sigma_v_max, sample_Vmono, sample_alpha, sample_beta,
+                       sample_sigma_v_ext):
    """Sample the flow calibration."""
    Vext = sample("Vext", Uniform(Vext_min, Vext_max).expand([3]))
    sigma_v = sample("sigma_v", Uniform(sigma_v_min, sigma_v_max))

-    if sample_Vmono:
-        Vmono = sample("Vmono", Uniform(Vmono_min, Vmono_max))
-    else:
-        Vmono = 0.0
+    alpha = sample("alpha", Uniform(alpha_min, alpha_max)) if sample_alpha else 1.0                            # noqa
+    beta = sample("beta", Uniform(beta_min, beta_max)) if sample_beta else 1.0                                 # noqa
+    Vmono = sample("Vmono", Uniform(Vmono_min, Vmono_max)) if sample_Vmono else 0.0                            # noqa
+    sigma_v_ext = sample("sigma_v_ext", Uniform(sigma_v_min, sigma_v_max)) if sample_sigma_v_ext else sigma_v  # noqa

-    if sample_alpha:
-        alpha = sample("alpha", Uniform(alpha_min, alpha_max))
-    else:
-        alpha = 1.0
-
-    if sample_beta:
-        beta = sample("beta", Uniform(beta_min, beta_max))
-    else:
-        beta = 1.0
-
-    return Vext, Vmono, sigma_v, alpha, beta
+    return Vext, Vmono, sigma_v, sigma_v_ext, alpha, beta


 ###############################################################################
@ -626,6 +626,23 @@ def sample_calibration(Vext_min, Vext_max, Vmono_min, Vmono_max,
 ###############################################################################


+def find_extrap_mask(rmax, rdist):
+    """
+    Make a mask of shape `(nsim, ngal, nrdist)` of which velocity field values
+    are extrapolated. above which the
+    """
+    nsim, ngal = rmax.shape
+    extrap_mask = np.zeros((nsim, ngal, len(rdist)), dtype=bool)
+    extrap_weights = np.ones((nsim, ngal, len(rdist)))
+    for i in range(nsim):
+        for j in range(ngal):
+            k = np.searchsorted(rdist, rmax[i, j])
+            extrap_mask[i, j, k:] = True
+            extrap_weights[i, j, k:] = rmax[i, j] / rdist[k:]
+
+    return extrap_mask, extrap_weights
+
+
 class PV_validation_model(BaseFlowValidationModel):
    """
    Peculiar velocity validation model.
@ -636,6 +653,9 @@ class PV_validation_model(BaseFlowValidationModel):
        LOS density field.
    los_velocity : 3-dimensional array of shape (n_sims, n_objects, n_steps)
        LOS radial velocity field.
+    rmax : 1-dimensional array of shape (n_sims, n_objects)
+        Radial distance above which the underlying reconstruction is
+        extrapolated.
    RA, dec : 1-dimensional arrays of shape (n_objects)
        Right ascension and declination in degrees.
    z_obs : 1-dimensional array of shape (n_objects)
@ -650,7 +670,7 @@ class PV_validation_model(BaseFlowValidationModel):
        Matter density parameter.
    """

-    def __init__(self, los_density, los_velocity, RA, dec, z_obs,
+    def __init__(self, los_density, los_velocity, rmax, RA, dec, z_obs,
                 e_zobs, calibration_params, r_xrange, Omega_m, kind):
        if e_zobs is not None:
            e2_cz_obs = jnp.asarray((SPEED_OF_LIGHT * e_zobs)**2)
@ -661,9 +681,9 @@ class PV_validation_model(BaseFlowValidationModel):
        RA = np.deg2rad(RA)
        dec = np.deg2rad(dec)

-        names = ["los_density", "los_velocity", "RA", "dec", "z_obs",
+        names = ["los_density", "los_velocity", "rmax", "RA", "dec", "z_obs",
                 "e2_cz_obs"]
-        values = [los_density, los_velocity, RA, dec, z_obs, e2_cz_obs]
+        values = [los_density, los_velocity, rmax, RA, dec, z_obs, e2_cz_obs]
        self._setattr_as_jax(names, values)
        self._set_calibration_params(calibration_params)
        self._set_radial_spacing(r_xrange, Omega_m)
@ -672,11 +692,23 @@ class PV_validation_model(BaseFlowValidationModel):
        self.Omega_m = Omega_m
        self.norm = - self.ndata * jnp.log(self.num_sims)

+        extrap_mask, extrap_weights = find_extrap_mask(rmax, r_xrange)
+        self.extrap_mask = jnp.asarray(extrap_mask)
+        self.extrap_weights = jnp.asarray(extrap_weights)
+
    def __call__(self, calibration_hyperparams, distmod_hyperparams,
                 store_ll_all=False):
        """NumPyro PV validation model."""
-        Vext, Vmono, sigma_v, alpha, beta = sample_calibration(**calibration_hyperparams)  # noqa
-        cz_err = jnp.sqrt(sigma_v**2 + self.e2_cz_obs)
+        Vext, Vmono, sigma_v, sigma_v_ext, alpha, beta = sample_calibration(**calibration_hyperparams)  # noqa
+        # Turn e2_cz to be of shape (nsims, ndata, nxrange) and apply
+        # sigma_v_ext where applicable
+        e2_cz = jnp.full_like(self.extrap_mask, sigma_v**2, dtype=jnp.float32)
+        if calibration_hyperparams["sample_sigma_v_ext"]:
+            e2_cz = e2_cz.at[self.extrap_mask].set(sigma_v_ext**2)
+
+        # Now add the observational errors
+        e2_cz += self.e2_cz_obs[None, :, None]
+
        Vext_rad = project_Vext(Vext[0], Vext[1], Vext[2], self.RA, self.dec)

        if self.kind == "SN":
@ -700,10 +732,13 @@ class PV_validation_model(BaseFlowValidationModel):
        pnorm = simpson(ptilde, dx=self.dr, axis=-1)

        # Calculate z_obs at each distance. Shape is (n_sims, ndata, nxrange)
-        vrad = beta * self.los_velocity + Vext_rad[None, :, None] + Vmono
+        # The weights are related to the extrapolation of the velocity field.
+        vrad = beta * self.los_velocity
+        vrad += (Vext_rad[None, :, None] + Vmono) * self.extrap_weights
        zobs = (1 + self.z_xrange[None, None, :]) * (1 + vrad / SPEED_OF_LIGHT) - 1  # noqa

-        ptilde *= calculate_likelihood_zobs(self.z_obs, zobs, cz_err)
+        ptilde *= calculate_likelihood_zobs(self.z_obs, zobs, e2_cz)
+        # ptilde *= calculate_likelihood_zobs(self.z_obs, zobs, sigma_v)
        ll = jnp.log(simpson(ptilde, dx=self.dr, axis=-1)) - jnp.log(pnorm)

        if store_ll_all:
@ -740,6 +775,7 @@ def get_model(loader, zcmb_max=None, verbose=True):

    los_overdensity = loader.los_density
    los_velocity = loader.los_radial_velocity
+    rmax = loader.rmax
    kind = loader._catname

    if kind in ["LOSS", "Foundation"]:
@ -753,10 +789,9 @@ def get_model(loader, zcmb_max=None, verbose=True):
                              "e_c": e_c[mask]}

        model = PV_validation_model(
-            los_overdensity[:, mask], los_velocity[:, mask], RA[mask],
-            dec[mask], zCMB[mask], e_zCMB, calibration_params,
+            los_overdensity[:, mask], los_velocity[:, mask], rmax[:, mask],
+            RA[mask], dec[mask], zCMB[mask], e_zCMB, calibration_params,
            loader.rdist, loader._Omega_m, "SN")
-        # return model_old, model
    elif "Pantheon+" in kind:
        keys = ["RA", "DEC", "zCMB", "mB", "x1", "c", "biasCor_m_b", "mBERR",
                "x1ERR", "cERR", "biasCorErr_m_b", "zCMB_SN", "zCMB_Group",
@ -766,7 +801,7 @@ def get_model(loader, zcmb_max=None, verbose=True):
        mB -= bias_corr_mB
        e_mB = np.sqrt(e_mB**2 + e_bias_corr_mB**2)

-        mask = (zCMB < zcmb_max)
+        mask = zCMB < zcmb_max

        if kind == "Pantheon+_groups":
            mask &= np.isfinite(zCMB_Group)
@ -782,8 +817,8 @@ def get_model(loader, zcmb_max=None, verbose=True):
                              "e_mB": e_mB[mask], "e_x1": e_x1[mask],
                              "e_c": e_c[mask]}
        model = PV_validation_model(
-            los_overdensity[:, mask], los_velocity[:, mask], RA[mask],
-            dec[mask], zCMB[mask], e_zCMB[mask], calibration_params,
+            los_overdensity[:, mask], los_velocity[:, mask], rmax[:, mask],
+            RA[mask], dec[mask], zCMB[mask], e_zCMB[mask], calibration_params,
            loader.rdist, loader._Omega_m, "SN")
    elif kind in ["SFI_gals", "2MTF", "SFI_gals_masked"]:
        keys = ["RA", "DEC", "z_CMB", "mag", "eta", "e_mag", "e_eta"]
@ -798,14 +833,13 @@ def get_model(loader, zcmb_max=None, verbose=True):
        calibration_params = {"mag": mag[mask], "eta": eta[mask],
                              "e_mag": e_mag[mask], "e_eta": e_eta[mask]}
        model = PV_validation_model(
-            los_overdensity[:, mask], los_velocity[:, mask], RA[mask],
-            dec[mask], zCMB[mask], None, calibration_params, loader.rdist,
-            loader._Omega_m, "TFR")
+            los_overdensity[:, mask], los_velocity[:, mask], rmax[:, mask],
+            RA[mask], dec[mask], zCMB[mask], None, calibration_params,
+            loader.rdist, loader._Omega_m, "TFR")
    else:
        raise ValueError(f"Catalogue `{kind}` not recognized.")

-    if verbose:
-        print(f"Selected {np.sum(mask)}/{len(mask)} galaxies.", flush=True)
+    fprint(f"selected {np.sum(mask)}/{len(mask)} galaxies.")

    return model

@ -822,9 +856,13 @@ def _posterior_element(r, beta, Vext_radial, los_velocity, Omega_m, zobs,
    `Observed2CosmologicalRedshift`.
    """
    zobs_pred = predict_zobs(r, beta, Vext_radial, los_velocity, Omega_m)
-    likelihood = calculate_likelihood_zobs(zobs, zobs_pred, sigma_v)
-    prior = dVdOmega * los_density**alpha
-    return likelihood * prior
+    # Likelihood term
+    dcz = SPEED_OF_LIGHT * (zobs - zobs_pred)
+    posterior = jnp.exp(-0.5 * dcz**2 / sigma_v**2) / jnp.sqrt(2 * jnp.pi * sigma_v**2)  # noqa
+    # Prior term
+    posterior *= dVdOmega * los_density**alpha
+
+    return posterior


 class BaseObserved2CosmologicalRedshift(ABC):
@ -834,10 +872,10 @@ class BaseObserved2CosmologicalRedshift(ABC):
        for i, key in enumerate(calibration_samples.keys()):
            x = calibration_samples[key]
            if not isinstance(x, (np.ndarray, jnp.ndarray)):
-                raise ValueError(f"Calibration sample {x} must be an array.")
+                raise ValueError(f"Calibration sample `{key}` must be an array.")  # noqa

-            if x.ndim != 1:
-                raise ValueError(f"Calibration samples {x} must be 1D.")
+            if x.ndim != 1 and key != "Vext":
+                raise ValueError(f"Calibration samples `{key}` must be 1D.")

            if i == 0:
                ncalibratrion = len(x)
@ -863,12 +901,7 @@ class BaseObserved2CosmologicalRedshift(ABC):
        self._ncalibration_samples = ncalibratrion

        # It is best to JIT compile the functions right here.
-        self._vmap_simps = jit(vmap(lambda y: simpson(y, dx=dr)))
-        axs = (0, None, None, 0, None, None, None, None, 0, 0)
-        self._vmap_posterior_element = vmap(_posterior_element, in_axes=axs)
-        self._vmap_posterior_element = jit(self._vmap_posterior_element)
-
-        self._simps = jit(lambda y: simpson(y, dx=dr))
+        self._jit_posterior_element = jit(_posterior_element)

    def get_calibration_samples(self, key):
        """Get calibration samples for a given key."""
@ -896,8 +929,8 @@ class Observed2CosmologicalRedshift(BaseObserved2CosmologicalRedshift):
    Parameters
    ----------
    calibration_samples : dict
-        Dictionary of flow calibration samples (`alpha`, `beta`, `Vext_x`,
-        `Vext_y`, `Vext_z`, `sigma_v`, ...).
+        Dictionary of flow calibration samples (`alpha`, `beta`, `Vext`,
+        `sigma_v`, ...).
    r_xrange : 1-dimensional array
        Radial comoving distances where the fields are interpolated for each
        object.
@ -919,11 +952,9 @@ class Observed2CosmologicalRedshift(BaseObserved2CosmologicalRedshift):
    def posterior_mean_std(self, x, px):
        """
        Calculate the mean and standard deviation of a 1-dimensional PDF.
-        Assumes that the PDF is already normalized and that the spacing is that
-        of `r_xrange` which is inferred when initializing this class.
        """
-        mu = self._simps(x * px)
-        std = (self._simps(x**2 * px) - mu**2)**0.5
+        mu = simpson(x * px, x=x)
+        std = (simpson(x**2 * px, x=x) - mu**2)**0.5
        return mu, std

    def posterior_zcosmo(self, zobs, RA, dec, los_density, los_velocity,
@ -953,11 +984,8 @@ class Observed2CosmologicalRedshift(BaseObserved2CosmologicalRedshift):
        posterior : 1-dimensional array
            Posterior PDF.
        """
-        Vext_radial = project_Vext(
-            self.get_calibration_samples("Vext_x"),
-            self.get_calibration_samples("Vext_y"),
-            self.get_calibration_samples("Vext_z"),
-            RA, dec)
+        Vext = self.get_calibration_samples("Vext")
+        Vext_radial = project_Vext(*[Vext[:, i] for i in range(3)], RA, dec)

        alpha = self.get_calibration_samples("alpha")
        beta = self.get_calibration_samples("beta")
@ -970,13 +998,13 @@ class Observed2CosmologicalRedshift(BaseObserved2CosmologicalRedshift):
                             dtype=np.float32)
        for i in trange(self.ncalibration_samples, desc="Marginalizing",
                        disable=not verbose):
-            posterior[i] = self._vmap_posterior_element(
+            posterior[i] = self._jit_posterior_element(
                self._r_xrange, beta[i], Vext_radial[i], los_velocity,
                self._Omega_m, zobs, sigma_v[i], alpha[i], self._dVdOmega,
                los_density)

-        # Normalize the posterior for each flow sample and then stack them.
-        posterior /= self._vmap_simps(posterior).reshape(-1, 1)
+        # # Normalize the posterior for each flow sample and then stack them.
+        posterior /= simpson(posterior, x=self._zcos_xrange, axis=-1)[:, None]
        posterior = jnp.nanmean(posterior, axis=0)

        return self._zcos_xrange, posterior
--- a/csiborgtools/utils.py
+++ b/csiborgtools/utils.py
@ -352,7 +352,8 @@ def binned_statistic(x, y, left_edges, bin_width, statistic):
 def fprint(msg, verbose=True):
    """Print and flush a message with a timestamp."""
    if verbose:
-        print(f"{datetime.now()}:   {msg}", flush=True)
+        timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+        print(f"{timestamp}   {msg}", flush=True)


 ###############################################################################
--- a/notebooks/flow/flow_bulk.ipynb
+++ b/notebooks/flow/flow_bulk.ipynb
--- a/notebooks/flow/flow_los.ipynb
+++ b/notebooks/flow/flow_los.ipynb
--- a/notebooks/flow/process_upglade.ipynb
+++ b/notebooks/flow/process_upglade.ipynb
--- a/scripts/field_los.py
+++ b/scripts/field_los.py
@ -26,10 +26,11 @@ import csiborgtools
 import numpy as np
 from astropy import units as u
 from astropy.coordinates import SkyCoord
+from astropy.io import fits
 from h5py import File
 from mpi4py import MPI
+from numba import jit
 from taskmaster import work_delegation  # noqa
-from astropy.io import fits

 from utils import get_nsims

@ -79,7 +80,7 @@ def get_los(catalogue_name, simname, comm):
                RA = f["RA"][:]
                dec = f["DEC"][:]
        elif catalogue_name == "UPGLADE":
-            fname = "/mnt/users/rstiskalek/csiborgtools/data/upglade_z_0p05_all_PROCESSED.h5"  # noqa
+            fname = "/mnt/users/rstiskalek/csiborgtools/data/upglade_all_z0p05_new_PROCESSED.h5"  # noqa
            with File(fname, 'r') as f:
                RA = f["RA"][:]
                dec = f["DEC"][:]
@ -242,6 +243,7 @@ def combine_from_simulations(catalogue_name, simname, nsims, outfolder,
            f_out.create_dataset(f"rdist_{nsim}", data=f["rdist"][:])
            f_out.create_dataset(f"density_{nsim}", data=f["density"][:])
            f_out.create_dataset(f"velocity_{nsim}", data=f["velocity"][:])
+            f_out.create_dataset(f"rmax_{nsim}", data=f["rmax"][:])

        # Remove the temporary file.
        remove(fname)
@ -256,6 +258,30 @@ def combine_from_simulations(catalogue_name, simname, nsims, outfolder,
 #                       Main interpolating function                           #
 ###############################################################################

+@jit(nopython=True)
+def find_index_of_first_nan(y):
+    for n in range(1, len(y)):
+        if np.isnan(y[n]):
+            return n
+
+    return None
+
+
+def replace_nan_with_last_finite(x, y, apply_decay):
+    n = find_index_of_first_nan(y)
+
+    if n is None:
+        return y, x[-1]
+
+    y[n:] = y[n-1]
+    rmax = x[n-1]
+
+    if apply_decay:
+        # Optionally aply 1 / r decay
+        y[n:] *= rmax / x[n:]
+
+    return y, rmax
+

 def interpolate_field(pos, simname, nsim, MAS, grid, dump_folder, rmax,
                      dr, smooth_scales, verbose=False):
@ -300,6 +326,14 @@ def interpolate_field(pos, simname, nsim, MAS, grid, dump_folder, rmax,
        smooth_scales=smooth_scales, verbose=verbose,
        interpolation_method="linear")

+    rmax_density = np.full((len(pos), len(smooth_scales)), np.nan)
+    for i in range(len(pos)):
+        for j in range(len(smooth_scales)):
+            y, current_rmax = replace_nan_with_last_finite(rdist, finterp[i, :, j], False)  # noqa
+            finterp[i, :, j] = y
+            if current_rmax is not None:
+                rmax_density[i, j] = current_rmax
+
    print(f"Writing temporary file `{fname_out}`.")
    with File(fname_out, 'w') as f:
        f.create_dataset("rdist", data=rdist)
@ -318,8 +352,20 @@ def interpolate_field(pos, simname, nsim, MAS, grid, dump_folder, rmax,
        smooth_scales=smooth_scales, verbose=verbose,
        interpolation_method="linear")

+    rmax_velocity = np.full((3, len(pos), len(smooth_scales)), np.nan)
+    for k in range(3):
+        for i in range(len(pos)):
+            for j in range(len(smooth_scales)):
+                y, current_rmax = replace_nan_with_last_finite(rdist, finterp[k][i, :, j], True)  # noqa
+                finterp[k][i, :, j] = y
+                if current_rmax is not None:
+                    rmax_velocity[k, i, j] = current_rmax
+    rmax_velocity = np.min(rmax_velocity, axis=0)
+
+    rmax = np.minimum(rmax_density, rmax_velocity)
    with File(fname_out, 'a') as f:
        f.create_dataset("velocity", data=finterp)
+        f.create_dataset("rmax", data=rmax)


 ###############################################################################
@ -339,8 +385,8 @@ if __name__ == "__main__":
    parser.add_argument("--grid", type=int, help="Grid resolution.")
    args = parser.parse_args()

-    rmax = 200
-    dr = 0.25
+    rmax = 300
+    dr = 0.5
    smooth_scales = [0]

    comm = MPI.COMM_WORLD
--- a/scripts/field_los.sh
+++ b/scripts/field_los.sh
@ -11,8 +11,7 @@ MAS="SPH"
 grid=1024


-# for catalogue in "LOSS" "Foundation" "Pantheon+" "2MTF" "SFI_gals"; do
-for catalogue in "LOSS"; do
+for catalogue in "UPGLADE"; do
 # for catalogue in "Foundation"; do
    pythoncm="$env $file --catalogue $catalogue --nsims $nsims --simname $simname --MAS $MAS --grid $grid"
    if [ $on_login -eq 1 ]; then
--- a/scripts/flow_validation.py
+++ b/scripts/flow_validation.py
@ -100,6 +100,8 @@ def get_model(paths, get_model_kwargs, verbose=True):
                                          ARGS.catalogue, fpath, paths,
                                          ksmooth=ARGS.ksmooth)

+    print(f"\n{'Num. radial steps':<20} {len(loader.rdist)}\n", flush=True)
+
    return csiborgtools.flow.get_model(loader, **get_model_kwargs)


@ -227,8 +229,6 @@ if __name__ == "__main__":
    nsteps = 5000
    nburn = 1000
    zcmb_max = 0.06
-    sample_alpha = True
-    sample_beta = True
    calculate_evidence = False
    nchains_harmonic = 10
    num_epochs = 30
@ -237,7 +237,6 @@ if __name__ == "__main__":
        raise ValueError("The number of steps must be divisible by the number of chains.")  # noqa

    main_params = {"nsteps": nsteps, "nburn": nburn, "zcmb_max": zcmb_max,
-                   "sample_alpha": sample_alpha, "sample_beta": sample_beta,
                   "calculate_evidence": calculate_evidence,
                   "nchains_harmonic": nchains_harmonic,
                   "num_epochs": num_epochs}
@ -247,10 +246,11 @@ if __name__ == "__main__":
                               "Vmono_min": -1000, "Vmono_max": 1000,
                               "alpha_min": -1.0, "alpha_max": 3.0,
                               "beta_min": -1.0, "beta_max": 3.0,
-                               "sigma_v_min": 5.0, "sigma_v_max": 750.,
-                               "sample_Vmono": True,
-                               "sample_alpha": sample_alpha,
-                               "sample_beta": sample_beta,
+                               "sigma_v_min": 1.0, "sigma_v_max": 750.,
+                               "sample_Vmono": False,
+                               "sample_alpha": False,
+                               "sample_beta": True,
+                               "sample_sigma_v_ext": False,
                               }
    print_variables(
        calibration_hyperparams.keys(), calibration_hyperparams.values())
@ -280,5 +280,6 @@ if __name__ == "__main__":
    get_model_kwargs = {"zcmb_max": zcmb_max}

    model = get_model(paths, get_model_kwargs, )
-    run_model(model, nsteps, nburn, model_kwargs, out_folder, sample_beta,
-              calculate_evidence, nchains_harmonic, num_epochs, kwargs_print)
+    run_model(model, nsteps, nburn, model_kwargs, out_folder,
+              calibration_hyperparams["sample_beta"], calculate_evidence,
+              nchains_harmonic, num_epochs, kwargs_print)
--- a/scripts/flow_validation.sh
+++ b/scripts/flow_validation.sh
@ -19,9 +19,9 @@ fi

 # Submit a job for each combination of simname, catalogue, ksim
 # for simname in "Lilow2024" "CF4" "CF4gp" "csiborg1" "csiborg2_main" "csiborg2X"; do
-for simname in "csiborg2X"; do
+for simname in "Carrick2015"; do
 # for simname in "csiborg1" "csiborg2_main" "csiborg2X"; do
-    for catalogue in "Pantheon+"; do
+    for catalogue in "Pantheon+_zSN"; do
    # for catalogue in "2MTF"; do
        # for ksim in 0 1 2; do
        # for ksim in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20; do
--- a/scripts/output/csiborg_halocatalogue.py
+++ b/scripts/output/csiborg_halocatalogue.py
@ -21,19 +21,21 @@ from tqdm import tqdm

 if __name__ == "__main__":
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-    simname = "csiborg1"
+    simname = "csiborg2_random"

    nsims = paths.get_ics(simname)
    print(f"Number of simulations: {nsims}.")

-    fname_out = f"/mnt/users/rstiskalek/csiborgtools/data/halos_{simname}.hdf5"
+    fname_out = f"/mnt/users/rstiskalek/csiborgtools/data/random_halos_{simname}.hdf5"  # noqa

    print(f"Writing to `{fname_out}`.")

    with File(fname_out, 'w') as f:
        for nsim in tqdm(nsims, desc="Simulations"):
            grp = f.create_group(f"sim_{nsim}")
-            cat = csiborgtools.read.CSiBORG1Catalogue(nsim, paths)
+            # cat = csiborgtools.read.CSiBORG1Catalogue(nsim, paths)
+            cat = csiborgtools.read.CSiBORG2Catalogue(
+                nsim, 99, "random", paths, )

            grp["pos"] = cat["cartesian_pos"]
            grp["totmass"] = cat["totmass"]
--- a/scripts/post_upglade.py
+++ b/scripts/post_upglade.py
@ -23,6 +23,7 @@ from os.path import join

 import csiborgtools
 import numpy as np
+from csiborgtools import fprint
 from h5py import File
 from mpi4py import MPI
 from taskmaster import work_delegation  # noqa
@ -35,27 +36,35 @@ def t():
    return datetime.now().strftime("%H:%M:%S")


-def load_calibration(catalogue, simname, nsim, ksmooth, verbose=False):
+def load_calibration(catalogue, simname, ksmooth, sample_beta,
+                     verbose=False):
    """Load the pre-computed calibration samples."""
-    fname = f"/mnt/extraspace/rstiskalek/csiborg_postprocessing/peculiar_velocity/flow_samples_{catalogue}_{simname}_smooth_{ksmooth}.hdf5"  # noqa
-    keys = ["Vext_x", "Vext_y", "Vext_z", "alpha", "beta", "sigma_v"]
+    fname = f"/mnt/extraspace/rstiskalek/csiborg_postprocessing/peculiar_velocity/samples_{simname}_{catalogue}_ksmooth{ksmooth}.hdf5"  # noqa
+    if sample_beta:
+        fname = fname.replace(".hdf5", "_sample_beta.hdf5")

+    keys = ["Vext", "sigma_v", "alpha", "beta"]
    calibration_samples = {}
    with File(fname, 'r') as f:
-        for key in keys:
+        for n, key in enumerate(keys):
+            # In case alpha wasn't sampled just set to 1
+            if key == "alpha" and "alpha" not in f["samples"].keys():
+                calibration_samples[key] = np.full_like(
+                    calibration_samples["sigma_v"], 1.0)
+                continue
+
            # NOTE: here the posterior samples are down-sampled
-            calibration_samples[key] = f[f"sim_{nsim}/{key}"][:][::10]
+            calibration_samples[key] = f[f"samples/{key}"][:][::10]

-    if verbose:
-        k = list(calibration_samples.keys())[0]
-        nsamples = len(calibration_samples[k])
-        print(f"{t()}: found {nsamples} calibration posterior samples.",
-              flush=True)
+            if n == 0:
+                num_samples_original = len(f[f"samples/{key}"])
+                num_samples_final = len(calibration_samples[key])

+    fprint(f"downsampling calibration samples from {num_samples_original} to {num_samples_final}.", verbose=verbose)  # noqa
    return calibration_samples


-def main(loader, model, indxs, fdir, fname, num_split, verbose):
+def main(loader, nsim, model, indxs, fdir, fname, num_split, verbose):
    out = np.full(
        len(indxs), np.nan,
        dtype=[("mean_zcosmo", float), ("std_zcosmo", float)])
@ -65,7 +74,7 @@ def main(loader, model, indxs, fdir, fname, num_split, verbose):
                               disable=not verbose)):
        x, y = model.posterior_zcosmo(
            loader.cat["zcmb"][n], loader.cat["RA"][n], loader.cat["DEC"][n],
-            loader.los_density[n], loader.los_radial_velocity[n],
+            loader.los_density[nsim, n], loader.los_radial_velocity[nsim, n],
            extra_sigma_v=loader.cat["e_zcmb"][n] * SPEED_OF_LIGHT,
            verbose=False)

@ -98,7 +107,7 @@ if __name__ == "__main__":

    # Galaxy sample parameters
    catalogue = "UPGLADE"
-    fpath_data = "/mnt/users/rstiskalek/csiborgtools/data/upglade_z_0p05_all_PROCESSED.h5"  # noqa
+    fpath_data = "/mnt/users/rstiskalek/csiborgtools/data/upglade_all_z0p05_new_PROCESSED.h5"  # noqa

    # Number of splits for MPI
    nsplits = 1000
@ -112,12 +121,14 @@ if __name__ == "__main__":
        simname, nsim, catalogue, fpath_data, paths, ksmooth=ksmooth,
        verbose=rank == 0)
    calibration_samples = load_calibration(
-        catalogue_calibration, simname, nsim, ksmooth, verbose=rank == 0)
+        catalogue_calibration, simname, ksmooth, sample_beta=True,
+        verbose=rank == 0)
+
    model = csiborgtools.flow.Observed2CosmologicalRedshift(
        calibration_samples, loader.rdist, loader._Omega_m)
-    if rank == 0:
-        print(f"{t()}: the catalogue size is {loader.cat['zcmb'].size}.")
-        print(f"{t()}: loaded calibration samples and model.", flush=True)
+
+    fprint(f"catalogue size is {loader.cat['zcmb'].size}.", verbose=rank == 0)
+    fprint("loaded calibration samples and model.", verbose=rank == 0)

    # Decide how to split up the job
    if rank == 0:
@ -131,7 +142,8 @@ if __name__ == "__main__":

    # Process all splits with MPI, the rank 0 delegates the jobs.
    def main_wrapper(n):
-        main(loader, model, split_indxs[n], fdir, fname, n, verbose=size == 1)
+        main(loader, nsim, model, split_indxs[n], fdir, fname, n,
+             verbose=size == 1)

    comm.Barrier()
    work_delegation(
--- a/scripts/post_upglade.sh
+++ b/scripts/post_upglade.sh
@ -1,6 +1,6 @@
 nthreads=${1}
 on_login=${2}
-memory=4
+memory=12
 queue="redwood"
 env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
 file="post_upglade.py"
--- a/scripts/quijote_bulkflow.py
+++ b/scripts/quijote_bulkflow.py
@ -15,6 +15,9 @@
 """
 A script to calculate the bulk flow in Quijote simulations from either
 particles or FoF haloes and to also save the resulting smaller halo catalogues.
+
+If `Rmin > 0` the bulk flows computed from projected radial velocities are
+wrong, but the 3D volume average bulk flows are still correct.
 """
 from datetime import datetime
 from os.path import join
@ -70,7 +73,7 @@ def volume_bulk_flow(rdist, mass, vel, distances):
 ###############################################################################


-def main(nsim, folder, fname_basis, Rmax, subtract_observer_velocity,
+def main(nsim, folder, fname_basis, Rmin, Rmax, subtract_observer_velocity,
         verbose=True):
    boxsize = csiborgtools.simname2boxsize("quijote")
    observers = csiborgtools.read.fiducial_observers(boxsize, Rmax)
@ -100,6 +103,11 @@ def main(nsim, folder, fname_basis, Rmax, subtract_observer_velocity,
            return_distance=True, sort_results=True)
        rdist_part, indxs = rdist_part[0], indxs[0]

+        # And only the ones that are above Rmin
+        mask = rdist_part > Rmin
+        rdist_part = rdist_part[mask]
+        indxs = indxs[mask]
+
        part_pos_current = part_pos[indxs] - observers[i]
        part_vel_current = part_vel[indxs]
        # Quijote particle masses are all equal
@ -110,13 +118,16 @@ def main(nsim, folder, fname_basis, Rmax, subtract_observer_velocity,
            np.asarray(observers[i]).reshape(1, -1), Rmax,
            return_distance=True, sort_results=True)
        rdist_halo, indxs = rdist_halo[0], indxs[0]
+        mask = rdist_halo > Rmin
+        rdist_halo = rdist_halo[mask]
+        indxs = indxs[mask]

        halo_pos_current = halo_pos[indxs] - observers[i]
        halo_vel_current = halo_vel[indxs]
        halo_mass_current = halo_mass[indxs]

        # Subtract the observer velocity
-        rscale = 0.5  # Mpc / h
+        rscale = 2.0  # Mpc / h
        weights = np.exp(-0.5 * (rdist_part / rscale)**2)
        obs_vel_x = np.average(part_vel_current[:, 0], weights=weights)
        obs_vel_y = np.average(part_vel_current[:, 1], weights=weights)
@ -183,6 +194,7 @@ def main(nsim, folder, fname_basis, Rmax, subtract_observer_velocity,


 if __name__ == "__main__":
+    Rmin = 0
    Rmax = 150
    subtract_observer_velocity = True
    folder = "/mnt/extraspace/rstiskalek/quijote/BulkFlow_fiducial"
@ -195,7 +207,7 @@ if __name__ == "__main__":
    nsims = list(paths.get_ics("quijote"))

    def main_wrapper(nsim):
-        main(nsim, folder, fname_basis, Rmax, subtract_observer_velocity,
+        main(nsim, folder, fname_basis, Rmin, Rmax, subtract_observer_velocity,
             verbose=rank == 0)

    if rank == 0:
--- a/scripts/quijote_bulkflow.sh
+++ b/scripts/quijote_bulkflow.sh
@ -1,4 +1,4 @@
-nthreads=20
+nthreads=12
 memory=24
 on_login=0
 queue="berg"