Add bulk velocity to TFR inference

2025-02-07 13:23:27 +01:00 · 2025-02-07 13:23:27 +01:00 · dd60456580
commit dd60456580
parent e42d590e67
10 changed files with 55 additions and 27 deletions
--- a/tests/corner.png
+++ b/tests/corner.png
--- a/tests/likelihood_scan_a_TFR.png
+++ b/tests/likelihood_scan_a_TFR.png
--- a/tests/likelihood_scan_alpha.png
+++ b/tests/likelihood_scan_alpha.png
--- a/tests/likelihood_scan_b_TFR.png
+++ b/tests/likelihood_scan_b_TFR.png
--- a/tests/likelihood_scan_sigma_TFR.png
+++ b/tests/likelihood_scan_sigma_TFR.png
--- a/tests/likelihood_scan_sigma_v.png
+++ b/tests/likelihood_scan_sigma_v.png
--- a/tests/tfr_inference.py
+++ b/tests/tfr_inference.py
@ -199,6 +199,7 @@ def create_mock(Nt, L, xmin, cpar, dens, vel, Rmax, alpha, mthresh,
        - all_mobs (np.ndarrary): Observed apparent magnitudes of the tracers (shape = (Nt,))
        - all_etaobs (np.ndarrary): Observed linewidths of the tracers (shape = (Nt,))
        - all_xtrue (np.ndarrary): True comoving coordinates of the tracers (Mpc/h) (shape = (3, Nt))
+        - vbulk (np.ndarray): The bulk velocity of the box (km/s)
    
    """
    
@ -292,6 +293,12 @@ def create_mock(Nt, L, xmin, cpar, dens, vel, Rmax, alpha, mthresh,
        
        myprint(f'\tMade {accepted_count} of {Nt}')
        
+    # Obtain a bulk velocity
+    vhat = np.random.randn(3)
+    vhat = vhat / np.linalg.norm(vhat)
+    vbulk = np.random.randn() * utils.get_sigma_bulk(L, cpar)
+    vbulk = vhat * vbulk
+        
    # Get the radial component of the peculiar velocity at the positions of the objects
    myprint('Obtaining peculiar velocities')
    tracer_vel = projection.interp_field(
@ -301,6 +308,8 @@ def create_mock(Nt, L, xmin, cpar, dens, vel, Rmax, alpha, mthresh,
                            np.array([xmin, xmin, xmin]),
                            interp_order
                            ) # km/s
+    myprint('Adding bulk velocity')
+    tracer_vel = tracer_vel + vbulk[:,None,None]
    myprint('Radial projection')
    vr_true = np.squeeze(projection.project_radial(
                            tracer_vel,
@ -316,7 +325,7 @@ def create_mock(Nt, L, xmin, cpar, dens, vel, Rmax, alpha, mthresh,
    vr_noised = vr_true + sigma_v * np.random.randn(Nt)
    czCMB = ((1 + zcosmo) * (1 + vr_noised / utils.speed_of_light) - 1) * utils.speed_of_light
    
-    return all_RA, all_Dec, czCMB, all_mtrue, all_etatrue, all_mobs, all_etaobs, all_xtrue
+    return all_RA, all_Dec, czCMB, all_mtrue, all_etatrue, all_mobs, all_etaobs, all_xtrue, vbulk


 def estimate_data_parameters():
@ -378,7 +387,7 @@ def generateMBData(RA, Dec, cz_obs, L, N, R_lim, Nsig, Nint_points, sigma_v, fra
        - L (float): Box length (Mpc/h)
        - N (int): Number of grid cells per side
        - R_lim (float): Maximum allowed (true) comoving distance of a tracer (Mpc/h)
-        - Nsig (float): ???
+        - Nsig (float): How many standard deviations away from the predicted radius to use
        - Nint_points (int): Number of radii over which to integrate the likelihood
        - sigma_v (float): Uncertainty on the velocity field (km/s)
        - frac_sigma_r (float): An estimate of the fractional uncertainty on the positions of tracers
@ -411,7 +420,7 @@ def generateMBData(RA, Dec, cz_obs, L, N, R_lim, Nsig, Nint_points, sigma_v, fra
    return MB_pos


-def likelihood_vel(alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true, eta_true,
+def likelihood_vel(alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true, eta_true, vbulk,
               dens, vel, omega_m, h, L, xmin, interp_order, bias_epsilon,
               cz_obs, MB_pos, mthresh):
    """
@ -425,6 +434,7 @@ def likelihood_vel(alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true, eta_true,
        - sigma_v (float): Uncertainty on the velocity field (km/s)
        - m_true (np.ndarray): True apparent magnitudes of the tracers (shape = (Nt,))
        - eta_true (np.ndarray): True linewidths of the tracers (shape = (Nt,))
+        - vbulk (np.ndarray): Bulk velocity of the box (km/s) (shape=(3,))
        - dens (np.ndarray): Over-density field (shape = (N, N, N))
        - vel (np.ndarray): Velocity field (km/s) (shape = (3, N, N, N))
        - omega_m (float): Matter density parameter Om
@ -475,6 +485,7 @@ def likelihood_vel(alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true, eta_true,
                                interp_order,
                                use_jitted=True,
                                )
+    tracer_vel = tracer_vel + jnp.squeeze(vbulk)[...,None,None]
    tracer_vr = projection.project_radial(
                    tracer_vel,
                    MB_pos,
@ -541,7 +552,7 @@ def likelihood_eta(eta_true, eta_obs, sigma_eta):
    return loglike


-def likelihood(alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true, eta_true,
+def likelihood(alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true, eta_true, vbulk,
               dens, vel, omega_m, h, L, xmin, interp_order, bias_epsilon,
               cz_obs, m_obs, eta_obs, sigma_m, sigma_eta, MB_pos, mthresh):
    """
@ -555,6 +566,7 @@ def likelihood(alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true, eta_true,
        - sigma_v (float): Uncertainty on the velocity field (km/s)
        - m_true (np.ndarray): True apparent magnitudes of the tracers (shape = (Nt,))
        - eta_true (np.ndarray): True linewidths of the tracers (shape = (Nt,))
+        - vbulk (np.ndarray): Bulk velocity of the box (km/s) (shape=(3,))
        - dens (np.ndarray): Over-density field (shape = (N, N, N))
        - vel (np.ndarray): Velocity field (km/s) (shape = (3, N, N, N))
        - omega_m (float): Matter density parameter Om
@ -577,7 +589,7 @@ def likelihood(alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true, eta_true,
    """

    
-    loglike_vel = likelihood_vel(alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true, eta_true,
+    loglike_vel = likelihood_vel(alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true, eta_true, vbulk,
                   dens, vel, omega_m, h, L, xmin, interp_order, bias_epsilon,
                   cz_obs, MB_pos, mthresh)
    loglike_m = likelihood_m(m_true, m_obs, sigma_m, mthresh)
@ -588,7 +600,7 @@ def likelihood(alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true, eta_true,
    return loglike


-def test_likelihood_scan(prior, alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true, eta_true,
+def test_likelihood_scan(prior, alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true, eta_true, vbulk,
                         dens, vel, omega_m, h, L, xmin, interp_order, bias_epsilon,
                         czCMB, m_obs, eta_obs, sigma_m, sigma_eta, MB_pos, mthresh):
    """
@ -627,7 +639,7 @@ def test_likelihood_scan(prior, alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true,
    pars = [alpha, a_TFR, b_TFR, sigma_TFR, sigma_v]
    par_names = ['alpha', 'a_TFR', 'b_TFR', 'sigma_TFR', 'sigma_v']
    
-    orig_ll = - likelihood(*pars, m_true, eta_true,
+    orig_ll = - likelihood(*pars, m_true, eta_true, vbulk,
                         dens, vel, omega_m, h, L, xmin, interp_order, bias_epsilon,
                         czCMB, m_obs, eta_obs, sigma_m, sigma_eta, MB_pos, mthresh)
    
@ -646,7 +658,7 @@ def test_likelihood_scan(prior, alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true,
        orig_x = pars[i]
        for j, xx in enumerate(x):
            pars[i] = xx
-            all_ll[j] = - likelihood(*pars, m_true, eta_true,
+            all_ll[j] = - likelihood(*pars, m_true, eta_true, vbulk,
                         dens, vel, omega_m, h, L, xmin, interp_order, bias_epsilon,
                         czCMB, m_obs, eta_obs, sigma_m, sigma_eta, MB_pos, mthresh)
        pars[i] = orig_x
@ -665,7 +677,7 @@ def test_likelihood_scan(prior, alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true,
    return


-def run_mcmc(num_warmup, num_samples, prior, initial, dens, vel, omega_m, h, L, xmin, interp_order, bias_epsilon,
+def run_mcmc(num_warmup, num_samples, prior, initial, dens, vel, cpar, L, xmin, interp_order, bias_epsilon,
             czCMB, m_obs, eta_obs, sigma_m, sigma_eta, MB_pos, mthresh,):
    """
    Run MCMC over the model parameters
@ -677,8 +689,7 @@ def run_mcmc(num_warmup, num_samples, prior, initial, dens, vel, omega_m, h, L,
        - initial
        - dens (np.ndarray): Over-density field (shape = (N, N, N))
        - vel (np.ndarray): Velocity field (km/s) (shape = (3, N, N, N))
-        - omega_m (float): Matter density parameter Om
-        - h (float): Hubble constant H0 = 100 h km/s/Mpc
+        - cpar (borg.cosmo.CosmologicalParameters): Cosmological parameters to use
        - L (float): Comoving box size (Mpc/h)
        - xmin (float): Coordinate of corner of the box (Mpc/h)
        - interp_order (int): Order of interpolation from grid points to the line of sight
@ -693,11 +704,14 @@ def run_mcmc(num_warmup, num_samples, prior, initial, dens, vel, omega_m, h, L,
        - mthresh (float): Threshold absolute magnitude in selection
        
    Returns:
-        - mcmc
+        - mcmc (numpyro.infer.MCMC): MCMC object which has been run
        
    """
    
    Nt = eta_obs.shape[0]
+    omega_m = cpar.omega_m
+    h = cpar.h
+    sigma_bulk = utils.get_sigma_bulk(L, cpar)
    
    def tfr_model():

@ -727,15 +741,20 @@ def run_mcmc(num_warmup, num_samples, prior, initial, dens, vel, omega_m, h, L,
        m_true = numpyro.deterministic("m_true", x[:, 0])
        eta_true = numpyro.deterministic("eta_true", x[:, 1])
        
+        # Sample bulk velocity
+        vbulk_x = numpyro.sample("vbulk_x", dist.Normal(0, sigma_bulk / jnp.sqrt(3)))
+        vbulk_y = numpyro.sample("vbulk_y", dist.Normal(0, sigma_bulk / jnp.sqrt(3)))
+        vbulk_z = numpyro.sample("vbulk_z", dist.Normal(0, sigma_bulk / jnp.sqrt(3)))
+        
        # Evaluate the likelihood
-        numpyro.sample("obs", TFRLikelihood(alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, hyper_mean_eta, hyper_sigma_eta, m_true, eta_true), obs=jnp.array([m_obs, eta_obs]))
+        numpyro.sample("obs", TFRLikelihood(alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, hyper_mean_eta, hyper_sigma_eta, m_true, eta_true, vbulk_x, vbulk_y, vbulk_z), obs=jnp.array([m_obs, eta_obs]))


    class TFRLikelihood(dist.Distribution):
        support = dist.constraints.real

-        def __init__(self, alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, hyper_mean_eta, hyper_sigma_eta, m_true, eta_true):
-            self.alpha, self.a_TFR, self.b_TFR, self.sigma_TFR, self.sigma_v, self.hyper_mean_eta, self.hyper_sigma_eta, self.m_true, self.eta_true = dist.util.promote_shapes(alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, hyper_mean_eta, hyper_sigma_eta, m_true, eta_true)
+        def __init__(self, alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, hyper_mean_eta, hyper_sigma_eta, m_true, eta_true, vbulk_x, vbulk_y, vbulk_z):
+            self.alpha, self.a_TFR, self.b_TFR, self.sigma_TFR, self.sigma_v, self.hyper_mean_eta, self.hyper_sigma_eta, self.m_true, self.eta_true, self.vbulk_x, self.vbulk_y, self.vbulk_z = dist.util.promote_shapes(alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, hyper_mean_eta, hyper_sigma_eta, m_true, eta_true, vbulk_x, vbulk_y, vbulk_z)
            batch_shape = lax.broadcast_shapes(
                    jnp.shape(alpha),
                    jnp.shape(a_TFR),
@ -746,6 +765,9 @@ def run_mcmc(num_warmup, num_samples, prior, initial, dens, vel, omega_m, h, L,
                    jnp.shape(hyper_sigma_eta),
                    jnp.shape(m_true),
                    jnp.shape(eta_true),
+                    jnp.shape(vbulk_x),
+                    jnp.shape(vbulk_y),
+                    jnp.shape(vbulk_z),
                    )
            super(TFRLikelihood, self).__init__(batch_shape = batch_shape)

@ -753,8 +775,9 @@ def run_mcmc(num_warmup, num_samples, prior, initial, dens, vel, omega_m, h, L,
            raise NotImplementedError

        def log_prob(self, value):
+            vbulk = jnp.array([self.vbulk_x, self.vbulk_y, self.vbulk_z])
            loglike =  likelihood(self.alpha, self.a_TFR, self.b_TFR, self.sigma_TFR, self.sigma_v, 
-                             self.m_true, self.eta_true,
+                             self.m_true, self.eta_true, vbulk,
                             dens, vel, omega_m, h, L, xmin, interp_order, bias_epsilon,
                             czCMB, m_obs, eta_obs, sigma_m, sigma_eta, MB_pos, mthresh)
            return loglike
@ -764,6 +787,9 @@ def run_mcmc(num_warmup, num_samples, prior, initial, dens, vel, omega_m, h, L,
    values = initial
    values['true_vars'] = jnp.array([m_obs, eta_obs]).T
    values['L_corr'] = jnp.identity(2)
+    values['vbulk_x'] = 0.
+    values['vbulk_y'] = 0.
+    values['vbulk_z'] = 0.
    myprint('Preparing MCMC kernel')
    kernel = numpyro.infer.NUTS(tfr_model,
            init_strategy=numpyro.infer.initialization.init_to_value(values=initial)
@ -781,7 +807,7 @@ def process_mcmc_run(mcmc, param_labels, truths, true_vars):
    Make summary plots from the MCMC and save these to file
    
    Args:
-        - mcmc 
+        - mcmc (numpyro.infer.MCMC): MCMC object which has been run
        - param_labels (list[str]): Names of the parameters to plot
        - truths (list[float]): True values of the parameters to plot. If unknown, then entry is None
        - true_vars (dict): True values of the observables to compare against inferred ones
@ -812,7 +838,7 @@ def process_mcmc_run(mcmc, param_labels, truths, true_vars):
    fig1.savefig('trace.png')
    
    # Corner plot
-    fig2, axs2 = plt.subplots(samps.shape[1], samps.shape[1], figsize=(15,15))
+    fig2, axs2 = plt.subplots(samps.shape[1], samps.shape[1], figsize=(20,20))
    corner.corner(
        np.array(samps),
        labels=param_labels,
@ -849,7 +875,6 @@ def process_mcmc_run(mcmc, param_labels, truths, true_vars):
            fig3.tight_layout()
            fig3.savefig(f'true_predicted_{var}.png')
        
-        
    return


@ -879,7 +904,7 @@ def main():
    interp_order = 1
    bias_epsilon = 1.e-7
    num_warmup = 1000
-    num_samples = 1000
+    num_samples = 2000
    
    prior = {
        'alpha': [0.5, 2.5],
@ -904,7 +929,7 @@ def main():
    # Make mock
    np.random.seed(123)
    cpar, dens, vel = get_fields(L, N, xmin)
-    RA, Dec, czCMB, m_true, eta_true, m_obs, eta_obs, xtrue = create_mock(
+    RA, Dec, czCMB, m_true, eta_true, m_obs, eta_obs, xtrue, vbulk = create_mock(
            Nt, L, xmin, cpar, dens, vel, Rmax, alpha, mthresh,
            a_TFR, b_TFR, sigma_TFR, sigma_m, sigma_eta, 
            hyper_eta_mu, hyper_eta_sigma, sigma_v, 
@ -912,22 +937,26 @@ def main():
    MB_pos = generateMBData(RA, Dec, czCMB, L, N, R_lim, Nsig, Nint_points, sigma_v, frac_sigma_r)
    
    # Test likelihood
-    loglike = likelihood(alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true, eta_true,
+    loglike = likelihood(alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true, eta_true, vbulk,
                         dens, vel, cpar.omega_m, cpar.h, L, xmin, interp_order, bias_epsilon,
                         czCMB, m_obs, eta_obs, sigma_m, sigma_eta, MB_pos, mthresh)
    myprint(f'loglike {loglike}')
    
    # Scan over parameters to make plots verifying behaviour
-    test_likelihood_scan(prior, alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true, eta_true,
+    test_likelihood_scan(prior, alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true, eta_true,  vbulk,
                         dens, vel, cpar.omega_m, cpar.h, L, xmin, interp_order, bias_epsilon,
                         czCMB, m_obs, eta_obs, sigma_m, sigma_eta, MB_pos, mthresh)
    
    # Run a MCMC
-    mcmc = run_mcmc(num_warmup, num_samples, prior, initial, dens, vel, cpar.omega_m, cpar.h, L, xmin, interp_order, bias_epsilon,
+    mcmc = run_mcmc(num_warmup, num_samples, prior, initial, dens, vel, cpar, L, xmin, interp_order, bias_epsilon,
             czCMB, m_obs, eta_obs, sigma_m, sigma_eta, MB_pos, mthresh,)
    
-    param_labels = ['alpha', 'a_TFR', 'b_TFR', 'sigma_TFR', 'sigma_v', 'hyper_mean_m', 'hyper_sigma_m', 'hyper_mean_eta', 'hyper_sigma_eta', 'hyper_corr']
-    truths = [alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, None, None, hyper_eta_mu, hyper_eta_sigma, None]
+    param_labels = ['alpha', 'a_TFR', 'b_TFR', 'sigma_TFR', 'sigma_v', 
+                    'hyper_mean_m', 'hyper_sigma_m', 'hyper_mean_eta', 'hyper_sigma_eta', 'hyper_corr', 
+                    'vbulk_x', 'vbulk_y', 'vbulk_z']
+    truths = [alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, 
+              None, None, hyper_eta_mu, hyper_eta_sigma, None, 
+              vbulk[0], vbulk[1], vbulk[2]]
    true_vars = {'m':m_true, 'eta':eta_true}
    process_mcmc_run(mcmc, param_labels, truths, true_vars)

@ -938,7 +967,6 @@ if __name__ == "__main__":
 TO DO

 - Reinsert magnitude cut
- Add bulk velocity
 - Deal with case where sigma_eta and sigma_m could be floats vs arrays

 """
--- a/tests/trace.png
+++ b/tests/trace.png
--- a/tests/true_predicted_eta.png
+++ b/tests/true_predicted_eta.png
--- a/tests/true_predicted_m.png
+++ b/tests/true_predicted_m.png