Add z_obs output (#119)

* Update SN model to output zobs too

* Add TF predicted zobs

* Add imoprt

* Update nb

* Calculate chi2

* Update nb

* Update script
This commit is contained in:
Richard Stiskalek 2024-03-22 18:10:40 +01:00 committed by GitHub
parent f7285b2600
commit 4093186f9a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 462 additions and 47 deletions

View file

@ -17,4 +17,4 @@ from .flow_model import (DataLoader, radial_velocity_los, dist2redshift,
SD_PV_validation_model, SN_PV_validation_model, # noqa
TF_PV_validation_model, radec_to_galactic, # noqa
sample_prior, make_loss, get_model, # noqa
optimize_model_with_jackknife) # noqa
optimize_model_with_jackknife, distmodulus2dist) # noqa

View file

@ -19,7 +19,7 @@ References
----------
[1] https://arxiv.org/abs/1912.09383.
"""
from abc import ABC
from abc import ABC, abstractmethod
from datetime import datetime
from warnings import catch_warnings, simplefilter, warn
@ -35,11 +35,12 @@ from jax import numpy as jnp
from jax import vmap
from jax.lax import cond, scan
from jax.random import PRNGKey
from numdifftools import Hessian
from numpyro.infer import Predictive, util
from scipy.interpolate import interp1d
from scipy.optimize import fmin_powell
from sklearn.model_selection import KFold
from tqdm import trange
from numdifftools import Hessian
from ..params import simname2Omega_m
@ -426,17 +427,45 @@ def dist2distmodulus(dist, Omega_m):
return 5 * jnp.log10(luminosity_distance) + 25
# def distmodulus2dist(distmodulus, Omega_m):
# """
# Copied from Supranta. Make sure this actually works.
#
#
# """
# dL = 10 ** ((distmodulus - 25.) / 5.)
# r_hMpc = dL
# for i in range(4):
# r_hMpc = dL / (1.0 + dist2redshift(r_hMpc, Omega_m))
# return r_hMpc
def distmodulus2dist(mu, Omega_m, ninterp=10000, zmax=0.1, mu2comoving=None,
return_interpolator=False):
"""
Convert distance modulus to comoving distance. Note that this is a costly
implementation, as it builts up the interpolator every time it is called
unless it is provided.
Parameters
----------
mu : float or 1-dimensional array
Distance modulus.
Omega_m : float
Matter density parameter.
ninterp : int, optional
Number of points to interpolate the mapping from distance modulus to
comoving distance.
zmax : float, optional
Maximum redshift for the interpolation.
mu2comoving : callable, optional
Interpolator from distance modulus to comoving distance. If not
provided, it is built up every time the function is called.
return_interpolator : bool, optional
Whether to return the interpolator as well.
Returns
-------
float (or 1-dimensional array) and callable (optional)
"""
if mu2comoving is None:
zrange = np.linspace(1e-15, zmax, ninterp)
cosmo = FlatLambdaCDM(H0=100, Om0=Omega_m)
mu2comoving = interp1d(
cosmo.distmod(zrange).value, cosmo.comoving_distance(zrange).value,
kind="cubic")
if return_interpolator:
return mu2comoving(mu), mu2comoving
return mu2comoving(mu)
def project_Vext(Vext_x, Vext_y, Vext_z, RA, dec):
@ -549,6 +578,29 @@ def calculate_ll_zobs(zobs, zobs_pred, sigma_v):
return jnp.exp(-0.5 * (dcz / sigma_v)**2) / jnp.sqrt(2 * np.pi) / sigma_v
def stack_normal(mus, stds):
"""
Stack the normal distributions and approximate the stacked distribution
by a single Gaussian.
Parameters
----------
mus : 1-dimensional array
Means of the normal distributions.
stds : 1-dimensional array
Standard deviations of the normal distributions.
Returns
-------
mu, std : floats
"""
if mus.ndim > 1 or stds.ndim > 1 and mus.shape != stds.shape:
raise ValueError("Shape of `mus` and `stds` must be the same and 1D.")
mu = np.mean(mus)
std = (np.sum(stds**2 + (mus - mu)**2) / len(mus))**0.5
return mu, std
class BaseFlowValidationModel(ABC):
"""
Base class for the flow validation models.
@ -556,8 +608,90 @@ class BaseFlowValidationModel(ABC):
@property
def ndata(self):
"""
Number of PV objects in the catalogue.
Returns
-------
int
"""
return len(self._RA)
@abstractmethod
def predict_zobs_single(self, **kwargs):
pass
def predict_zobs(self, samples):
"""
Predict the observed redshifts given the samples from the posterior.
Parameters
----------
samples : dict of 1-dimensional arrays
Posterior samples.
Returns
-------
zobs_mean : 2-dimensional array of shape (ndata, nsamples)
Mean of the predicted redshifts.
zobs_std : 2-dimensional array of shape (ndata, nsamples)
Standard deviation of the predicted redshifts.
"""
keys = list(samples.keys())
nsamples = len(samples[keys[0]])
zobs_mean = np.empty((self.ndata, nsamples), dtype=np.float32)
zobs_std = np.empty_like(zobs_mean)
# JIT compile the function, it is called many times.
f = jit(self.predict_zobs_single)
for i in trange(nsamples):
x = {key: samples[key][i] for key in keys}
if "alpha" not in keys:
x["alpha"] = 1.0
e_z = samples["sigma_v"][i] / SPEED_OF_LIGHT
mu, var = f(**x)
zobs_mean[:, i] = mu
zobs_std[:, i] = (var + e_z**2)**0.5
return zobs_mean, zobs_std
def summarize_zobs_pred(self, zobs_mean, zobs_pred):
"""
Summarize the predicted observed redshifts from each posterior sample
by stacking their Gaussian distribution and approximating the stacked
distribution by a single Gaussian.
Parameters
----------
zobs_mean : 2-dimensional array of shape (ndata, nsamples)
Mean of the predicted redshifts.
zobs_pred : 2-dimensional array of shape (ndata, nsamples)
Predicted redshifts.
Returns
-------
mu : 1-dimensional array
Mean of predicted redshift, averaged over the posterior samples.
std : 1-dimensional array
Standard deviation of the predicted redshift, averaged over the
posterior samples.
"""
mu = np.empty(self.ndata, dtype=np.float32)
std = np.empty_like(mu)
for i in range(self.ndata):
mu[i], std[i] = stack_normal(zobs_mean[i], zobs_pred[i])
return mu, std
@abstractmethod
def __call__(self, **kwargs):
pass
class SD_PV_validation_model(BaseFlowValidationModel):
"""
@ -625,6 +759,9 @@ class SD_PV_validation_model(BaseFlowValidationModel):
# Distribution of velocity uncertainty sigma_v
self._sv = dist.LogNormal(*lognorm_mean_std_to_loc_scale(150, 100))
def predict_zobs_single(self, **kwargs):
raise NotImplementedError("This method is not implemented yet.")
def __call__(self, sample_alpha=False):
"""
The simple distance NumPyro PV validation model.
@ -714,11 +851,15 @@ class SN_PV_validation_model(BaseFlowValidationModel):
raise ValueError("The radial step size must be constant.")
dr = dr[0]
# Get the various vmapped functions
# Get the various functions, also vmapped
self._f_ptilde_wo_bias = lambda mu, err: calculate_ptilde_wo_bias(mu_xrange, mu, err, r2_xrange, True) # noqa
self._f_simps = lambda y: simps(y, dr) # noqa
self._f_zobs = lambda beta, Vr, vpec_rad: predict_zobs(r_xrange, beta, Vr, vpec_rad, Omega_m) # noqa
self._vmap_ptilde_wo_bias = vmap(lambda mu, err: calculate_ptilde_wo_bias(mu_xrange, mu, err, r2_xrange, True)) # noqa
self._vmap_simps = vmap(lambda y: simps(y, dr))
self._vmap_zobs = vmap(lambda beta, Vr, vpec_rad: predict_zobs(r_xrange, beta, Vr, vpec_rad, Omega_m), in_axes=(None, 0, 0)) # noqa
# Distribution of external velocity components
self._Vext = dist.Uniform(-500, 500)
# Distribution of velocity and density bias parameters
@ -733,6 +874,83 @@ class SN_PV_validation_model(BaseFlowValidationModel):
self._beta_cal = dist.Normal(3.112, 1.0)
self._e_mu = dist.LogNormal(*lognorm_mean_std_to_loc_scale(0.1, 0.05))
self._Omega_m = Omega_m
self._r_xrange = r_xrange
def mu(self, mag_cal, alpha_cal, beta_cal):
"""
Distance modulus of each object the given SALT2 calibration parameters.
Parameters
----------
mag_cal, alpha_cal, beta_cal : floats
SALT2 calibration parameters.
Returns
-------
1-dimensional array
"""
return self._mB - mag_cal + alpha_cal * self._x1 - beta_cal * self._c
def squared_e_mu(self, alpha_cal, beta_cal, e_mu_intrinsic):
"""
Linearly-propagated squared error on the SALT2 distance modulus.
Parameters
----------
alpha_cal, beta_cal, e_mu_intrinsic : floats
SALT2 calibration parameters.
Returns
-------
1-dimensional array
"""
return (self._e2_mB + alpha_cal**2 * self._e2_x1
+ beta_cal**2 * self._e2_c + e_mu_intrinsic**2)
def predict_zobs_single(self, Vext_x, Vext_y, Vext_z, alpha, beta,
e_mu_intrinsic, mag_cal, alpha_cal, beta_cal,
**kwargs):
"""
Predict the observed redshifts given the samples from the posterior.
Parameters
----------
Vext_x, Vext_y, Vext_z : floats
Components of the external velocity.
alpha, beta : floats
Density and velocity bias parameters.
e_mu_intrinsic, mag_cal, alpha_cal, beta_cal : floats
Calibration parameters.
kwargs : dict
Additional arguments (for compatibility).
Returns
-------
zobs_mean : 1-dimensional array
Mean of the predicted redshifts.
zobs_var : 1-dimensional array
Variance of the predicted redshifts.
"""
mu = self.mu(mag_cal, alpha_cal, beta_cal)
squared_e_mu = self.squared_e_mu(alpha_cal, beta_cal, e_mu_intrinsic)
Vext_rad = project_Vext(Vext_x, Vext_y, Vext_z, self._RA, self._dec)
# Calculate p(r) (Malmquist bias)
ptilde = self._vmap_ptilde_wo_bias(mu, squared_e_mu)
ptilde *= self._los_density**alpha
ptilde /= self._vmap_simps(ptilde).reshape(-1, 1)
# Predicted mean z_obs
zobs_pred = self._vmap_zobs(beta, Vext_rad, self._los_velocity)
zobs_mean = self._vmap_simps(zobs_pred * ptilde)
# Variance of the predicted z_obs
zobs_pred -= zobs_mean.reshape(-1, 1)
zobs_var = self._vmap_simps(zobs_pred**2 * ptilde)
return zobs_mean, zobs_var
def __call__(self, sample_alpha=True, fix_calibration=False):
"""
The supernova NumPyro PV validation model with SALT2 calibration.
@ -777,9 +995,8 @@ class SN_PV_validation_model(BaseFlowValidationModel):
Vext_rad = project_Vext(Vx, Vy, Vz, self._RA, self._dec)
mu = self._mB - mag_cal + alpha_cal * self._x1 - beta_cal * self._c
squared_e_mu = (self._e2_mB + alpha_cal**2 * self._e2_x1
+ beta_cal**2 * self._e2_c + e_mu_intrinsic**2)
mu = self.mu(mag_cal, alpha_cal, beta_cal)
squared_e_mu = self.squared_e_mu(alpha_cal, beta_cal, e_mu_intrinsic)
def scan_body(ll, i):
# Calculate p(r) and multiply it by the galaxy bias
@ -857,6 +1074,10 @@ class TF_PV_validation_model(BaseFlowValidationModel):
self._f_simps = lambda y: simps(y, dr) # noqa
self._f_zobs = lambda beta, Vr, vpec_rad: predict_zobs(r_xrange, beta, Vr, vpec_rad, Omega_m) # noqa
self._vmap_ptilde_wo_bias = vmap(lambda mu, err: calculate_ptilde_wo_bias(mu_xrange, mu, err, r2_xrange, True)) # noqa
self._vmap_simps = vmap(lambda y: simps(y, dr))
self._vmap_zobs = vmap(lambda beta, Vr, vpec_rad: predict_zobs(r_xrange, beta, Vr, vpec_rad, Omega_m), in_axes=(None, 0, 0)) # noqa
# Distribution of external velocity components
self._Vext = dist.Uniform(-1000, 1000)
# Distribution of velocity and density bias parameters
@ -870,6 +1091,83 @@ class TF_PV_validation_model(BaseFlowValidationModel):
self._b = dist.Normal(-5.95, 0.1)
self._e_mu = dist.LogNormal(*lognorm_mean_std_to_loc_scale(0.3, 0.1)) # noqa
self._Omega_m = Omega_m
self._r_xrange = r_xrange
def mu(self, a, b):
"""
Distance modulus of each object the given Tully-Fisher calibration.
Parameters
----------
a, b : floats
Tully-Fisher calibration parameters.
Returns
-------
1-dimensional array
"""
return self._mag - (a + b * self._eta)
def squared_e_mu(self, b, e_mu_intrinsic):
"""
Squared error on the Tully-Fisher distance modulus.
Parameters
----------
b, e_mu_intrinsic : floats
Tully-Fisher calibration parameters.
Returns
-------
1-dimensional array
"""
return (self._e2_mag + b**2 * self._e2_eta + e_mu_intrinsic**2)
def predict_zobs_single(self, Vext_x, Vext_y, Vext_z, alpha, beta,
e_mu_intrinsic, a, b, **kwargs):
"""
Predict the observed redshifts given the samples from the posterior.
Parameters
----------
Vext_x, Vext_y, Vext_z : floats
Components of the external velocity.
alpha, beta : floats
Density and velocity bias parameters.
e_mu_intrinsic, a, b : floats
Calibration parameters.
kwargs : dict
Additional arguments (for compatibility).
Returns
-------
zobs_mean : 1-dimensional array
Mean of the predicted redshifts.
zobs_var : 1-dimensional array
Variance of the predicted redshifts.
"""
mu = self.mu(a, b)
squared_e_mu = self.squared_e_mu(b, e_mu_intrinsic)
Vext_rad = project_Vext(Vext_x, Vext_y, Vext_z, self._RA, self._dec)
# Calculate p(r) (Malmquist bias)
ptilde = self._vmap_ptilde_wo_bias(mu, squared_e_mu)
ptilde *= self._los_density**alpha
ptilde /= self._vmap_simps(ptilde).reshape(-1, 1)
# Predicted mean z_obs
zobs_pred = self._vmap_zobs(beta, Vext_rad, self._los_velocity)
zobs_mean = self._vmap_simps(zobs_pred * ptilde)
# Variance of the predicted z_obs
zobs_pred -= zobs_mean.reshape(-1, 1)
zobs_var = self._vmap_simps(zobs_pred**2 * ptilde)
return zobs_mean, zobs_var
def __call__(self, sample_alpha=True):
"""
The Tully-Fisher NumPyro PV validation model.
@ -893,9 +1191,8 @@ class TF_PV_validation_model(BaseFlowValidationModel):
Vext_rad = project_Vext(Vx, Vy, Vz, self._RA, self._dec)
mu = self._mag - (a + b * self._eta)
squared_e_mu = (self._e2_mag + b**2 * self._e2_eta
+ e_mu_intrinsic**2)
mu = self.mu(a, b)
squared_e_mu = self.squared_e_mu(b, e_mu_intrinsic)
def scan_body(ll, i):
# Calculate p(r) and multiply it by the galaxy bias

File diff suppressed because one or more lines are too long

View file

@ -30,7 +30,7 @@ def read_samples(catalogue, simname, ksmooth, include_calibration=False,
nsims = paths.get_ics(simname)
Vx, Vy, Vz, beta, sigma_v, alpha = [], [], [], [], [], []
BIC, AIC, logZ = [], [], []
BIC, AIC, logZ, chi2 = [], [], [], []
if catalogue in ["LOSS", "Foundation", "Pantheon+"]:
alpha_cal, beta_cal, mag_cal, e_mu_intrinsic = [], [], [], []
@ -69,6 +69,10 @@ def read_samples(catalogue, simname, ksmooth, include_calibration=False,
BIC.append(f[f"sim_{nsim}/BIC"][...])
AIC.append(f[f"sim_{nsim}/AIC"][...])
logZ.append(f[f"sim_{nsim}/logZ"][...])
try:
chi2.append(f[f"sim_{nsim}/chi2"][...])
except KeyError:
chi2.append([0.])
if catalogue in ["LOSS", "Foundation", "Pantheon+"]:
alpha_cal.append(f[f"sim_{nsim}/alpha_cal"][:])
@ -84,7 +88,7 @@ def read_samples(catalogue, simname, ksmooth, include_calibration=False,
Vx, Vy, Vz, alpha, beta, sigma_v = np.hstack(Vx), np.hstack(Vy), np.hstack(Vz), np.hstack(alpha), np.hstack(beta), np.hstack(sigma_v) # noqa
gof = np.hstack(BIC), np.hstack(AIC), np.hstack(logZ)
gof = np.hstack(BIC), np.hstack(AIC), np.hstack(logZ), np.hstack(chi2)
if catalogue in ["LOSS", "Foundation", "Pantheon+"]:
alpha_cal, beta_cal, mag_cal, e_mu_intrinsic = np.hstack(alpha_cal), np.hstack(beta_cal), np.hstack(mag_cal), np.hstack(e_mu_intrinsic) # noqa
@ -116,6 +120,7 @@ def read_samples(catalogue, simname, ksmooth, include_calibration=False,
print("BIC = {:4f} +- {:4f}".format(np.mean(gof[0]), np.std(gof[0])))
print("AIC = {:4f} +- {:4f}".format(np.mean(gof[1]), np.std(gof[1])))
print("logZ = {:4f} +- {:4f}".format(np.mean(gof[2]), np.std(gof[2])))
print("chi2 = {:4f} +- {:4f}".format(np.mean(gof[3]), np.std(gof[3])))
data = np.vstack(data).T

View file

@ -104,11 +104,19 @@ def run_model(model, nsteps, nburn, nchains, nsim, dump_folder,
samples = mcmc.get_samples()
thinned_samples = csiborgtools.thin_samples_by_acl(samples)
# Calculate the chi2
keys = list(thinned_samples.keys())
nsamples = len(thinned_samples[keys[0]])
zobs_mean, zobs_std = model.predict_zobs(thinned_samples)
nu = model.ndata - len(keys)
chi2 = [np.sum((zobs_mean[:, i] - model._z_obs)**2 / zobs_std[:, i]**2) / nu # noqa
for i in range(nsamples)]
gof = csiborgtools.numpyro_gof(model, mcmc, model_kwargs)
# Save the samples to the temporary folder.
fname = join(dump_folder, f"samples_{nsim}.npz")
np.savez(fname, **thinned_samples, **gof)
np.savez(fname, **thinned_samples, **gof, chi2=chi2)
def combine_from_simulations(catalogue_name, simname, nsims, outfolder,

View file

@ -1,5 +1,5 @@
memory=4
on_login=0
on_login=1
nthreads=${1}
ksmooth=${2}
@ -7,8 +7,8 @@ queue="berg"
env="/mnt/users/rstiskalek/csiborgtools/venv_csiborg/bin/python"
file="flow_validation.py"
catalogue="Pantheon+"
simname="csiborg2_main"
catalogue="LOSS"
simname="Carrick2015"
pythoncm="$env $file --catalogue $catalogue --simname $simname --ksmooth $ksmooth"