Mocks with absolute calibration (#156)

* Rename nb

* Add Carrick 2MTF mocks

* Add more 2MTF mock support

* Add mocks generator

* Add mock gen nb

* Control over MAlmquist

* Control over Malmquist

* Update imports

* Update script

* Add H0 to TFR mocks

* Clear up saving

* Add h sampling

* Update saving

* Update mocks

* Add calibration to mocks

* More support to read in absmag

* Add absmag

* Update script
This commit is contained in:
Richard Stiskalek 2024-10-09 20:41:36 +02:00 committed by GitHub
parent 76a7609f7f
commit 4167ba4fb1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 902 additions and 118 deletions

View file

@ -17,6 +17,7 @@ from .io import (DataLoader, get_model, read_absolute_calibration,
from .flow_model import (PV_LogLikelihood, PV_validation_model, dist2redshift, # noqa from .flow_model import (PV_LogLikelihood, PV_validation_model, dist2redshift, # noqa
Observed2CosmologicalRedshift, predict_zobs, # noqa Observed2CosmologicalRedshift, predict_zobs, # noqa
project_Vext, stack_pzosmo_over_realizations) # noqa project_Vext, stack_pzosmo_over_realizations) # noqa
from .mocks import mock_Carrick2MTF # noqa
from .selection import ToyMagnitudeSelection # noqa from .selection import ToyMagnitudeSelection # noqa
from .void_model import (load_void_data, interpolate_void, select_void_h, # noqa from .void_model import (load_void_data, interpolate_void, select_void_h, # noqa
mock_void) # noqa mock_void) # noqa

View file

@ -170,28 +170,6 @@ class BaseFlowValidationModel(ABC):
self._setattr_as_jax(names, values) self._setattr_as_jax(names, values)
def _set_abs_calibration_params(self, abs_calibration_params):
self.with_absolute_calibration = abs_calibration_params is not None
if abs_calibration_params is None:
self.with_absolute_calibration = False
return
self.calibration_distmod = jnp.asarray(
abs_calibration_params["calibration_distmod"][..., 0])
self.calibration_edistmod = jnp.asarray(
abs_calibration_params["calibration_distmod"][..., 1])
self.data_with_calibration = jnp.asarray(
abs_calibration_params["data_with_calibration"])
self.data_wo_calibration = ~self.data_with_calibration
# Calculate the log of the number of calibrators. Where there is no
# calibrator set the number of calibrators to 1 to avoid log(0) and
# this way only zeros are being added.
length_calibration = abs_calibration_params["length_calibration"]
length_calibration[length_calibration == 0] = 1
self.log_length_calibration = jnp.log(length_calibration)
def _set_radial_spacing(self, r_xrange, Omega_m): def _set_radial_spacing(self, r_xrange, Omega_m):
cosmo = FlatLambdaCDM(H0=H0, Om0=Omega_m) cosmo = FlatLambdaCDM(H0=H0, Om0=Omega_m)
@ -356,7 +334,8 @@ def e2_distmod_TFR(e2_mag, e2_eta, eta, b, c, e_mu_intrinsic):
def sample_TFR(e_mu_min, e_mu_max, a_mean, a_std, b_mean, b_std, def sample_TFR(e_mu_min, e_mu_max, a_mean, a_std, b_mean, b_std,
c_mean, c_std, alpha_min, alpha_max, sample_alpha, c_mean, c_std, alpha_min, alpha_max, sample_alpha,
a_dipole_mean, a_dipole_std, sample_a_dipole, name): a_dipole_mean, a_dipole_std, sample_a_dipole,
sample_curvature, name):
"""Sample Tully-Fisher calibration parameters.""" """Sample Tully-Fisher calibration parameters."""
e_mu = sample(f"e_mu_{name}", Uniform(e_mu_min, e_mu_max)) e_mu = sample(f"e_mu_{name}", Uniform(e_mu_min, e_mu_max))
a = sample(f"a_{name}", Normal(a_mean, a_std)) a = sample(f"a_{name}", Normal(a_mean, a_std))
@ -367,7 +346,11 @@ def sample_TFR(e_mu_min, e_mu_max, a_mean, a_std, b_mean, b_std,
ax, ay, az = 0.0, 0.0, 0.0 ax, ay, az = 0.0, 0.0, 0.0
b = sample(f"b_{name}", Normal(b_mean, b_std)) b = sample(f"b_{name}", Normal(b_mean, b_std))
c = sample(f"c_{name}", Normal(c_mean, c_std))
if sample_curvature:
c = sample(f"c_{name}", Normal(c_mean, c_std))
else:
c = 0.
alpha = sample_alpha_bias(name, alpha_min, alpha_max, sample_alpha) alpha = sample_alpha_bias(name, alpha_min, alpha_max, sample_alpha)
@ -495,8 +478,6 @@ class PV_LogLikelihood(BaseFlowValidationModel):
Errors on the observed redshifts. Errors on the observed redshifts.
calibration_params : dict calibration_params : dict
Calibration parameters of each object. Calibration parameters of each object.
abs_calibration_params : dict
Absolute calibration parameters.
mag_selection : dict mag_selection : dict
Magnitude selection parameters, optional. Magnitude selection parameters, optional.
r_xrange : 1-dimensional array r_xrange : 1-dimensional array
@ -513,12 +494,17 @@ class PV_LogLikelihood(BaseFlowValidationModel):
Whether to directly sample the distance without numerical Whether to directly sample the distance without numerical
marginalisation. in which case the tracers can be coupled by a marginalisation. in which case the tracers can be coupled by a
covariance matrix. By default `False`. covariance matrix. By default `False`.
with_homogeneous_malmquist : bool, optional
Whether to include the homogeneous Malmquist bias. By default `True`.
with_inhomogeneous_malmquist : bool, optional
Whether to include the inhomogeneous Malmquist bias. By default `True`.
""" """
def __init__(self, los_density, los_velocity, RA, dec, z_obs, e_zobs, def __init__(self, los_density, los_velocity, RA, dec, z_obs, e_zobs,
calibration_params, abs_calibration_params, mag_selection, calibration_params, mag_selection, r_xrange, Omega_m, kind,
r_xrange, Omega_m, kind, name, void_kwargs=None, name, void_kwargs=None, wo_num_dist_marginalisation=False,
wo_num_dist_marginalisation=False): with_homogeneous_malmquist=True,
with_inhomogeneous_malmquist=True):
if e_zobs is not None: if e_zobs is not None:
e2_cz_obs = jnp.asarray((SPEED_OF_LIGHT * e_zobs)**2) e2_cz_obs = jnp.asarray((SPEED_OF_LIGHT * e_zobs)**2)
else: else:
@ -550,13 +536,14 @@ class PV_LogLikelihood(BaseFlowValidationModel):
self._setattr_as_jax(names, values) self._setattr_as_jax(names, values)
self._set_calibration_params(calibration_params) self._set_calibration_params(calibration_params)
self._set_abs_calibration_params(abs_calibration_params)
self._set_radial_spacing(r_xrange, Omega_m) self._set_radial_spacing(r_xrange, Omega_m)
self.kind = kind self.kind = kind
self.name = name self.name = name
self.Omega_m = Omega_m self.Omega_m = Omega_m
self.wo_num_dist_marginalisation = wo_num_dist_marginalisation self.wo_num_dist_marginalisation = wo_num_dist_marginalisation
self.with_homogeneous_malmquist = with_homogeneous_malmquist
self.with_inhomogeneous_malmquist = with_inhomogeneous_malmquist
self.norm = - self.ndata * jnp.log(self.num_sims) self.norm = - self.ndata * jnp.log(self.num_sims)
if mag_selection is not None: if mag_selection is not None:
@ -771,9 +758,14 @@ class PV_LogLikelihood(BaseFlowValidationModel):
"marginalising the true distance.") "marginalising the true distance.")
# Calculate p(r) (Malmquist bias). Shape is (ndata, nxrange) # Calculate p(r) (Malmquist bias). Shape is (ndata, nxrange)
log_ptilde = log_ptilde_wo_bias( if self.with_homogeneous_malmquist:
self.mu_xrange[None, :], mu[:, None], e2_mu[:, None], log_ptilde = log_ptilde_wo_bias(
self.log_r2_xrange[None, :]) self.mu_xrange[None, :], mu[:, None], e2_mu[:, None],
self.log_r2_xrange[None, :])
else:
log_ptilde = log_ptilde_wo_bias(
self.mu_xrange[None, :], mu[:, None], e2_mu[:, None],
0.)
if self.is_void_data: if self.is_void_data:
rLG = field_calibration_params["rLG"] rLG = field_calibration_params["rLG"]
@ -785,7 +777,9 @@ class PV_LogLikelihood(BaseFlowValidationModel):
# Inhomogeneous Malmquist bias. Shape: (nsims, ndata, nxrange) # Inhomogeneous Malmquist bias. Shape: (nsims, ndata, nxrange)
alpha = distmod_params["alpha"] alpha = distmod_params["alpha"]
log_ptilde = log_ptilde[None, ...] + alpha * log_los_density log_ptilde = log_ptilde[None, ...]
if self.with_inhomogeneous_malmquist:
log_ptilde += alpha * log_los_density
ptilde = jnp.exp(log_ptilde) ptilde = jnp.exp(log_ptilde)
# Normalization of p(r). Shape: (nsims, ndata) # Normalization of p(r). Shape: (nsims, ndata)
@ -802,29 +796,51 @@ class PV_LogLikelihood(BaseFlowValidationModel):
ptilde *= likelihood_zobs( ptilde *= likelihood_zobs(
self.z_obs[None, :, None], zobs, e2_cz[None, :, None]) self.z_obs[None, :, None], zobs, e2_cz[None, :, None])
if self.with_absolute_calibration:
raise NotImplementedError(
"Absolute calibration not implemented for this model. "
"Use `PV_LogLikelihood_NoDistMarg` instead.")
# Integrate over the radial distance. Shape: (nsims, ndata) # Integrate over the radial distance. Shape: (nsims, ndata)
ll = jnp.log(simpson(ptilde, x=self.r_xrange, axis=-1)) ll = jnp.log(simpson(ptilde, x=self.r_xrange, axis=-1))
ll -= jnp.log(pnorm) ll -= jnp.log(pnorm)
return ll0 + jnp.sum(logsumexp(ll, axis=0)) + self.norm return ll0 + jnp.sum(logsumexp(ll, axis=0)) + self.norm
else: else:
if field_calibration_params["sample_h"]:
raise NotImplementedError(
"Sampling of h is not yet implemented.")
e_mu = jnp.sqrt(e2_mu) e_mu = jnp.sqrt(e2_mu)
# True distance modulus, shape is `(n_data)`` # True distance modulus, shape is `(n_data)`. If we have absolute
# calibration, then this distance modulus assumes a particular h.
with plate("plate_mu", self.ndata): with plate("plate_mu", self.ndata):
mu_true = sample("mu", Normal(mu, e_mu)) mu_true = sample("mu", Normal(mu, e_mu))
# True distance and redshift, shape is `(n_data)`. # Likelihood of the true distance modulii given the calibration.
r_true = distmod2dist(mu_true, self.Omega_m) if field_calibration_params["sample_h"]:
z_true = distmod2redshift(mu_true, self.Omega_m) raise RuntimeError(
"Sampling of 'h' has not yet been thoroughly tested.")
h = field_calibration_params["h"]
# Now, the rest of the code except the calibration likelihood
# uses the distance modulus in units of h
mu_true_h = mu_true + 5 * jnp.log10(h)
# Calculate the log-likelihood of the calibration, but the
# shape is `(n_calibrators, n_data)`. Where there is no data
# we set the likelihood to 0 (or the log-likelihood to -inf)
ll_calibration = jnp.where(
self.is_finite_calibrator,
normal_logpdf(self.mu_calibration, mu_true[None, :],
self.e_mu_calibration),
-jnp.inf)
# Now average out over the calibrators, however only if the
# there is at least one calibrator. If there isn't, then we
# just assing a log-likelihood of 0.
ll_calibration = jnp.where(
self.any_calibrator,
logsumexp(ll_calibration, axis=0) - jnp.log(self.counts_calibrators), # noqa
0.)
else:
mu_true_h = mu_true
# True distance and redshift, shape is `(n_data)`. The distance
# here is in units of `Mpc / h``.
r_true = distmod2dist(mu_true_h, self.Omega_m)
z_true = distmod2redshift(mu_true_h, self.Omega_m)
if self.is_void_data: if self.is_void_data:
raise NotImplementedError( raise NotImplementedError(
@ -840,26 +856,29 @@ class PV_LogLikelihood(BaseFlowValidationModel):
alpha = distmod_params["alpha"] alpha = distmod_params["alpha"]
# Normalisation of p(mu), shape is `(n_sims, n_data, n_rad)` # Normalisation of p(mu), shape is `(n_sims, n_data, n_rad)`
pnorm = ( pnorm = normal_logpdf(
+ self.log_r2_xrange[None, None, :] self.mu_xrange[None, :], mu[:, None], e_mu[:, None])[None, ...]
+ alpha * log_los_density_grid if self.with_homogeneous_malmquist:
+ normal_logpdf( pnorm += self.log_r2_xrange[None, None, :]
self.mu_xrange[None, :], mu[:, None], e_mu[:, None])[None, ...]) # noqa if self.with_inhomogeneous_malmquist:
pnorm += alpha * log_los_density_grid
pnorm = jnp.exp(pnorm) pnorm = jnp.exp(pnorm)
# Now integrate over the radial steps. Shape is `(nsims, ndata)`. # Now integrate over the radial steps. Shape is `(nsims, ndata)`.
# No Jacobian here because I integrate over distance, not the # No Jacobian here because I integrate over distance, not the
# distance modulus. # distance modulus.
pnorm = simpson(pnorm, x=self.r_xrange, axis=-1) pnorm = simpson(pnorm, x=self.r_xrange, axis=-1)
# Jacobian |dr / dmu|_(mu_true), shape is `(n_data)`. # Jacobian |dr / dmu|_(mu_true_h), shape is `(n_data)`.
jac = jnp.abs(distmod2dist_gradient(mu_true, self.Omega_m)) jac = jnp.abs(distmod2dist_gradient(mu_true_h, self.Omega_m))
# Calculate unnormalized log p(mu). Shape is (nsims, ndata) # Calculate unnormalized log p(mu). Shape is (nsims, ndata)
ll = ( ll = 0.0
+ jnp.log(jac)[None, :] if self.with_homogeneous_malmquist:
+ (2 * jnp.log(r_true) - self.log_r2_xrange_mean)[None, :] ll += (+ jnp.log(jac)
+ alpha * log_density + (2 * jnp.log(r_true) - self.log_r2_xrange_mean))
) if self.with_inhomogeneous_malmquist:
ll += alpha * log_density
# Subtract the normalization. Shape remains (nsims, ndata) # Subtract the normalization. Shape remains (nsims, ndata)
ll -= jnp.log(pnorm) ll -= jnp.log(pnorm)
@ -871,13 +890,13 @@ class PV_LogLikelihood(BaseFlowValidationModel):
zobs *= 1 + vrad / SPEED_OF_LIGHT zobs *= 1 + vrad / SPEED_OF_LIGHT
zobs -= 1. zobs -= 1.
# Add the log-likelihood of observed redshifts. Shape remains
# `(nsims, ndata)`
ll += log_likelihood_zobs( ll += log_likelihood_zobs(
self.z_obs[None, :], zobs, e2_cz[None, :]) self.z_obs[None, :], zobs, e2_cz[None, :])
if self.with_absolute_calibration: if field_calibration_params["sample_h"]:
raise NotImplementedError( ll += ll_calibration[None, :]
"Absolute calibration not implemented for this model. "
"Use `PV_LogLikelihood_NoDistMarg` instead.")
return ll0 + jnp.sum(logsumexp(ll, axis=0)) + self.norm return ll0 + jnp.sum(logsumexp(ll, axis=0)) + self.norm

View file

@ -58,7 +58,8 @@ class DataLoader:
def __init__(self, simname, ksim, catalogue, catalogue_fpath, paths, def __init__(self, simname, ksim, catalogue, catalogue_fpath, paths,
ksmooth=None, store_full_velocity=False, verbose=True): ksmooth=None, store_full_velocity=False, verbose=True):
fprint("reading the catalogue,", verbose) fprint("reading the catalogue,", verbose)
self._cat = self._read_catalogue(catalogue, catalogue_fpath) self._cat, self._absmag_calibration = self._read_catalogue(
catalogue, catalogue_fpath)
self._catname = catalogue self._catname = catalogue
fprint("reading the interpolated field.", verbose) fprint("reading the interpolated field.", verbose)
@ -132,6 +133,15 @@ class DataLoader:
"""The distance indicators catalogue (structured array).""" """The distance indicators catalogue (structured array)."""
return self._cat[self._mask] return self._cat[self._mask]
@property
def absmag_calibration(self):
"""Returns the absolute magnitude calibration with masking applied."""
if self._absmag_calibration is None:
return None
return {key: val[:, self._mask]
for key, val in self._absmag_calibration.items()}
@property @property
def catname(self): def catname(self):
"""Catalogue name.""" """Catalogue name."""
@ -187,6 +197,8 @@ class DataLoader:
fpath = paths.field_los(simname, "Pantheon+") fpath = paths.field_los(simname, "Pantheon+")
elif "CF4_TFR" in catalogue: elif "CF4_TFR" in catalogue:
fpath = paths.field_los(simname, "CF4_TFR") fpath = paths.field_los(simname, "CF4_TFR")
elif "Carrick2MTFmock" in catalogue:
fpath = paths.field_los(simname, "2MTF")
else: else:
fpath = paths.field_los(simname, catalogue) fpath = paths.field_los(simname, catalogue)
@ -217,6 +229,8 @@ class DataLoader:
return rdist, los_density, los_velocity return rdist, los_density, los_velocity
def _read_catalogue(self, catalogue, catalogue_fpath): def _read_catalogue(self, catalogue, catalogue_fpath):
absmag_calibration = None
if catalogue == "A2": if catalogue == "A2":
with File(catalogue_fpath, 'r') as f: with File(catalogue_fpath, 'r') as f:
dtype = [(key, np.float32) for key in f.keys()] dtype = [(key, np.float32) for key in f.keys()]
@ -262,6 +276,20 @@ class DataLoader:
arr = np.empty(len(mock_data["RA"]), dtype=dtype) arr = np.empty(len(mock_data["RA"]), dtype=dtype)
for key in mock_data.keys(): for key in mock_data.keys():
arr[key] = mock_data[key] arr[key] = mock_data[key]
elif "Carrick2MTFmock" in catalogue:
with File(catalogue_fpath, 'r') as f:
keys_skip = ["mu_calibration", "e_mu_calibration"]
dtype = [(key, np.float32) for key in f.keys()
if key not in keys_skip]
arr = np.empty(len(f["RA"]), dtype=dtype)
for key in f.keys():
if key not in keys_skip:
arr[key] = f[key][:]
absmag_calibration = {
"mu_calibration": f["mu_calibration"][...],
"e_mu_calibration": f["e_mu_calibration"][...]}
elif "UPGLADE" in catalogue: elif "UPGLADE" in catalogue:
with File(catalogue_fpath, 'r') as f: with File(catalogue_fpath, 'r') as f:
dtype = [(key, np.float32) for key in f.keys()] dtype = [(key, np.float32) for key in f.keys()]
@ -286,7 +314,7 @@ class DataLoader:
else: else:
raise ValueError(f"Unknown catalogue: `{catalogue}`.") raise ValueError(f"Unknown catalogue: `{catalogue}`.")
return arr return arr, absmag_calibration
############################################################################### ###############################################################################
@ -322,7 +350,7 @@ def radial_velocity_los(los_velocity, ra, dec):
def read_absolute_calibration(kind, data_length, calibration_fpath): def read_absolute_calibration(kind, data_length, calibration_fpath):
""" """
Read the absolute calibration for the CF4 TFR sample from LEDA but Read the absolute calibration for the CF4 TFR sample from LEDA but
preprocessed by me. preprocessed by me. Missing values are replaced with NaN.
Parameters Parameters
---------- ----------
@ -335,13 +363,13 @@ def read_absolute_calibration(kind, data_length, calibration_fpath):
Returns Returns
------- -------
data : 3-dimensional array of shape (data_length, max_calib, 2) mu : 2-dimensional array of shape `(ncalib, ngalaxies)`
Absolute calibration data. Absolute calibration data.
with_calibration : 1-dimensional array of shape (data_length) e_mu : 2-dimensional array of shape `(ncalib, ngalaxies)`
Whether the sample has a calibration. Uncertainties of the absolute calibration.
length_calibration : 1-dimensional array of shape (data_length)
Number of calibration points per sample.
""" """
raise RuntimeError("The read-in functions are not guaranteed to work "
"properly.")
data = {} data = {}
with File(calibration_fpath, 'r') as f: with File(calibration_fpath, 'r') as f:
for key in f[kind].keys(): for key in f[kind].keys():
@ -355,14 +383,14 @@ def read_absolute_calibration(kind, data_length, calibration_fpath):
max_calib = max(len(val) for val in data.values()) max_calib = max(len(val) for val in data.values())
out = np.full((data_length, max_calib, 2), np.nan) out = np.full((data_length, max_calib, 2), np.nan)
with_calibration = np.full(data_length, False)
length_calibration = np.full(data_length, 0)
for i in data.keys(): for i in data.keys():
out[int(i), :len(data[i]), :] = data[i] out[int(i), :len(data[i]), :] = data[i]
with_calibration[int(i)] = True
length_calibration[int(i)] = len(data[i])
return out, with_calibration, length_calibration # Unpack from this the distsance modulus and its uncertainty.
mu = out[:, :, 0].T
e_mu = out[:, :, 1].T
return mu, e_mu
def mask_fields(density, velocity, mask, return_none): def mask_fields(density, velocity, mask, return_none):
@ -422,8 +450,9 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
loader._field_rdist = rdist loader._field_rdist = rdist
if absolute_calibration is not None and "CF4_TFR_" not in kind: if absolute_calibration is not None and not ("CF4_TFR_" in kind or "Carrick2MTFmock" in kind): # noqa
raise ValueError("Absolute calibration supported only for the CF4 TFR sample.") # noqa raise ValueError("Absolute calibration supported only for either "
"the CF4 TFR sample or Carrick 2MTF mocks.")
if kind in ["LOSS", "Foundation"]: if kind in ["LOSS", "Foundation"]:
keys = ["RA", "DEC", "z_CMB", "mB", "x1", "c", "e_mB", "e_x1", "e_c"] keys = ["RA", "DEC", "z_CMB", "mB", "x1", "c", "e_mB", "e_x1", "e_c"]
@ -442,7 +471,7 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
model = PV_LogLikelihood( model = PV_LogLikelihood(
los_overdensity, los_velocity, los_overdensity, los_velocity,
RA[mask], dec[mask], zCMB[mask], e_zCMB, calibration_params, RA[mask], dec[mask], zCMB[mask], e_zCMB, calibration_params,
None, mag_selection, loader.rdist, loader._Omega_m, "SN", mag_selection, loader.rdist, loader._Omega_m, "SN",
name=kind, void_kwargs=void_kwargs, name=kind, void_kwargs=void_kwargs,
wo_num_dist_marginalisation=wo_num_dist_marginalisation) wo_num_dist_marginalisation=wo_num_dist_marginalisation)
elif "Pantheon+" in kind: elif "Pantheon+" in kind:
@ -476,26 +505,62 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
model = PV_LogLikelihood( model = PV_LogLikelihood(
los_overdensity, los_velocity, los_overdensity, los_velocity,
RA[mask], dec[mask], zCMB[mask], e_zCMB[mask], calibration_params, RA[mask], dec[mask], zCMB[mask], e_zCMB[mask], calibration_params,
None, mag_selection, loader.rdist, loader._Omega_m, "SN", mag_selection, loader.rdist, loader._Omega_m, "SN",
name=kind, void_kwargs=void_kwargs, name=kind, void_kwargs=void_kwargs,
wo_num_dist_marginalisation=wo_num_dist_marginalisation) wo_num_dist_marginalisation=wo_num_dist_marginalisation)
elif kind in ["SFI_gals", "2MTF", "SFI_gals_masked"] or "IndranilVoidTFRMock" in kind: # noqa elif kind in ["SFI_gals", "2MTF", "SFI_gals_masked"] or "IndranilVoidTFRMock" in kind or "Carrick2MTFmock" in kind: # noqa
keys = ["RA", "DEC", "z_CMB", "mag", "eta", "e_mag", "e_eta"] keys = ["RA", "DEC", "z_CMB", "mag", "eta", "e_mag", "e_eta"]
RA, dec, zCMB, mag, eta, e_mag, e_eta = (loader.cat[k] for k in keys) RA, dec, zCMB, mag, eta, e_mag, e_eta = (loader.cat[k] for k in keys)
mask = (zCMB < zcmb_max) & (zCMB > zcmb_min) mask = (zCMB < zcmb_max) & (zCMB > zcmb_min)
if "Carrick2MTFmock" in kind:
# For the mock we only want to select objects with the '2M++'
# volume.
mask &= loader.cat["r"] < 130
# The mocks are generated without Malmquist.
fprint("disabling homogeneous and inhomogeneous Malmquist bias for the mock.") # noqa
with_homogeneous_malmquist = False
with_inhomogeneous_malmquist = False
else:
with_homogeneous_malmquist = True
with_inhomogeneous_malmquist = True
calibration_params = {"mag": mag[mask], "eta": eta[mask], calibration_params = {"mag": mag[mask], "eta": eta[mask],
"e_mag": e_mag[mask], "e_eta": e_eta[mask]} "e_mag": e_mag[mask], "e_eta": e_eta[mask]}
# Append the calibration data
if "Carrick2MTFmock" in kind:
absmag_calibration = loader.absmag_calibration
# The shape of these is (`ncalibrators, nobjects`).
mu_calibration = absmag_calibration["mu_calibration"][:, mask]
e_mu_calibration = absmag_calibration["e_mu_calibration"][:, mask]
# Auxiliary parameters.
m = np.isfinite(mu_calibration)
# NumPyro refuses to start if any inputs are not finite, so we
# replace with some ficutive mean and very large standard
# deviation.
mu_calibration[~m] = 0.0
e_mu_calibration[~m] = 1000.0
calibration_params["mu_calibration"] = mu_calibration
calibration_params["e_mu_calibration"] = e_mu_calibration
calibration_params["is_finite_calibrator"] = m
calibration_params["counts_calibrators"] = np.sum(m, axis=0)
calibration_params["any_calibrator"] = np.any(m, axis=0)
los_overdensity, los_velocity = mask_fields( los_overdensity, los_velocity = mask_fields(
los_overdensity, los_velocity, mask, void_kwargs is not None) los_overdensity, los_velocity, mask, void_kwargs is not None)
model = PV_LogLikelihood( model = PV_LogLikelihood(
los_overdensity, los_velocity, los_overdensity, los_velocity,
RA[mask], dec[mask], zCMB[mask], None, calibration_params, None, RA[mask], dec[mask], zCMB[mask], None, calibration_params,
mag_selection, loader.rdist, loader._Omega_m, "TFR", name=kind, mag_selection, loader.rdist, loader._Omega_m, "TFR", name=kind,
void_kwargs=void_kwargs, void_kwargs=void_kwargs,
wo_num_dist_marginalisation=wo_num_dist_marginalisation) wo_num_dist_marginalisation=wo_num_dist_marginalisation,
with_homogeneous_malmquist=with_homogeneous_malmquist,
with_inhomogeneous_malmquist=with_inhomogeneous_malmquist)
elif "CF4_TFR_" in kind: elif "CF4_TFR_" in kind:
# The full name can be e.g. "CF4_TFR_not2MTForSFI_i" or "CF4_TFR_i". # The full name can be e.g. "CF4_TFR_not2MTForSFI_i" or "CF4_TFR_i".
band = kind.split("_")[-1] band = kind.split("_")[-1]
@ -532,38 +597,39 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
else: else:
mask &= Qs == 5 mask &= Qs == 5
# Read the absolute calibration
if absolute_calibration is not None:
CF4_length = len(RA)
distmod, with_calibration, length_calibration = read_absolute_calibration( # noqa
"Cepheids", CF4_length, calibration_fpath)
distmod = distmod[mask]
with_calibration = with_calibration[mask]
length_calibration = length_calibration[mask]
fprint(f"found {np.sum(with_calibration)} galaxies with absolute calibration.") # noqa
distmod = distmod[with_calibration]
length_calibration = length_calibration[with_calibration]
abs_calibration_params = {
"calibration_distmod": distmod,
"data_with_calibration": with_calibration,
"length_calibration": length_calibration}
else:
abs_calibration_params = None
calibration_params = {"mag": mag[mask], "eta": eta[mask], calibration_params = {"mag": mag[mask], "eta": eta[mask],
"e_mag": e_mag[mask], "e_eta": e_eta[mask]} "e_mag": e_mag[mask], "e_eta": e_eta[mask]}
# Read the absolute calibration
mu_calibration, e_mu_calibration = read_absolute_calibration(
absolute_calibration, len(RA), calibration_fpath)
# The shape of these is (`ncalibrators, nobjects`).
mu_calibration = mu_calibration[:, mask]
e_mu_calibration = e_mu_calibration[:, mask]
# Auxiliary parameters.
m = np.isfinite(mu_calibration)
# NumPyro refuses to start if any inputs are not finite, so we
# replace with some ficutive mean and very large standard
# deviation.
mu_calibration[~m] = 0.0
e_mu_calibration[~m] = 1000.0
calibration_params["mu_calibration"] = mu_calibration
calibration_params["e_mu_calibration"] = e_mu_calibration
calibration_params["is_finite_calibrator"] = m
calibration_params["counts_calibrators"] = np.sum(m, axis=0)
calibration_params["any_calibrator"] = np.any(m, axis=0)
los_overdensity, los_velocity = mask_fields( los_overdensity, los_velocity = mask_fields(
los_overdensity, los_velocity, mask, void_kwargs is not None) los_overdensity, los_velocity, mask, void_kwargs is not None)
model = PV_LogLikelihood( model = PV_LogLikelihood(
los_overdensity, los_velocity, los_overdensity, los_velocity,
RA[mask], dec[mask], z_obs[mask], None, calibration_params, RA[mask], dec[mask], z_obs[mask], None, calibration_params,
abs_calibration_params, mag_selection, loader.rdist, mag_selection, loader.rdist, loader._Omega_m, "TFR", name=kind,
loader._Omega_m, "TFR", name=kind, void_kwargs=void_kwargs, void_kwargs=void_kwargs,
wo_num_dist_marginalisation=wo_num_dist_marginalisation) wo_num_dist_marginalisation=wo_num_dist_marginalisation)
elif kind in ["CF4_GroupAll"]: elif kind in ["CF4_GroupAll"]:
# Note, this for some reason works terribly. # Note, this for some reason works terribly.
@ -583,7 +649,7 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
model = PV_LogLikelihood( model = PV_LogLikelihood(
los_overdensity, los_velocity, los_overdensity, los_velocity,
RA[mask], dec[mask], zCMB[mask], None, calibration_params, None, RA[mask], dec[mask], zCMB[mask], None, calibration_params,
mag_selection, loader.rdist, loader._Omega_m, "simple", mag_selection, loader.rdist, loader._Omega_m, "simple",
name=kind, void_kwargs=void_kwargs, name=kind, void_kwargs=void_kwargs,
wo_num_dist_marginalisation=wo_num_dist_marginalisation) wo_num_dist_marginalisation=wo_num_dist_marginalisation)

150
csiborgtools/flow/mocks.py Normal file
View file

@ -0,0 +1,150 @@
# Copyright (C) 2024 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""Mock data generators."""
import numpy as np
from ..field.interp import evaluate_cartesian_regular
from ..params import SPEED_OF_LIGHT
from ..utils import cartesian_to_radec, radec_to_cartesian, radec_to_galactic
from .cosmography import distmod2dist, distmod2redshift
###############################################################################
# Mock Quijote observations #
###############################################################################
def mock_Carrick2MTF(velocity_field, boxsize, RA_2MTF, DEC_2MTF,
a_TF=-22.8, b_TF=-7.2, sigma_TF=0.25, sigma_v=100.,
Vext=[150.0, 10.0, -100.0], h=1.0, beta=0.4,
mean_eta=0.069, std_eta=0.078, mean_e_eta=0.012,
mean_mag=10.31, std_mag=0.83, mean_e_mag=0.044,
sigma_calibration=0.05, calibration_max_percentile=10,
calibration_rand_fraction=0.5, nrepeat_calibration=1,
seed=42, Om0=0.3, verbose=True, **kwargs):
"""
Mock TFR catalogue build against the Carrick velocity field and the
2MTF sky distribution to avoid recomputing the LOS velocities.
"""
nsamples = len(RA_2MTF)
# Convert Vext from ICRS to Galactic coordinates.
Vext = np.asarray(Vext).reshape(1, 3)
Vext_mag, Vext_RA, Vext_DEC = cartesian_to_radec(Vext).reshape(-1, )
Vext_l, Vext_b = radec_to_galactic(Vext_RA, Vext_DEC)
Vext_galactic = np.asanyarray([Vext_mag, Vext_l, Vext_b]).reshape(1, 3)
Vext = radec_to_cartesian(Vext_galactic).reshape(-1, )
truths = {"a": a_TF, "b": b_TF, "e_mu": sigma_TF, "sigma_v": sigma_v,
"Vext": Vext,
"mean_eta": mean_eta, "std_eta": std_eta,
"mean_mag": mean_mag, "std_mag": std_mag,
}
gen = np.random.default_rng(seed)
# The Carrick box is in the Galactic coordinates.
l, b = radec_to_galactic(RA_2MTF, DEC_2MTF)
gal_phi = np.deg2rad(l)
gal_theta = np.pi / 2 - np.deg2rad(b)
# Sample the linewidth of each galaxy from a Gaussian distribution to mimic
# the MNR procedure.
eta_true = gen.normal(mean_eta, std_eta, nsamples)
eta_obs = gen.normal(eta_true, mean_e_eta)
# Subtract the mean of the observed linewidths, so that they are
# centered around zero. For consistency subtract from both observed
# and true values.
eta_mean_sampled = np.mean(eta_obs)
eta_true -= eta_mean_sampled
eta_obs -= eta_mean_sampled
# Sample the magnitude from some Gaussian distribution to replicate MNR.
mag_true = gen.normal(mean_mag, std_mag, nsamples)
mag_obs = gen.normal(mag_true, mean_e_mag)
# Calculate the 'true' distance modulus and redshift from the TFR distance.
# If h != 1, then these distance modulii are in physical units.
mu_TFR = mag_true - (a_TF + b_TF * eta_true)
mu_true = gen.normal(mu_TFR, sigma_TF)
# This is the distance modulus in units of little h.
mu_true_h = mu_true + 5 * np.log10(h)
# Select a fraction of nearby galaxies.
mu_max = np.percentile(mu_true, calibration_max_percentile)
ks = np.where(mu_true < mu_max)[0]
nsel = int(calibration_rand_fraction * len(ks))
if verbose:
print(f"Assigning calibration to {nsel}/{nsamples} galaxies.")
mu_calibration = np.full((nrepeat_calibration, nsamples), np.nan)
e_mu_calibration = np.full((nrepeat_calibration, nsamples), np.nan)
for n in range(nrepeat_calibration):
ks_n = gen.choice(ks, nsel, replace=False)
mu_calibration[n, ks_n] = gen.normal(mu_true[ks_n], sigma_calibration)
e_mu_calibration[n, ks_n] = np.ones(len(ks_n)) * sigma_calibration
# Convert the true distance modulus to true distance and cosmological
# redshift. The distance is in Mpc/h because the box is in Mpc / h.
r = distmod2dist(mu_true_h, Om0)
zcosmo = distmod2redshift(mu_true_h, Om0)
# Calculate the Cartesian coordinates of each galaxy. This is initially
# centered at (0, 0, 0).
pos = r * np.asarray([
np.sin(gal_theta) * np.cos(gal_phi),
np.sin(gal_theta) * np.sin(gal_phi),
np.cos(gal_theta)])
pos = pos.T
pos_box = pos / boxsize + 0.5
vel = evaluate_cartesian_regular(
velocity_field[0], velocity_field[1], velocity_field[2],
pos=pos_box, smooth_scales=None, method="cubic")
vel = beta * np.vstack(vel).T
for i in range(3):
vel[:, i] += Vext[i]
# Compute the radial velocity.
Vr = np.sum(vel * pos, axis=1) / np.linalg.norm(pos, axis=1)
# The true redshift of the source.
zCMB_true = (1 + zcosmo) * (1 + Vr / SPEED_OF_LIGHT) - 1
zCMB_obs = gen.normal(zCMB_true, sigma_v / SPEED_OF_LIGHT)
# These galaxies will be masked out when LOS is read it because they are
# too far away.
distance_mask = r < 125
truths["distance_mask"] = distance_mask
sample = {"RA": RA_2MTF,
"DEC": DEC_2MTF,
"z_CMB": zCMB_obs,
"eta": eta_obs,
"mag": mag_obs,
"e_eta": np.ones(nsamples) * mean_e_eta,
"e_mag": np.ones(nsamples) * mean_e_mag,
"mu_true": mu_true,
"mu_TFR": mu_TFR,
"mu_calibration": mu_calibration,
"e_mu_calibration": e_mu_calibration,
"r": r,
}
return sample, truths

File diff suppressed because one or more lines are too long

View file

@ -116,6 +116,9 @@ def get_models(ksim, get_model_kwargs, mag_selection, void_kwargs,
"Pantheon+_groups", "Pantheon+_groups_zSN", "Pantheon+_groups", "Pantheon+_groups_zSN",
"Pantheon+_zSN"]: "Pantheon+_zSN"]:
fpath = join(folder, "PV_compilation.hdf5") fpath = join(folder, "PV_compilation.hdf5")
elif "Carrick2MTFmock" in cat:
ki = cat.split("_")[-1]
fpath =f"/mnt/extraspace/rstiskalek/csiborg_postprocessing/flow_mock/Carrick2MTFmock_seed{ki}.hdf5" # noqa
elif "CF4_TFR" in cat: elif "CF4_TFR" in cat:
fpath = join(folder, "PV/CF4/CF4_TF-distances.hdf5") fpath = join(folder, "PV/CF4/CF4_TF-distances.hdf5")
elif cat in ["CF4_GroupAll"]: elif cat in ["CF4_GroupAll"]:
@ -186,7 +189,7 @@ def run_model(model, nsteps, nburn, model_kwargs, out_folder,
neg_ln_evidence_err = (jax.numpy.nan, jax.numpy.nan) neg_ln_evidence_err = (jax.numpy.nan, jax.numpy.nan)
fname = join(out_folder, fname) fname = join(out_folder, fname)
print(f"Saving results to `{fname}`.") print(f"Saving results: `{fname}`.")
with File(fname, "w") as f: with File(fname, "w") as f:
# Write samples # Write samples
grp = f.create_group("samples") grp = f.create_group("samples")
@ -203,15 +206,22 @@ def run_model(model, nsteps, nburn, model_kwargs, out_folder,
grp.create_dataset("neg_lnZ_harmonic", data=neg_ln_evidence) grp.create_dataset("neg_lnZ_harmonic", data=neg_ln_evidence)
grp.create_dataset("neg_lnZ_harmonic_err", data=neg_ln_evidence_err) grp.create_dataset("neg_lnZ_harmonic_err", data=neg_ln_evidence_err)
fname_summary = fname.replace(".hdf5", ".txt") fname_config = fname.replace(".hdf5", "_config.txt")
print(f"Saving summary to `{fname_summary}`.") print(f"Saving configuration: `{fname_config}`.")
with open(fname_summary, 'w') as f: with open(fname_config, 'w') as f:
original_stdout = sys.stdout original_stdout = sys.stdout
sys.stdout = f sys.stdout = f
print("User parameters:") print("User parameters:")
for kwargs in kwargs_print: for kwargs in kwargs_print:
print_variables(kwargs.keys(), kwargs.values()) print_variables(kwargs.keys(), kwargs.values())
sys.stdout = original_stdout
fname_summary = fname.replace(".hdf5", "_summary.txt")
print(f"Saving summary: `{fname_summary}`.")
with open(fname_summary, 'w') as f:
original_stdout = sys.stdout
sys.stdout = f
print("HMC summary:") print("HMC summary:")
print(f"{'BIC':<20} {BIC}") print(f"{'BIC':<20} {BIC}")
@ -238,7 +248,7 @@ def get_distmod_hyperparams(catalogue, sample_alpha, sample_mag_dipole):
"alpha_min": alpha_min, "alpha_max": alpha_max, "alpha_min": alpha_min, "alpha_max": alpha_max,
"sample_alpha": sample_alpha "sample_alpha": sample_alpha
} }
elif catalogue in ["SFI_gals", "2MTF"] or "CF4_TFR" in catalogue or "IndranilVoidTFRMock" in catalogue: # noqa elif catalogue in ["SFI_gals", "2MTF"] or "CF4_TFR" in catalogue or "IndranilVoidTFRMock" in catalogue or "Carrick2MTFmock" in catalogue: # noqa
return {"e_mu_min": 0.001, "e_mu_max": 1.0, return {"e_mu_min": 0.001, "e_mu_max": 1.0,
"a_mean": -21., "a_std": 5.0, "a_mean": -21., "a_std": 5.0,
"b_mean": -5.95, "b_std": 4.0, "b_mean": -5.95, "b_std": 4.0,
@ -247,6 +257,7 @@ def get_distmod_hyperparams(catalogue, sample_alpha, sample_mag_dipole):
"sample_a_dipole": sample_mag_dipole, "sample_a_dipole": sample_mag_dipole,
"alpha_min": alpha_min, "alpha_max": alpha_max, "alpha_min": alpha_min, "alpha_max": alpha_max,
"sample_alpha": sample_alpha, "sample_alpha": sample_alpha,
"sample_curvature": False if "Carrick2MTFmock" in catalogue else True, # noqa
} }
elif catalogue in ["CF4_GroupAll"]: elif catalogue in ["CF4_GroupAll"]:
return {"e_mu_min": 0.001, "e_mu_max": 1.0, return {"e_mu_min": 0.001, "e_mu_max": 1.0,
@ -294,8 +305,8 @@ if __name__ == "__main__":
########################################################################### ###########################################################################
# `None` means default behaviour # `None` means default behaviour
nsteps = 2_000 nsteps = 1_500
nburn = 2_000 nburn = 1_500
zcmb_min = None zcmb_min = None
zcmb_max = 0.05 zcmb_max = 0.05
nchains_harmonic = 10 nchains_harmonic = 10
@ -310,9 +321,16 @@ if __name__ == "__main__":
sample_mag_dipole = False sample_mag_dipole = False
wo_num_dist_marginalisation = False wo_num_dist_marginalisation = False
absolute_calibration = None absolute_calibration = None
calculate_harmonic = (False if inference_method == "bayes" else True) and (not wo_num_dist_marginalisation) # noqa calculate_harmonic = (False if (inference_method == "bayes") else True) and (not wo_num_dist_marginalisation) # noqa
sample_h = True if absolute_calibration is not None else False sample_h = True if absolute_calibration is not None else False
# These mocks are generated without a density field, so there is no
# inhomogeneous Malmquist and we also do not need evidences.
for catalogue in ARGS.catalogue:
if "Carrick2MTFmock" in catalogue:
sample_alpha = False
calculate_harmonic = False
fname_kwargs = {"inference_method": inference_method, fname_kwargs = {"inference_method": inference_method,
"smooth": ARGS.ksmooth, "smooth": ARGS.ksmooth,
"nsim": ARGS.ksim, "nsim": ARGS.ksim,
@ -375,7 +393,7 @@ if __name__ == "__main__":
"Vmono_min": -1000, "Vmono_max": 1000, "Vmono_min": -1000, "Vmono_max": 1000,
"beta_min": -10.0, "beta_max": 10.0, "beta_min": -10.0, "beta_max": 10.0,
"sigma_v_min": 1.0, "sigma_v_max": 1000 if "IndranilVoid_" in ARGS.simname else 750., # noqa "sigma_v_min": 1.0, "sigma_v_max": 1000 if "IndranilVoid_" in ARGS.simname else 750., # noqa
"h_min": 0.01, "h_max": 5.0, "h_min": 0.25, "h_max": 1.,
"no_Vext": False if no_Vext is None else no_Vext, # noqa "no_Vext": False if no_Vext is None else no_Vext, # noqa
"sample_Vmag_vax": sample_Vmag_vax, "sample_Vmag_vax": sample_Vmag_vax,
"sample_Vmono": sample_Vmono, "sample_Vmono": sample_Vmono,

View file

@ -38,10 +38,10 @@ fi
# for simname in "IndranilVoid_exp" "IndranilVoid_gauss" "IndranilVoid_mb"; do # for simname in "IndranilVoid_exp" "IndranilVoid_gauss" "IndranilVoid_mb"; do
for simname in "IndranilVoid_exp"; do for simname in "Carrick2015"; do
# for simname in "Carrick2015" "Lilow2024" "csiborg2_main" "csiborg2X" "manticore_2MPP_N128_DES_V1" "CF4" "CLONES"; do # for simname in "Carrick2015" "Lilow2024" "csiborg2_main" "csiborg2X" "manticore_2MPP_N128_DES_V1" "CF4" "CLONES"; do
# for catalogue in "LOSS" "Foundation" "2MTF" "SFI_gals" "CF4_TFR_i" "CF4_TFR_w1"; do # for catalogue in "LOSS" "Foundation" "2MTF" "SFI_gals" "CF4_TFR_i" "CF4_TFR_w1"; do
for catalogue in "2MTF"; do for catalogue in "CF4_TFR_w1"; do
# for catalogue in "CF4_TFR_i" "CF4_TFR_w1"; do # for catalogue in "CF4_TFR_i" "CF4_TFR_w1"; do
# for catalogue in "2MTF" "SFI_gals" "CF4_TFR_i" "CF4_TFR_w1"; do # for catalogue in "2MTF" "SFI_gals" "CF4_TFR_i" "CF4_TFR_w1"; do
for ksim in "none"; do for ksim in "none"; do