mirror of
https://github.com/Richard-Sti/csiborgtools_public.git
synced 2025-05-13 22:21:12 +00:00
Mocks with absolute calibration (#156)
* Rename nb * Add Carrick 2MTF mocks * Add more 2MTF mock support * Add mocks generator * Add mock gen nb * Control over MAlmquist * Control over Malmquist * Update imports * Update script * Add H0 to TFR mocks * Clear up saving * Add h sampling * Update saving * Update mocks * Add calibration to mocks * More support to read in absmag * Add absmag * Update script
This commit is contained in:
parent
76a7609f7f
commit
4167ba4fb1
8 changed files with 902 additions and 118 deletions
|
@ -17,6 +17,7 @@ from .io import (DataLoader, get_model, read_absolute_calibration,
|
|||
from .flow_model import (PV_LogLikelihood, PV_validation_model, dist2redshift, # noqa
|
||||
Observed2CosmologicalRedshift, predict_zobs, # noqa
|
||||
project_Vext, stack_pzosmo_over_realizations) # noqa
|
||||
from .mocks import mock_Carrick2MTF # noqa
|
||||
from .selection import ToyMagnitudeSelection # noqa
|
||||
from .void_model import (load_void_data, interpolate_void, select_void_h, # noqa
|
||||
mock_void) # noqa
|
||||
|
|
|
@ -170,28 +170,6 @@ class BaseFlowValidationModel(ABC):
|
|||
|
||||
self._setattr_as_jax(names, values)
|
||||
|
||||
def _set_abs_calibration_params(self, abs_calibration_params):
|
||||
self.with_absolute_calibration = abs_calibration_params is not None
|
||||
|
||||
if abs_calibration_params is None:
|
||||
self.with_absolute_calibration = False
|
||||
return
|
||||
|
||||
self.calibration_distmod = jnp.asarray(
|
||||
abs_calibration_params["calibration_distmod"][..., 0])
|
||||
self.calibration_edistmod = jnp.asarray(
|
||||
abs_calibration_params["calibration_distmod"][..., 1])
|
||||
self.data_with_calibration = jnp.asarray(
|
||||
abs_calibration_params["data_with_calibration"])
|
||||
self.data_wo_calibration = ~self.data_with_calibration
|
||||
|
||||
# Calculate the log of the number of calibrators. Where there is no
|
||||
# calibrator set the number of calibrators to 1 to avoid log(0) and
|
||||
# this way only zeros are being added.
|
||||
length_calibration = abs_calibration_params["length_calibration"]
|
||||
length_calibration[length_calibration == 0] = 1
|
||||
self.log_length_calibration = jnp.log(length_calibration)
|
||||
|
||||
def _set_radial_spacing(self, r_xrange, Omega_m):
|
||||
cosmo = FlatLambdaCDM(H0=H0, Om0=Omega_m)
|
||||
|
||||
|
@ -356,7 +334,8 @@ def e2_distmod_TFR(e2_mag, e2_eta, eta, b, c, e_mu_intrinsic):
|
|||
|
||||
def sample_TFR(e_mu_min, e_mu_max, a_mean, a_std, b_mean, b_std,
|
||||
c_mean, c_std, alpha_min, alpha_max, sample_alpha,
|
||||
a_dipole_mean, a_dipole_std, sample_a_dipole, name):
|
||||
a_dipole_mean, a_dipole_std, sample_a_dipole,
|
||||
sample_curvature, name):
|
||||
"""Sample Tully-Fisher calibration parameters."""
|
||||
e_mu = sample(f"e_mu_{name}", Uniform(e_mu_min, e_mu_max))
|
||||
a = sample(f"a_{name}", Normal(a_mean, a_std))
|
||||
|
@ -367,7 +346,11 @@ def sample_TFR(e_mu_min, e_mu_max, a_mean, a_std, b_mean, b_std,
|
|||
ax, ay, az = 0.0, 0.0, 0.0
|
||||
|
||||
b = sample(f"b_{name}", Normal(b_mean, b_std))
|
||||
|
||||
if sample_curvature:
|
||||
c = sample(f"c_{name}", Normal(c_mean, c_std))
|
||||
else:
|
||||
c = 0.
|
||||
|
||||
alpha = sample_alpha_bias(name, alpha_min, alpha_max, sample_alpha)
|
||||
|
||||
|
@ -495,8 +478,6 @@ class PV_LogLikelihood(BaseFlowValidationModel):
|
|||
Errors on the observed redshifts.
|
||||
calibration_params : dict
|
||||
Calibration parameters of each object.
|
||||
abs_calibration_params : dict
|
||||
Absolute calibration parameters.
|
||||
mag_selection : dict
|
||||
Magnitude selection parameters, optional.
|
||||
r_xrange : 1-dimensional array
|
||||
|
@ -513,12 +494,17 @@ class PV_LogLikelihood(BaseFlowValidationModel):
|
|||
Whether to directly sample the distance without numerical
|
||||
marginalisation. in which case the tracers can be coupled by a
|
||||
covariance matrix. By default `False`.
|
||||
with_homogeneous_malmquist : bool, optional
|
||||
Whether to include the homogeneous Malmquist bias. By default `True`.
|
||||
with_inhomogeneous_malmquist : bool, optional
|
||||
Whether to include the inhomogeneous Malmquist bias. By default `True`.
|
||||
"""
|
||||
|
||||
def __init__(self, los_density, los_velocity, RA, dec, z_obs, e_zobs,
|
||||
calibration_params, abs_calibration_params, mag_selection,
|
||||
r_xrange, Omega_m, kind, name, void_kwargs=None,
|
||||
wo_num_dist_marginalisation=False):
|
||||
calibration_params, mag_selection, r_xrange, Omega_m, kind,
|
||||
name, void_kwargs=None, wo_num_dist_marginalisation=False,
|
||||
with_homogeneous_malmquist=True,
|
||||
with_inhomogeneous_malmquist=True):
|
||||
if e_zobs is not None:
|
||||
e2_cz_obs = jnp.asarray((SPEED_OF_LIGHT * e_zobs)**2)
|
||||
else:
|
||||
|
@ -550,13 +536,14 @@ class PV_LogLikelihood(BaseFlowValidationModel):
|
|||
|
||||
self._setattr_as_jax(names, values)
|
||||
self._set_calibration_params(calibration_params)
|
||||
self._set_abs_calibration_params(abs_calibration_params)
|
||||
self._set_radial_spacing(r_xrange, Omega_m)
|
||||
|
||||
self.kind = kind
|
||||
self.name = name
|
||||
self.Omega_m = Omega_m
|
||||
self.wo_num_dist_marginalisation = wo_num_dist_marginalisation
|
||||
self.with_homogeneous_malmquist = with_homogeneous_malmquist
|
||||
self.with_inhomogeneous_malmquist = with_inhomogeneous_malmquist
|
||||
self.norm = - self.ndata * jnp.log(self.num_sims)
|
||||
|
||||
if mag_selection is not None:
|
||||
|
@ -771,9 +758,14 @@ class PV_LogLikelihood(BaseFlowValidationModel):
|
|||
"marginalising the true distance.")
|
||||
|
||||
# Calculate p(r) (Malmquist bias). Shape is (ndata, nxrange)
|
||||
if self.with_homogeneous_malmquist:
|
||||
log_ptilde = log_ptilde_wo_bias(
|
||||
self.mu_xrange[None, :], mu[:, None], e2_mu[:, None],
|
||||
self.log_r2_xrange[None, :])
|
||||
else:
|
||||
log_ptilde = log_ptilde_wo_bias(
|
||||
self.mu_xrange[None, :], mu[:, None], e2_mu[:, None],
|
||||
0.)
|
||||
|
||||
if self.is_void_data:
|
||||
rLG = field_calibration_params["rLG"]
|
||||
|
@ -785,7 +777,9 @@ class PV_LogLikelihood(BaseFlowValidationModel):
|
|||
|
||||
# Inhomogeneous Malmquist bias. Shape: (nsims, ndata, nxrange)
|
||||
alpha = distmod_params["alpha"]
|
||||
log_ptilde = log_ptilde[None, ...] + alpha * log_los_density
|
||||
log_ptilde = log_ptilde[None, ...]
|
||||
if self.with_inhomogeneous_malmquist:
|
||||
log_ptilde += alpha * log_los_density
|
||||
|
||||
ptilde = jnp.exp(log_ptilde)
|
||||
# Normalization of p(r). Shape: (nsims, ndata)
|
||||
|
@ -802,29 +796,51 @@ class PV_LogLikelihood(BaseFlowValidationModel):
|
|||
ptilde *= likelihood_zobs(
|
||||
self.z_obs[None, :, None], zobs, e2_cz[None, :, None])
|
||||
|
||||
if self.with_absolute_calibration:
|
||||
raise NotImplementedError(
|
||||
"Absolute calibration not implemented for this model. "
|
||||
"Use `PV_LogLikelihood_NoDistMarg` instead.")
|
||||
|
||||
# Integrate over the radial distance. Shape: (nsims, ndata)
|
||||
ll = jnp.log(simpson(ptilde, x=self.r_xrange, axis=-1))
|
||||
ll -= jnp.log(pnorm)
|
||||
|
||||
return ll0 + jnp.sum(logsumexp(ll, axis=0)) + self.norm
|
||||
else:
|
||||
if field_calibration_params["sample_h"]:
|
||||
raise NotImplementedError(
|
||||
"Sampling of h is not yet implemented.")
|
||||
|
||||
e_mu = jnp.sqrt(e2_mu)
|
||||
# True distance modulus, shape is `(n_data)``
|
||||
# True distance modulus, shape is `(n_data)`. If we have absolute
|
||||
# calibration, then this distance modulus assumes a particular h.
|
||||
with plate("plate_mu", self.ndata):
|
||||
mu_true = sample("mu", Normal(mu, e_mu))
|
||||
|
||||
# True distance and redshift, shape is `(n_data)`.
|
||||
r_true = distmod2dist(mu_true, self.Omega_m)
|
||||
z_true = distmod2redshift(mu_true, self.Omega_m)
|
||||
# Likelihood of the true distance modulii given the calibration.
|
||||
if field_calibration_params["sample_h"]:
|
||||
raise RuntimeError(
|
||||
"Sampling of 'h' has not yet been thoroughly tested.")
|
||||
h = field_calibration_params["h"]
|
||||
|
||||
# Now, the rest of the code except the calibration likelihood
|
||||
# uses the distance modulus in units of h
|
||||
mu_true_h = mu_true + 5 * jnp.log10(h)
|
||||
|
||||
# Calculate the log-likelihood of the calibration, but the
|
||||
# shape is `(n_calibrators, n_data)`. Where there is no data
|
||||
# we set the likelihood to 0 (or the log-likelihood to -inf)
|
||||
ll_calibration = jnp.where(
|
||||
self.is_finite_calibrator,
|
||||
normal_logpdf(self.mu_calibration, mu_true[None, :],
|
||||
self.e_mu_calibration),
|
||||
-jnp.inf)
|
||||
|
||||
# Now average out over the calibrators, however only if the
|
||||
# there is at least one calibrator. If there isn't, then we
|
||||
# just assing a log-likelihood of 0.
|
||||
ll_calibration = jnp.where(
|
||||
self.any_calibrator,
|
||||
logsumexp(ll_calibration, axis=0) - jnp.log(self.counts_calibrators), # noqa
|
||||
0.)
|
||||
else:
|
||||
mu_true_h = mu_true
|
||||
|
||||
# True distance and redshift, shape is `(n_data)`. The distance
|
||||
# here is in units of `Mpc / h``.
|
||||
r_true = distmod2dist(mu_true_h, self.Omega_m)
|
||||
z_true = distmod2redshift(mu_true_h, self.Omega_m)
|
||||
|
||||
if self.is_void_data:
|
||||
raise NotImplementedError(
|
||||
|
@ -840,26 +856,29 @@ class PV_LogLikelihood(BaseFlowValidationModel):
|
|||
alpha = distmod_params["alpha"]
|
||||
|
||||
# Normalisation of p(mu), shape is `(n_sims, n_data, n_rad)`
|
||||
pnorm = (
|
||||
+ self.log_r2_xrange[None, None, :]
|
||||
+ alpha * log_los_density_grid
|
||||
+ normal_logpdf(
|
||||
self.mu_xrange[None, :], mu[:, None], e_mu[:, None])[None, ...]) # noqa
|
||||
pnorm = normal_logpdf(
|
||||
self.mu_xrange[None, :], mu[:, None], e_mu[:, None])[None, ...]
|
||||
if self.with_homogeneous_malmquist:
|
||||
pnorm += self.log_r2_xrange[None, None, :]
|
||||
if self.with_inhomogeneous_malmquist:
|
||||
pnorm += alpha * log_los_density_grid
|
||||
|
||||
pnorm = jnp.exp(pnorm)
|
||||
# Now integrate over the radial steps. Shape is `(nsims, ndata)`.
|
||||
# No Jacobian here because I integrate over distance, not the
|
||||
# distance modulus.
|
||||
pnorm = simpson(pnorm, x=self.r_xrange, axis=-1)
|
||||
|
||||
# Jacobian |dr / dmu|_(mu_true), shape is `(n_data)`.
|
||||
jac = jnp.abs(distmod2dist_gradient(mu_true, self.Omega_m))
|
||||
# Jacobian |dr / dmu|_(mu_true_h), shape is `(n_data)`.
|
||||
jac = jnp.abs(distmod2dist_gradient(mu_true_h, self.Omega_m))
|
||||
|
||||
# Calculate unnormalized log p(mu). Shape is (nsims, ndata)
|
||||
ll = (
|
||||
+ jnp.log(jac)[None, :]
|
||||
+ (2 * jnp.log(r_true) - self.log_r2_xrange_mean)[None, :]
|
||||
+ alpha * log_density
|
||||
)
|
||||
ll = 0.0
|
||||
if self.with_homogeneous_malmquist:
|
||||
ll += (+ jnp.log(jac)
|
||||
+ (2 * jnp.log(r_true) - self.log_r2_xrange_mean))
|
||||
if self.with_inhomogeneous_malmquist:
|
||||
ll += alpha * log_density
|
||||
|
||||
# Subtract the normalization. Shape remains (nsims, ndata)
|
||||
ll -= jnp.log(pnorm)
|
||||
|
@ -871,13 +890,13 @@ class PV_LogLikelihood(BaseFlowValidationModel):
|
|||
zobs *= 1 + vrad / SPEED_OF_LIGHT
|
||||
zobs -= 1.
|
||||
|
||||
# Add the log-likelihood of observed redshifts. Shape remains
|
||||
# `(nsims, ndata)`
|
||||
ll += log_likelihood_zobs(
|
||||
self.z_obs[None, :], zobs, e2_cz[None, :])
|
||||
|
||||
if self.with_absolute_calibration:
|
||||
raise NotImplementedError(
|
||||
"Absolute calibration not implemented for this model. "
|
||||
"Use `PV_LogLikelihood_NoDistMarg` instead.")
|
||||
if field_calibration_params["sample_h"]:
|
||||
ll += ll_calibration[None, :]
|
||||
|
||||
return ll0 + jnp.sum(logsumexp(ll, axis=0)) + self.norm
|
||||
|
||||
|
|
|
@ -58,7 +58,8 @@ class DataLoader:
|
|||
def __init__(self, simname, ksim, catalogue, catalogue_fpath, paths,
|
||||
ksmooth=None, store_full_velocity=False, verbose=True):
|
||||
fprint("reading the catalogue,", verbose)
|
||||
self._cat = self._read_catalogue(catalogue, catalogue_fpath)
|
||||
self._cat, self._absmag_calibration = self._read_catalogue(
|
||||
catalogue, catalogue_fpath)
|
||||
self._catname = catalogue
|
||||
|
||||
fprint("reading the interpolated field.", verbose)
|
||||
|
@ -132,6 +133,15 @@ class DataLoader:
|
|||
"""The distance indicators catalogue (structured array)."""
|
||||
return self._cat[self._mask]
|
||||
|
||||
@property
|
||||
def absmag_calibration(self):
|
||||
"""Returns the absolute magnitude calibration with masking applied."""
|
||||
if self._absmag_calibration is None:
|
||||
return None
|
||||
|
||||
return {key: val[:, self._mask]
|
||||
for key, val in self._absmag_calibration.items()}
|
||||
|
||||
@property
|
||||
def catname(self):
|
||||
"""Catalogue name."""
|
||||
|
@ -187,6 +197,8 @@ class DataLoader:
|
|||
fpath = paths.field_los(simname, "Pantheon+")
|
||||
elif "CF4_TFR" in catalogue:
|
||||
fpath = paths.field_los(simname, "CF4_TFR")
|
||||
elif "Carrick2MTFmock" in catalogue:
|
||||
fpath = paths.field_los(simname, "2MTF")
|
||||
else:
|
||||
fpath = paths.field_los(simname, catalogue)
|
||||
|
||||
|
@ -217,6 +229,8 @@ class DataLoader:
|
|||
return rdist, los_density, los_velocity
|
||||
|
||||
def _read_catalogue(self, catalogue, catalogue_fpath):
|
||||
absmag_calibration = None
|
||||
|
||||
if catalogue == "A2":
|
||||
with File(catalogue_fpath, 'r') as f:
|
||||
dtype = [(key, np.float32) for key in f.keys()]
|
||||
|
@ -262,6 +276,20 @@ class DataLoader:
|
|||
arr = np.empty(len(mock_data["RA"]), dtype=dtype)
|
||||
for key in mock_data.keys():
|
||||
arr[key] = mock_data[key]
|
||||
elif "Carrick2MTFmock" in catalogue:
|
||||
with File(catalogue_fpath, 'r') as f:
|
||||
keys_skip = ["mu_calibration", "e_mu_calibration"]
|
||||
dtype = [(key, np.float32) for key in f.keys()
|
||||
if key not in keys_skip]
|
||||
arr = np.empty(len(f["RA"]), dtype=dtype)
|
||||
for key in f.keys():
|
||||
if key not in keys_skip:
|
||||
arr[key] = f[key][:]
|
||||
|
||||
absmag_calibration = {
|
||||
"mu_calibration": f["mu_calibration"][...],
|
||||
"e_mu_calibration": f["e_mu_calibration"][...]}
|
||||
|
||||
elif "UPGLADE" in catalogue:
|
||||
with File(catalogue_fpath, 'r') as f:
|
||||
dtype = [(key, np.float32) for key in f.keys()]
|
||||
|
@ -286,7 +314,7 @@ class DataLoader:
|
|||
else:
|
||||
raise ValueError(f"Unknown catalogue: `{catalogue}`.")
|
||||
|
||||
return arr
|
||||
return arr, absmag_calibration
|
||||
|
||||
|
||||
###############################################################################
|
||||
|
@ -322,7 +350,7 @@ def radial_velocity_los(los_velocity, ra, dec):
|
|||
def read_absolute_calibration(kind, data_length, calibration_fpath):
|
||||
"""
|
||||
Read the absolute calibration for the CF4 TFR sample from LEDA but
|
||||
preprocessed by me.
|
||||
preprocessed by me. Missing values are replaced with NaN.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
@ -335,13 +363,13 @@ def read_absolute_calibration(kind, data_length, calibration_fpath):
|
|||
|
||||
Returns
|
||||
-------
|
||||
data : 3-dimensional array of shape (data_length, max_calib, 2)
|
||||
mu : 2-dimensional array of shape `(ncalib, ngalaxies)`
|
||||
Absolute calibration data.
|
||||
with_calibration : 1-dimensional array of shape (data_length)
|
||||
Whether the sample has a calibration.
|
||||
length_calibration : 1-dimensional array of shape (data_length)
|
||||
Number of calibration points per sample.
|
||||
e_mu : 2-dimensional array of shape `(ncalib, ngalaxies)`
|
||||
Uncertainties of the absolute calibration.
|
||||
"""
|
||||
raise RuntimeError("The read-in functions are not guaranteed to work "
|
||||
"properly.")
|
||||
data = {}
|
||||
with File(calibration_fpath, 'r') as f:
|
||||
for key in f[kind].keys():
|
||||
|
@ -355,14 +383,14 @@ def read_absolute_calibration(kind, data_length, calibration_fpath):
|
|||
max_calib = max(len(val) for val in data.values())
|
||||
|
||||
out = np.full((data_length, max_calib, 2), np.nan)
|
||||
with_calibration = np.full(data_length, False)
|
||||
length_calibration = np.full(data_length, 0)
|
||||
for i in data.keys():
|
||||
out[int(i), :len(data[i]), :] = data[i]
|
||||
with_calibration[int(i)] = True
|
||||
length_calibration[int(i)] = len(data[i])
|
||||
|
||||
return out, with_calibration, length_calibration
|
||||
# Unpack from this the distsance modulus and its uncertainty.
|
||||
mu = out[:, :, 0].T
|
||||
e_mu = out[:, :, 1].T
|
||||
|
||||
return mu, e_mu
|
||||
|
||||
|
||||
def mask_fields(density, velocity, mask, return_none):
|
||||
|
@ -422,8 +450,9 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
|
|||
|
||||
loader._field_rdist = rdist
|
||||
|
||||
if absolute_calibration is not None and "CF4_TFR_" not in kind:
|
||||
raise ValueError("Absolute calibration supported only for the CF4 TFR sample.") # noqa
|
||||
if absolute_calibration is not None and not ("CF4_TFR_" in kind or "Carrick2MTFmock" in kind): # noqa
|
||||
raise ValueError("Absolute calibration supported only for either "
|
||||
"the CF4 TFR sample or Carrick 2MTF mocks.")
|
||||
|
||||
if kind in ["LOSS", "Foundation"]:
|
||||
keys = ["RA", "DEC", "z_CMB", "mB", "x1", "c", "e_mB", "e_x1", "e_c"]
|
||||
|
@ -442,7 +471,7 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
|
|||
model = PV_LogLikelihood(
|
||||
los_overdensity, los_velocity,
|
||||
RA[mask], dec[mask], zCMB[mask], e_zCMB, calibration_params,
|
||||
None, mag_selection, loader.rdist, loader._Omega_m, "SN",
|
||||
mag_selection, loader.rdist, loader._Omega_m, "SN",
|
||||
name=kind, void_kwargs=void_kwargs,
|
||||
wo_num_dist_marginalisation=wo_num_dist_marginalisation)
|
||||
elif "Pantheon+" in kind:
|
||||
|
@ -476,26 +505,62 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
|
|||
model = PV_LogLikelihood(
|
||||
los_overdensity, los_velocity,
|
||||
RA[mask], dec[mask], zCMB[mask], e_zCMB[mask], calibration_params,
|
||||
None, mag_selection, loader.rdist, loader._Omega_m, "SN",
|
||||
mag_selection, loader.rdist, loader._Omega_m, "SN",
|
||||
name=kind, void_kwargs=void_kwargs,
|
||||
wo_num_dist_marginalisation=wo_num_dist_marginalisation)
|
||||
elif kind in ["SFI_gals", "2MTF", "SFI_gals_masked"] or "IndranilVoidTFRMock" in kind: # noqa
|
||||
elif kind in ["SFI_gals", "2MTF", "SFI_gals_masked"] or "IndranilVoidTFRMock" in kind or "Carrick2MTFmock" in kind: # noqa
|
||||
keys = ["RA", "DEC", "z_CMB", "mag", "eta", "e_mag", "e_eta"]
|
||||
RA, dec, zCMB, mag, eta, e_mag, e_eta = (loader.cat[k] for k in keys)
|
||||
|
||||
mask = (zCMB < zcmb_max) & (zCMB > zcmb_min)
|
||||
if "Carrick2MTFmock" in kind:
|
||||
# For the mock we only want to select objects with the '2M++'
|
||||
# volume.
|
||||
mask &= loader.cat["r"] < 130
|
||||
# The mocks are generated without Malmquist.
|
||||
fprint("disabling homogeneous and inhomogeneous Malmquist bias for the mock.") # noqa
|
||||
with_homogeneous_malmquist = False
|
||||
with_inhomogeneous_malmquist = False
|
||||
else:
|
||||
with_homogeneous_malmquist = True
|
||||
with_inhomogeneous_malmquist = True
|
||||
|
||||
calibration_params = {"mag": mag[mask], "eta": eta[mask],
|
||||
"e_mag": e_mag[mask], "e_eta": e_eta[mask]}
|
||||
|
||||
# Append the calibration data
|
||||
if "Carrick2MTFmock" in kind:
|
||||
absmag_calibration = loader.absmag_calibration
|
||||
|
||||
# The shape of these is (`ncalibrators, nobjects`).
|
||||
mu_calibration = absmag_calibration["mu_calibration"][:, mask]
|
||||
e_mu_calibration = absmag_calibration["e_mu_calibration"][:, mask]
|
||||
# Auxiliary parameters.
|
||||
m = np.isfinite(mu_calibration)
|
||||
|
||||
# NumPyro refuses to start if any inputs are not finite, so we
|
||||
# replace with some ficutive mean and very large standard
|
||||
# deviation.
|
||||
mu_calibration[~m] = 0.0
|
||||
e_mu_calibration[~m] = 1000.0
|
||||
|
||||
calibration_params["mu_calibration"] = mu_calibration
|
||||
calibration_params["e_mu_calibration"] = e_mu_calibration
|
||||
calibration_params["is_finite_calibrator"] = m
|
||||
calibration_params["counts_calibrators"] = np.sum(m, axis=0)
|
||||
calibration_params["any_calibrator"] = np.any(m, axis=0)
|
||||
|
||||
los_overdensity, los_velocity = mask_fields(
|
||||
los_overdensity, los_velocity, mask, void_kwargs is not None)
|
||||
|
||||
model = PV_LogLikelihood(
|
||||
los_overdensity, los_velocity,
|
||||
RA[mask], dec[mask], zCMB[mask], None, calibration_params, None,
|
||||
RA[mask], dec[mask], zCMB[mask], None, calibration_params,
|
||||
mag_selection, loader.rdist, loader._Omega_m, "TFR", name=kind,
|
||||
void_kwargs=void_kwargs,
|
||||
wo_num_dist_marginalisation=wo_num_dist_marginalisation)
|
||||
wo_num_dist_marginalisation=wo_num_dist_marginalisation,
|
||||
with_homogeneous_malmquist=with_homogeneous_malmquist,
|
||||
with_inhomogeneous_malmquist=with_inhomogeneous_malmquist)
|
||||
elif "CF4_TFR_" in kind:
|
||||
# The full name can be e.g. "CF4_TFR_not2MTForSFI_i" or "CF4_TFR_i".
|
||||
band = kind.split("_")[-1]
|
||||
|
@ -532,38 +597,39 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
|
|||
else:
|
||||
mask &= Qs == 5
|
||||
|
||||
# Read the absolute calibration
|
||||
if absolute_calibration is not None:
|
||||
CF4_length = len(RA)
|
||||
distmod, with_calibration, length_calibration = read_absolute_calibration( # noqa
|
||||
"Cepheids", CF4_length, calibration_fpath)
|
||||
|
||||
distmod = distmod[mask]
|
||||
with_calibration = with_calibration[mask]
|
||||
length_calibration = length_calibration[mask]
|
||||
fprint(f"found {np.sum(with_calibration)} galaxies with absolute calibration.") # noqa
|
||||
|
||||
distmod = distmod[with_calibration]
|
||||
length_calibration = length_calibration[with_calibration]
|
||||
|
||||
abs_calibration_params = {
|
||||
"calibration_distmod": distmod,
|
||||
"data_with_calibration": with_calibration,
|
||||
"length_calibration": length_calibration}
|
||||
else:
|
||||
abs_calibration_params = None
|
||||
|
||||
calibration_params = {"mag": mag[mask], "eta": eta[mask],
|
||||
"e_mag": e_mag[mask], "e_eta": e_eta[mask]}
|
||||
|
||||
# Read the absolute calibration
|
||||
mu_calibration, e_mu_calibration = read_absolute_calibration(
|
||||
absolute_calibration, len(RA), calibration_fpath)
|
||||
|
||||
# The shape of these is (`ncalibrators, nobjects`).
|
||||
mu_calibration = mu_calibration[:, mask]
|
||||
e_mu_calibration = e_mu_calibration[:, mask]
|
||||
# Auxiliary parameters.
|
||||
m = np.isfinite(mu_calibration)
|
||||
|
||||
# NumPyro refuses to start if any inputs are not finite, so we
|
||||
# replace with some ficutive mean and very large standard
|
||||
# deviation.
|
||||
mu_calibration[~m] = 0.0
|
||||
e_mu_calibration[~m] = 1000.0
|
||||
|
||||
calibration_params["mu_calibration"] = mu_calibration
|
||||
calibration_params["e_mu_calibration"] = e_mu_calibration
|
||||
calibration_params["is_finite_calibrator"] = m
|
||||
calibration_params["counts_calibrators"] = np.sum(m, axis=0)
|
||||
calibration_params["any_calibrator"] = np.any(m, axis=0)
|
||||
|
||||
los_overdensity, los_velocity = mask_fields(
|
||||
los_overdensity, los_velocity, mask, void_kwargs is not None)
|
||||
|
||||
model = PV_LogLikelihood(
|
||||
los_overdensity, los_velocity,
|
||||
RA[mask], dec[mask], z_obs[mask], None, calibration_params,
|
||||
abs_calibration_params, mag_selection, loader.rdist,
|
||||
loader._Omega_m, "TFR", name=kind, void_kwargs=void_kwargs,
|
||||
mag_selection, loader.rdist, loader._Omega_m, "TFR", name=kind,
|
||||
void_kwargs=void_kwargs,
|
||||
wo_num_dist_marginalisation=wo_num_dist_marginalisation)
|
||||
elif kind in ["CF4_GroupAll"]:
|
||||
# Note, this for some reason works terribly.
|
||||
|
@ -583,7 +649,7 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
|
|||
|
||||
model = PV_LogLikelihood(
|
||||
los_overdensity, los_velocity,
|
||||
RA[mask], dec[mask], zCMB[mask], None, calibration_params, None,
|
||||
RA[mask], dec[mask], zCMB[mask], None, calibration_params,
|
||||
mag_selection, loader.rdist, loader._Omega_m, "simple",
|
||||
name=kind, void_kwargs=void_kwargs,
|
||||
wo_num_dist_marginalisation=wo_num_dist_marginalisation)
|
||||
|
|
150
csiborgtools/flow/mocks.py
Normal file
150
csiborgtools/flow/mocks.py
Normal file
|
@ -0,0 +1,150 @@
|
|||
# Copyright (C) 2024 Richard Stiskalek
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation; either version 3 of the License, or (at your
|
||||
# option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
# Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
"""Mock data generators."""
|
||||
import numpy as np
|
||||
|
||||
from ..field.interp import evaluate_cartesian_regular
|
||||
from ..params import SPEED_OF_LIGHT
|
||||
from ..utils import cartesian_to_radec, radec_to_cartesian, radec_to_galactic
|
||||
from .cosmography import distmod2dist, distmod2redshift
|
||||
|
||||
###############################################################################
|
||||
# Mock Quijote observations #
|
||||
###############################################################################
|
||||
|
||||
|
||||
def mock_Carrick2MTF(velocity_field, boxsize, RA_2MTF, DEC_2MTF,
|
||||
a_TF=-22.8, b_TF=-7.2, sigma_TF=0.25, sigma_v=100.,
|
||||
Vext=[150.0, 10.0, -100.0], h=1.0, beta=0.4,
|
||||
mean_eta=0.069, std_eta=0.078, mean_e_eta=0.012,
|
||||
mean_mag=10.31, std_mag=0.83, mean_e_mag=0.044,
|
||||
sigma_calibration=0.05, calibration_max_percentile=10,
|
||||
calibration_rand_fraction=0.5, nrepeat_calibration=1,
|
||||
seed=42, Om0=0.3, verbose=True, **kwargs):
|
||||
"""
|
||||
Mock TFR catalogue build against the Carrick velocity field and the
|
||||
2MTF sky distribution to avoid recomputing the LOS velocities.
|
||||
"""
|
||||
nsamples = len(RA_2MTF)
|
||||
|
||||
# Convert Vext from ICRS to Galactic coordinates.
|
||||
Vext = np.asarray(Vext).reshape(1, 3)
|
||||
Vext_mag, Vext_RA, Vext_DEC = cartesian_to_radec(Vext).reshape(-1, )
|
||||
Vext_l, Vext_b = radec_to_galactic(Vext_RA, Vext_DEC)
|
||||
Vext_galactic = np.asanyarray([Vext_mag, Vext_l, Vext_b]).reshape(1, 3)
|
||||
Vext = radec_to_cartesian(Vext_galactic).reshape(-1, )
|
||||
|
||||
truths = {"a": a_TF, "b": b_TF, "e_mu": sigma_TF, "sigma_v": sigma_v,
|
||||
"Vext": Vext,
|
||||
"mean_eta": mean_eta, "std_eta": std_eta,
|
||||
"mean_mag": mean_mag, "std_mag": std_mag,
|
||||
}
|
||||
|
||||
gen = np.random.default_rng(seed)
|
||||
|
||||
# The Carrick box is in the Galactic coordinates.
|
||||
l, b = radec_to_galactic(RA_2MTF, DEC_2MTF)
|
||||
gal_phi = np.deg2rad(l)
|
||||
gal_theta = np.pi / 2 - np.deg2rad(b)
|
||||
|
||||
# Sample the linewidth of each galaxy from a Gaussian distribution to mimic
|
||||
# the MNR procedure.
|
||||
eta_true = gen.normal(mean_eta, std_eta, nsamples)
|
||||
eta_obs = gen.normal(eta_true, mean_e_eta)
|
||||
|
||||
# Subtract the mean of the observed linewidths, so that they are
|
||||
# centered around zero. For consistency subtract from both observed
|
||||
# and true values.
|
||||
eta_mean_sampled = np.mean(eta_obs)
|
||||
eta_true -= eta_mean_sampled
|
||||
eta_obs -= eta_mean_sampled
|
||||
|
||||
# Sample the magnitude from some Gaussian distribution to replicate MNR.
|
||||
mag_true = gen.normal(mean_mag, std_mag, nsamples)
|
||||
mag_obs = gen.normal(mag_true, mean_e_mag)
|
||||
|
||||
# Calculate the 'true' distance modulus and redshift from the TFR distance.
|
||||
|
||||
# If h != 1, then these distance modulii are in physical units.
|
||||
mu_TFR = mag_true - (a_TF + b_TF * eta_true)
|
||||
mu_true = gen.normal(mu_TFR, sigma_TF)
|
||||
# This is the distance modulus in units of little h.
|
||||
mu_true_h = mu_true + 5 * np.log10(h)
|
||||
|
||||
# Select a fraction of nearby galaxies.
|
||||
mu_max = np.percentile(mu_true, calibration_max_percentile)
|
||||
ks = np.where(mu_true < mu_max)[0]
|
||||
nsel = int(calibration_rand_fraction * len(ks))
|
||||
if verbose:
|
||||
print(f"Assigning calibration to {nsel}/{nsamples} galaxies.")
|
||||
|
||||
mu_calibration = np.full((nrepeat_calibration, nsamples), np.nan)
|
||||
e_mu_calibration = np.full((nrepeat_calibration, nsamples), np.nan)
|
||||
|
||||
for n in range(nrepeat_calibration):
|
||||
ks_n = gen.choice(ks, nsel, replace=False)
|
||||
|
||||
mu_calibration[n, ks_n] = gen.normal(mu_true[ks_n], sigma_calibration)
|
||||
e_mu_calibration[n, ks_n] = np.ones(len(ks_n)) * sigma_calibration
|
||||
|
||||
# Convert the true distance modulus to true distance and cosmological
|
||||
# redshift. The distance is in Mpc/h because the box is in Mpc / h.
|
||||
r = distmod2dist(mu_true_h, Om0)
|
||||
zcosmo = distmod2redshift(mu_true_h, Om0)
|
||||
|
||||
# Calculate the Cartesian coordinates of each galaxy. This is initially
|
||||
# centered at (0, 0, 0).
|
||||
pos = r * np.asarray([
|
||||
np.sin(gal_theta) * np.cos(gal_phi),
|
||||
np.sin(gal_theta) * np.sin(gal_phi),
|
||||
np.cos(gal_theta)])
|
||||
pos = pos.T
|
||||
pos_box = pos / boxsize + 0.5
|
||||
|
||||
vel = evaluate_cartesian_regular(
|
||||
velocity_field[0], velocity_field[1], velocity_field[2],
|
||||
pos=pos_box, smooth_scales=None, method="cubic")
|
||||
vel = beta * np.vstack(vel).T
|
||||
|
||||
for i in range(3):
|
||||
vel[:, i] += Vext[i]
|
||||
|
||||
# Compute the radial velocity.
|
||||
Vr = np.sum(vel * pos, axis=1) / np.linalg.norm(pos, axis=1)
|
||||
|
||||
# The true redshift of the source.
|
||||
zCMB_true = (1 + zcosmo) * (1 + Vr / SPEED_OF_LIGHT) - 1
|
||||
zCMB_obs = gen.normal(zCMB_true, sigma_v / SPEED_OF_LIGHT)
|
||||
|
||||
# These galaxies will be masked out when LOS is read it because they are
|
||||
# too far away.
|
||||
distance_mask = r < 125
|
||||
truths["distance_mask"] = distance_mask
|
||||
|
||||
sample = {"RA": RA_2MTF,
|
||||
"DEC": DEC_2MTF,
|
||||
"z_CMB": zCMB_obs,
|
||||
"eta": eta_obs,
|
||||
"mag": mag_obs,
|
||||
"e_eta": np.ones(nsamples) * mean_e_eta,
|
||||
"e_mag": np.ones(nsamples) * mean_e_mag,
|
||||
"mu_true": mu_true,
|
||||
"mu_TFR": mu_TFR,
|
||||
"mu_calibration": mu_calibration,
|
||||
"e_mu_calibration": e_mu_calibration,
|
||||
"r": r,
|
||||
}
|
||||
|
||||
return sample, truths
|
530
notebooks/flow/mock_carrick.ipynb
Normal file
530
notebooks/flow/mock_carrick.ipynb
Normal file
File diff suppressed because one or more lines are too long
|
@ -116,6 +116,9 @@ def get_models(ksim, get_model_kwargs, mag_selection, void_kwargs,
|
|||
"Pantheon+_groups", "Pantheon+_groups_zSN",
|
||||
"Pantheon+_zSN"]:
|
||||
fpath = join(folder, "PV_compilation.hdf5")
|
||||
elif "Carrick2MTFmock" in cat:
|
||||
ki = cat.split("_")[-1]
|
||||
fpath =f"/mnt/extraspace/rstiskalek/csiborg_postprocessing/flow_mock/Carrick2MTFmock_seed{ki}.hdf5" # noqa
|
||||
elif "CF4_TFR" in cat:
|
||||
fpath = join(folder, "PV/CF4/CF4_TF-distances.hdf5")
|
||||
elif cat in ["CF4_GroupAll"]:
|
||||
|
@ -186,7 +189,7 @@ def run_model(model, nsteps, nburn, model_kwargs, out_folder,
|
|||
neg_ln_evidence_err = (jax.numpy.nan, jax.numpy.nan)
|
||||
|
||||
fname = join(out_folder, fname)
|
||||
print(f"Saving results to `{fname}`.")
|
||||
print(f"Saving results: `{fname}`.")
|
||||
with File(fname, "w") as f:
|
||||
# Write samples
|
||||
grp = f.create_group("samples")
|
||||
|
@ -203,15 +206,22 @@ def run_model(model, nsteps, nburn, model_kwargs, out_folder,
|
|||
grp.create_dataset("neg_lnZ_harmonic", data=neg_ln_evidence)
|
||||
grp.create_dataset("neg_lnZ_harmonic_err", data=neg_ln_evidence_err)
|
||||
|
||||
fname_summary = fname.replace(".hdf5", ".txt")
|
||||
print(f"Saving summary to `{fname_summary}`.")
|
||||
with open(fname_summary, 'w') as f:
|
||||
fname_config = fname.replace(".hdf5", "_config.txt")
|
||||
print(f"Saving configuration: `{fname_config}`.")
|
||||
with open(fname_config, 'w') as f:
|
||||
original_stdout = sys.stdout
|
||||
sys.stdout = f
|
||||
|
||||
print("User parameters:")
|
||||
for kwargs in kwargs_print:
|
||||
print_variables(kwargs.keys(), kwargs.values())
|
||||
sys.stdout = original_stdout
|
||||
|
||||
fname_summary = fname.replace(".hdf5", "_summary.txt")
|
||||
print(f"Saving summary: `{fname_summary}`.")
|
||||
with open(fname_summary, 'w') as f:
|
||||
original_stdout = sys.stdout
|
||||
sys.stdout = f
|
||||
|
||||
print("HMC summary:")
|
||||
print(f"{'BIC':<20} {BIC}")
|
||||
|
@ -238,7 +248,7 @@ def get_distmod_hyperparams(catalogue, sample_alpha, sample_mag_dipole):
|
|||
"alpha_min": alpha_min, "alpha_max": alpha_max,
|
||||
"sample_alpha": sample_alpha
|
||||
}
|
||||
elif catalogue in ["SFI_gals", "2MTF"] or "CF4_TFR" in catalogue or "IndranilVoidTFRMock" in catalogue: # noqa
|
||||
elif catalogue in ["SFI_gals", "2MTF"] or "CF4_TFR" in catalogue or "IndranilVoidTFRMock" in catalogue or "Carrick2MTFmock" in catalogue: # noqa
|
||||
return {"e_mu_min": 0.001, "e_mu_max": 1.0,
|
||||
"a_mean": -21., "a_std": 5.0,
|
||||
"b_mean": -5.95, "b_std": 4.0,
|
||||
|
@ -247,6 +257,7 @@ def get_distmod_hyperparams(catalogue, sample_alpha, sample_mag_dipole):
|
|||
"sample_a_dipole": sample_mag_dipole,
|
||||
"alpha_min": alpha_min, "alpha_max": alpha_max,
|
||||
"sample_alpha": sample_alpha,
|
||||
"sample_curvature": False if "Carrick2MTFmock" in catalogue else True, # noqa
|
||||
}
|
||||
elif catalogue in ["CF4_GroupAll"]:
|
||||
return {"e_mu_min": 0.001, "e_mu_max": 1.0,
|
||||
|
@ -294,8 +305,8 @@ if __name__ == "__main__":
|
|||
###########################################################################
|
||||
|
||||
# `None` means default behaviour
|
||||
nsteps = 2_000
|
||||
nburn = 2_000
|
||||
nsteps = 1_500
|
||||
nburn = 1_500
|
||||
zcmb_min = None
|
||||
zcmb_max = 0.05
|
||||
nchains_harmonic = 10
|
||||
|
@ -310,9 +321,16 @@ if __name__ == "__main__":
|
|||
sample_mag_dipole = False
|
||||
wo_num_dist_marginalisation = False
|
||||
absolute_calibration = None
|
||||
calculate_harmonic = (False if inference_method == "bayes" else True) and (not wo_num_dist_marginalisation) # noqa
|
||||
calculate_harmonic = (False if (inference_method == "bayes") else True) and (not wo_num_dist_marginalisation) # noqa
|
||||
sample_h = True if absolute_calibration is not None else False
|
||||
|
||||
# These mocks are generated without a density field, so there is no
|
||||
# inhomogeneous Malmquist and we also do not need evidences.
|
||||
for catalogue in ARGS.catalogue:
|
||||
if "Carrick2MTFmock" in catalogue:
|
||||
sample_alpha = False
|
||||
calculate_harmonic = False
|
||||
|
||||
fname_kwargs = {"inference_method": inference_method,
|
||||
"smooth": ARGS.ksmooth,
|
||||
"nsim": ARGS.ksim,
|
||||
|
@ -375,7 +393,7 @@ if __name__ == "__main__":
|
|||
"Vmono_min": -1000, "Vmono_max": 1000,
|
||||
"beta_min": -10.0, "beta_max": 10.0,
|
||||
"sigma_v_min": 1.0, "sigma_v_max": 1000 if "IndranilVoid_" in ARGS.simname else 750., # noqa
|
||||
"h_min": 0.01, "h_max": 5.0,
|
||||
"h_min": 0.25, "h_max": 1.,
|
||||
"no_Vext": False if no_Vext is None else no_Vext, # noqa
|
||||
"sample_Vmag_vax": sample_Vmag_vax,
|
||||
"sample_Vmono": sample_Vmono,
|
||||
|
|
|
@ -38,10 +38,10 @@ fi
|
|||
|
||||
|
||||
# for simname in "IndranilVoid_exp" "IndranilVoid_gauss" "IndranilVoid_mb"; do
|
||||
for simname in "IndranilVoid_exp"; do
|
||||
for simname in "Carrick2015"; do
|
||||
# for simname in "Carrick2015" "Lilow2024" "csiborg2_main" "csiborg2X" "manticore_2MPP_N128_DES_V1" "CF4" "CLONES"; do
|
||||
# for catalogue in "LOSS" "Foundation" "2MTF" "SFI_gals" "CF4_TFR_i" "CF4_TFR_w1"; do
|
||||
for catalogue in "2MTF"; do
|
||||
for catalogue in "CF4_TFR_w1"; do
|
||||
# for catalogue in "CF4_TFR_i" "CF4_TFR_w1"; do
|
||||
# for catalogue in "2MTF" "SFI_gals" "CF4_TFR_i" "CF4_TFR_w1"; do
|
||||
for ksim in "none"; do
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue