Mocks with absolute calibration (#156)

* Rename nb

* Add Carrick 2MTF mocks

* Add more 2MTF mock support

* Add mocks generator

* Add mock gen nb

* Control over MAlmquist

* Control over Malmquist

* Update imports

* Update script

* Add H0 to TFR mocks

* Clear up saving

* Add h sampling

* Update saving

* Update mocks

* Add calibration to mocks

* More support to read in absmag

* Add absmag

* Update script
This commit is contained in:
Richard Stiskalek 2024-10-09 20:41:36 +02:00 committed by GitHub
parent 76a7609f7f
commit 4167ba4fb1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 902 additions and 118 deletions

View file

@ -17,6 +17,7 @@ from .io import (DataLoader, get_model, read_absolute_calibration,
from .flow_model import (PV_LogLikelihood, PV_validation_model, dist2redshift, # noqa
Observed2CosmologicalRedshift, predict_zobs, # noqa
project_Vext, stack_pzosmo_over_realizations) # noqa
from .mocks import mock_Carrick2MTF # noqa
from .selection import ToyMagnitudeSelection # noqa
from .void_model import (load_void_data, interpolate_void, select_void_h, # noqa
mock_void) # noqa

View file

@ -170,28 +170,6 @@ class BaseFlowValidationModel(ABC):
self._setattr_as_jax(names, values)
def _set_abs_calibration_params(self, abs_calibration_params):
self.with_absolute_calibration = abs_calibration_params is not None
if abs_calibration_params is None:
self.with_absolute_calibration = False
return
self.calibration_distmod = jnp.asarray(
abs_calibration_params["calibration_distmod"][..., 0])
self.calibration_edistmod = jnp.asarray(
abs_calibration_params["calibration_distmod"][..., 1])
self.data_with_calibration = jnp.asarray(
abs_calibration_params["data_with_calibration"])
self.data_wo_calibration = ~self.data_with_calibration
# Calculate the log of the number of calibrators. Where there is no
# calibrator set the number of calibrators to 1 to avoid log(0) and
# this way only zeros are being added.
length_calibration = abs_calibration_params["length_calibration"]
length_calibration[length_calibration == 0] = 1
self.log_length_calibration = jnp.log(length_calibration)
def _set_radial_spacing(self, r_xrange, Omega_m):
cosmo = FlatLambdaCDM(H0=H0, Om0=Omega_m)
@ -356,7 +334,8 @@ def e2_distmod_TFR(e2_mag, e2_eta, eta, b, c, e_mu_intrinsic):
def sample_TFR(e_mu_min, e_mu_max, a_mean, a_std, b_mean, b_std,
c_mean, c_std, alpha_min, alpha_max, sample_alpha,
a_dipole_mean, a_dipole_std, sample_a_dipole, name):
a_dipole_mean, a_dipole_std, sample_a_dipole,
sample_curvature, name):
"""Sample Tully-Fisher calibration parameters."""
e_mu = sample(f"e_mu_{name}", Uniform(e_mu_min, e_mu_max))
a = sample(f"a_{name}", Normal(a_mean, a_std))
@ -367,7 +346,11 @@ def sample_TFR(e_mu_min, e_mu_max, a_mean, a_std, b_mean, b_std,
ax, ay, az = 0.0, 0.0, 0.0
b = sample(f"b_{name}", Normal(b_mean, b_std))
c = sample(f"c_{name}", Normal(c_mean, c_std))
if sample_curvature:
c = sample(f"c_{name}", Normal(c_mean, c_std))
else:
c = 0.
alpha = sample_alpha_bias(name, alpha_min, alpha_max, sample_alpha)
@ -495,8 +478,6 @@ class PV_LogLikelihood(BaseFlowValidationModel):
Errors on the observed redshifts.
calibration_params : dict
Calibration parameters of each object.
abs_calibration_params : dict
Absolute calibration parameters.
mag_selection : dict
Magnitude selection parameters, optional.
r_xrange : 1-dimensional array
@ -513,12 +494,17 @@ class PV_LogLikelihood(BaseFlowValidationModel):
Whether to directly sample the distance without numerical
marginalisation. in which case the tracers can be coupled by a
covariance matrix. By default `False`.
with_homogeneous_malmquist : bool, optional
Whether to include the homogeneous Malmquist bias. By default `True`.
with_inhomogeneous_malmquist : bool, optional
Whether to include the inhomogeneous Malmquist bias. By default `True`.
"""
def __init__(self, los_density, los_velocity, RA, dec, z_obs, e_zobs,
calibration_params, abs_calibration_params, mag_selection,
r_xrange, Omega_m, kind, name, void_kwargs=None,
wo_num_dist_marginalisation=False):
calibration_params, mag_selection, r_xrange, Omega_m, kind,
name, void_kwargs=None, wo_num_dist_marginalisation=False,
with_homogeneous_malmquist=True,
with_inhomogeneous_malmquist=True):
if e_zobs is not None:
e2_cz_obs = jnp.asarray((SPEED_OF_LIGHT * e_zobs)**2)
else:
@ -550,13 +536,14 @@ class PV_LogLikelihood(BaseFlowValidationModel):
self._setattr_as_jax(names, values)
self._set_calibration_params(calibration_params)
self._set_abs_calibration_params(abs_calibration_params)
self._set_radial_spacing(r_xrange, Omega_m)
self.kind = kind
self.name = name
self.Omega_m = Omega_m
self.wo_num_dist_marginalisation = wo_num_dist_marginalisation
self.with_homogeneous_malmquist = with_homogeneous_malmquist
self.with_inhomogeneous_malmquist = with_inhomogeneous_malmquist
self.norm = - self.ndata * jnp.log(self.num_sims)
if mag_selection is not None:
@ -771,9 +758,14 @@ class PV_LogLikelihood(BaseFlowValidationModel):
"marginalising the true distance.")
# Calculate p(r) (Malmquist bias). Shape is (ndata, nxrange)
log_ptilde = log_ptilde_wo_bias(
self.mu_xrange[None, :], mu[:, None], e2_mu[:, None],
self.log_r2_xrange[None, :])
if self.with_homogeneous_malmquist:
log_ptilde = log_ptilde_wo_bias(
self.mu_xrange[None, :], mu[:, None], e2_mu[:, None],
self.log_r2_xrange[None, :])
else:
log_ptilde = log_ptilde_wo_bias(
self.mu_xrange[None, :], mu[:, None], e2_mu[:, None],
0.)
if self.is_void_data:
rLG = field_calibration_params["rLG"]
@ -785,7 +777,9 @@ class PV_LogLikelihood(BaseFlowValidationModel):
# Inhomogeneous Malmquist bias. Shape: (nsims, ndata, nxrange)
alpha = distmod_params["alpha"]
log_ptilde = log_ptilde[None, ...] + alpha * log_los_density
log_ptilde = log_ptilde[None, ...]
if self.with_inhomogeneous_malmquist:
log_ptilde += alpha * log_los_density
ptilde = jnp.exp(log_ptilde)
# Normalization of p(r). Shape: (nsims, ndata)
@ -802,29 +796,51 @@ class PV_LogLikelihood(BaseFlowValidationModel):
ptilde *= likelihood_zobs(
self.z_obs[None, :, None], zobs, e2_cz[None, :, None])
if self.with_absolute_calibration:
raise NotImplementedError(
"Absolute calibration not implemented for this model. "
"Use `PV_LogLikelihood_NoDistMarg` instead.")
# Integrate over the radial distance. Shape: (nsims, ndata)
ll = jnp.log(simpson(ptilde, x=self.r_xrange, axis=-1))
ll -= jnp.log(pnorm)
return ll0 + jnp.sum(logsumexp(ll, axis=0)) + self.norm
else:
if field_calibration_params["sample_h"]:
raise NotImplementedError(
"Sampling of h is not yet implemented.")
e_mu = jnp.sqrt(e2_mu)
# True distance modulus, shape is `(n_data)``
# True distance modulus, shape is `(n_data)`. If we have absolute
# calibration, then this distance modulus assumes a particular h.
with plate("plate_mu", self.ndata):
mu_true = sample("mu", Normal(mu, e_mu))
# True distance and redshift, shape is `(n_data)`.
r_true = distmod2dist(mu_true, self.Omega_m)
z_true = distmod2redshift(mu_true, self.Omega_m)
# Likelihood of the true distance modulii given the calibration.
if field_calibration_params["sample_h"]:
raise RuntimeError(
"Sampling of 'h' has not yet been thoroughly tested.")
h = field_calibration_params["h"]
# Now, the rest of the code except the calibration likelihood
# uses the distance modulus in units of h
mu_true_h = mu_true + 5 * jnp.log10(h)
# Calculate the log-likelihood of the calibration, but the
# shape is `(n_calibrators, n_data)`. Where there is no data
# we set the likelihood to 0 (or the log-likelihood to -inf)
ll_calibration = jnp.where(
self.is_finite_calibrator,
normal_logpdf(self.mu_calibration, mu_true[None, :],
self.e_mu_calibration),
-jnp.inf)
# Now average out over the calibrators, however only if the
# there is at least one calibrator. If there isn't, then we
# just assing a log-likelihood of 0.
ll_calibration = jnp.where(
self.any_calibrator,
logsumexp(ll_calibration, axis=0) - jnp.log(self.counts_calibrators), # noqa
0.)
else:
mu_true_h = mu_true
# True distance and redshift, shape is `(n_data)`. The distance
# here is in units of `Mpc / h``.
r_true = distmod2dist(mu_true_h, self.Omega_m)
z_true = distmod2redshift(mu_true_h, self.Omega_m)
if self.is_void_data:
raise NotImplementedError(
@ -840,26 +856,29 @@ class PV_LogLikelihood(BaseFlowValidationModel):
alpha = distmod_params["alpha"]
# Normalisation of p(mu), shape is `(n_sims, n_data, n_rad)`
pnorm = (
+ self.log_r2_xrange[None, None, :]
+ alpha * log_los_density_grid
+ normal_logpdf(
self.mu_xrange[None, :], mu[:, None], e_mu[:, None])[None, ...]) # noqa
pnorm = normal_logpdf(
self.mu_xrange[None, :], mu[:, None], e_mu[:, None])[None, ...]
if self.with_homogeneous_malmquist:
pnorm += self.log_r2_xrange[None, None, :]
if self.with_inhomogeneous_malmquist:
pnorm += alpha * log_los_density_grid
pnorm = jnp.exp(pnorm)
# Now integrate over the radial steps. Shape is `(nsims, ndata)`.
# No Jacobian here because I integrate over distance, not the
# distance modulus.
pnorm = simpson(pnorm, x=self.r_xrange, axis=-1)
# Jacobian |dr / dmu|_(mu_true), shape is `(n_data)`.
jac = jnp.abs(distmod2dist_gradient(mu_true, self.Omega_m))
# Jacobian |dr / dmu|_(mu_true_h), shape is `(n_data)`.
jac = jnp.abs(distmod2dist_gradient(mu_true_h, self.Omega_m))
# Calculate unnormalized log p(mu). Shape is (nsims, ndata)
ll = (
+ jnp.log(jac)[None, :]
+ (2 * jnp.log(r_true) - self.log_r2_xrange_mean)[None, :]
+ alpha * log_density
)
ll = 0.0
if self.with_homogeneous_malmquist:
ll += (+ jnp.log(jac)
+ (2 * jnp.log(r_true) - self.log_r2_xrange_mean))
if self.with_inhomogeneous_malmquist:
ll += alpha * log_density
# Subtract the normalization. Shape remains (nsims, ndata)
ll -= jnp.log(pnorm)
@ -871,13 +890,13 @@ class PV_LogLikelihood(BaseFlowValidationModel):
zobs *= 1 + vrad / SPEED_OF_LIGHT
zobs -= 1.
# Add the log-likelihood of observed redshifts. Shape remains
# `(nsims, ndata)`
ll += log_likelihood_zobs(
self.z_obs[None, :], zobs, e2_cz[None, :])
if self.with_absolute_calibration:
raise NotImplementedError(
"Absolute calibration not implemented for this model. "
"Use `PV_LogLikelihood_NoDistMarg` instead.")
if field_calibration_params["sample_h"]:
ll += ll_calibration[None, :]
return ll0 + jnp.sum(logsumexp(ll, axis=0)) + self.norm

View file

@ -58,7 +58,8 @@ class DataLoader:
def __init__(self, simname, ksim, catalogue, catalogue_fpath, paths,
ksmooth=None, store_full_velocity=False, verbose=True):
fprint("reading the catalogue,", verbose)
self._cat = self._read_catalogue(catalogue, catalogue_fpath)
self._cat, self._absmag_calibration = self._read_catalogue(
catalogue, catalogue_fpath)
self._catname = catalogue
fprint("reading the interpolated field.", verbose)
@ -132,6 +133,15 @@ class DataLoader:
"""The distance indicators catalogue (structured array)."""
return self._cat[self._mask]
@property
def absmag_calibration(self):
"""Returns the absolute magnitude calibration with masking applied."""
if self._absmag_calibration is None:
return None
return {key: val[:, self._mask]
for key, val in self._absmag_calibration.items()}
@property
def catname(self):
"""Catalogue name."""
@ -187,6 +197,8 @@ class DataLoader:
fpath = paths.field_los(simname, "Pantheon+")
elif "CF4_TFR" in catalogue:
fpath = paths.field_los(simname, "CF4_TFR")
elif "Carrick2MTFmock" in catalogue:
fpath = paths.field_los(simname, "2MTF")
else:
fpath = paths.field_los(simname, catalogue)
@ -217,6 +229,8 @@ class DataLoader:
return rdist, los_density, los_velocity
def _read_catalogue(self, catalogue, catalogue_fpath):
absmag_calibration = None
if catalogue == "A2":
with File(catalogue_fpath, 'r') as f:
dtype = [(key, np.float32) for key in f.keys()]
@ -262,6 +276,20 @@ class DataLoader:
arr = np.empty(len(mock_data["RA"]), dtype=dtype)
for key in mock_data.keys():
arr[key] = mock_data[key]
elif "Carrick2MTFmock" in catalogue:
with File(catalogue_fpath, 'r') as f:
keys_skip = ["mu_calibration", "e_mu_calibration"]
dtype = [(key, np.float32) for key in f.keys()
if key not in keys_skip]
arr = np.empty(len(f["RA"]), dtype=dtype)
for key in f.keys():
if key not in keys_skip:
arr[key] = f[key][:]
absmag_calibration = {
"mu_calibration": f["mu_calibration"][...],
"e_mu_calibration": f["e_mu_calibration"][...]}
elif "UPGLADE" in catalogue:
with File(catalogue_fpath, 'r') as f:
dtype = [(key, np.float32) for key in f.keys()]
@ -286,7 +314,7 @@ class DataLoader:
else:
raise ValueError(f"Unknown catalogue: `{catalogue}`.")
return arr
return arr, absmag_calibration
###############################################################################
@ -322,7 +350,7 @@ def radial_velocity_los(los_velocity, ra, dec):
def read_absolute_calibration(kind, data_length, calibration_fpath):
"""
Read the absolute calibration for the CF4 TFR sample from LEDA but
preprocessed by me.
preprocessed by me. Missing values are replaced with NaN.
Parameters
----------
@ -335,13 +363,13 @@ def read_absolute_calibration(kind, data_length, calibration_fpath):
Returns
-------
data : 3-dimensional array of shape (data_length, max_calib, 2)
mu : 2-dimensional array of shape `(ncalib, ngalaxies)`
Absolute calibration data.
with_calibration : 1-dimensional array of shape (data_length)
Whether the sample has a calibration.
length_calibration : 1-dimensional array of shape (data_length)
Number of calibration points per sample.
e_mu : 2-dimensional array of shape `(ncalib, ngalaxies)`
Uncertainties of the absolute calibration.
"""
raise RuntimeError("The read-in functions are not guaranteed to work "
"properly.")
data = {}
with File(calibration_fpath, 'r') as f:
for key in f[kind].keys():
@ -355,14 +383,14 @@ def read_absolute_calibration(kind, data_length, calibration_fpath):
max_calib = max(len(val) for val in data.values())
out = np.full((data_length, max_calib, 2), np.nan)
with_calibration = np.full(data_length, False)
length_calibration = np.full(data_length, 0)
for i in data.keys():
out[int(i), :len(data[i]), :] = data[i]
with_calibration[int(i)] = True
length_calibration[int(i)] = len(data[i])
return out, with_calibration, length_calibration
# Unpack from this the distsance modulus and its uncertainty.
mu = out[:, :, 0].T
e_mu = out[:, :, 1].T
return mu, e_mu
def mask_fields(density, velocity, mask, return_none):
@ -422,8 +450,9 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
loader._field_rdist = rdist
if absolute_calibration is not None and "CF4_TFR_" not in kind:
raise ValueError("Absolute calibration supported only for the CF4 TFR sample.") # noqa
if absolute_calibration is not None and not ("CF4_TFR_" in kind or "Carrick2MTFmock" in kind): # noqa
raise ValueError("Absolute calibration supported only for either "
"the CF4 TFR sample or Carrick 2MTF mocks.")
if kind in ["LOSS", "Foundation"]:
keys = ["RA", "DEC", "z_CMB", "mB", "x1", "c", "e_mB", "e_x1", "e_c"]
@ -442,7 +471,7 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
model = PV_LogLikelihood(
los_overdensity, los_velocity,
RA[mask], dec[mask], zCMB[mask], e_zCMB, calibration_params,
None, mag_selection, loader.rdist, loader._Omega_m, "SN",
mag_selection, loader.rdist, loader._Omega_m, "SN",
name=kind, void_kwargs=void_kwargs,
wo_num_dist_marginalisation=wo_num_dist_marginalisation)
elif "Pantheon+" in kind:
@ -476,26 +505,62 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
model = PV_LogLikelihood(
los_overdensity, los_velocity,
RA[mask], dec[mask], zCMB[mask], e_zCMB[mask], calibration_params,
None, mag_selection, loader.rdist, loader._Omega_m, "SN",
mag_selection, loader.rdist, loader._Omega_m, "SN",
name=kind, void_kwargs=void_kwargs,
wo_num_dist_marginalisation=wo_num_dist_marginalisation)
elif kind in ["SFI_gals", "2MTF", "SFI_gals_masked"] or "IndranilVoidTFRMock" in kind: # noqa
elif kind in ["SFI_gals", "2MTF", "SFI_gals_masked"] or "IndranilVoidTFRMock" in kind or "Carrick2MTFmock" in kind: # noqa
keys = ["RA", "DEC", "z_CMB", "mag", "eta", "e_mag", "e_eta"]
RA, dec, zCMB, mag, eta, e_mag, e_eta = (loader.cat[k] for k in keys)
mask = (zCMB < zcmb_max) & (zCMB > zcmb_min)
if "Carrick2MTFmock" in kind:
# For the mock we only want to select objects with the '2M++'
# volume.
mask &= loader.cat["r"] < 130
# The mocks are generated without Malmquist.
fprint("disabling homogeneous and inhomogeneous Malmquist bias for the mock.") # noqa
with_homogeneous_malmquist = False
with_inhomogeneous_malmquist = False
else:
with_homogeneous_malmquist = True
with_inhomogeneous_malmquist = True
calibration_params = {"mag": mag[mask], "eta": eta[mask],
"e_mag": e_mag[mask], "e_eta": e_eta[mask]}
# Append the calibration data
if "Carrick2MTFmock" in kind:
absmag_calibration = loader.absmag_calibration
# The shape of these is (`ncalibrators, nobjects`).
mu_calibration = absmag_calibration["mu_calibration"][:, mask]
e_mu_calibration = absmag_calibration["e_mu_calibration"][:, mask]
# Auxiliary parameters.
m = np.isfinite(mu_calibration)
# NumPyro refuses to start if any inputs are not finite, so we
# replace with some ficutive mean and very large standard
# deviation.
mu_calibration[~m] = 0.0
e_mu_calibration[~m] = 1000.0
calibration_params["mu_calibration"] = mu_calibration
calibration_params["e_mu_calibration"] = e_mu_calibration
calibration_params["is_finite_calibrator"] = m
calibration_params["counts_calibrators"] = np.sum(m, axis=0)
calibration_params["any_calibrator"] = np.any(m, axis=0)
los_overdensity, los_velocity = mask_fields(
los_overdensity, los_velocity, mask, void_kwargs is not None)
model = PV_LogLikelihood(
los_overdensity, los_velocity,
RA[mask], dec[mask], zCMB[mask], None, calibration_params, None,
RA[mask], dec[mask], zCMB[mask], None, calibration_params,
mag_selection, loader.rdist, loader._Omega_m, "TFR", name=kind,
void_kwargs=void_kwargs,
wo_num_dist_marginalisation=wo_num_dist_marginalisation)
wo_num_dist_marginalisation=wo_num_dist_marginalisation,
with_homogeneous_malmquist=with_homogeneous_malmquist,
with_inhomogeneous_malmquist=with_inhomogeneous_malmquist)
elif "CF4_TFR_" in kind:
# The full name can be e.g. "CF4_TFR_not2MTForSFI_i" or "CF4_TFR_i".
band = kind.split("_")[-1]
@ -532,38 +597,39 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
else:
mask &= Qs == 5
# Read the absolute calibration
if absolute_calibration is not None:
CF4_length = len(RA)
distmod, with_calibration, length_calibration = read_absolute_calibration( # noqa
"Cepheids", CF4_length, calibration_fpath)
distmod = distmod[mask]
with_calibration = with_calibration[mask]
length_calibration = length_calibration[mask]
fprint(f"found {np.sum(with_calibration)} galaxies with absolute calibration.") # noqa
distmod = distmod[with_calibration]
length_calibration = length_calibration[with_calibration]
abs_calibration_params = {
"calibration_distmod": distmod,
"data_with_calibration": with_calibration,
"length_calibration": length_calibration}
else:
abs_calibration_params = None
calibration_params = {"mag": mag[mask], "eta": eta[mask],
"e_mag": e_mag[mask], "e_eta": e_eta[mask]}
# Read the absolute calibration
mu_calibration, e_mu_calibration = read_absolute_calibration(
absolute_calibration, len(RA), calibration_fpath)
# The shape of these is (`ncalibrators, nobjects`).
mu_calibration = mu_calibration[:, mask]
e_mu_calibration = e_mu_calibration[:, mask]
# Auxiliary parameters.
m = np.isfinite(mu_calibration)
# NumPyro refuses to start if any inputs are not finite, so we
# replace with some ficutive mean and very large standard
# deviation.
mu_calibration[~m] = 0.0
e_mu_calibration[~m] = 1000.0
calibration_params["mu_calibration"] = mu_calibration
calibration_params["e_mu_calibration"] = e_mu_calibration
calibration_params["is_finite_calibrator"] = m
calibration_params["counts_calibrators"] = np.sum(m, axis=0)
calibration_params["any_calibrator"] = np.any(m, axis=0)
los_overdensity, los_velocity = mask_fields(
los_overdensity, los_velocity, mask, void_kwargs is not None)
model = PV_LogLikelihood(
los_overdensity, los_velocity,
RA[mask], dec[mask], z_obs[mask], None, calibration_params,
abs_calibration_params, mag_selection, loader.rdist,
loader._Omega_m, "TFR", name=kind, void_kwargs=void_kwargs,
mag_selection, loader.rdist, loader._Omega_m, "TFR", name=kind,
void_kwargs=void_kwargs,
wo_num_dist_marginalisation=wo_num_dist_marginalisation)
elif kind in ["CF4_GroupAll"]:
# Note, this for some reason works terribly.
@ -583,7 +649,7 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
model = PV_LogLikelihood(
los_overdensity, los_velocity,
RA[mask], dec[mask], zCMB[mask], None, calibration_params, None,
RA[mask], dec[mask], zCMB[mask], None, calibration_params,
mag_selection, loader.rdist, loader._Omega_m, "simple",
name=kind, void_kwargs=void_kwargs,
wo_num_dist_marginalisation=wo_num_dist_marginalisation)

150
csiborgtools/flow/mocks.py Normal file
View file

@ -0,0 +1,150 @@
# Copyright (C) 2024 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""Mock data generators."""
import numpy as np
from ..field.interp import evaluate_cartesian_regular
from ..params import SPEED_OF_LIGHT
from ..utils import cartesian_to_radec, radec_to_cartesian, radec_to_galactic
from .cosmography import distmod2dist, distmod2redshift
###############################################################################
# Mock Quijote observations #
###############################################################################
def mock_Carrick2MTF(velocity_field, boxsize, RA_2MTF, DEC_2MTF,
a_TF=-22.8, b_TF=-7.2, sigma_TF=0.25, sigma_v=100.,
Vext=[150.0, 10.0, -100.0], h=1.0, beta=0.4,
mean_eta=0.069, std_eta=0.078, mean_e_eta=0.012,
mean_mag=10.31, std_mag=0.83, mean_e_mag=0.044,
sigma_calibration=0.05, calibration_max_percentile=10,
calibration_rand_fraction=0.5, nrepeat_calibration=1,
seed=42, Om0=0.3, verbose=True, **kwargs):
"""
Mock TFR catalogue build against the Carrick velocity field and the
2MTF sky distribution to avoid recomputing the LOS velocities.
"""
nsamples = len(RA_2MTF)
# Convert Vext from ICRS to Galactic coordinates.
Vext = np.asarray(Vext).reshape(1, 3)
Vext_mag, Vext_RA, Vext_DEC = cartesian_to_radec(Vext).reshape(-1, )
Vext_l, Vext_b = radec_to_galactic(Vext_RA, Vext_DEC)
Vext_galactic = np.asanyarray([Vext_mag, Vext_l, Vext_b]).reshape(1, 3)
Vext = radec_to_cartesian(Vext_galactic).reshape(-1, )
truths = {"a": a_TF, "b": b_TF, "e_mu": sigma_TF, "sigma_v": sigma_v,
"Vext": Vext,
"mean_eta": mean_eta, "std_eta": std_eta,
"mean_mag": mean_mag, "std_mag": std_mag,
}
gen = np.random.default_rng(seed)
# The Carrick box is in the Galactic coordinates.
l, b = radec_to_galactic(RA_2MTF, DEC_2MTF)
gal_phi = np.deg2rad(l)
gal_theta = np.pi / 2 - np.deg2rad(b)
# Sample the linewidth of each galaxy from a Gaussian distribution to mimic
# the MNR procedure.
eta_true = gen.normal(mean_eta, std_eta, nsamples)
eta_obs = gen.normal(eta_true, mean_e_eta)
# Subtract the mean of the observed linewidths, so that they are
# centered around zero. For consistency subtract from both observed
# and true values.
eta_mean_sampled = np.mean(eta_obs)
eta_true -= eta_mean_sampled
eta_obs -= eta_mean_sampled
# Sample the magnitude from some Gaussian distribution to replicate MNR.
mag_true = gen.normal(mean_mag, std_mag, nsamples)
mag_obs = gen.normal(mag_true, mean_e_mag)
# Calculate the 'true' distance modulus and redshift from the TFR distance.
# If h != 1, then these distance modulii are in physical units.
mu_TFR = mag_true - (a_TF + b_TF * eta_true)
mu_true = gen.normal(mu_TFR, sigma_TF)
# This is the distance modulus in units of little h.
mu_true_h = mu_true + 5 * np.log10(h)
# Select a fraction of nearby galaxies.
mu_max = np.percentile(mu_true, calibration_max_percentile)
ks = np.where(mu_true < mu_max)[0]
nsel = int(calibration_rand_fraction * len(ks))
if verbose:
print(f"Assigning calibration to {nsel}/{nsamples} galaxies.")
mu_calibration = np.full((nrepeat_calibration, nsamples), np.nan)
e_mu_calibration = np.full((nrepeat_calibration, nsamples), np.nan)
for n in range(nrepeat_calibration):
ks_n = gen.choice(ks, nsel, replace=False)
mu_calibration[n, ks_n] = gen.normal(mu_true[ks_n], sigma_calibration)
e_mu_calibration[n, ks_n] = np.ones(len(ks_n)) * sigma_calibration
# Convert the true distance modulus to true distance and cosmological
# redshift. The distance is in Mpc/h because the box is in Mpc / h.
r = distmod2dist(mu_true_h, Om0)
zcosmo = distmod2redshift(mu_true_h, Om0)
# Calculate the Cartesian coordinates of each galaxy. This is initially
# centered at (0, 0, 0).
pos = r * np.asarray([
np.sin(gal_theta) * np.cos(gal_phi),
np.sin(gal_theta) * np.sin(gal_phi),
np.cos(gal_theta)])
pos = pos.T
pos_box = pos / boxsize + 0.5
vel = evaluate_cartesian_regular(
velocity_field[0], velocity_field[1], velocity_field[2],
pos=pos_box, smooth_scales=None, method="cubic")
vel = beta * np.vstack(vel).T
for i in range(3):
vel[:, i] += Vext[i]
# Compute the radial velocity.
Vr = np.sum(vel * pos, axis=1) / np.linalg.norm(pos, axis=1)
# The true redshift of the source.
zCMB_true = (1 + zcosmo) * (1 + Vr / SPEED_OF_LIGHT) - 1
zCMB_obs = gen.normal(zCMB_true, sigma_v / SPEED_OF_LIGHT)
# These galaxies will be masked out when LOS is read it because they are
# too far away.
distance_mask = r < 125
truths["distance_mask"] = distance_mask
sample = {"RA": RA_2MTF,
"DEC": DEC_2MTF,
"z_CMB": zCMB_obs,
"eta": eta_obs,
"mag": mag_obs,
"e_eta": np.ones(nsamples) * mean_e_eta,
"e_mag": np.ones(nsamples) * mean_e_mag,
"mu_true": mu_true,
"mu_TFR": mu_TFR,
"mu_calibration": mu_calibration,
"e_mu_calibration": e_mu_calibration,
"r": r,
}
return sample, truths

File diff suppressed because one or more lines are too long

View file

@ -116,6 +116,9 @@ def get_models(ksim, get_model_kwargs, mag_selection, void_kwargs,
"Pantheon+_groups", "Pantheon+_groups_zSN",
"Pantheon+_zSN"]:
fpath = join(folder, "PV_compilation.hdf5")
elif "Carrick2MTFmock" in cat:
ki = cat.split("_")[-1]
fpath =f"/mnt/extraspace/rstiskalek/csiborg_postprocessing/flow_mock/Carrick2MTFmock_seed{ki}.hdf5" # noqa
elif "CF4_TFR" in cat:
fpath = join(folder, "PV/CF4/CF4_TF-distances.hdf5")
elif cat in ["CF4_GroupAll"]:
@ -186,7 +189,7 @@ def run_model(model, nsteps, nburn, model_kwargs, out_folder,
neg_ln_evidence_err = (jax.numpy.nan, jax.numpy.nan)
fname = join(out_folder, fname)
print(f"Saving results to `{fname}`.")
print(f"Saving results: `{fname}`.")
with File(fname, "w") as f:
# Write samples
grp = f.create_group("samples")
@ -203,15 +206,22 @@ def run_model(model, nsteps, nburn, model_kwargs, out_folder,
grp.create_dataset("neg_lnZ_harmonic", data=neg_ln_evidence)
grp.create_dataset("neg_lnZ_harmonic_err", data=neg_ln_evidence_err)
fname_summary = fname.replace(".hdf5", ".txt")
print(f"Saving summary to `{fname_summary}`.")
with open(fname_summary, 'w') as f:
fname_config = fname.replace(".hdf5", "_config.txt")
print(f"Saving configuration: `{fname_config}`.")
with open(fname_config, 'w') as f:
original_stdout = sys.stdout
sys.stdout = f
print("User parameters:")
for kwargs in kwargs_print:
print_variables(kwargs.keys(), kwargs.values())
sys.stdout = original_stdout
fname_summary = fname.replace(".hdf5", "_summary.txt")
print(f"Saving summary: `{fname_summary}`.")
with open(fname_summary, 'w') as f:
original_stdout = sys.stdout
sys.stdout = f
print("HMC summary:")
print(f"{'BIC':<20} {BIC}")
@ -238,7 +248,7 @@ def get_distmod_hyperparams(catalogue, sample_alpha, sample_mag_dipole):
"alpha_min": alpha_min, "alpha_max": alpha_max,
"sample_alpha": sample_alpha
}
elif catalogue in ["SFI_gals", "2MTF"] or "CF4_TFR" in catalogue or "IndranilVoidTFRMock" in catalogue: # noqa
elif catalogue in ["SFI_gals", "2MTF"] or "CF4_TFR" in catalogue or "IndranilVoidTFRMock" in catalogue or "Carrick2MTFmock" in catalogue: # noqa
return {"e_mu_min": 0.001, "e_mu_max": 1.0,
"a_mean": -21., "a_std": 5.0,
"b_mean": -5.95, "b_std": 4.0,
@ -247,6 +257,7 @@ def get_distmod_hyperparams(catalogue, sample_alpha, sample_mag_dipole):
"sample_a_dipole": sample_mag_dipole,
"alpha_min": alpha_min, "alpha_max": alpha_max,
"sample_alpha": sample_alpha,
"sample_curvature": False if "Carrick2MTFmock" in catalogue else True, # noqa
}
elif catalogue in ["CF4_GroupAll"]:
return {"e_mu_min": 0.001, "e_mu_max": 1.0,
@ -294,8 +305,8 @@ if __name__ == "__main__":
###########################################################################
# `None` means default behaviour
nsteps = 2_000
nburn = 2_000
nsteps = 1_500
nburn = 1_500
zcmb_min = None
zcmb_max = 0.05
nchains_harmonic = 10
@ -310,9 +321,16 @@ if __name__ == "__main__":
sample_mag_dipole = False
wo_num_dist_marginalisation = False
absolute_calibration = None
calculate_harmonic = (False if inference_method == "bayes" else True) and (not wo_num_dist_marginalisation) # noqa
calculate_harmonic = (False if (inference_method == "bayes") else True) and (not wo_num_dist_marginalisation) # noqa
sample_h = True if absolute_calibration is not None else False
# These mocks are generated without a density field, so there is no
# inhomogeneous Malmquist and we also do not need evidences.
for catalogue in ARGS.catalogue:
if "Carrick2MTFmock" in catalogue:
sample_alpha = False
calculate_harmonic = False
fname_kwargs = {"inference_method": inference_method,
"smooth": ARGS.ksmooth,
"nsim": ARGS.ksim,
@ -375,7 +393,7 @@ if __name__ == "__main__":
"Vmono_min": -1000, "Vmono_max": 1000,
"beta_min": -10.0, "beta_max": 10.0,
"sigma_v_min": 1.0, "sigma_v_max": 1000 if "IndranilVoid_" in ARGS.simname else 750., # noqa
"h_min": 0.01, "h_max": 5.0,
"h_min": 0.25, "h_max": 1.,
"no_Vext": False if no_Vext is None else no_Vext, # noqa
"sample_Vmag_vax": sample_Vmag_vax,
"sample_Vmono": sample_Vmono,

View file

@ -38,10 +38,10 @@ fi
# for simname in "IndranilVoid_exp" "IndranilVoid_gauss" "IndranilVoid_mb"; do
for simname in "IndranilVoid_exp"; do
for simname in "Carrick2015"; do
# for simname in "Carrick2015" "Lilow2024" "csiborg2_main" "csiborg2X" "manticore_2MPP_N128_DES_V1" "CF4" "CLONES"; do
# for catalogue in "LOSS" "Foundation" "2MTF" "SFI_gals" "CF4_TFR_i" "CF4_TFR_w1"; do
for catalogue in "2MTF"; do
for catalogue in "CF4_TFR_w1"; do
# for catalogue in "CF4_TFR_i" "CF4_TFR_w1"; do
# for catalogue in "2MTF" "SFI_gals" "CF4_TFR_i" "CF4_TFR_w1"; do
for ksim in "none"; do