Add void interpolation (#150)

* Update imports

* Remove multiple ICs for void

* Update submit

* Add void support

* All the void support

* Little update

* Update nb

* Update script
This commit is contained in:
Richard Stiskalek 2024-09-21 18:12:15 +02:00 committed by GitHub
parent 5336c0296c
commit 0bec52dd36
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 229 additions and 58 deletions

View file

@ -18,3 +18,4 @@ from .flow_model import (PV_LogLikelihood, PV_validation_model, dist2redshift,
Observed2CosmologicalRedshift, predict_zobs, # noqa
project_Vext, stack_pzosmo_over_realizations) # noqa
from .selection import ToyMagnitudeSelection # noqa
from .void_model import load_void_data, interpolate_void # noqa

View file

@ -25,6 +25,7 @@ from abc import ABC, abstractmethod
import numpy as np
from astropy import units as u
from astropy.coordinates import SkyCoord, angular_separation
from astropy.cosmology import FlatLambdaCDM, z_at_value
from jax import jit
from jax import numpy as jnp
@ -37,6 +38,7 @@ from tqdm import trange
from ..params import SPEED_OF_LIGHT
from ..utils import fprint
from .selection import toy_log_magnitude_selection
from .void_model import interpolate_void, load_void_data
H0 = 100 # km / s / Mpc
@ -193,6 +195,33 @@ class BaseFlowValidationModel(ABC):
self.z_xrange = jnp.asarray(z_xrange)
self.mu_xrange = jnp.asarray(mu_xrange)
def _set_void_data(self, RA, dec, kind, h, order):
"""Create the void interpolator."""
# h is the MOND model value of local H0 to convert the radial grid to
# Mpc / h
rLG_grid, void_grid = load_void_data(kind)
void_grid = jnp.asarray(void_grid, dtype=jnp.float32)
rLG_grid = jnp.asarray(rLG_grid, dtype=jnp.float32)
rLG_grid *= h
rLG_min, rLG_max = rLG_grid.min(), rLG_grid.max()
rgrid_min, rgrid_max = 0, 250
fprint(f"setting radial grid from {rLG_min} to {rLG_max} Mpc.")
rgrid_max *= h
# Get angular separation (in degrees) of each object from the model
# axis.
model_axis = SkyCoord(l=117, b=4, frame='galactic', unit='deg').icrs
coords = SkyCoord(ra=RA, dec=dec, unit='deg').icrs
phi = angular_separation(coords.ra.rad, coords.dec.rad,
model_axis.ra.rad, model_axis.dec.rad)
phi = jnp.asarray(phi * 180 / np.pi, dtype=jnp.float32)
self.void_interpolator = lambda rLG: interpolate_void(
rLG, self.r_xrange, phi, void_grid, rgrid_min, rgrid_max,
rLG_min, rLG_max, order)
@property
def ndata(self):
"""Number of PV objects in the catalogue."""
@ -201,7 +230,24 @@ class BaseFlowValidationModel(ABC):
@property
def num_sims(self):
"""Number of simulations."""
return len(self.log_los_density)
if self.is_void_data:
return 1.
return len(self.log_los_density())
def log_los_density(self, **kwargs):
if self.is_void_data:
# Currently we have no densities for the void.
return jnp.zeros((1, self.ndata, len(self.r_xrange)))
return self._log_los_density
def los_velocity(self, **kwargs):
if self.is_void_data:
# We want the shape to be `(1, n_objects, n_radial_steps)``.
return self.void_interpolator(kwargs["rLG"])[None, ...]
return self._los_velocity
@abstractmethod
def __call__(self, **kwargs):
@ -331,7 +377,8 @@ def sample_simple(e_mu_min, e_mu_max, dmu_min, dmu_max, alpha_min, alpha_max,
def sample_calibration(Vext_min, Vext_max, Vmono_min, Vmono_max, beta_min,
beta_max, sigma_v_min, sigma_v_max, h_min, h_max,
no_Vext, sample_Vmono, sample_beta, sample_h):
rLG_min, rLG_max, no_Vext, sample_Vmono, sample_beta,
sample_h, sample_rLG):
"""Sample the flow calibration."""
sigma_v = sample("sigma_v", Uniform(sigma_v_min, sigma_v_max))
@ -357,12 +404,18 @@ def sample_calibration(Vext_min, Vext_max, Vmono_min, Vmono_max, beta_min,
else:
h = 1.0
if sample_rLG:
rLG = sample("rLG", Uniform(rLG_min, rLG_max))
else:
rLG = None
return {"Vext": Vext,
"Vmono": Vmono,
"sigma_v": sigma_v,
"beta": beta,
"h": h,
"sample_h": sample_h,
"rLG": rLG,
}
@ -386,9 +439,9 @@ class PV_LogLikelihood(BaseFlowValidationModel):
Parameters
----------
los_density : 3-dimensional array of shape (n_sims, n_objects, n_steps)
LOS density field.
LOS density field. Set to `None` if the data is void data.
los_velocity : 3-dimensional array of shape (n_sims, n_objects, n_steps)
LOS radial velocity field.
LOS radial velocity field. Set to `None` if the data is void data.
RA, dec : 1-dimensional arrays of shape (n_objects)
Right ascension and declination in degrees.
z_obs : 1-dimensional array of shape (n_objects)
@ -409,6 +462,8 @@ class PV_LogLikelihood(BaseFlowValidationModel):
Catalogue kind, either "TFR", "SN", or "simple".
name : str
Name of the catalogue.
void_kwargs : dict, optional
Void data parameters. If `None` the data is not void data.
with_num_dist_marginalisation : bool, optional
Whether to use numerical distance marginalisation, in which case
the tracers cannot be coupled by a covariance matrix. By default
@ -417,19 +472,31 @@ class PV_LogLikelihood(BaseFlowValidationModel):
def __init__(self, los_density, los_velocity, RA, dec, z_obs, e_zobs,
calibration_params, abs_calibration_params, mag_selection,
r_xrange, Omega_m, kind, name, with_num_dist_marginalisation):
r_xrange, Omega_m, kind, name, void_kwargs=None,
with_num_dist_marginalisation=True):
if e_zobs is not None:
e2_cz_obs = jnp.asarray((SPEED_OF_LIGHT * e_zobs)**2)
else:
e2_cz_obs = jnp.zeros_like(z_obs)
self.is_void_data = void_kwargs is not None
# This must be done before we convert to radians.
if void_kwargs is not None:
self._set_void_data(RA=RA, dec=dec, **void_kwargs)
# Convert RA/dec to radians.
RA, dec = np.deg2rad(RA), np.deg2rad(dec)
names = ["log_los_density", "los_velocity", "RA", "dec", "z_obs",
"e2_cz_obs"]
values = [jnp.log(los_density), los_velocity, RA, dec, z_obs,
e2_cz_obs]
names = ["RA", "dec", "z_obs", "e2_cz_obs"]
values = [RA, dec, z_obs, e2_cz_obs]
if not self.is_void_data:
names += ["_log_los_density", "_los_velocity"]
values += [jnp.log(los_density), los_velocity]
# Set the void data
self._setattr_as_jax(names, values)
self._set_calibration_params(calibration_params)
self._set_abs_calibration_params(abs_calibration_params)
@ -660,17 +727,24 @@ class PV_LogLikelihood(BaseFlowValidationModel):
mu_xrange[None, :], mu[:, None], e2_mu[:, None],
self.log_r2_xrange[None, :])
if self.is_void_data:
rLG = field_calibration_params["rLG"]
log_los_density = self.log_los_density(rLG=rLG)
los_velocity = self.los_velocity(rLG=rLG)
else:
log_los_density = self.log_los_density()
los_velocity = self.los_velocity()
# Inhomogeneous Malmquist bias. Shape: (nsims, ndata, nxrange)
alpha = distmod_params["alpha"]
log_ptilde = log_ptilde[None, ...] + alpha * self.log_los_density
log_ptilde = log_ptilde[None, ...] + alpha * log_los_density
ptilde = jnp.exp(log_ptilde)
# Normalization of p(r). Shape: (nsims, ndata)
pnorm = simpson(ptilde, x=self.r_xrange, axis=-1)
# Calculate z_obs at each distance. Shape: (nsims, ndata, nxrange)
vrad = field_calibration_params["beta"] * self.los_velocity
vrad = field_calibration_params["beta"] * los_velocity
vrad += (Vext_rad[None, :, None] + Vmono)
zobs = 1 + self.z_xrange[None, None, :]
zobs *= 1 + vrad / SPEED_OF_LIGHT

View file

@ -64,13 +64,16 @@ class DataLoader:
self._field_rdist, self._los_density, self._los_velocity = self._read_field( # noqa
simname, ksim, catalogue, ksmooth, paths)
if "IndranilVoid" not in simname:
if len(self._cat) != self._los_density.shape[1]:
raise ValueError("The number of objects in the catalogue does not "
"match the number of objects in the field.")
raise ValueError(
"The number of objects in the catalogue does not match "
"the number of objects in the field.")
fprint("calculating the radial velocity.", verbose)
nobject = self._los_density.shape[1]
dtype = self._los_density.dtype
num_sims = len(self._los_density)
if simname in ["Carrick2015", "Lilow2024"]:
# Carrick+2015 and Lilow+2024 are in galactic coordinates
@ -81,9 +84,8 @@ class DataLoader:
else:
d1, d2 = self._cat["RA"], self._cat["DEC"]
num_sims = len(self._los_density)
if "IndranilVoid" in simname:
self._los_radial_velocity = self._los_velocity
self._los_radial_velocity = None
self._los_velocity = None
else:
radvel = np.empty(
@ -165,6 +167,9 @@ class DataLoader:
return self._los_radial_velocity[:, self._mask, ...]
def _read_field(self, simname, ksims, catalogue, ksmooth, paths):
if "IndranilVoid" in simname:
return None, None, None
nsims = paths.get_ics(simname)
if isinstance(ksims, int):
ksims = [ksims]
@ -340,8 +345,17 @@ def read_absolute_calibration(kind, data_length, calibration_fpath):
return out, with_calibration, length_calibration
def mask_fields(density, velocity, mask, return_none):
"""Shortcut to mask fields, unless they are `None`"""
if return_none:
return None, None
return density[:, mask], velocity[:, mask]
def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
absolute_calibration=None, calibration_fpath=None):
absolute_calibration=None, calibration_fpath=None,
void_kwargs=None):
"""
Get a model and extract the relevant data from the loader.
@ -366,10 +380,23 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
zcmb_min = 0.0 if zcmb_min is None else zcmb_min
zcmb_max = np.infty if zcmb_max is None else zcmb_max
if void_kwargs is None:
los_overdensity = loader.los_density
los_velocity = loader.los_radial_velocity
else:
los_overdensity = None
los_velocity = None
kind = loader._catname
if void_kwargs is not None:
rdist = void_kwargs.pop("rdist", None)
if rdist is None:
raise ValueError(
"The radial distances must be provided for the void.")
loader._field_rdist = rdist
if absolute_calibration is not None and "CF4_TFR_" not in kind:
raise ValueError("Absolute calibration supported only for the CF4 TFR sample.") # noqa
@ -384,11 +411,14 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
"e_mag": e_mag[mask], "e_x1": e_x1[mask],
"e_c": e_c[mask]}
los_overdensity, los_velocity = mask_fields(
los_overdensity, los_velocity, mask, void_kwargs is not None)
model = PV_LogLikelihood(
los_overdensity[:, mask], los_velocity[:, mask],
los_overdensity, los_velocity,
RA[mask], dec[mask], zCMB[mask], e_zCMB, calibration_params,
None, mag_selection, loader.rdist, loader._Omega_m, "SN",
name=kind)
name=kind, void_kwargs=void_kwargs)
elif "Pantheon+" in kind:
keys = ["RA", "DEC", "zCMB", "mB", "x1", "c", "biasCor_m_b", "mBERR",
"x1ERR", "cERR", "biasCorErr_m_b", "zCMB_SN", "zCMB_Group",
@ -413,11 +443,15 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
calibration_params = {"mag": mB[mask], "x1": x1[mask], "c": c[mask],
"e_mag": e_mB[mask], "e_x1": e_x1[mask],
"e_c": e_c[mask]}
los_overdensity, los_velocity = mask_fields(
los_overdensity, los_velocity, mask, void_kwargs is not None)
model = PV_LogLikelihood(
los_overdensity[:, mask], los_velocity[:, mask],
los_overdensity, los_velocity,
RA[mask], dec[mask], zCMB[mask], e_zCMB[mask], calibration_params,
None, mag_selection, loader.rdist, loader._Omega_m, "SN",
name=kind)
name=kind, void_kwargs=void_kwargs)
elif kind in ["SFI_gals", "2MTF", "SFI_gals_masked"]:
keys = ["RA", "DEC", "z_CMB", "mag", "eta", "e_mag", "e_eta"]
RA, dec, zCMB, mag, eta, e_mag, e_eta = (loader.cat[k] for k in keys)
@ -425,10 +459,15 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
mask = (zCMB < zcmb_max) & (zCMB > zcmb_min)
calibration_params = {"mag": mag[mask], "eta": eta[mask],
"e_mag": e_mag[mask], "e_eta": e_eta[mask]}
los_overdensity, los_velocity = mask_fields(
los_overdensity, los_velocity, mask, void_kwargs is not None)
model = PV_LogLikelihood(
los_overdensity[:, mask], los_velocity[:, mask],
los_overdensity, los_velocity,
RA[mask], dec[mask], zCMB[mask], None, calibration_params, None,
mag_selection, loader.rdist, loader._Omega_m, "TFR", name=kind)
mag_selection, loader.rdist, loader._Omega_m, "TFR", name=kind,
void_kwargs=void_kwargs)
elif "CF4_TFR_" in kind:
# The full name can be e.g. "CF4_TFR_not2MTForSFI_i" or "CF4_TFR_i".
band = kind.split("_")[-1]
@ -488,11 +527,15 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
calibration_params = {"mag": mag[mask], "eta": eta[mask],
"e_mag": e_mag[mask], "e_eta": e_eta[mask]}
los_overdensity, los_velocity = mask_fields(
los_overdensity, los_velocity, mask, void_kwargs is not None)
model = PV_LogLikelihood(
los_overdensity[:, mask], los_velocity[:, mask],
los_overdensity, los_velocity,
RA[mask], dec[mask], z_obs[mask], None, calibration_params,
abs_calibration_params, mag_selection, loader.rdist,
loader._Omega_m, "TFR", name=kind)
loader._Omega_m, "TFR", name=kind, void_kwargs=void_kwargs)
elif kind in ["CF4_GroupAll"]:
# Note, this for some reason works terribly.
keys = ["RA", "DE", "Vcmb", "DMzp", "eDM"]
@ -505,11 +548,15 @@ def get_model(loader, zcmb_min=None, zcmb_max=None, mag_selection=None,
mu += 5 * np.log10(0.75)
calibration_params = {"mu": mu[mask], "e_mu": e_mu[mask]}
los_overdensity, los_velocity = mask_fields(
los_overdensity, los_velocity, mask, void_kwargs is not None)
model = PV_LogLikelihood(
los_overdensity[:, mask], los_velocity[:, mask],
los_overdensity, los_velocity,
RA[mask], dec[mask], zCMB[mask], None, calibration_params, None,
mag_selection, loader.rdist, loader._Omega_m, "simple",
name=kind)
name=kind, void_kwargs=void_kwargs)
else:
raise ValueError(f"Catalogue `{kind}` not recognized.")

View file

@ -56,7 +56,7 @@ def load_void_data(kind):
for f in files]
rLG = np.sort(rLG)
for i, ri in enumerate(tqdm(rLG, desc="Loading observer data")):
for i, ri in enumerate(tqdm(rLG, desc="Loading void observer data")):
f = join(fdir, f"v_pec_{kind}profile_rLG_{ri}.dat")
data_i = np.genfromtxt(f).T

View file

@ -135,23 +135,10 @@ class Paths:
files = [search(r'realization(\d+)_delta\.fits', file).group(1)
for file in files if search(r'realization(\d+)_delta\.fits', file)] # noqa
files = [int(file) for file in files]
# Downsample to only 20 realisations
# files = files[::5]
elif simname in ["Carrick2015", "Lilow2024", "no_field", "CLONES"]:
files = [0]
elif "IndranilVoid" in simname:
kind = simname.split("_")[-1]
if kind not in ["exp", "gauss", "mb"]:
raise ValueError(f"Unknown void kind `{simname}`.")
kind = kind.upper()
fdir = join(self.aux_cat_dir, "IndranilVoid", f"{kind}profile")
files = glob(join(fdir, "v_pec_*.dat"))
files = [
search(rf'v_pec_{kind}profile_rLG_(\d+)\.dat', file).group(1)
for file in files]
files = [int(file) for file in files]
files = [0]
else:
raise ValueError(f"Unknown simulation name `{simname}`.")

View file

@ -48,6 +48,47 @@
"## Quick checks"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File: /mnt/extraspace/rstiskalek/csiborg_postprocessing/peculiar_velocity/samples_Carrick2015_2MTF_mike_zcmb_max_0.05_sample_alpha.hdf5\n",
"Last modified: 30/08/2024 15:27:56\n"
]
},
{
"data": {
"text/plain": [
"9715.0205"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fname = paths.flow_validation(\n",
" fdir, \"Carrick2015\", \"2MTF\", inference_method=\"mike\",\n",
" sample_alpha=True, sample_beta=None,\n",
" zcmb_max=0.05)\n",
"\n",
"\n",
"get_gof(\"neg_lnZ_harmonic\", fname)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,

View file

@ -74,8 +74,9 @@ import sys
from os.path import join # noqa
import csiborgtools # noqa
from csiborgtools import fprint # noqa
import jax # noqa
import numpy as np # noqa
from csiborgtools import fprint # noqa
from h5py import File # noqa
from numpyro.infer import MCMC, NUTS, init_to_median # noqa
@ -86,7 +87,8 @@ def print_variables(names, variables):
print(flush=True)
def get_models(ksim, get_model_kwargs, mag_selection, verbose=True):
def get_models(ksim, get_model_kwargs, mag_selection, void_kwargs,
verbose=True):
"""Load the data and create the NumPyro models."""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
folder = "/mnt/extraspace/rstiskalek/catalogs/"
@ -125,12 +127,21 @@ def get_models(ksim, get_model_kwargs, mag_selection, verbose=True):
cat, fpath, paths,
ksmooth=ARGS.ksmooth)
models[i] = csiborgtools.flow.get_model(
loader, mag_selection=mag_selection[i], **get_model_kwargs)
loader, mag_selection=mag_selection[i], void_kwargs=void_kwargs,
**get_model_kwargs)
fprint(f"num. radial steps is {len(loader.rdist)}")
return models
def select_void_h(kind):
hs = {"mb": 0.7615, "gauss": 0.7724, "exp": 0.7725}
try:
return hs[kind]
except KeyError:
raise ValueError(f"Unknown void kind: `{kind}`.")
def get_harmonic_evidence(samples, log_posterior, nchains_harmonic, epoch_num):
"""Compute evidence using the `harmonic` package."""
data, names = csiborgtools.dict_samples_to_array(samples)
@ -339,8 +350,18 @@ if __name__ == "__main__":
if mag_selection and inference_method != "bayes":
raise ValueError("Magnitude selection is only supported with `bayes` inference.") # noqa
if "IndranilVoid" in ARGS.simname and ARGS.ksim is None:
raise ValueError("`IndranilVoid` must be run only per specific realization.") # noqa
if "IndranilVoid" in ARGS.simname:
if ARGS.ksim is not None:
raise ValueError(
"`IndranilVoid` does not have multiple realisations.")
kind = ARGS.simname.split("_")[-1]
h = select_void_h(kind)
rdist = np.arange(0, 165, 0.5)
void_kwargs = {"kind": kind, "h": h, "order": 1, "rdist": rdist}
else:
void_kwargs = None
h = 1.
if inference_method != "bayes":
mag_selection = [None] * len(ARGS.catalogue)
@ -360,6 +381,8 @@ if __name__ == "__main__":
"sample_Vmono": sample_Vmono,
"sample_beta": sample_beta,
"sample_h": sample_h,
"sample_rLG": "IndranilVoid" in ARGS.simname,
"rLG_min": 0.0, "rLG_max": 500 * h,
}
print_variables(
calibration_hyperparams.keys(), calibration_hyperparams.values())
@ -394,7 +417,7 @@ if __name__ == "__main__":
print(f"{'Current simulation:':<20} {i + 1} ({ksim}) out of {len(ksim_iterator)}.") # noqa
fname_kwargs["nsim"] = ksim
models = get_models(ksim, get_model_kwargs, mag_selection)
models = get_models(ksim, get_model_kwargs, mag_selection, void_kwargs)
model_kwargs = {
"models": models,
"field_calibration_hyperparams": calibration_hyperparams,

View file

@ -37,11 +37,9 @@ else
fi
# for simname in "IndranilVoid_exp" "IndranilVoid_gauss" "IndranilVoid_mb"; do
for simname in "Carrick2015"; do
for simname in "IndranilVoid_exp" "IndranilVoid_gauss" "IndranilVoid_mb"; do
# for simname in "no_field"; do
# for catalogue in "LOSS" "Foundation" "2MTF" "SFI_gals" "CF4_TFR_i" "CF4_TFR_w1"; do
for catalogue in "LOSS"; do
for catalogue in "LOSS" "Foundation" "2MTF" "SFI_gals" "CF4_TFR_i" "CF4_TFR_w1"; do
# for catalogue in "CF4_TFR_i" "CF4_TFR_w1"; do
# for catalogue in "2MTF" "SFI_gals" "CF4_TFR_i" "CF4_TFR_w1"; do
for ksim in "none"; do