mirror of
https://github.com/Richard-Sti/csiborgtools_public.git
synced 2025-05-13 06:01:13 +00:00
More flow (#118)
* Add GoF calculation * Add import * Add base flow * Add reading of ndata * Update nb * Update plotting * Update script * Update plots * Updaet plo * Add script * Update nb * Update nb * Update script * Update script * Update nb * Remove imports * Improve labelling * Improve flow calibration * Add bulk flow plots * Update flow * Update scrit * Calculate more radial steps * Update bulk * Update script * Update nb
This commit is contained in:
parent
a9cb8943d6
commit
f7285b2600
12 changed files with 1144 additions and 1023 deletions
|
@ -19,7 +19,7 @@ from .utils import (center_of_mass, delta2ncells, number_counts,
|
|||
binned_statistic, cosine_similarity, fprint, # noqa
|
||||
hms_to_degrees, dms_to_degrees, great_circle_distance, # noqa
|
||||
radec_to_cartesian, cartesian_to_radec, # noqa
|
||||
thin_samples_by_acl) # noqa
|
||||
thin_samples_by_acl, numpyro_gof) # noqa
|
||||
from .params import paths_glamdring, simname2boxsize, simname2Omega_m # noqa
|
||||
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@ References
|
|||
----------
|
||||
[1] https://arxiv.org/abs/1912.09383.
|
||||
"""
|
||||
from abc import ABC
|
||||
from datetime import datetime
|
||||
from warnings import catch_warnings, simplefilter, warn
|
||||
|
||||
|
@ -37,7 +38,7 @@ from jax.random import PRNGKey
|
|||
from numpyro.infer import Predictive, util
|
||||
from scipy.optimize import fmin_powell
|
||||
from sklearn.model_selection import KFold
|
||||
from tqdm import tqdm, trange
|
||||
from tqdm import trange
|
||||
from numdifftools import Hessian
|
||||
|
||||
from ..params import simname2Omega_m
|
||||
|
@ -82,6 +83,8 @@ class DataLoader:
|
|||
----------
|
||||
simname : str
|
||||
Simulation name.
|
||||
ksims : int
|
||||
Index of the simulation to read in (not the IC index).
|
||||
catalogue : str
|
||||
Name of the catalogue with LOS objects.
|
||||
catalogue_fpath : str
|
||||
|
@ -94,7 +97,7 @@ class DataLoader:
|
|||
Whether to store the full 3D velocity field. Otherwise stores only
|
||||
the radial velocity.
|
||||
"""
|
||||
def __init__(self, simname, catalogue, catalogue_fpath, paths,
|
||||
def __init__(self, simname, ksim, catalogue, catalogue_fpath, paths,
|
||||
ksmooth=None, store_full_velocity=False):
|
||||
print(f"{t()}: reading the catalogue.")
|
||||
self._cat = self._read_catalogue(catalogue, catalogue_fpath)
|
||||
|
@ -102,7 +105,7 @@ class DataLoader:
|
|||
|
||||
print(f"{t()}: reading the interpolated field.")
|
||||
self._field_rdist, self._los_density, self._los_velocity = self._read_field( # noqa
|
||||
simname, catalogue, ksmooth, paths)
|
||||
simname, ksim, catalogue, ksmooth, paths)
|
||||
|
||||
if len(self._field_rdist) % 2 == 0:
|
||||
warn(f"The number of radial steps is even. Skipping the first "
|
||||
|
@ -117,7 +120,8 @@ class DataLoader:
|
|||
"match the number of objects in the field.")
|
||||
|
||||
print(f"{t()}: calculating the radial velocity.")
|
||||
nobject, nsim = self._los_density.shape[:2]
|
||||
nobject = len(self._los_density)
|
||||
dtype = self._los_density.dtype
|
||||
|
||||
# In case of Carrick 2015 the box is in galactic coordinates..
|
||||
if simname == "Carrick2015":
|
||||
|
@ -125,12 +129,10 @@ class DataLoader:
|
|||
else:
|
||||
d1, d2 = self._cat["RA"], self._cat["DEC"]
|
||||
|
||||
radvel = np.empty((nobject, nsim, len(self._field_rdist)),
|
||||
self._los_velocity.dtype)
|
||||
for i in trange(nobject):
|
||||
for j in range(nsim):
|
||||
radvel[i, j, :] = radial_velocity_los(
|
||||
self._los_velocity[:, i, j, ...], d1[i], d2[i])
|
||||
radvel = np.empty((nobject, len(self._field_rdist)), dtype)
|
||||
for i in range(nobject):
|
||||
radvel[i, :] = radial_velocity_los(self._los_velocity[:, i, ...],
|
||||
d1[i], d2[i])
|
||||
self._los_radial_velocity = radvel
|
||||
|
||||
if not store_full_velocity:
|
||||
|
@ -192,7 +194,7 @@ class DataLoader:
|
|||
|
||||
Returns
|
||||
----------
|
||||
3-dimensional array of shape (n_objects, n_simulations, n_steps)
|
||||
2-dimensional array of shape (n_objects, n_steps)
|
||||
"""
|
||||
return self._los_density[self._mask]
|
||||
|
||||
|
@ -203,7 +205,7 @@ class DataLoader:
|
|||
|
||||
Returns
|
||||
-------
|
||||
4-dimensional array of shape (n_objects, n_simulations, 3, n_steps)
|
||||
3-dimensional array of shape (3, n_objects, n_steps)
|
||||
"""
|
||||
if self._los_velocity is None:
|
||||
raise ValueError("The 3D velocities were not stored.")
|
||||
|
@ -216,38 +218,29 @@ class DataLoader:
|
|||
|
||||
Returns
|
||||
-------
|
||||
3-dimensional array of shape (n_objects, n_simulations, n_steps)
|
||||
2-dimensional array of shape (n_objects, n_steps)
|
||||
"""
|
||||
return self._los_radial_velocity[self._mask]
|
||||
|
||||
def _read_field(self, simname, catalogue, k, paths):
|
||||
def _read_field(self, simname, ksim, catalogue, ksmooth, paths):
|
||||
"""Read in the interpolated field."""
|
||||
out_density = None
|
||||
out_velocity = None
|
||||
has_smoothed = False
|
||||
|
||||
nsims = paths.get_ics(simname)
|
||||
if not (0 <= ksim < len(nsims)):
|
||||
raise ValueError("Invalid simulation index.")
|
||||
nsim = nsims[ksim]
|
||||
|
||||
with File(paths.field_los(simname, catalogue), 'r') as f:
|
||||
has_smoothed = True if f[f"density_{nsims[0]}"].ndim > 2 else False
|
||||
if has_smoothed and (k is None or not isinstance(k, int)):
|
||||
has_smoothed = True if f[f"density_{nsim}"].ndim > 2 else False
|
||||
if has_smoothed and (ksmooth is None or not isinstance(ksmooth, int)): # noqa
|
||||
raise ValueError("The output contains smoothed field but "
|
||||
"`ksmooth` is None. It must be provided.")
|
||||
|
||||
for i, nsim in enumerate(tqdm(nsims)):
|
||||
if out_density is None:
|
||||
nobject, nstep = f[f"density_{nsim}"].shape[:2]
|
||||
out_density = np.empty(
|
||||
(nobject, len(nsims), nstep), dtype=np.float32)
|
||||
out_velocity = np.empty(
|
||||
(3, nobject, len(nsims), nstep), dtype=np.float32)
|
||||
|
||||
indx = (..., k) if has_smoothed else (...)
|
||||
out_density[:, i, :] = f[f"density_{nsim}"][indx]
|
||||
out_velocity[:, :, i, :] = f[f"velocity_{nsim}"][indx]
|
||||
|
||||
indx = (..., ksmooth) if has_smoothed else (...)
|
||||
los_density = f[f"density_{nsim}"][indx]
|
||||
los_velocity = f[f"velocity_{nsim}"][indx]
|
||||
rdist = f[f"rdist_{nsims[0]}"][:]
|
||||
|
||||
return rdist, out_density, out_velocity
|
||||
return rdist, los_density, los_velocity
|
||||
|
||||
def _read_catalogue(self, catalogue, catalogue_fpath):
|
||||
"""
|
||||
|
@ -556,7 +549,17 @@ def calculate_ll_zobs(zobs, zobs_pred, sigma_v):
|
|||
return jnp.exp(-0.5 * (dcz / sigma_v)**2) / jnp.sqrt(2 * np.pi) / sigma_v
|
||||
|
||||
|
||||
class SD_PV_validation_model:
|
||||
class BaseFlowValidationModel(ABC):
|
||||
"""
|
||||
Base class for the flow validation models.
|
||||
"""
|
||||
|
||||
@property
|
||||
def ndata(self):
|
||||
return len(self._RA)
|
||||
|
||||
|
||||
class SD_PV_validation_model(BaseFlowValidationModel):
|
||||
"""
|
||||
Simple distance peculiar velocity (PV) validation model, assuming that
|
||||
we already have a calibrated estimate of the comoving distance to the
|
||||
|
@ -657,7 +660,7 @@ class SD_PV_validation_model:
|
|||
numpyro.factor("ll", ll)
|
||||
|
||||
|
||||
class SN_PV_validation_model:
|
||||
class SN_PV_validation_model(BaseFlowValidationModel):
|
||||
"""
|
||||
Supernova peculiar velocity (PV) validation model that includes the
|
||||
calibration of the SALT2 light curve parameters.
|
||||
|
@ -793,11 +796,11 @@ class SN_PV_validation_model:
|
|||
return ll + jnp.log(self._f_simps(ptilde) / pnorm), None
|
||||
|
||||
ll = 0.
|
||||
ll, __ = scan(scan_body, ll, jnp.arange(len(self._RA)))
|
||||
ll, __ = scan(scan_body, ll, jnp.arange(self.ndata))
|
||||
numpyro.factor("ll", ll)
|
||||
|
||||
|
||||
class TF_PV_validation_model:
|
||||
class TF_PV_validation_model(BaseFlowValidationModel):
|
||||
"""
|
||||
Tully-Fisher peculiar velocity (PV) validation model that includes the
|
||||
calibration of the Tully-Fisher distance `mu = m - (a + b * eta)`.
|
||||
|
@ -909,7 +912,7 @@ class TF_PV_validation_model:
|
|||
return ll + jnp.log(self._f_simps(ptilde) / pnorm), None
|
||||
|
||||
ll = 0.
|
||||
ll, __ = scan(scan_body, ll, jnp.arange(len(self._RA)))
|
||||
ll, __ = scan(scan_body, ll, jnp.arange(self.ndata))
|
||||
|
||||
numpyro.factor("ll", ll)
|
||||
|
||||
|
@ -919,7 +922,7 @@ class TF_PV_validation_model:
|
|||
###############################################################################
|
||||
|
||||
|
||||
def get_model(loader, k, zcmb_max=None, verbose=True):
|
||||
def get_model(loader, zcmb_max=None, verbose=True):
|
||||
"""
|
||||
Get a model and extract the relevant data from the loader.
|
||||
|
||||
|
@ -927,8 +930,6 @@ def get_model(loader, k, zcmb_max=None, verbose=True):
|
|||
----------
|
||||
loader : DataLoader
|
||||
DataLoader instance.
|
||||
k : int
|
||||
Simulation index.
|
||||
zcmb_max : float, optional
|
||||
Maximum observed redshift in the CMB frame to include.
|
||||
verbose : bool, optional
|
||||
|
@ -940,11 +941,8 @@ def get_model(loader, k, zcmb_max=None, verbose=True):
|
|||
"""
|
||||
zcmb_max = np.infty if zcmb_max is None else zcmb_max
|
||||
|
||||
if k > loader.los_density.shape[1]:
|
||||
raise ValueError(f"Simulation index `{k}` out of range.")
|
||||
|
||||
los_overdensity = loader.los_density[:, k, :]
|
||||
los_velocity = loader.los_radial_velocity[:, k, :]
|
||||
los_overdensity = loader.los_density
|
||||
los_velocity = loader.los_radial_velocity
|
||||
kind = loader._catname
|
||||
|
||||
if kind in ["LOSS", "Foundation"]:
|
||||
|
@ -1160,4 +1158,5 @@ def optimize_model_with_jackknife(loader, k, n_splits=5, sample_alpha=True,
|
|||
for key in keys]
|
||||
stats = {key: (mean[i], std[i]) for i, key in enumerate(keys)}
|
||||
|
||||
loader.reset_mask()
|
||||
return samples, stats, fmin, logz, bic
|
||||
|
|
|
@ -16,10 +16,12 @@
|
|||
Collection of stand-off utility functions used in the scripts.
|
||||
"""
|
||||
from copy import deepcopy
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
from numba import jit
|
||||
from datetime import datetime
|
||||
from numpyro.infer import util
|
||||
from scipy.stats import multivariate_normal
|
||||
|
||||
###############################################################################
|
||||
# Positions #
|
||||
|
@ -428,3 +430,57 @@ def thin_samples_by_acl(samples):
|
|||
thinned_samples[key] = np.hstack(key_samples)
|
||||
|
||||
return thinned_samples
|
||||
|
||||
|
||||
def numpyro_gof(model, mcmc, model_kwargs={}):
|
||||
"""
|
||||
Get the goodness-of-fit statistics for a sampled Numpyro model. Calculates
|
||||
the BIC and AIC using the maximum likelihood sampled point and the log
|
||||
evidence using the Laplace approximation.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
model : numpyro model
|
||||
The model to evaluate.
|
||||
mcmc : numpyro MCMC
|
||||
The MCMC object containing the samples.
|
||||
ndata : int
|
||||
The number of data points.
|
||||
model_kwargs : dict, optional
|
||||
Additional keyword arguments to pass to the model.
|
||||
|
||||
Returns
|
||||
-------
|
||||
gof : dict
|
||||
Dictionary containing the BIC, AIC and logZ.
|
||||
"""
|
||||
samples = mcmc.get_samples(group_by_chain=False)
|
||||
log_likelihood = util.log_likelihood(model, samples, **model_kwargs)["ll"]
|
||||
|
||||
# Calculate the BIC using the maximum likelihood sampled point.
|
||||
kmax = np.argmax(log_likelihood)
|
||||
nparam = len(samples)
|
||||
try:
|
||||
ndata = model.ndata
|
||||
except AttributeError as e:
|
||||
raise AttributeError("The model must have an attribute `ndata` "
|
||||
"indicating the number of data points.") from e
|
||||
BIC = -2 * log_likelihood[kmax] + nparam * np.log(ndata)
|
||||
|
||||
# Calculate AIC
|
||||
AIC = 2 * nparam - 2 * log_likelihood[kmax]
|
||||
|
||||
# Calculate log(Z) using Laplace approximation.
|
||||
X = np.vstack([samples[key] for key in samples.keys()]).T
|
||||
mu, cov = multivariate_normal.fit(X)
|
||||
test_sample = {key: mu[i] for i, key in enumerate(samples.keys())}
|
||||
|
||||
ll_mu = util.log_likelihood(model, test_sample, **model_kwargs)["ll"]
|
||||
cov_det = np.linalg.det(cov)
|
||||
D = len(mu)
|
||||
logZ = ll_mu + 0.5 * np.log(cov_det) + D / 2 * np.log(2 * np.pi)
|
||||
|
||||
# Convert to float
|
||||
out = {"BIC": BIC, "AIC": AIC, "logZ": logZ}
|
||||
out = {key: float(val) for key, val in out.items()}
|
||||
return out
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue