Add marginalization over boxes (#131)

* Parallelize over simulations

* Update docs

* Update dependency

* Update imports

* Add adtitional dependencies

* Update .gitignore

* Update ERADME

* Simplify numpyro GOF

* Speed up GOF

* Deepcopy samples

* Update scripts

* Add GPU acceleration

* Select boxes

* Update script

* Optionally sample beta

* Fix old code

* Simplify code

* Start saving log posterior

* Start popping log_likeliood

* Add imports

* Add converting samples

* Fix sctipt name

* Add evidence with harmonic

* Remove comment

* Update imports

* Update imports so that pylians not required

* Stop requiring Pylians to be installed

* Update submission scripts for loops

* Update nb

* Update nb

* Add Manticore boxes

* Add verbosity flag

* Add bulk flow

* Update script

* Update nb

* Update normalization

* Update submit

* Update nb
This commit is contained in:
Richard Stiskalek 2024-06-26 10:43:26 +01:00 committed by GitHub
parent ffaf92cd4b
commit ce55a2b47e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 1436 additions and 1290 deletions

View file

@ -20,8 +20,9 @@ from .utils import (center_of_mass, delta2ncells, number_counts,
binned_statistic, cosine_similarity, fprint, # noqa
hms_to_degrees, dms_to_degrees, great_circle_distance, # noqa
radec_to_cartesian, cartesian_to_radec, # noqa
thin_samples_by_acl, numpyro_gof, radec_to_galactic, # noqa
heliocentric_to_cmb, calculate_acl) # noqa
thin_samples_by_acl, BIC_AIC, radec_to_galactic, # noqa
heliocentric_to_cmb, calculate_acl, harmonic_evidence, # noqa
dict_samples_to_array) # noqa
from .params import (paths_glamdring, simname2boxsize, simname2Omega_m, # noqa
snap2redshift) # noqa

View file

@ -12,16 +12,15 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from warnings import warn
from warnings import warn # noqa
from csiborgtools.clustering.knn import kNN_1DCDF # noqa
from csiborgtools.clustering.utils import (BaseRVS, RVSinbox, # noqa
RVSinsphere, RVSonsphere,
normalised_marks)
from csiborgtools.clustering.knn import kNN_1DCDF # noqa
from csiborgtools.clustering.utils import ( # noqa
BaseRVS, RVSinbox, RVSinsphere, RVSonsphere, normalised_marks) # noqa
try:
import Corrfunc # noqa
from .tpcf import Mock2PCF # noqa
import Corrfunc # noqa
from .tpcf import Mock2PCF # noqa
except ImportError:
warn("`Corrfunc` not installed. 2PCF modules will not be available .") # noqa
warn("`Corrfunc` not installed. 2PCF modules will not be available.",
UserWarning) # noqa

View file

@ -12,15 +12,25 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .density import (DensityField, PotentialField, TidalTensorField, # noqa
VelocityField, radial_velocity, power_spectrum, # noqa
overdensity_field) # noqa
try:
import MAS_library as MASL # noqa
import Pk_library as PKL # noqa
from .density import (DensityField, PotentialField, TidalTensorField, # noqa
VelocityField, radial_velocity, power_spectrum, # noqa
overdensity_field) # noqa
from .interp import (evaluate_cartesian_cic, evaluate_sky, evaluate_los, # noqa
field2rsp, fill_outside, make_sky, # noqa
observer_peculiar_velocity, smoothen_field, # noqa
field_at_distance) # noqa
except ImportError:
from warnings import warn
warn("`MAS_library` and `Pk_library` not installed. `density` and "
"`interp` related modules are not available. "
"Please install `Pylians`.", UserWarning)
from .enclosed_mass import (particles_enclosed_mass, # noqa
particles_enclosed_momentum, field_enclosed_mass, # noqa
bulkflow_peery2018) # noqa
from .interp import (evaluate_cartesian_cic, evaluate_sky, evaluate_los, # noqa
field2rsp, fill_outside, make_sky, # noqa
observer_peculiar_velocity, smoothen_field, # noqa
field_at_distance) # noqa
from .corr import bayesian_bootstrap_correlation # noqa
from .utils import nside2radec # noqa

View file

@ -102,7 +102,7 @@ def _field_enclosed_mass(field, rmax, boxsize):
return mass * cell_volume, volume * cell_volume
def field_enclosed_mass(field, distances, boxsize):
def field_enclosed_mass(field, distances, boxsize, verbose=True):
"""
Calculate the approximate enclosed mass within a given radius from a
density field, counts the mass in cells and volume of cells whose
@ -116,6 +116,8 @@ def field_enclosed_mass(field, distances, boxsize):
Radii to calculate the enclosed mass at in `Mpc / h`.
boxsize : float
Box size in `Mpc / h`.
verbose : bool
Verbosity flag.
Returns
-------
@ -127,7 +129,7 @@ def field_enclosed_mass(field, distances, boxsize):
enclosed_mass = np.zeros_like(distances)
enclosed_volume = np.zeros_like(distances)
for i, dist in enumerate(tqdm(distances)):
for i, dist in enumerate(tqdm(distances, disable=not verbose)):
enclosed_mass[i], enclosed_volume[i] = _field_enclosed_mass(
field, dist, boxsize)

View file

@ -14,9 +14,6 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .flow_model import (DataLoader, radial_velocity_los, dist2redshift, # noqa
dist2distmodulus, predict_zobs, project_Vext, # noqa
SD_PV_validation_model, SN_PV_validation_model, # noqa
TF_PV_validation_model, radec_to_galactic, # noqa
sample_prior, make_loss, get_model, # noqa
optimize_model_with_jackknife, distmodulus2dist, # noqa
PV_validation_model, get_model, distmodulus2dist, # noqa
Observed2CosmologicalRedshift, # noqa
stack_pzosmo_over_realizations) # noqa

File diff suppressed because it is too large Load diff

View file

@ -26,8 +26,6 @@ import numpy as np
from astropy import units as u
from astropy.coordinates import SkyCoord
from numba import jit
from numpyro.infer import util
from scipy.stats import multivariate_normal
###############################################################################
# Positions #
@ -429,55 +427,127 @@ def thin_samples_by_acl(samples):
return thinned_samples
def numpyro_gof(model, mcmc, model_kwargs={}):
###############################################################################
# Model comparison #
###############################################################################
def BIC_AIC(samples, log_likelihood, ndata):
"""
Get the goodness-of-fit statistics for a sampled Numpyro model. Calculates
the BIC and AIC using the maximum likelihood sampled point and the log
evidence using the Laplace approximation.
Get the BIC/AIC of HMC samples from a Numpyro model.
Parameters
----------
model : numpyro model
The model to evaluate.
mcmc : numpyro MCMC
The MCMC object containing the samples.
ndata : int
The number of data points.
model_kwargs : dict, optional
Additional keyword arguments to pass to the model.
samples: dict
Dictionary of samples from the Numpyro MCMC object.
log_likelihood: numpy array
Log likelihood values of the samples.
ndata: int
Number of data points.
Returns
-------
gof : dict
Dictionary containing the BIC, AIC and logZ.
BIC, AIC: floats
"""
samples = mcmc.get_samples(group_by_chain=False)
log_likelihood = util.log_likelihood(model, samples, **model_kwargs)["ll"]
# Calculate the BIC using the maximum likelihood sampled point.
kmax = np.argmax(log_likelihood)
nparam = len(samples)
try:
ndata = model.ndata
except AttributeError as e:
raise AttributeError("The model must have an attribute `ndata` "
"indicating the number of data points.") from e
BIC = -2 * log_likelihood[kmax] + nparam * np.log(ndata)
# Calculate AIC
# How many parameters?
nparam = 0
for val in samples.values():
if val.ndim == 1:
nparam += 1
elif val.ndim == 2:
nparam += val.shape[-1]
else:
raise ValueError("Invalid dimensionality of samples to count the number of parameters.") # noqa
BIC = nparam * np.log(ndata) - 2 * log_likelihood[kmax]
AIC = 2 * nparam - 2 * log_likelihood[kmax]
# Calculate log(Z) using Laplace approximation.
X = np.vstack([samples[key] for key in samples.keys()]).T
mu, cov = multivariate_normal.fit(X)
test_sample = {key: mu[i] for i, key in enumerate(samples.keys())}
return float(BIC), float(AIC)
ll_mu = util.log_likelihood(model, test_sample, **model_kwargs)["ll"]
cov_det = np.linalg.det(cov)
D = len(mu)
logZ = ll_mu + 0.5 * np.log(cov_det) + D / 2 * np.log(2 * np.pi)
# Convert to float
out = {"BIC": BIC, "AIC": AIC, "logZ": logZ}
out = {key: float(val) for key, val in out.items()}
return out
def dict_samples_to_array(samples):
"""Convert a dictionary of samples to a 2-dimensional array."""
data = []
names = []
for key, value in samples.items():
if value.ndim == 1:
data.append(value)
names.append(key)
elif value.ndim == 2:
for i in range(value.shape[-1]):
data.append(value[:, i])
names.append(f"{key}_{i}")
else:
raise ValueError("Invalid dimensionality of samples to stack.")
return np.vstack(data).T, names
def harmonic_evidence(samples, log_posterior, temperature=0.8, epochs_num=20,
return_flow_samples=True, verbose=True):
"""
Calculate the evidence using the `harmonic` package. The model has a few
more hyperparameters that are set to defaults now.
Parameters
----------
samples: 3-dimensional array
MCMC samples of shape `(nchains, nsamples, ndim)`.
log_posterior: 2-dimensional array
Log posterior values of shape `(nchains, nsamples)`.
temperature: float, optional
Temperature of the `harmonic` model.
epochs_num: int, optional
Number of epochs for training the model.
return_flow_samples: bool, optional
Whether to return the flow samples.
verbose: bool, optional
Whether to print progress.
Returns
-------
ln_inv_evidence, err_ln_inv_evidence: float and tuple of floats
The log inverse evidence and its error.
flow_samples: 2-dimensional array, optional
Flow samples of shape `(nsamples, ndim)`. To check their agreement
with the input samples.
"""
try:
import harmonic as hm
except ImportError:
raise ImportError("The `harmonic` package is required to calculate the evidence.") from None # noqa
# Do some standard checks of inputs.
if samples.ndim != 3:
raise ValueError("The samples must be a 3-dimensional array of shape `(nchains, nsamples, ndim)`.") # noqa
if log_posterior.ndim != 2 and log_posterior.shape[:2] != samples.shape[:2]: # noqa
raise ValueError("The log posterior must be a 2-dimensional array of shape `(nchains, nsamples)`.") # noqa
ndim = samples.shape[-1]
chains = hm.Chains(ndim)
chains.add_chains_3d(samples, log_posterior)
chains_train, chains_infer = hm.utils.split_data(
chains, training_proportion=0.5)
# This has a few more hyperparameters that are set to defaults now.
model = hm.model.RQSplineModel(
ndim, standardize=True, temperature=temperature)
model.fit(chains_train.samples, epochs=epochs_num, verbose=verbose)
ev = hm.Evidence(chains_infer.nchains, model)
ev.add_chains(chains_infer)
ln_inv_evidence = ev.ln_evidence_inv
err_ln_inv_evidence = ev.compute_ln_inv_evidence_errors()
if return_flow_samples:
samples = samples.reshape((-1, ndim))
samp_num = samples.shape[0]
flow_samples = model.sample(samp_num)
return ln_inv_evidence, err_ln_inv_evidence, flow_samples
return ln_inv_evidence, err_ln_inv_evidence