mirror of
https://github.com/Richard-Sti/csiborgtools_public.git
synced 2025-06-08 09:51:12 +00:00
Add marginalization over boxes (#131)
* Parallelize over simulations * Update docs * Update dependency * Update imports * Add adtitional dependencies * Update .gitignore * Update ERADME * Simplify numpyro GOF * Speed up GOF * Deepcopy samples * Update scripts * Add GPU acceleration * Select boxes * Update script * Optionally sample beta * Fix old code * Simplify code * Start saving log posterior * Start popping log_likeliood * Add imports * Add converting samples * Fix sctipt name * Add evidence with harmonic * Remove comment * Update imports * Update imports so that pylians not required * Stop requiring Pylians to be installed * Update submission scripts for loops * Update nb * Update nb * Add Manticore boxes * Add verbosity flag * Add bulk flow * Update script * Update nb * Update normalization * Update submit * Update nb
This commit is contained in:
parent
ffaf92cd4b
commit
ce55a2b47e
16 changed files with 1436 additions and 1290 deletions
|
@ -20,8 +20,9 @@ from .utils import (center_of_mass, delta2ncells, number_counts,
|
|||
binned_statistic, cosine_similarity, fprint, # noqa
|
||||
hms_to_degrees, dms_to_degrees, great_circle_distance, # noqa
|
||||
radec_to_cartesian, cartesian_to_radec, # noqa
|
||||
thin_samples_by_acl, numpyro_gof, radec_to_galactic, # noqa
|
||||
heliocentric_to_cmb, calculate_acl) # noqa
|
||||
thin_samples_by_acl, BIC_AIC, radec_to_galactic, # noqa
|
||||
heliocentric_to_cmb, calculate_acl, harmonic_evidence, # noqa
|
||||
dict_samples_to_array) # noqa
|
||||
from .params import (paths_glamdring, simname2boxsize, simname2Omega_m, # noqa
|
||||
snap2redshift) # noqa
|
||||
|
||||
|
|
|
@ -12,16 +12,15 @@
|
|||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
from warnings import warn
|
||||
from warnings import warn # noqa
|
||||
|
||||
from csiborgtools.clustering.knn import kNN_1DCDF # noqa
|
||||
from csiborgtools.clustering.utils import (BaseRVS, RVSinbox, # noqa
|
||||
RVSinsphere, RVSonsphere,
|
||||
normalised_marks)
|
||||
from csiborgtools.clustering.knn import kNN_1DCDF # noqa
|
||||
from csiborgtools.clustering.utils import ( # noqa
|
||||
BaseRVS, RVSinbox, RVSinsphere, RVSonsphere, normalised_marks) # noqa
|
||||
|
||||
try:
|
||||
import Corrfunc # noqa
|
||||
|
||||
from .tpcf import Mock2PCF # noqa
|
||||
import Corrfunc # noqa
|
||||
from .tpcf import Mock2PCF # noqa
|
||||
except ImportError:
|
||||
warn("`Corrfunc` not installed. 2PCF modules will not be available .") # noqa
|
||||
warn("`Corrfunc` not installed. 2PCF modules will not be available.",
|
||||
UserWarning) # noqa
|
||||
|
|
|
@ -12,15 +12,25 @@
|
|||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
from .density import (DensityField, PotentialField, TidalTensorField, # noqa
|
||||
VelocityField, radial_velocity, power_spectrum, # noqa
|
||||
overdensity_field) # noqa
|
||||
try:
|
||||
import MAS_library as MASL # noqa
|
||||
import Pk_library as PKL # noqa
|
||||
|
||||
from .density import (DensityField, PotentialField, TidalTensorField, # noqa
|
||||
VelocityField, radial_velocity, power_spectrum, # noqa
|
||||
overdensity_field) # noqa
|
||||
from .interp import (evaluate_cartesian_cic, evaluate_sky, evaluate_los, # noqa
|
||||
field2rsp, fill_outside, make_sky, # noqa
|
||||
observer_peculiar_velocity, smoothen_field, # noqa
|
||||
field_at_distance) # noqa
|
||||
except ImportError:
|
||||
from warnings import warn
|
||||
warn("`MAS_library` and `Pk_library` not installed. `density` and "
|
||||
"`interp` related modules are not available. "
|
||||
"Please install `Pylians`.", UserWarning)
|
||||
|
||||
from .enclosed_mass import (particles_enclosed_mass, # noqa
|
||||
particles_enclosed_momentum, field_enclosed_mass, # noqa
|
||||
bulkflow_peery2018) # noqa
|
||||
from .interp import (evaluate_cartesian_cic, evaluate_sky, evaluate_los, # noqa
|
||||
field2rsp, fill_outside, make_sky, # noqa
|
||||
observer_peculiar_velocity, smoothen_field, # noqa
|
||||
field_at_distance) # noqa
|
||||
from .corr import bayesian_bootstrap_correlation # noqa
|
||||
from .utils import nside2radec # noqa
|
||||
|
|
|
@ -102,7 +102,7 @@ def _field_enclosed_mass(field, rmax, boxsize):
|
|||
return mass * cell_volume, volume * cell_volume
|
||||
|
||||
|
||||
def field_enclosed_mass(field, distances, boxsize):
|
||||
def field_enclosed_mass(field, distances, boxsize, verbose=True):
|
||||
"""
|
||||
Calculate the approximate enclosed mass within a given radius from a
|
||||
density field, counts the mass in cells and volume of cells whose
|
||||
|
@ -116,6 +116,8 @@ def field_enclosed_mass(field, distances, boxsize):
|
|||
Radii to calculate the enclosed mass at in `Mpc / h`.
|
||||
boxsize : float
|
||||
Box size in `Mpc / h`.
|
||||
verbose : bool
|
||||
Verbosity flag.
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
@ -127,7 +129,7 @@ def field_enclosed_mass(field, distances, boxsize):
|
|||
enclosed_mass = np.zeros_like(distances)
|
||||
enclosed_volume = np.zeros_like(distances)
|
||||
|
||||
for i, dist in enumerate(tqdm(distances)):
|
||||
for i, dist in enumerate(tqdm(distances, disable=not verbose)):
|
||||
enclosed_mass[i], enclosed_volume[i] = _field_enclosed_mass(
|
||||
field, dist, boxsize)
|
||||
|
||||
|
|
|
@ -14,9 +14,6 @@
|
|||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
from .flow_model import (DataLoader, radial_velocity_los, dist2redshift, # noqa
|
||||
dist2distmodulus, predict_zobs, project_Vext, # noqa
|
||||
SD_PV_validation_model, SN_PV_validation_model, # noqa
|
||||
TF_PV_validation_model, radec_to_galactic, # noqa
|
||||
sample_prior, make_loss, get_model, # noqa
|
||||
optimize_model_with_jackknife, distmodulus2dist, # noqa
|
||||
PV_validation_model, get_model, distmodulus2dist, # noqa
|
||||
Observed2CosmologicalRedshift, # noqa
|
||||
stack_pzosmo_over_realizations) # noqa
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -26,8 +26,6 @@ import numpy as np
|
|||
from astropy import units as u
|
||||
from astropy.coordinates import SkyCoord
|
||||
from numba import jit
|
||||
from numpyro.infer import util
|
||||
from scipy.stats import multivariate_normal
|
||||
|
||||
###############################################################################
|
||||
# Positions #
|
||||
|
@ -429,55 +427,127 @@ def thin_samples_by_acl(samples):
|
|||
return thinned_samples
|
||||
|
||||
|
||||
def numpyro_gof(model, mcmc, model_kwargs={}):
|
||||
###############################################################################
|
||||
# Model comparison #
|
||||
###############################################################################
|
||||
|
||||
|
||||
def BIC_AIC(samples, log_likelihood, ndata):
|
||||
"""
|
||||
Get the goodness-of-fit statistics for a sampled Numpyro model. Calculates
|
||||
the BIC and AIC using the maximum likelihood sampled point and the log
|
||||
evidence using the Laplace approximation.
|
||||
Get the BIC/AIC of HMC samples from a Numpyro model.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
model : numpyro model
|
||||
The model to evaluate.
|
||||
mcmc : numpyro MCMC
|
||||
The MCMC object containing the samples.
|
||||
ndata : int
|
||||
The number of data points.
|
||||
model_kwargs : dict, optional
|
||||
Additional keyword arguments to pass to the model.
|
||||
samples: dict
|
||||
Dictionary of samples from the Numpyro MCMC object.
|
||||
log_likelihood: numpy array
|
||||
Log likelihood values of the samples.
|
||||
ndata: int
|
||||
Number of data points.
|
||||
|
||||
Returns
|
||||
-------
|
||||
gof : dict
|
||||
Dictionary containing the BIC, AIC and logZ.
|
||||
BIC, AIC: floats
|
||||
"""
|
||||
samples = mcmc.get_samples(group_by_chain=False)
|
||||
log_likelihood = util.log_likelihood(model, samples, **model_kwargs)["ll"]
|
||||
|
||||
# Calculate the BIC using the maximum likelihood sampled point.
|
||||
kmax = np.argmax(log_likelihood)
|
||||
nparam = len(samples)
|
||||
try:
|
||||
ndata = model.ndata
|
||||
except AttributeError as e:
|
||||
raise AttributeError("The model must have an attribute `ndata` "
|
||||
"indicating the number of data points.") from e
|
||||
BIC = -2 * log_likelihood[kmax] + nparam * np.log(ndata)
|
||||
|
||||
# Calculate AIC
|
||||
# How many parameters?
|
||||
nparam = 0
|
||||
for val in samples.values():
|
||||
if val.ndim == 1:
|
||||
nparam += 1
|
||||
elif val.ndim == 2:
|
||||
nparam += val.shape[-1]
|
||||
else:
|
||||
raise ValueError("Invalid dimensionality of samples to count the number of parameters.") # noqa
|
||||
|
||||
BIC = nparam * np.log(ndata) - 2 * log_likelihood[kmax]
|
||||
AIC = 2 * nparam - 2 * log_likelihood[kmax]
|
||||
|
||||
# Calculate log(Z) using Laplace approximation.
|
||||
X = np.vstack([samples[key] for key in samples.keys()]).T
|
||||
mu, cov = multivariate_normal.fit(X)
|
||||
test_sample = {key: mu[i] for i, key in enumerate(samples.keys())}
|
||||
return float(BIC), float(AIC)
|
||||
|
||||
ll_mu = util.log_likelihood(model, test_sample, **model_kwargs)["ll"]
|
||||
cov_det = np.linalg.det(cov)
|
||||
D = len(mu)
|
||||
logZ = ll_mu + 0.5 * np.log(cov_det) + D / 2 * np.log(2 * np.pi)
|
||||
|
||||
# Convert to float
|
||||
out = {"BIC": BIC, "AIC": AIC, "logZ": logZ}
|
||||
out = {key: float(val) for key, val in out.items()}
|
||||
return out
|
||||
def dict_samples_to_array(samples):
|
||||
"""Convert a dictionary of samples to a 2-dimensional array."""
|
||||
data = []
|
||||
names = []
|
||||
|
||||
for key, value in samples.items():
|
||||
if value.ndim == 1:
|
||||
data.append(value)
|
||||
names.append(key)
|
||||
elif value.ndim == 2:
|
||||
for i in range(value.shape[-1]):
|
||||
data.append(value[:, i])
|
||||
names.append(f"{key}_{i}")
|
||||
else:
|
||||
raise ValueError("Invalid dimensionality of samples to stack.")
|
||||
|
||||
return np.vstack(data).T, names
|
||||
|
||||
|
||||
def harmonic_evidence(samples, log_posterior, temperature=0.8, epochs_num=20,
|
||||
return_flow_samples=True, verbose=True):
|
||||
"""
|
||||
Calculate the evidence using the `harmonic` package. The model has a few
|
||||
more hyperparameters that are set to defaults now.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
samples: 3-dimensional array
|
||||
MCMC samples of shape `(nchains, nsamples, ndim)`.
|
||||
log_posterior: 2-dimensional array
|
||||
Log posterior values of shape `(nchains, nsamples)`.
|
||||
temperature: float, optional
|
||||
Temperature of the `harmonic` model.
|
||||
epochs_num: int, optional
|
||||
Number of epochs for training the model.
|
||||
return_flow_samples: bool, optional
|
||||
Whether to return the flow samples.
|
||||
verbose: bool, optional
|
||||
Whether to print progress.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ln_inv_evidence, err_ln_inv_evidence: float and tuple of floats
|
||||
The log inverse evidence and its error.
|
||||
flow_samples: 2-dimensional array, optional
|
||||
Flow samples of shape `(nsamples, ndim)`. To check their agreement
|
||||
with the input samples.
|
||||
"""
|
||||
try:
|
||||
import harmonic as hm
|
||||
except ImportError:
|
||||
raise ImportError("The `harmonic` package is required to calculate the evidence.") from None # noqa
|
||||
|
||||
# Do some standard checks of inputs.
|
||||
if samples.ndim != 3:
|
||||
raise ValueError("The samples must be a 3-dimensional array of shape `(nchains, nsamples, ndim)`.") # noqa
|
||||
|
||||
if log_posterior.ndim != 2 and log_posterior.shape[:2] != samples.shape[:2]: # noqa
|
||||
raise ValueError("The log posterior must be a 2-dimensional array of shape `(nchains, nsamples)`.") # noqa
|
||||
|
||||
ndim = samples.shape[-1]
|
||||
chains = hm.Chains(ndim)
|
||||
chains.add_chains_3d(samples, log_posterior)
|
||||
chains_train, chains_infer = hm.utils.split_data(
|
||||
chains, training_proportion=0.5)
|
||||
|
||||
# This has a few more hyperparameters that are set to defaults now.
|
||||
model = hm.model.RQSplineModel(
|
||||
ndim, standardize=True, temperature=temperature)
|
||||
model.fit(chains_train.samples, epochs=epochs_num, verbose=verbose)
|
||||
|
||||
ev = hm.Evidence(chains_infer.nchains, model)
|
||||
ev.add_chains(chains_infer)
|
||||
ln_inv_evidence = ev.ln_evidence_inv
|
||||
err_ln_inv_evidence = ev.compute_ln_inv_evidence_errors()
|
||||
|
||||
if return_flow_samples:
|
||||
samples = samples.reshape((-1, ndim))
|
||||
samp_num = samples.shape[0]
|
||||
flow_samples = model.sample(samp_num)
|
||||
|
||||
return ln_inv_evidence, err_ln_inv_evidence, flow_samples
|
||||
|
||||
return ln_inv_evidence, err_ln_inv_evidence
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue