* Switch to CB2

* Update for extrapolation

* Add 'nan' extrapolation

* Update nb

* Update submits

* Add Rmax to the models

* Update nb

* Add print statement

* Update script settings

* Update flow model to new method

* Update printing

* Update path

* Update so that it works

* Update nb

* Update submit

* Add Rmin for hollow bulk flows

* Update script

* Update script

* Update scripts back

* Update scripts back

* Fix normalization bug

* Update script

* pep8
This commit is contained in:
Richard Stiskalek 2024-07-25 11:48:37 +01:00 committed by GitHub
parent 73ffffb826
commit 8d49aa071b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 379 additions and 198 deletions

View File

@ -16,16 +16,15 @@
Tools for interpolating 3D fields at arbitrary positions.
"""
import MAS_library as MASL
import numpy
import numpy as np
import smoothing_library as SL
from scipy.interpolate import RegularGridInterpolator
from numba import jit
from scipy.interpolate import RegularGridInterpolator
from tqdm import tqdm, trange
from ..utils import periodic_wrap_grid, radec_to_cartesian
from .utils import divide_nonzero, force_single_precision, nside2radec
###############################################################################
# Cartesian interpolation #
###############################################################################
@ -61,7 +60,7 @@ def evaluate_cartesian_cic(*fields, pos, smooth_scales=None, verbose=False):
else:
shape = (pos.shape[0], len(smooth_scales))
interp_fields = [numpy.full(shape, numpy.nan, dtype=numpy.float32)
interp_fields = [np.full(shape, np.nan, dtype=np.float32)
for __ in range(len(fields))]
for i, field in enumerate(fields):
@ -73,7 +72,7 @@ def evaluate_cartesian_cic(*fields, pos, smooth_scales=None, verbose=False):
for j, scale in enumerate(iterator):
if not scale > 0:
fsmooth = numpy.copy(field)
fsmooth = np.copy(field)
else:
fsmooth = smoothen_field(field, scale, 1., make_copy=True)
MASL.CIC_interp(fsmooth, 1., pos, interp_fields[i][:, j])
@ -121,15 +120,15 @@ def evaluate_cartesian_regular(*fields, pos, smooth_scales=None,
ngrid = fields[0].shape[0]
cellsize = 1. / ngrid
X = numpy.linspace(0.5 * cellsize, 1 - 0.5 * cellsize, ngrid)
Y, Z = numpy.copy(X), numpy.copy(X)
X = np.linspace(0.5 * cellsize, 1 - 0.5 * cellsize, ngrid)
Y, Z = np.copy(X), np.copy(X)
interp_fields = [numpy.full(shape, numpy.nan, dtype=numpy.float32)
interp_fields = [np.full(shape, np.nan, dtype=np.float32)
for __ in range(len(fields))]
for i, field in enumerate(fields):
if smooth_scales is None:
field_interp = RegularGridInterpolator(
(X, Y, Z), field, fill_value=None, bounds_error=False,
(X, Y, Z), field, fill_value=np.nan, bounds_error=False,
method=method)
interp_fields[i] = field_interp(pos)
else:
@ -138,12 +137,12 @@ def evaluate_cartesian_regular(*fields, pos, smooth_scales=None,
for j, scale in enumerate(iterator):
if not scale > 0:
fsmooth = numpy.copy(field)
fsmooth = np.copy(field)
else:
fsmooth = smoothen_field(field, scale, 1., make_copy=True)
field_interp = RegularGridInterpolator(
(X, Y, Z), fsmooth, fill_value=None, bounds_error=False,
(X, Y, Z), fsmooth, fill_value=np.nan, bounds_error=False,
method=method)
interp_fields[i][:, j] = field_interp(pos)
@ -175,9 +174,9 @@ def observer_peculiar_velocity(velocity_field, smooth_scales=None,
vpec : 1-dimensional array of shape `(3,)` or `(len(smooth_scales), 3)`
"""
if observer is None:
pos = numpy.asanyarray([0.5, 0.5, 0.5]).reshape(1, 3)
pos = np.asanyarray([0.5, 0.5, 0.5]).reshape(1, 3)
else:
pos = numpy.asanyarray(observer).reshape(1, 3)
pos = np.asanyarray(observer).reshape(1, 3)
vx, vy, vz = evaluate_cartesian_cic(
*velocity_field, pos=pos, smooth_scales=smooth_scales, verbose=verbose)
@ -188,9 +187,9 @@ def observer_peculiar_velocity(velocity_field, smooth_scales=None,
vz = vz.reshape(-1, )
if smooth_scales is None:
return numpy.array([vx[0], vy[0], vz[0]])
return np.array([vx[0], vy[0], vz[0]])
return numpy.vstack([vx, vy, vz]).T
return np.vstack([vx, vy, vz]).T
###############################################################################
# Evaluating the fields along a LOS #
@ -233,19 +232,19 @@ def evaluate_los(*fields, sky_pos, boxsize, rmax, dr, smooth_scales=None,
"""
mpc2box = 1. / boxsize
if not isinstance(sky_pos, numpy.ndarray) and sky_pos.ndim != 2:
if not isinstance(sky_pos, np.ndarray) and sky_pos.ndim != 2:
raise ValueError("`sky_pos` must be a 2D array.")
nsamples = len(sky_pos)
if rmax > 0.5 * boxsize:
if interpolation_method == "cic" and rmax > 0.5 * boxsize:
raise ValueError("The maximum radius must be within the box.")
# Radial positions to evaluate for each line of sight.
rdist = numpy.arange(0, rmax, dr, dtype=fields[0].dtype)
rdist = np.arange(0, rmax, dr, dtype=fields[0].dtype)
# Create an array of radial positions and sky coordinates of each line of
# sight.
pos = numpy.empty((nsamples * len(rdist), 3), dtype=fields[0].dtype)
pos = np.empty((nsamples * len(rdist), 3), dtype=fields[0].dtype)
for i in range(nsamples):
start, end = i * len(rdist), (i + 1) * len(rdist)
pos[start:end, 0] = rdist * mpc2box
@ -261,7 +260,7 @@ def evaluate_los(*fields, sky_pos, boxsize, rmax, dr, smooth_scales=None,
smooth_scales = [smooth_scales]
if isinstance(smooth_scales, list):
smooth_scales = numpy.array(smooth_scales, dtype=numpy.float32)
smooth_scales = np.array(smooth_scales, dtype=np.float32)
smooth_scales *= mpc2box
@ -286,8 +285,7 @@ def evaluate_los(*fields, sky_pos, boxsize, rmax, dr, smooth_scales=None,
field_interp_reshaped = [None] * len(fields)
for i in range(len(fields)):
samples = numpy.full(shape_single, numpy.nan,
dtype=field_interp[i].dtype)
samples = np.full(shape_single, np.nan, dtype=field_interp[i].dtype)
for j in range(nsamples):
start, end = j * len(rdist), (j + 1) * len(rdist)
samples[j] = field_interp[i][start:end, ...]
@ -328,7 +326,7 @@ def evaluate_sky(*fields, pos, mpc2box, smooth_scales=None, verbose=False):
(list of) 1-dimensional array of shape `(n_samples, len(smooth_scales))`
"""
# Make a copy of the positions to avoid modifying the input.
pos = numpy.copy(pos)
pos = np.copy(pos)
pos = force_single_precision(pos)
pos[:, 0] *= mpc2box
@ -340,7 +338,7 @@ def evaluate_sky(*fields, pos, mpc2box, smooth_scales=None, verbose=False):
smooth_scales = [smooth_scales]
if isinstance(smooth_scales, list):
smooth_scales = numpy.array(smooth_scales, dtype=numpy.float32)
smooth_scales = np.array(smooth_scales, dtype=np.float32)
smooth_scales *= mpc2box
@ -374,26 +372,26 @@ def make_sky(field, angpos, dist, boxsize, verbose=True):
interp_field : 1-dimensional array of shape `(n_pos, )`.
"""
dx = dist[1] - dist[0]
assert numpy.allclose(dist[1:] - dist[:-1], dx)
assert np.allclose(dist[1:] - dist[:-1], dx)
assert angpos.ndim == 2 and dist.ndim == 1
# We loop over the angular directions, at each step evaluating a vector
# of distances. We pre-allocate arrays for speed.
dir_loop = numpy.full((dist.size, 3), numpy.nan, dtype=numpy.float32)
dir_loop = np.full((dist.size, 3), np.nan, dtype=np.float32)
ndir = angpos.shape[0]
out = numpy.full(ndir, numpy.nan, dtype=numpy.float32)
out = np.full(ndir, np.nan, dtype=np.float32)
for i in trange(ndir) if verbose else range(ndir):
dir_loop[:, 0] = dist
dir_loop[:, 1] = angpos[i, 0]
dir_loop[:, 2] = angpos[i, 1]
out[i] = numpy.sum(
out[i] = np.sum(
dist**2 * evaluate_sky(field, pos=dir_loop, mpc2box=1 / boxsize))
# Assuming the field is in h^2 Msun / kpc**3, we need to convert Mpc / h
# to kpc / h and multiply by the pixel area.
out *= dx * 1e9 * 4 * numpy.pi / len(angpos)
out *= dx * 1e9 * 4 * np.pi / len(angpos)
return out
@ -432,8 +430,7 @@ def field_at_distance(field, distance, boxsize, smooth_scales=None, nside=128,
# box Cartesian coordinates. We take HEALPix pixels because they are
# uniformly distributed on the sky.
angpos = nside2radec(nside)
X = numpy.hstack([numpy.ones(len(angpos)).reshape(-1, 1) * distance,
angpos])
X = np.hstack([np.ones(len(angpos)).reshape(-1, 1) * distance, angpos])
X = radec_to_cartesian(X) / boxsize + 0.5
return evaluate_cartesian_cic(field, pos=X, smooth_scales=smooth_scales,
@ -448,8 +445,8 @@ def field_at_distance(field, distance, boxsize, smooth_scales=None, nside=128,
@jit(nopython=True)
def make_gridpos(grid_size):
"""Make a regular grid of positions and distances from the center."""
grid_pos = numpy.full((grid_size**3, 3), numpy.nan, dtype=numpy.float32)
grid_dist = numpy.full(grid_size**3, numpy.nan, dtype=numpy.float32)
grid_pos = np.full((grid_size**3, 3), np.nan, dtype=np.float32)
grid_dist = np.full(grid_size**3, np.nan, dtype=np.float32)
n = 0
for i in range(grid_size):
@ -507,8 +504,8 @@ def field2rsp(field, radvel_field, box, MAS, init_value=0.):
grid_pos += 0.5
grid_pos = periodic_wrap_grid(grid_pos)
rsp_field = numpy.full(field.shape, init_value, dtype=numpy.float32)
cell_counts = numpy.zeros(rsp_field.shape, dtype=numpy.float32)
rsp_field = np.full(field.shape, init_value, dtype=np.float32)
cell_counts = np.zeros(rsp_field.shape, dtype=np.float32)
# Interpolate the field to the grid positions.
MASL.MA(grid_pos, rsp_field, 1., MAS, W=field.reshape(-1,))
@ -554,6 +551,6 @@ def smoothen_field(field, smooth_scale, boxsize, threads=1, make_copy=False):
threads)
if make_copy:
field = numpy.copy(field)
field = np.copy(field)
return SL.field_smoothing(field, W_k, threads)

View File

@ -82,8 +82,8 @@ class DataLoader:
self._cat = self._read_catalogue(catalogue, catalogue_fpath)
self._catname = catalogue
fprint("reading the interpolated field,", verbose)
self._field_rdist, self._los_density, self._los_velocity = self._read_field( # noqa
fprint("reading the interpolated field.", verbose)
self._field_rdist, self._los_density, self._los_velocity, self._rmax = self._read_field( # noqa
simname, ksim, catalogue, ksmooth, paths)
if len(self._field_rdist) % 2 == 0:
@ -183,6 +183,14 @@ class DataLoader:
return self._los_velocity[:, :, self._mask, ...]
@property
def rmax(self):
"""
Radial distance above which the underlying reconstruction is
extrapolated `(n_sims, n_objects)`.
"""
return self._rmax[:, self._mask]
@property
def los_radial_velocity(self):
"""
@ -205,6 +213,7 @@ class DataLoader:
los_density = [None] * len(ksims)
los_velocity = [None] * len(ksims)
rmax = [None] * len(ksims)
for n, ksim in enumerate(ksims):
nsim = nsims[ksim]
@ -219,11 +228,13 @@ class DataLoader:
los_density[n] = f[f"density_{nsim}"][indx]
los_velocity[n] = f[f"velocity_{nsim}"][indx]
rdist = f[f"rdist_{nsim}"][...]
rmax[n] = f[f"rmax_{nsim}"][indx]
los_density = np.stack(los_density)
los_velocity = np.stack(los_velocity)
rmax = np.stack(rmax)
return rdist, los_density, los_velocity
return rdist, los_density, los_velocity, rmax
def _read_catalogue(self, catalogue, catalogue_fpath):
if catalogue == "A2":
@ -456,7 +467,6 @@ def predict_zobs(dist, beta, Vext_radial, vpec_radial, Omega_m):
velocity field.
"""
zcosmo = dist2redshift(dist, Omega_m)
vrad = beta * vpec_radial + Vext_radial
return (1 + zcosmo) * (1 + vrad / SPEED_OF_LIGHT) - 1
@ -473,14 +483,13 @@ def calculate_ptilde_wo_bias(xrange, mu, err_squared, r_squared_xrange):
return ptilde
def calculate_likelihood_zobs(zobs, zobs_pred, sigma_v):
def calculate_likelihood_zobs(zobs, zobs_pred, e2_cz):
"""
Calculate the likelihood of the observed redshift given the predicted
redshift.
"""
dcz = SPEED_OF_LIGHT * (zobs[:, None] - zobs_pred)
sigma_v = sigma_v[:, None]
return jnp.exp(-0.5 * (dcz / sigma_v)**2) / jnp.sqrt(2 * np.pi) / sigma_v
return jnp.exp(-0.5 * dcz**2 / e2_cz) / jnp.sqrt(2 * np.pi * e2_cz)
###############################################################################
# Base flow validation #
@ -598,27 +607,18 @@ def sample_TFR(e_mu_min, e_mu_max, a_mean, a_std, b_mean, b_std):
def sample_calibration(Vext_min, Vext_max, Vmono_min, Vmono_max,
alpha_min, alpha_max, beta_min, beta_max, sigma_v_min,
sigma_v_max, sample_Vmono, sample_alpha, sample_beta):
sigma_v_max, sample_Vmono, sample_alpha, sample_beta,
sample_sigma_v_ext):
"""Sample the flow calibration."""
Vext = sample("Vext", Uniform(Vext_min, Vext_max).expand([3]))
sigma_v = sample("sigma_v", Uniform(sigma_v_min, sigma_v_max))
if sample_Vmono:
Vmono = sample("Vmono", Uniform(Vmono_min, Vmono_max))
else:
Vmono = 0.0
alpha = sample("alpha", Uniform(alpha_min, alpha_max)) if sample_alpha else 1.0 # noqa
beta = sample("beta", Uniform(beta_min, beta_max)) if sample_beta else 1.0 # noqa
Vmono = sample("Vmono", Uniform(Vmono_min, Vmono_max)) if sample_Vmono else 0.0 # noqa
sigma_v_ext = sample("sigma_v_ext", Uniform(sigma_v_min, sigma_v_max)) if sample_sigma_v_ext else sigma_v # noqa
if sample_alpha:
alpha = sample("alpha", Uniform(alpha_min, alpha_max))
else:
alpha = 1.0
if sample_beta:
beta = sample("beta", Uniform(beta_min, beta_max))
else:
beta = 1.0
return Vext, Vmono, sigma_v, alpha, beta
return Vext, Vmono, sigma_v, sigma_v_ext, alpha, beta
###############################################################################
@ -626,6 +626,23 @@ def sample_calibration(Vext_min, Vext_max, Vmono_min, Vmono_max,
###############################################################################
def find_extrap_mask(rmax, rdist):
"""
Make a mask of shape `(nsim, ngal, nrdist)` of which velocity field values
are extrapolated. above which the
"""
nsim, ngal = rmax.shape
extrap_mask = np.zeros((nsim, ngal, len(rdist)), dtype=bool)
extrap_weights = np.ones((nsim, ngal, len(rdist)))
for i in range(nsim):
for j in range(ngal):
k = np.searchsorted(rdist, rmax[i, j])
extrap_mask[i, j, k:] = True
extrap_weights[i, j, k:] = rmax[i, j] / rdist[k:]
return extrap_mask, extrap_weights
class PV_validation_model(BaseFlowValidationModel):
"""
Peculiar velocity validation model.
@ -636,6 +653,9 @@ class PV_validation_model(BaseFlowValidationModel):
LOS density field.
los_velocity : 3-dimensional array of shape (n_sims, n_objects, n_steps)
LOS radial velocity field.
rmax : 1-dimensional array of shape (n_sims, n_objects)
Radial distance above which the underlying reconstruction is
extrapolated.
RA, dec : 1-dimensional arrays of shape (n_objects)
Right ascension and declination in degrees.
z_obs : 1-dimensional array of shape (n_objects)
@ -650,7 +670,7 @@ class PV_validation_model(BaseFlowValidationModel):
Matter density parameter.
"""
def __init__(self, los_density, los_velocity, RA, dec, z_obs,
def __init__(self, los_density, los_velocity, rmax, RA, dec, z_obs,
e_zobs, calibration_params, r_xrange, Omega_m, kind):
if e_zobs is not None:
e2_cz_obs = jnp.asarray((SPEED_OF_LIGHT * e_zobs)**2)
@ -661,9 +681,9 @@ class PV_validation_model(BaseFlowValidationModel):
RA = np.deg2rad(RA)
dec = np.deg2rad(dec)
names = ["los_density", "los_velocity", "RA", "dec", "z_obs",
names = ["los_density", "los_velocity", "rmax", "RA", "dec", "z_obs",
"e2_cz_obs"]
values = [los_density, los_velocity, RA, dec, z_obs, e2_cz_obs]
values = [los_density, los_velocity, rmax, RA, dec, z_obs, e2_cz_obs]
self._setattr_as_jax(names, values)
self._set_calibration_params(calibration_params)
self._set_radial_spacing(r_xrange, Omega_m)
@ -672,11 +692,23 @@ class PV_validation_model(BaseFlowValidationModel):
self.Omega_m = Omega_m
self.norm = - self.ndata * jnp.log(self.num_sims)
extrap_mask, extrap_weights = find_extrap_mask(rmax, r_xrange)
self.extrap_mask = jnp.asarray(extrap_mask)
self.extrap_weights = jnp.asarray(extrap_weights)
def __call__(self, calibration_hyperparams, distmod_hyperparams,
store_ll_all=False):
"""NumPyro PV validation model."""
Vext, Vmono, sigma_v, alpha, beta = sample_calibration(**calibration_hyperparams) # noqa
cz_err = jnp.sqrt(sigma_v**2 + self.e2_cz_obs)
Vext, Vmono, sigma_v, sigma_v_ext, alpha, beta = sample_calibration(**calibration_hyperparams) # noqa
# Turn e2_cz to be of shape (nsims, ndata, nxrange) and apply
# sigma_v_ext where applicable
e2_cz = jnp.full_like(self.extrap_mask, sigma_v**2, dtype=jnp.float32)
if calibration_hyperparams["sample_sigma_v_ext"]:
e2_cz = e2_cz.at[self.extrap_mask].set(sigma_v_ext**2)
# Now add the observational errors
e2_cz += self.e2_cz_obs[None, :, None]
Vext_rad = project_Vext(Vext[0], Vext[1], Vext[2], self.RA, self.dec)
if self.kind == "SN":
@ -700,10 +732,13 @@ class PV_validation_model(BaseFlowValidationModel):
pnorm = simpson(ptilde, dx=self.dr, axis=-1)
# Calculate z_obs at each distance. Shape is (n_sims, ndata, nxrange)
vrad = beta * self.los_velocity + Vext_rad[None, :, None] + Vmono
# The weights are related to the extrapolation of the velocity field.
vrad = beta * self.los_velocity
vrad += (Vext_rad[None, :, None] + Vmono) * self.extrap_weights
zobs = (1 + self.z_xrange[None, None, :]) * (1 + vrad / SPEED_OF_LIGHT) - 1 # noqa
ptilde *= calculate_likelihood_zobs(self.z_obs, zobs, cz_err)
ptilde *= calculate_likelihood_zobs(self.z_obs, zobs, e2_cz)
# ptilde *= calculate_likelihood_zobs(self.z_obs, zobs, sigma_v)
ll = jnp.log(simpson(ptilde, dx=self.dr, axis=-1)) - jnp.log(pnorm)
if store_ll_all:
@ -740,6 +775,7 @@ def get_model(loader, zcmb_max=None, verbose=True):
los_overdensity = loader.los_density
los_velocity = loader.los_radial_velocity
rmax = loader.rmax
kind = loader._catname
if kind in ["LOSS", "Foundation"]:
@ -753,10 +789,9 @@ def get_model(loader, zcmb_max=None, verbose=True):
"e_c": e_c[mask]}
model = PV_validation_model(
los_overdensity[:, mask], los_velocity[:, mask], RA[mask],
dec[mask], zCMB[mask], e_zCMB, calibration_params,
los_overdensity[:, mask], los_velocity[:, mask], rmax[:, mask],
RA[mask], dec[mask], zCMB[mask], e_zCMB, calibration_params,
loader.rdist, loader._Omega_m, "SN")
# return model_old, model
elif "Pantheon+" in kind:
keys = ["RA", "DEC", "zCMB", "mB", "x1", "c", "biasCor_m_b", "mBERR",
"x1ERR", "cERR", "biasCorErr_m_b", "zCMB_SN", "zCMB_Group",
@ -766,7 +801,7 @@ def get_model(loader, zcmb_max=None, verbose=True):
mB -= bias_corr_mB
e_mB = np.sqrt(e_mB**2 + e_bias_corr_mB**2)
mask = (zCMB < zcmb_max)
mask = zCMB < zcmb_max
if kind == "Pantheon+_groups":
mask &= np.isfinite(zCMB_Group)
@ -782,8 +817,8 @@ def get_model(loader, zcmb_max=None, verbose=True):
"e_mB": e_mB[mask], "e_x1": e_x1[mask],
"e_c": e_c[mask]}
model = PV_validation_model(
los_overdensity[:, mask], los_velocity[:, mask], RA[mask],
dec[mask], zCMB[mask], e_zCMB[mask], calibration_params,
los_overdensity[:, mask], los_velocity[:, mask], rmax[:, mask],
RA[mask], dec[mask], zCMB[mask], e_zCMB[mask], calibration_params,
loader.rdist, loader._Omega_m, "SN")
elif kind in ["SFI_gals", "2MTF", "SFI_gals_masked"]:
keys = ["RA", "DEC", "z_CMB", "mag", "eta", "e_mag", "e_eta"]
@ -798,14 +833,13 @@ def get_model(loader, zcmb_max=None, verbose=True):
calibration_params = {"mag": mag[mask], "eta": eta[mask],
"e_mag": e_mag[mask], "e_eta": e_eta[mask]}
model = PV_validation_model(
los_overdensity[:, mask], los_velocity[:, mask], RA[mask],
dec[mask], zCMB[mask], None, calibration_params, loader.rdist,
loader._Omega_m, "TFR")
los_overdensity[:, mask], los_velocity[:, mask], rmax[:, mask],
RA[mask], dec[mask], zCMB[mask], None, calibration_params,
loader.rdist, loader._Omega_m, "TFR")
else:
raise ValueError(f"Catalogue `{kind}` not recognized.")
if verbose:
print(f"Selected {np.sum(mask)}/{len(mask)} galaxies.", flush=True)
fprint(f"selected {np.sum(mask)}/{len(mask)} galaxies.")
return model
@ -822,9 +856,13 @@ def _posterior_element(r, beta, Vext_radial, los_velocity, Omega_m, zobs,
`Observed2CosmologicalRedshift`.
"""
zobs_pred = predict_zobs(r, beta, Vext_radial, los_velocity, Omega_m)
likelihood = calculate_likelihood_zobs(zobs, zobs_pred, sigma_v)
prior = dVdOmega * los_density**alpha
return likelihood * prior
# Likelihood term
dcz = SPEED_OF_LIGHT * (zobs - zobs_pred)
posterior = jnp.exp(-0.5 * dcz**2 / sigma_v**2) / jnp.sqrt(2 * jnp.pi * sigma_v**2) # noqa
# Prior term
posterior *= dVdOmega * los_density**alpha
return posterior
class BaseObserved2CosmologicalRedshift(ABC):
@ -834,10 +872,10 @@ class BaseObserved2CosmologicalRedshift(ABC):
for i, key in enumerate(calibration_samples.keys()):
x = calibration_samples[key]
if not isinstance(x, (np.ndarray, jnp.ndarray)):
raise ValueError(f"Calibration sample {x} must be an array.")
raise ValueError(f"Calibration sample `{key}` must be an array.") # noqa
if x.ndim != 1:
raise ValueError(f"Calibration samples {x} must be 1D.")
if x.ndim != 1 and key != "Vext":
raise ValueError(f"Calibration samples `{key}` must be 1D.")
if i == 0:
ncalibratrion = len(x)
@ -863,12 +901,7 @@ class BaseObserved2CosmologicalRedshift(ABC):
self._ncalibration_samples = ncalibratrion
# It is best to JIT compile the functions right here.
self._vmap_simps = jit(vmap(lambda y: simpson(y, dx=dr)))
axs = (0, None, None, 0, None, None, None, None, 0, 0)
self._vmap_posterior_element = vmap(_posterior_element, in_axes=axs)
self._vmap_posterior_element = jit(self._vmap_posterior_element)
self._simps = jit(lambda y: simpson(y, dx=dr))
self._jit_posterior_element = jit(_posterior_element)
def get_calibration_samples(self, key):
"""Get calibration samples for a given key."""
@ -896,8 +929,8 @@ class Observed2CosmologicalRedshift(BaseObserved2CosmologicalRedshift):
Parameters
----------
calibration_samples : dict
Dictionary of flow calibration samples (`alpha`, `beta`, `Vext_x`,
`Vext_y`, `Vext_z`, `sigma_v`, ...).
Dictionary of flow calibration samples (`alpha`, `beta`, `Vext`,
`sigma_v`, ...).
r_xrange : 1-dimensional array
Radial comoving distances where the fields are interpolated for each
object.
@ -919,11 +952,9 @@ class Observed2CosmologicalRedshift(BaseObserved2CosmologicalRedshift):
def posterior_mean_std(self, x, px):
"""
Calculate the mean and standard deviation of a 1-dimensional PDF.
Assumes that the PDF is already normalized and that the spacing is that
of `r_xrange` which is inferred when initializing this class.
"""
mu = self._simps(x * px)
std = (self._simps(x**2 * px) - mu**2)**0.5
mu = simpson(x * px, x=x)
std = (simpson(x**2 * px, x=x) - mu**2)**0.5
return mu, std
def posterior_zcosmo(self, zobs, RA, dec, los_density, los_velocity,
@ -953,11 +984,8 @@ class Observed2CosmologicalRedshift(BaseObserved2CosmologicalRedshift):
posterior : 1-dimensional array
Posterior PDF.
"""
Vext_radial = project_Vext(
self.get_calibration_samples("Vext_x"),
self.get_calibration_samples("Vext_y"),
self.get_calibration_samples("Vext_z"),
RA, dec)
Vext = self.get_calibration_samples("Vext")
Vext_radial = project_Vext(*[Vext[:, i] for i in range(3)], RA, dec)
alpha = self.get_calibration_samples("alpha")
beta = self.get_calibration_samples("beta")
@ -970,13 +998,13 @@ class Observed2CosmologicalRedshift(BaseObserved2CosmologicalRedshift):
dtype=np.float32)
for i in trange(self.ncalibration_samples, desc="Marginalizing",
disable=not verbose):
posterior[i] = self._vmap_posterior_element(
posterior[i] = self._jit_posterior_element(
self._r_xrange, beta[i], Vext_radial[i], los_velocity,
self._Omega_m, zobs, sigma_v[i], alpha[i], self._dVdOmega,
los_density)
# Normalize the posterior for each flow sample and then stack them.
posterior /= self._vmap_simps(posterior).reshape(-1, 1)
# # Normalize the posterior for each flow sample and then stack them.
posterior /= simpson(posterior, x=self._zcos_xrange, axis=-1)[:, None]
posterior = jnp.nanmean(posterior, axis=0)
return self._zcos_xrange, posterior

View File

@ -352,7 +352,8 @@ def binned_statistic(x, y, left_edges, bin_width, statistic):
def fprint(msg, verbose=True):
"""Print and flush a message with a timestamp."""
if verbose:
print(f"{datetime.now()}: {msg}", flush=True)
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print(f"{timestamp} {msg}", flush=True)
###############################################################################

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -26,10 +26,11 @@ import csiborgtools
import numpy as np
from astropy import units as u
from astropy.coordinates import SkyCoord
from astropy.io import fits
from h5py import File
from mpi4py import MPI
from numba import jit
from taskmaster import work_delegation # noqa
from astropy.io import fits
from utils import get_nsims
@ -79,7 +80,7 @@ def get_los(catalogue_name, simname, comm):
RA = f["RA"][:]
dec = f["DEC"][:]
elif catalogue_name == "UPGLADE":
fname = "/mnt/users/rstiskalek/csiborgtools/data/upglade_z_0p05_all_PROCESSED.h5" # noqa
fname = "/mnt/users/rstiskalek/csiborgtools/data/upglade_all_z0p05_new_PROCESSED.h5" # noqa
with File(fname, 'r') as f:
RA = f["RA"][:]
dec = f["DEC"][:]
@ -242,6 +243,7 @@ def combine_from_simulations(catalogue_name, simname, nsims, outfolder,
f_out.create_dataset(f"rdist_{nsim}", data=f["rdist"][:])
f_out.create_dataset(f"density_{nsim}", data=f["density"][:])
f_out.create_dataset(f"velocity_{nsim}", data=f["velocity"][:])
f_out.create_dataset(f"rmax_{nsim}", data=f["rmax"][:])
# Remove the temporary file.
remove(fname)
@ -256,6 +258,30 @@ def combine_from_simulations(catalogue_name, simname, nsims, outfolder,
# Main interpolating function #
###############################################################################
@jit(nopython=True)
def find_index_of_first_nan(y):
for n in range(1, len(y)):
if np.isnan(y[n]):
return n
return None
def replace_nan_with_last_finite(x, y, apply_decay):
n = find_index_of_first_nan(y)
if n is None:
return y, x[-1]
y[n:] = y[n-1]
rmax = x[n-1]
if apply_decay:
# Optionally aply 1 / r decay
y[n:] *= rmax / x[n:]
return y, rmax
def interpolate_field(pos, simname, nsim, MAS, grid, dump_folder, rmax,
dr, smooth_scales, verbose=False):
@ -300,6 +326,14 @@ def interpolate_field(pos, simname, nsim, MAS, grid, dump_folder, rmax,
smooth_scales=smooth_scales, verbose=verbose,
interpolation_method="linear")
rmax_density = np.full((len(pos), len(smooth_scales)), np.nan)
for i in range(len(pos)):
for j in range(len(smooth_scales)):
y, current_rmax = replace_nan_with_last_finite(rdist, finterp[i, :, j], False) # noqa
finterp[i, :, j] = y
if current_rmax is not None:
rmax_density[i, j] = current_rmax
print(f"Writing temporary file `{fname_out}`.")
with File(fname_out, 'w') as f:
f.create_dataset("rdist", data=rdist)
@ -318,8 +352,20 @@ def interpolate_field(pos, simname, nsim, MAS, grid, dump_folder, rmax,
smooth_scales=smooth_scales, verbose=verbose,
interpolation_method="linear")
rmax_velocity = np.full((3, len(pos), len(smooth_scales)), np.nan)
for k in range(3):
for i in range(len(pos)):
for j in range(len(smooth_scales)):
y, current_rmax = replace_nan_with_last_finite(rdist, finterp[k][i, :, j], True) # noqa
finterp[k][i, :, j] = y
if current_rmax is not None:
rmax_velocity[k, i, j] = current_rmax
rmax_velocity = np.min(rmax_velocity, axis=0)
rmax = np.minimum(rmax_density, rmax_velocity)
with File(fname_out, 'a') as f:
f.create_dataset("velocity", data=finterp)
f.create_dataset("rmax", data=rmax)
###############################################################################
@ -339,8 +385,8 @@ if __name__ == "__main__":
parser.add_argument("--grid", type=int, help="Grid resolution.")
args = parser.parse_args()
rmax = 200
dr = 0.25
rmax = 300
dr = 0.5
smooth_scales = [0]
comm = MPI.COMM_WORLD

View File

@ -11,8 +11,7 @@ MAS="SPH"
grid=1024
# for catalogue in "LOSS" "Foundation" "Pantheon+" "2MTF" "SFI_gals"; do
for catalogue in "LOSS"; do
for catalogue in "UPGLADE"; do
# for catalogue in "Foundation"; do
pythoncm="$env $file --catalogue $catalogue --nsims $nsims --simname $simname --MAS $MAS --grid $grid"
if [ $on_login -eq 1 ]; then

View File

@ -100,6 +100,8 @@ def get_model(paths, get_model_kwargs, verbose=True):
ARGS.catalogue, fpath, paths,
ksmooth=ARGS.ksmooth)
print(f"\n{'Num. radial steps':<20} {len(loader.rdist)}\n", flush=True)
return csiborgtools.flow.get_model(loader, **get_model_kwargs)
@ -227,8 +229,6 @@ if __name__ == "__main__":
nsteps = 5000
nburn = 1000
zcmb_max = 0.06
sample_alpha = True
sample_beta = True
calculate_evidence = False
nchains_harmonic = 10
num_epochs = 30
@ -237,7 +237,6 @@ if __name__ == "__main__":
raise ValueError("The number of steps must be divisible by the number of chains.") # noqa
main_params = {"nsteps": nsteps, "nburn": nburn, "zcmb_max": zcmb_max,
"sample_alpha": sample_alpha, "sample_beta": sample_beta,
"calculate_evidence": calculate_evidence,
"nchains_harmonic": nchains_harmonic,
"num_epochs": num_epochs}
@ -247,10 +246,11 @@ if __name__ == "__main__":
"Vmono_min": -1000, "Vmono_max": 1000,
"alpha_min": -1.0, "alpha_max": 3.0,
"beta_min": -1.0, "beta_max": 3.0,
"sigma_v_min": 5.0, "sigma_v_max": 750.,
"sample_Vmono": True,
"sample_alpha": sample_alpha,
"sample_beta": sample_beta,
"sigma_v_min": 1.0, "sigma_v_max": 750.,
"sample_Vmono": False,
"sample_alpha": False,
"sample_beta": True,
"sample_sigma_v_ext": False,
}
print_variables(
calibration_hyperparams.keys(), calibration_hyperparams.values())
@ -280,5 +280,6 @@ if __name__ == "__main__":
get_model_kwargs = {"zcmb_max": zcmb_max}
model = get_model(paths, get_model_kwargs, )
run_model(model, nsteps, nburn, model_kwargs, out_folder, sample_beta,
calculate_evidence, nchains_harmonic, num_epochs, kwargs_print)
run_model(model, nsteps, nburn, model_kwargs, out_folder,
calibration_hyperparams["sample_beta"], calculate_evidence,
nchains_harmonic, num_epochs, kwargs_print)

View File

@ -19,9 +19,9 @@ fi
# Submit a job for each combination of simname, catalogue, ksim
# for simname in "Lilow2024" "CF4" "CF4gp" "csiborg1" "csiborg2_main" "csiborg2X"; do
for simname in "csiborg2X"; do
for simname in "Carrick2015"; do
# for simname in "csiborg1" "csiborg2_main" "csiborg2X"; do
for catalogue in "Pantheon+"; do
for catalogue in "Pantheon+_zSN"; do
# for catalogue in "2MTF"; do
# for ksim in 0 1 2; do
# for ksim in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20; do

View File

@ -21,19 +21,21 @@ from tqdm import tqdm
if __name__ == "__main__":
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
simname = "csiborg1"
simname = "csiborg2_random"
nsims = paths.get_ics(simname)
print(f"Number of simulations: {nsims}.")
fname_out = f"/mnt/users/rstiskalek/csiborgtools/data/halos_{simname}.hdf5"
fname_out = f"/mnt/users/rstiskalek/csiborgtools/data/random_halos_{simname}.hdf5" # noqa
print(f"Writing to `{fname_out}`.")
with File(fname_out, 'w') as f:
for nsim in tqdm(nsims, desc="Simulations"):
grp = f.create_group(f"sim_{nsim}")
cat = csiborgtools.read.CSiBORG1Catalogue(nsim, paths)
# cat = csiborgtools.read.CSiBORG1Catalogue(nsim, paths)
cat = csiborgtools.read.CSiBORG2Catalogue(
nsim, 99, "random", paths, )
grp["pos"] = cat["cartesian_pos"]
grp["totmass"] = cat["totmass"]

View File

@ -23,6 +23,7 @@ from os.path import join
import csiborgtools
import numpy as np
from csiborgtools import fprint
from h5py import File
from mpi4py import MPI
from taskmaster import work_delegation # noqa
@ -35,27 +36,35 @@ def t():
return datetime.now().strftime("%H:%M:%S")
def load_calibration(catalogue, simname, nsim, ksmooth, verbose=False):
def load_calibration(catalogue, simname, ksmooth, sample_beta,
verbose=False):
"""Load the pre-computed calibration samples."""
fname = f"/mnt/extraspace/rstiskalek/csiborg_postprocessing/peculiar_velocity/flow_samples_{catalogue}_{simname}_smooth_{ksmooth}.hdf5" # noqa
keys = ["Vext_x", "Vext_y", "Vext_z", "alpha", "beta", "sigma_v"]
fname = f"/mnt/extraspace/rstiskalek/csiborg_postprocessing/peculiar_velocity/samples_{simname}_{catalogue}_ksmooth{ksmooth}.hdf5" # noqa
if sample_beta:
fname = fname.replace(".hdf5", "_sample_beta.hdf5")
keys = ["Vext", "sigma_v", "alpha", "beta"]
calibration_samples = {}
with File(fname, 'r') as f:
for key in keys:
for n, key in enumerate(keys):
# In case alpha wasn't sampled just set to 1
if key == "alpha" and "alpha" not in f["samples"].keys():
calibration_samples[key] = np.full_like(
calibration_samples["sigma_v"], 1.0)
continue
# NOTE: here the posterior samples are down-sampled
calibration_samples[key] = f[f"sim_{nsim}/{key}"][:][::10]
calibration_samples[key] = f[f"samples/{key}"][:][::10]
if verbose:
k = list(calibration_samples.keys())[0]
nsamples = len(calibration_samples[k])
print(f"{t()}: found {nsamples} calibration posterior samples.",
flush=True)
if n == 0:
num_samples_original = len(f[f"samples/{key}"])
num_samples_final = len(calibration_samples[key])
fprint(f"downsampling calibration samples from {num_samples_original} to {num_samples_final}.", verbose=verbose) # noqa
return calibration_samples
def main(loader, model, indxs, fdir, fname, num_split, verbose):
def main(loader, nsim, model, indxs, fdir, fname, num_split, verbose):
out = np.full(
len(indxs), np.nan,
dtype=[("mean_zcosmo", float), ("std_zcosmo", float)])
@ -65,7 +74,7 @@ def main(loader, model, indxs, fdir, fname, num_split, verbose):
disable=not verbose)):
x, y = model.posterior_zcosmo(
loader.cat["zcmb"][n], loader.cat["RA"][n], loader.cat["DEC"][n],
loader.los_density[n], loader.los_radial_velocity[n],
loader.los_density[nsim, n], loader.los_radial_velocity[nsim, n],
extra_sigma_v=loader.cat["e_zcmb"][n] * SPEED_OF_LIGHT,
verbose=False)
@ -98,7 +107,7 @@ if __name__ == "__main__":
# Galaxy sample parameters
catalogue = "UPGLADE"
fpath_data = "/mnt/users/rstiskalek/csiborgtools/data/upglade_z_0p05_all_PROCESSED.h5" # noqa
fpath_data = "/mnt/users/rstiskalek/csiborgtools/data/upglade_all_z0p05_new_PROCESSED.h5" # noqa
# Number of splits for MPI
nsplits = 1000
@ -112,12 +121,14 @@ if __name__ == "__main__":
simname, nsim, catalogue, fpath_data, paths, ksmooth=ksmooth,
verbose=rank == 0)
calibration_samples = load_calibration(
catalogue_calibration, simname, nsim, ksmooth, verbose=rank == 0)
catalogue_calibration, simname, ksmooth, sample_beta=True,
verbose=rank == 0)
model = csiborgtools.flow.Observed2CosmologicalRedshift(
calibration_samples, loader.rdist, loader._Omega_m)
if rank == 0:
print(f"{t()}: the catalogue size is {loader.cat['zcmb'].size}.")
print(f"{t()}: loaded calibration samples and model.", flush=True)
fprint(f"catalogue size is {loader.cat['zcmb'].size}.", verbose=rank == 0)
fprint("loaded calibration samples and model.", verbose=rank == 0)
# Decide how to split up the job
if rank == 0:
@ -131,7 +142,8 @@ if __name__ == "__main__":
# Process all splits with MPI, the rank 0 delegates the jobs.
def main_wrapper(n):
main(loader, model, split_indxs[n], fdir, fname, n, verbose=size == 1)
main(loader, nsim, model, split_indxs[n], fdir, fname, n,
verbose=size == 1)
comm.Barrier()
work_delegation(

View File

@ -1,6 +1,6 @@
nthreads=${1}
on_login=${2}
memory=4
memory=12
queue="redwood"
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
file="post_upglade.py"

View File

@ -15,6 +15,9 @@
"""
A script to calculate the bulk flow in Quijote simulations from either
particles or FoF haloes and to also save the resulting smaller halo catalogues.
If `Rmin > 0` the bulk flows computed from projected radial velocities are
wrong, but the 3D volume average bulk flows are still correct.
"""
from datetime import datetime
from os.path import join
@ -70,7 +73,7 @@ def volume_bulk_flow(rdist, mass, vel, distances):
###############################################################################
def main(nsim, folder, fname_basis, Rmax, subtract_observer_velocity,
def main(nsim, folder, fname_basis, Rmin, Rmax, subtract_observer_velocity,
verbose=True):
boxsize = csiborgtools.simname2boxsize("quijote")
observers = csiborgtools.read.fiducial_observers(boxsize, Rmax)
@ -100,6 +103,11 @@ def main(nsim, folder, fname_basis, Rmax, subtract_observer_velocity,
return_distance=True, sort_results=True)
rdist_part, indxs = rdist_part[0], indxs[0]
# And only the ones that are above Rmin
mask = rdist_part > Rmin
rdist_part = rdist_part[mask]
indxs = indxs[mask]
part_pos_current = part_pos[indxs] - observers[i]
part_vel_current = part_vel[indxs]
# Quijote particle masses are all equal
@ -110,13 +118,16 @@ def main(nsim, folder, fname_basis, Rmax, subtract_observer_velocity,
np.asarray(observers[i]).reshape(1, -1), Rmax,
return_distance=True, sort_results=True)
rdist_halo, indxs = rdist_halo[0], indxs[0]
mask = rdist_halo > Rmin
rdist_halo = rdist_halo[mask]
indxs = indxs[mask]
halo_pos_current = halo_pos[indxs] - observers[i]
halo_vel_current = halo_vel[indxs]
halo_mass_current = halo_mass[indxs]
# Subtract the observer velocity
rscale = 0.5 # Mpc / h
rscale = 2.0 # Mpc / h
weights = np.exp(-0.5 * (rdist_part / rscale)**2)
obs_vel_x = np.average(part_vel_current[:, 0], weights=weights)
obs_vel_y = np.average(part_vel_current[:, 1], weights=weights)
@ -183,6 +194,7 @@ def main(nsim, folder, fname_basis, Rmax, subtract_observer_velocity,
if __name__ == "__main__":
Rmin = 0
Rmax = 150
subtract_observer_velocity = True
folder = "/mnt/extraspace/rstiskalek/quijote/BulkFlow_fiducial"
@ -195,7 +207,7 @@ if __name__ == "__main__":
nsims = list(paths.get_ics("quijote"))
def main_wrapper(nsim):
main(nsim, folder, fname_basis, Rmax, subtract_observer_velocity,
main(nsim, folder, fname_basis, Rmin, Rmax, subtract_observer_velocity,
verbose=rank == 0)
if rank == 0:

View File

@ -1,4 +1,4 @@
nthreads=20
nthreads=12
memory=24
on_login=0
queue="berg"