Add simple distance flow model (#114)

* Add imports

* Add field LOS paths

* Add basic flow model

* Edit script

* Add nb

* Add nb

* Update nb

* Add some docs

* Add RA reading

* Add imoprts

* Updates to the flow model

* Update script

* Bring back A2

* Update imports

* Update imports

* Add Carrick to ICs

* Add Carrick boxsize

* Add Carrick and fix minor bugs

* Add Carrick box

* Update script

* Edit imports

* Add fixed flow!

* Update omega_m and add it

* Update nb

* Update nb

* Update nb

* Remove old print statements

* Update params

* Add thinning of chains

* Add import

* Add flow validation script

* Add submit script

* Add ksmooth

* Update nb

* Update params

* Update script

* Update string

* Move where distributions are defined

* Add density bias parameter

* Add lognorm mean

* Update scripts

* Update script
This commit is contained in:
Richard Stiskalek 2024-03-08 10:44:19 +00:00 committed by GitHub
parent fb93f85543
commit a65e3cb15b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 1762 additions and 60 deletions

View file

@ -12,14 +12,15 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from csiborgtools import clustering, field, halo, match, read, summary # noqa
from csiborgtools import clustering, field, flow, halo, match, read, summary # noqa
from .utils import (center_of_mass, delta2ncells, number_counts, # noqa
periodic_distance, periodic_distance_two_points, # noqa
binned_statistic, cosine_similarity, fprint, # noqa
hms_to_degrees, dms_to_degrees, great_circle_distance, # noqa
radec_to_cartesian, cartesian_to_radec) # noqa
from .params import paths_glamdring, simname2boxsize # noqa
radec_to_cartesian, cartesian_to_radec, # noqa
thin_samples_by_acl) # noqa
from .params import paths_glamdring, simname2boxsize, simname2Omega_m # noqa
###############################################################################

View file

@ -0,0 +1,17 @@
# Copyright (C) 2024 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .flow_model import (DataLoader, radial_velocity_los, dist2redshift, # noqa
dist2distmodulus, predict_zobs, project_Vext, # noqa
SD_PV_validation_model) # noqa

View file

@ -0,0 +1,657 @@
# Copyright (C) 2024 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Validation of the CSiBORG velocity field against PV measurements. Based on [1].
References
----------
[1] https://arxiv.org/abs/1912.09383.
"""
from datetime import datetime
from warnings import warn
import numpy as np
import numpyro
import numpyro.distributions as dist
from astropy import units as u
from astropy.coordinates import SkyCoord
from astropy.cosmology import FlatLambdaCDM
from h5py import File
from jax import numpy as jnp
from jax import vmap
from tqdm import tqdm, trange
from ..params import simname2Omega_m
from ..read import CSiBORG1Catalogue
SPEED_OF_LIGHT = 299792.458 # km / s
def t():
"""Shortcut to get the current time."""
return datetime.now().strftime("%H:%M:%S")
def radec_to_galactic(ra, dec):
"""
Convert right ascension and declination to galactic coordinates (all in
degrees.)
Parameters
----------
ra, dec : float or 1-dimensional array
Right ascension and declination in degrees.
Returns
-------
l, b : float or 1-dimensional array
"""
c = SkyCoord(ra=ra*u.degree, dec=dec*u.degree, frame='icrs')
return c.galactic.l.degree, c.galactic.b.degree
###############################################################################
# Data loader #
###############################################################################
class DataLoader:
"""
Data loader for the line of sight (LOS) interpolated fields and the
corresponding catalogues.
Parameters
----------
simname : str
Simulation name.
catalogue : str
Name of the catalogue with LOS objects.
catalogue_fpath : str
Path to the LOS catalogue file.
paths : csiborgtools.read.Paths
Paths object.
ksmooth : int, optional
Smoothing index.
store_full_velocity : bool, optional
Whether to store the full 3D velocity field. Otherwise stores only
the radial velocity.
"""
def __init__(self, simname, catalogue, catalogue_fpath, paths,
ksmooth=None, store_full_velocity=False):
print(f"{t()}: reading the catalogue.")
self._cat = self._read_catalogue(catalogue, catalogue_fpath)
self._catname = catalogue
print(f"{t()}: reading the interpolated field.")
self._field_rdist, self._los_density, self._los_velocity = self._read_field( # noqa
simname, catalogue, ksmooth, paths)
if len(self._field_rdist) % 2 == 0:
warn(f"The number of radial steps is even. Skipping the first "
f"step at {self._field_rdist[0]} because Simpson's rule "
"requires an odd number of steps.")
self._field_rdist = self._field_rdist[1:]
self._los_density = self._los_density[..., 1:]
self._los_velocity = self._los_velocity[..., 1:]
if len(self._cat) != len(self._los_density):
raise ValueError("The number of objects in the catalogue does not "
"match the number of objects in the field.")
print(f"{t()}: calculating the radial velocity.")
nobject, nsim = self._los_density.shape[:2]
# In case of Carrick 2015 the box is in galactic coordinates..
if simname == "Carrick2015":
d1, d2 = radec_to_galactic(self._cat["RA"], self._cat["DEC"])
else:
d1, d2 = self._cat["RA"], self._cat["DEC"]
radvel = np.empty((nobject, nsim, len(self._field_rdist)),
self._los_velocity.dtype)
for i in trange(nobject):
for j in range(nsim):
radvel[i, j, :] = radial_velocity_los(
self._los_velocity[:, i, j, ...], d1[i], d2[i])
self._los_radial_velocity = radvel
if not store_full_velocity:
self._los_velocity = None
Omega_m = simname2Omega_m(simname)
# Normalize the CSiBORG density by the mean matter density
if "csiborg" in simname:
cosmo = FlatLambdaCDM(H0=100, Om0=Omega_m)
mean_rho_matter = cosmo.critical_density0.to("Msun/kpc^3").value
mean_rho_matter *= Omega_m
self._los_density /= mean_rho_matter
# Since Carrick+2015 provide `rho / <rho> - 1`
if simname == "Carrick2015":
self._los_density += 1
@property
def cat(self):
"""
The distance indicators catalogue.
Returns
-------
structured array
"""
return self._cat
@property
def catname(self):
"""
Name of the catalogue.
Returns
-------
str
"""
return self._catname
@property
def rdist(self):
"""
Radial distances where the field was interpolated for each object.
Returns
-------
1-dimensional array
"""
return self._field_rdist
@property
def los_density(self):
"""
Density field along the line of sight.
Returns
----------
3-dimensional array of shape (n_objects, n_simulations, n_steps)
"""
return self._los_density
@property
def los_velocity(self):
"""
Velocity field along the line of sight.
Returns
-------
4-dimensional array of shape (n_objects, n_simulations, 3, n_steps)
"""
if self._los_velocity is None:
raise ValueError("The 3D velocities were not stored.")
return self._los_velocity
@property
def los_radial_velocity(self):
"""
Radial velocity along the line of sight.
Returns
-------
3-dimensional array of shape (n_objects, n_simulations, n_steps)
"""
return self._los_radial_velocity
def _read_field(self, simname, catalogue, k, paths):
"""Read in the interpolated field."""
out_density = None
out_velocity = None
has_smoothed = False
nsims = paths.get_ics(simname)
with File(paths.field_los(simname, catalogue), 'r') as f:
has_smoothed = True if f[f"density_{nsims[0]}"].ndim > 2 else False
if has_smoothed and (k is None or not isinstance(k, int)):
raise ValueError("The output contains smoothed field but "
"`ksmooth` is None. It must be provided.")
for i, nsim in enumerate(tqdm(nsims)):
if out_density is None:
nobject, nstep = f[f"density_{nsim}"].shape[:2]
out_density = np.empty(
(nobject, len(nsims), nstep), dtype=np.float32)
out_velocity = np.empty(
(3, nobject, len(nsims), nstep), dtype=np.float32)
indx = (..., k) if has_smoothed else (...)
out_density[:, i, :] = f[f"density_{nsim}"][indx]
out_velocity[:, :, i, :] = f[f"velocity_{nsim}"][indx]
rdist = f[f"rdist_{nsims[0]}"][:]
return rdist, out_density, out_velocity
def _read_catalogue(self, catalogue, catalogue_fpath):
"""
Read in the distance indicator catalogue.
"""
if catalogue == "A2":
with File(catalogue_fpath, 'r') as f:
dtype = [(key, np.float32) for key in f.keys()]
arr = np.empty(len(f["RA"]), dtype=dtype)
for key in f.keys():
arr[key] = f[key][:]
elif catalogue == "LOSS" or catalogue == "Foundation":
with File(catalogue_fpath, 'r') as f:
grp = f[catalogue]
dtype = [(key, np.float32) for key in grp.keys()]
arr = np.empty(len(grp["RA"]), dtype=dtype)
for key in grp.keys():
arr[key] = grp[key][:]
elif "csiborg1" in catalogue:
nsim = int(catalogue.split("_")[-1])
cat = CSiBORG1Catalogue(nsim, bounds={"totmass": (1e13, None)})
seed = 42
gen = np.random.default_rng(seed)
mask = gen.choice(len(cat), size=100, replace=False)
keys = ["r_hMpc", "RA", "DEC"]
dtype = [(key, np.float32) for key in keys]
arr = np.empty(len(mask), dtype=dtype)
sph_pos = cat["spherical_pos"]
arr["r_hMpc"] = sph_pos[mask, 0]
arr["RA"] = sph_pos[mask, 1]
arr["DEC"] = sph_pos[mask, 2]
# TODO: add peculiar velocit
else:
raise ValueError(f"Unknown catalogue: `{catalogue}`.")
return arr
###############################################################################
# Supplementary flow functions #
###############################################################################
def radial_velocity_los(los_velocity, ra, dec):
"""
Calculate the radial velocity along the line of sight.
Parameters
----------
los_velocity : 2-dimensional array of shape (3, n_steps)
Line of sight velocity field.
ra, dec : floats
Right ascension and declination of the line of sight.
is_degrees : bool, optional
Whether the angles are in degrees.
Returns
-------
1-dimensional array of shape (n_steps)
"""
types = (float, np.float32, np.float64)
if not isinstance(ra, types) and not isinstance(dec, types):
raise ValueError("RA and dec must be floats.")
if los_velocity.ndim != 2 and los_velocity.shape[0] != 3:
raise ValueError("The shape of `los_velocity` must be (3, n_steps).")
ra_rad, dec_rad = np.deg2rad(ra), np.deg2rad(dec)
vx, vy, vz = los_velocity
return (vx * np.cos(ra_rad) * np.cos(dec_rad)
+ vy * np.sin(ra_rad) * np.cos(dec_rad)
+ vz * np.sin(dec_rad))
###############################################################################
# JAX Flow model #
###############################################################################
def lognorm_mean_std_to_loc_scale(mu, std):
"""
Calculate the location and scale parameters for the log-normal distribution
from the mean and standard deviation.
Parameters
----------
mu, std : float
Mean and standard deviation.
Returns
-------
loc, scale : float
"""
loc = np.log(mu) - 0.5 * np.log(1 + (std / mu) ** 2)
scale = np.sqrt(np.log(1 + (std / mu) ** 2))
return loc, scale
def simps(y, dx):
"""
Simpson's rule 1D integration, assuming that the number of steps is even
and that the step size is constant.
Parameters
----------
y : 1-dimensional array
Function values.
dx : float
Step size.
Returns
-------
float
"""
if len(y) % 2 == 0:
raise ValueError("The number of steps must be odd.")
return dx / 3 * jnp.sum(y[0:-1:2] + 4 * y[1::2] + y[2::2])
def dist2redshift(dist, Omega_m):
"""
Convert comoving distance to cosmological redshift if the Universe is
flat and z << 1.
Parameters
----------
dist : float or 1-dimensional array
Comoving distance in `Mpc / h`.
Omega_m : float
Matter density parameter.
Returns
-------
float or 1-dimensional array
"""
H0 = 100
eta = 3 * Omega_m / 2
return 1 / eta * (1 - (1 - 2 * H0 * dist / SPEED_OF_LIGHT * eta)**0.5)
def dist2distmodulus(dist, Omega_m):
"""
Convert comoving distance to distance modulus, assuming z << 1.
Parameters
----------
dist : float or 1-dimensional array
Comoving distance in `Mpc / h`.
Omega_m : float
Matter density parameter.
Returns
-------
float or 1-dimensional array
"""
zcosmo = dist2redshift(dist, Omega_m)
luminosity_distance = dist * (1 + zcosmo)
return 5 * jnp.log10(luminosity_distance) + 25
def project_Vext(Vext_x, Vext_y, Vext_z, RA, dec):
"""
Project the external velocity onto the line of sight along direction
specified by RA/dec. Note that the angles must be in radians.
Parameters
----------
Vext_x, Vext_y, Vext_z : floats
Components of the external velocity.
RA, dec : floats
Right ascension and declination in radians
Returns
-------
float
"""
cos_dec = jnp.cos(dec)
return (Vext_x * jnp.cos(RA) * cos_dec
+ Vext_y * jnp.sin(RA) * cos_dec
+ Vext_z * jnp.sin(dec))
def predict_zobs(dist, beta, Vext_radial, vpec_radial, Omega_m):
"""
Predict the observed redshift at a given comoving distance given some
velocity field.
Parameters
----------
dist : float
Comoving distance in `Mpc / h`.
beta : float
Velocity bias parameter.
Vext_radial : float
Radial component of the external velocity along the LOS.
vpec_radial : float
Radial component of the peculiar velocity along the LOS.
Omega_m : float
Matter density parameter.
Returns
-------
float
"""
zcosmo = dist2redshift(dist, Omega_m)
vrad = beta * vpec_radial + Vext_radial
return (1 + zcosmo) * (1 + vrad / SPEED_OF_LIGHT) - 1
###############################################################################
# Flow validation models #
###############################################################################
def calculate_ptilde_wo_bias(xrange, mu, err, r_squared_xrange=None):
"""
Calculate `ptilde(r)` without any bias.
Parameters
----------
xrange : 1-dimensional array
Radial distances where the field was interpolated for each object.
mu : float
Comoving distance in `Mpc / h`.
err : float
Error on the comoving distance in `Mpc / h`.
r_squared_xrange : 1-dimensional array, optional
Radial distances squared where the field was interpolated for each
object. If not provided, the `r^2` correction is not applied.
Returns
-------
1-dimensional array
"""
ptilde = jnp.exp(-0.5 * ((xrange - mu) / err)**2)
if r_squared_xrange is not None:
ptilde *= r_squared_xrange
return ptilde
def calculate_ll_zobs(zobs, zobs_pred, sigma_v):
"""
Calculate the likelihood of the observed redshift given the predicted
redshift.
Parameters
----------
zobs : float
Observed redshift.
zobs_pred : float
Predicted redshift.
sigma_v : float
Velocity uncertainty.
Returns
-------
float
"""
dcz = SPEED_OF_LIGHT * (zobs - zobs_pred)
return jnp.exp(-0.5 * (dcz / sigma_v)**2) / jnp.sqrt(2 * np.pi) / sigma_v
class SD_PV_validation_model:
"""
Simple distance peculiar velocity (PV) validation model, assuming that
we already have a calibrated estimate of the comoving distance to the
objects.
Parameters
----------
los_density : 2-dimensional array of shape (n_objects, n_steps)
LOS density field.
los_velocity : 3-dimensional array of shape (n_objects, n_steps)
LOS radial velocity field.
RA, dec : 1-dimensional arrays of shape (n_objects)
Right ascension and declination in degrees.
z_obs : 1-dimensional array of shape (n_objects)
Observed redshifts.
r_hMpc : 1-dimensional array of shape (n_objects)
Estimated comoving distances in `h^-1 Mpc`.
e_r_hMpc : 1-dimensional array of shape (n_objects)
Errors on the estimated comoving distances in `h^-1 Mpc`.
r_xrange : 1-dimensional array
Radial distances where the field was interpolated for each object.
Omega_m : float
Matter density parameter.
"""
def __init__(self, los_density, los_velocity, RA, dec, z_obs,
r_hMpc, e_r_hMpc, r_xrange, Omega_m):
# Convert everything to JAX arrays.
dt = jnp.float32
self._los_density = jnp.asarray(los_density, dtype=dt)
self._los_velocity = jnp.asarray(los_velocity, dtype=dt)
self._RA = jnp.asarray(np.deg2rad(RA), dtype=dt)
self._dec = jnp.asarray(np.deg2rad(dec), dtype=dt)
self._z_obs = jnp.asarray(z_obs, dtype=dt)
self._r_hMpc = jnp.asarray(r_hMpc, dtype=dt)
self._e_rhMpc = jnp.asarray(e_r_hMpc, dtype=dt)
# Get radius squared
r2_xrange = r_xrange**2
r2_xrange /= r2_xrange.mean()
# Get the stepsize, we need it to be constant for Simpson's rule.
dr = np.diff(r_xrange)
if not np.all(np.isclose(dr, dr[0], atol=1e-5)):
raise ValueError("The radial step size must be constant.")
dr = dr[0]
# Get the various vmapped functions
self._vmap_ptilde_wo_bias = vmap(lambda mu, err: calculate_ptilde_wo_bias(r_xrange, mu, err, r2_xrange)) # noqa
self._vmap_simps = vmap(lambda y: simps(y, dr))
self._vmap_zobs = vmap(lambda beta, Vr, vpec_rad: predict_zobs(r_xrange, beta, Vr, vpec_rad, Omega_m), in_axes=(None, 0, 0)) # noqa
self._vmap_ll_zobs = vmap(lambda zobs, zobs_pred, sigma_v: calculate_ll_zobs(zobs, zobs_pred, sigma_v), in_axes=(0, 0, None)) # noqa
# Vext_x, Vext_y, Vext_z: external velocity components
self._dist_Vext = dist.Uniform(-1000, 1000)
# We want sigma_v to be 150 +- 100 km / s (lognormal)
self._dist_sigma_v = dist.LogNormal(
*lognorm_mean_std_to_loc_scale(150, 100))
# Density power-law bias
self._dist_alpha = dist.LogNormal(
*lognorm_mean_std_to_loc_scale(1.0, 0.5))
# Velocity bias
self._dist_beta = dist.Normal(1., 0.5)
def __call__(self):
"""
The simple distance NumPyro PV validation model. Samples the following
parameters:
- `Vext_x`, `Vext_y`, `Vext_z`: external velocity components
- `alpha`: density bias parameter
- `beta`: velocity bias parameter
- `sigma_v`: velocity uncertainty
"""
Vx = numpyro.sample("Vext_x", self._dist_Vext)
Vy = numpyro.sample("Vext_y", self._dist_Vext)
Vz = numpyro.sample("Vext_z", self._dist_Vext)
alpha = numpyro.sample("alpha", self._dist_alpha)
beta = numpyro.sample("beta", self._dist_beta)
sigma_v = numpyro.sample("sigma_v", self._dist_sigma_v)
Vext_rad = project_Vext(Vx, Vy, Vz, self._RA, self._dec)
# Calculate p(r) and multiply it by the galaxy bias
ptilde = self._vmap_ptilde_wo_bias(self._r_hMpc, self._e_rhMpc)
ptilde *= self._los_density**alpha
# Normalization of p(r)
pnorm = self._vmap_simps(ptilde)
# Calculate p(z_obs) and multiply it by p(r)
zobs_pred = self._vmap_zobs(beta, Vext_rad, self._los_velocity)
ptilde *= self._vmap_ll_zobs(self._z_obs, zobs_pred, sigma_v)
ll = jnp.sum(jnp.log(self._vmap_simps(ptilde) / pnorm))
numpyro.factor("ll", ll)
# def SN_PV_wcal_validation_model(los_overdensity=None, los_velocity=None,
# RA=None, dec=None, z_CMB=None,
# mB=None, x1=None, c=None,
# e_mB=None, e_x1=None, e_c=None,
# mu_xrange=None, r_xrange=None,
# norm_r2_xrange=None, Omega_m=None, dr=None):
# """
# Pass
# """
# Vx = numpyro.sample("Vext_x", dist.Uniform(-1000, 1000))
# Vy = numpyro.sample("Vext_y", dist.Uniform(-1000, 1000))
# Vz = numpyro.sample("Vext_z", dist.Uniform(-1000, 1000))
# beta = numpyro.sample("beta", dist.Uniform(-10, 10))
#
# # TODO: Later sample these as well.
# e_mu_intrinsic = 0.064
# alpha_cal = 0.135
# beta_cal = 2.9
# mag_cal = -18.555
# sigma_v = 112
#
# # TODO: Check these for fiducial values.
# mu = mB - mag_cal + alpha_cal * x1 - beta_cal * c
# squared_e_mu = e_mB**2 + alpha_cal**2 * e_x1**2 + beta_cal**2 * e_c**2
#
# squared_e_mu += e_mu_intrinsic**2
# ll = 0.
# for i in range(len(los_overdensity)):
# # Project the external velocity for this galaxy.
# Vext_rad = project_Vext(Vx, Vy, Vz, RA[i], dec[i])
#
# dmu = mu_xrange - mu[i]
# ptilde = norm_r2_xrange * jnp.exp(-0.5 * dmu**2 / squared_e_mu[i])
# # TODO: Add some bias
# ptilde *= (1 + los_overdensity[i])
#
# zobs_pred = predict_zobs(r_xrange, beta, Vext_rad, los_velocity[i],
# Omega_m)
#
# dczobs = SPEED_OF_LIGHT * (z_CMB[i] - zobs_pred)
#
# ll_zobs = jnp.exp(-0.5 * (dczobs / sigma_v)**2) / sigma_v
#
# ll += jnp.log(simps(ptilde * ll_zobs, dr))
# ll -= jnp.log(simps(ptilde, dr))
#
# numpyro.factor("ll", ll)

View file

@ -13,7 +13,7 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Various user parameters for csiborgtools.
Various user parameters for CSiBORGTools.
"""
@ -37,7 +37,8 @@ def simname2boxsize(simname):
"borg1": 677.7,
"borg2": 676.6,
"quijote": 1000.,
"TNG300-1": 205.
"TNG300-1": 205.,
"Carrick2015": 400.,
}
boxsize = d.get(simname, None)
@ -48,6 +49,32 @@ def simname2boxsize(simname):
return boxsize
def simname2Omega_m(simname):
"""
Return Omega_m for a given simname.
Parameters
----------
simname : str
Simulation name.
Returns
-------
Omega_m: float
"""
d = {"csiborg1": 0.307,
"borg1": 0.307,
"Carrick2015": 0.3,
}
omega_m = d.get(simname, None)
if omega_m is None:
raise ValueError("Unknown simname: {}".format(simname))
return omega_m
paths_glamdring = {
"csiborg1_srcdir": "/mnt/extraspace/rstiskalek/csiborg1",
"csiborg2_main_srcdir": "/mnt/extraspace/rstiskalek/csiborg2_main",

View file

@ -111,6 +111,8 @@ class Paths:
files = glob(join(self.quijote_dir, "fiducial_processed",
"chain_*"))
files = [int(search(r'chain_(\d+)', f).group(1)) for f in files]
elif simname == "Carrick2015":
return [0]
else:
raise ValueError(f"Unknown simulation name `{simname}`.")
@ -653,3 +655,22 @@ class Paths:
str
"""
return self.tng300_1_dir
def field_los(self, simnname, catalogue):
"""
Path to the files containing the line-of-sight fields.
Parameters
----------
simname : str
Simulation name.
catalogue : str
Catalogue name.
Returns
-------
str
"""
fdir = join(self.postdir, "field_los")
try_create_directory(fdir)
return join(fdir, f"los_{catalogue}_{simnname}.hdf5")

View file

@ -15,7 +15,9 @@
"""
Collection of stand-off utility functions used in the scripts.
"""
import numpy
from copy import deepcopy
import numpy as np
from numba import jit
from datetime import datetime
@ -30,17 +32,17 @@ def center_of_mass(particle_positions, particles_mass, boxsize):
Calculate the center of mass of a halo while assuming periodic boundary
conditions of a cubical box.
"""
cm = numpy.zeros(3, dtype=particle_positions.dtype)
cm = np.zeros(3, dtype=particle_positions.dtype)
totmass = sum(particles_mass)
# Convert positions to unit circle coordinates in the complex plane,
# calculate the weighted average and convert it back to box coordinates.
for i in range(3):
cm_i = sum(particles_mass * numpy.exp(
2j * numpy.pi * particle_positions[:, i] / boxsize))
cm_i = sum(particles_mass * np.exp(
2j * np.pi * particle_positions[:, i] / boxsize))
cm_i /= totmass
cm_i = numpy.arctan2(cm_i.imag, cm_i.real) * boxsize / (2 * numpy.pi)
cm_i = np.arctan2(cm_i.imag, cm_i.real) * boxsize / (2 * np.pi)
if cm_i < 0:
cm_i += boxsize
@ -58,7 +60,7 @@ def periodic_distance(points, reference_point, boxsize):
npoints = len(points)
half_box = boxsize / 2
dist = numpy.zeros(npoints, dtype=points.dtype)
dist = np.zeros(npoints, dtype=points.dtype)
for i in range(npoints):
for j in range(3):
dist_1d = abs(points[i, j] - reference_point[j])
@ -124,15 +126,15 @@ def cartesian_to_radec(X):
"""
x, y, z = X[:, 0], X[:, 1], X[:, 2]
dist = numpy.linalg.norm(X, axis=1)
dec = numpy.arcsin(z / dist)
ra = numpy.arctan2(y, x)
ra[ra < 0] += 2 * numpy.pi
dist = np.linalg.norm(X, axis=1)
dec = np.arcsin(z / dist)
ra = np.arctan2(y, x)
ra[ra < 0] += 2 * np.pi
ra *= 180 / numpy.pi
dec *= 180 / numpy.pi
ra *= 180 / np.pi
dec *= 180 / np.pi
return numpy.vstack([dist, ra, dec]).T
return np.vstack([dist, ra, dec]).T
def radec_to_cartesian(X):
@ -142,11 +144,11 @@ def radec_to_cartesian(X):
"""
dist, ra, dec = X[:, 0], X[:, 1], X[:, 2]
cdec = numpy.cos(dec * numpy.pi / 180)
return numpy.vstack([
dist * cdec * numpy.cos(ra * numpy.pi / 180),
dist * cdec * numpy.sin(ra * numpy.pi / 180),
dist * numpy.sin(dec * numpy.pi / 180)
cdec = np.cos(dec * np.pi / 180)
return np.vstack([
dist * cdec * np.cos(ra * np.pi / 180),
dist * cdec * np.sin(ra * np.pi / 180),
dist * np.sin(dec * np.pi / 180)
]).T
@ -159,14 +161,14 @@ def great_circle_distance(x1, x2):
ra1, dec1 = x1
ra2, dec2 = x2
ra1 *= numpy.pi / 180
dec1 *= numpy.pi / 180
ra2 *= numpy.pi / 180
dec2 *= numpy.pi / 180
ra1 *= np.pi / 180
dec1 *= np.pi / 180
ra2 *= np.pi / 180
dec2 *= np.pi / 180
return 180 / numpy.pi * numpy.arccos(
numpy.sin(dec1) * numpy.sin(dec2)
+ numpy.cos(dec1) * numpy.cos(dec2) * numpy.cos(ra1 - ra2)
return 180 / np.pi * np.arccos(
np.sin(dec1) * np.sin(dec2)
+ np.cos(dec1) * np.cos(dec2) * np.cos(ra1 - ra2)
)
@ -193,8 +195,8 @@ def cosine_similarity(x, y):
if y.ndim == 1:
y = y.reshape(1, -1)
out = numpy.sum(x * y, axis=1)
out /= numpy.linalg.norm(x) * numpy.linalg.norm(y, axis=1)
out = np.sum(x * y, axis=1)
out /= np.linalg.norm(x) * np.linalg.norm(y, axis=1)
return out[0] if out.size == 1 else out
@ -258,8 +260,8 @@ def real2redshift(pos, vel, observer_location, observer_velocity, boxsize,
Redshift-space Cartesian position in `Mpc / h`.
"""
if make_copy:
pos = numpy.copy(pos)
vel = numpy.copy(vel)
pos = np.copy(pos)
vel = np.copy(vel)
H0_inv = 1. / 100
@ -267,8 +269,8 @@ def real2redshift(pos, vel, observer_location, observer_velocity, boxsize,
pos -= observer_location
vel -= observer_velocity
vr_dot = numpy.einsum('ij,ij->i', pos, vel)
norm2 = numpy.einsum('ij,ij->i', pos, pos)
vr_dot = np.einsum('ij,ij->i', pos, vel)
norm2 = np.einsum('ij,ij->i', pos, pos)
pos *= (1 + H0_inv * vr_dot / norm2).reshape(-1, 1)
@ -293,9 +295,9 @@ def number_counts(x, bin_edges):
"""
Calculate counts of samples in bins.
"""
out = numpy.full(bin_edges.size - 1, numpy.nan, dtype=numpy.float32)
out = np.full(bin_edges.size - 1, np.nan, dtype=np.float32)
for i in range(bin_edges.size - 1):
out[i] = numpy.sum((x >= bin_edges[i]) & (x < bin_edges[i + 1]))
out[i] = np.sum((x >= bin_edges[i]) & (x < bin_edges[i + 1]))
return out
@ -303,12 +305,12 @@ def binned_statistic(x, y, left_edges, bin_width, statistic):
"""
Calculate a binned statistic.
"""
out = numpy.full(left_edges.size, numpy.nan, dtype=x.dtype)
out = np.full(left_edges.size, np.nan, dtype=x.dtype)
for i in range(left_edges.size):
mask = (x >= left_edges[i]) & (x < left_edges[i] + bin_width)
if numpy.any(mask):
if np.any(mask):
out[i] = statistic(y[mask])
return out
@ -317,3 +319,112 @@ def fprint(msg, verbose=True):
"""Print and flush a message with a timestamp."""
if verbose:
print(f"{datetime.now()}: {msg}", flush=True)
###############################################################################
# ACL of MCMC chains #
###############################################################################
def calculate_acf(data):
"""
Calculates the autocorrelation of some data. Taken from `epsie` package
written by Collin Capano.
Parameters
----------
data : 1-dimensional array
The data to calculate the autocorrelation of.
Returns
-------
acf : 1-dimensional array
"""
# zero the mean
data = data - data.mean()
# zero-pad to 2 * nearest power of 2
newlen = int(2**(1 + np.ceil(np.log2(len(data)))))
x = np.zeros(newlen)
x[:len(data)] = data[:]
# correlate
acf = np.correlate(x, x, mode='full')
# drop corrupted region
acf = acf[len(acf)//2:]
# normalize
acf /= acf[0]
return acf
def calculate_acl(data):
"""
Calculate the autocorrelation length of some data. Taken from `epsie`
package written by Collin Capano. Algorithm used is from:
N. Madras and A.D. Sokal, J. Stat. Phys. 50, 109 (1988).
Parameters
----------
data : 1-dimensional array
The data to calculate the autocorrelation length of.
Returns
-------
acl : int
"""
# calculate the acf
acf = calculate_acf(data)
# now the ACL: Following from Sokal, this is estimated
# as the first point where M*tau[k] <= k, where
# tau = 2*cumsum(acf) - 1, and M is a tuneable parameter,
# generally chosen to be = 5 (which we use here)
m = 5
cacf = 2. * np.cumsum(acf) - 1.
win = m * cacf <= np.arange(len(cacf))
if win.any():
acl = int(np.ceil(cacf[np.where(win)[0][0]]))
else:
# data is too short to estimate the ACL, just choose
# the length of the data
acl = len(data)
return acl
def thin_samples_by_acl(samples):
"""
Thin MCMC samples by the autocorrelation length of each chain.
Parameters
----------
samples : dict
Dictionary of samples. Each value is a 2-dimensional array of shape
`(nchains, nsamples)`.
Returns
-------
thinned_samples : dict
Dictionary of thinned samples. Each value is a 1-dimensional array of
shape `(n_thinned_samples)`, where the samples are concatenated across
the chain.
"""
keys = list(samples.keys())
nchains = 1 if samples[keys[0]].ndim == 1 else samples[keys[0]].shape[0]
samples = deepcopy(samples)
if nchains == 1:
for key in keys:
samples[key] = samples[key].reshape(1, -1)
# Calculate the ACL of each chain.
acl = np.zeros(nchains, dtype=int)
for i in range(nchains):
acl[i] = max(calculate_acl(samples[key][i]) for key in keys)
thinned_samples = {}
for key in keys:
key_samples = []
for i in range(nchains):
key_samples.append(samples[key][i, ::acl[i]])
thinned_samples[key] = np.hstack(key_samples)
return thinned_samples

File diff suppressed because one or more lines are too long

View file

@ -20,22 +20,24 @@ from datetime import datetime
from gc import collect
from os import makedirs, remove, rmdir
from os.path import exists, join
from warnings import warn
import csiborgtools
import numpy as np
from astropy import units as u
from astropy.coordinates import SkyCoord
from h5py import File
from mpi4py import MPI
from taskmaster import work_delegation
from utils import get_nsims
###############################################################################
# I/O functions #
###############################################################################
def get_los(catalogue_name, comm):
def get_los(catalogue_name, simname, comm):
"""
Get the line of sight RA/dec coordinates for the given catalogue.
@ -43,6 +45,10 @@ def get_los(catalogue_name, comm):
----------
catalogue_name : str
Catalogue name.
simname : str
Simulation name.
comm : mpi4py.MPI.Comm
MPI communicator.
Returns
-------
@ -50,16 +56,42 @@ def get_los(catalogue_name, comm):
RA/dec coordinates of the line of sight.
"""
if comm.Get_rank() == 0:
pv_supranta_folder = "/mnt/extraspace/rstiskalek/catalogs/PV_Supranta"
folder = "/mnt/extraspace/rstiskalek/catalogs"
if catalogue_name == "A2":
with File(join(pv_supranta_folder, "A2.h5"), 'r') as f:
if catalogue_name == "LOSS" or catalogue_name == "Foundation":
fpath = join(folder, "PV_compilation_Supranta2019.hdf5")
with File(fpath, 'r') as f:
grp = f[catalogue_name]
RA = grp["RA"][:]
dec = grp["DEC"][:]
elif catalogue_name == "A2":
fpath = join(folder, "A2.h5")
with File(fpath, 'r') as f:
RA = f["RA"][:]
dec = f["DEC"][:]
elif "csiborg1" in catalogue_name:
nsim = int(catalogue_name.split("_")[-1])
cat = csiborgtools.read.CSiBORG1Catalogue(
nsim, bounds={"totmass": (1e13, None)})
seed = 42
gen = np.random.default_rng(seed)
mask = gen.choice(len(cat), size=100, replace=False)
sph_pos = cat["spherical_pos"]
RA = sph_pos[mask, 1]
dec = sph_pos[mask, 2]
else:
raise ValueError(f"Unknown field name: `{catalogue_name}`.")
# The Carrick+2015 is in galactic coordinates, so we need to convert
# the RA/dec to galactic coordinates.
if simname == "Carrick2015":
c = SkyCoord(ra=RA*u.degree, dec=dec*u.degree, frame='icrs')
pos = np.vstack((c.galactic.l, c.galactic.b)).T
else:
pos = np.vstack((RA, dec)).T
else:
pos = None
@ -90,6 +122,17 @@ def get_field(simname, nsim, kind, MAS, grid):
# Open the field reader.
if simname == "csiborg1":
field_reader = csiborgtools.read.CSiBORG1Field(nsim)
elif simname == "Carrick2015":
folder = "/mnt/extraspace/rstiskalek/catalogs"
warn(f"Using local paths from `{folder}`.", RuntimeWarning)
if kind == "density":
fpath = join(folder, "twompp_density_carrick2015.npy")
return np.load(fpath).astype(np.float32)
elif kind == "velocity":
fpath = join(folder, "twompp_velocity_carrick2015.npy")
return np.load(fpath).astype(np.float32)
else:
raise ValueError(f"Unknown field kind: `{kind}`.")
else:
raise ValueError(f"Unknown simulation name: `{simname}`.")
@ -230,8 +273,8 @@ if __name__ == "__main__":
args = parser.parse_args()
rmax = 200
dr = 0.1
smooth_scales = None
dr = 0.5
smooth_scales = [0, 2, 4, 6]
comm = MPI.COMM_WORLD
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
@ -248,8 +291,8 @@ if __name__ == "__main__":
dump_folder = None
dump_folder = comm.bcast(dump_folder, root=0)
# Get the line of sight RA/dec coordinates.
pos = get_los(args.catalogue, comm)
# Get the line of sight sky coordinates.
pos = get_los(args.catalogue, args.simname, comm)
def main(nsim):
interpolate_field(pos, args.simname, nsim, args.MAS, args.grid,

View file

@ -1,4 +1,4 @@
nthreads=1
nthreads=11
memory=64
on_login=${1}
queue="berg"
@ -6,7 +6,8 @@ env="/mnt/users/rstiskalek/csiborgtools/venv_csiborg/bin/python"
file="field_los.py"
catalogue="A2"
nsims="7444"
# catalogue="csiborg1_9844"
nsims="-1"
simname="csiborg1"
MAS="SPH"
grid=1024

View file

@ -134,8 +134,6 @@ def open_galaxy_positions(survey_name, comm, scatter=None):
if scatter < 0:
raise ValueError("Scatter must be positive.")
if scatter > 0:
print(f"Adding scatter of {scatter} Mpc / h.",
flush=True)
pos = scatter_along_radial_direction(pos, scatter,
boxsize)
@ -186,7 +184,6 @@ def evaluate_field(field, pos, boxsize, smooth_scales, verbose=True):
field, scale * mpc2box, boxsize=1, make_copy=True)
else:
field_smoothed = numpy.copy(field)
print("Going to evaluate the field....")
val[:, i] = csiborgtools.field.evaluate_sky(
field_smoothed, pos=pos, mpc2box=mpc2box)

View file

@ -1,5 +1,5 @@
nthreads=1
memory=32
nthreads=11
memory=64
on_login=${1}
queue="berg"
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
@ -7,11 +7,11 @@ file="field_sample.py"
nsims="-1"
simname="TNG300-1"
survey="TNG300-1"
simname="csiborg1"
survey="SDSS"
smooth_scales="0 2 4 8 16"
kind="density"
MAS="PCS"
MAS="SPH"
grid=1024
scatter=0

211
scripts/flow_validation.py Normal file
View file

@ -0,0 +1,211 @@
# Copyright (C) 2024 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to run the PV validation model on various catalogues and simulations.
The script is MPI parallelized over the IC realizations.
"""
from argparse import ArgumentParser
from datetime import datetime
from os import makedirs, remove, rmdir
from os.path import exists, join
import csiborgtools
import jax
import numpy as np
from h5py import File
from mpi4py import MPI
from numpyro.infer import MCMC, NUTS
from taskmaster import work_delegation # noqa
def get_model(args, nsim):
"""
Load the data and create the NumPyro model.
Parameters
----------
args : argparse.Namespace
Command line arguments.
nsim : int
Simulation index.
Returns
-------
numpyro.Primitive
"""
folder = "/mnt/extraspace/rstiskalek/catalogs/"
if args.catalogue == "A2":
fpath = join(folder, "A2.h5")
elif args.catalogue == "LOSS" or args.catalogue == "Foundation":
raise NotImplementedError("To be implemented..")
else:
raise ValueError(f"Unknown catalogue: `{args.catalogue}`.")
loader = csiborgtools.flow.DataLoader(args.simname, args.catalogue, fpath,
paths, ksmooth=args.ksmooth)
Omega_m = csiborgtools.simname2Omega_m(args.simname)
# Read in the data from the loader.
los_overdensity = loader.los_density[:, nsim, :]
los_velocity = loader.los_radial_velocity[:, nsim, :]
RA = loader.cat["RA"]
dec = loader.cat["DEC"]
z_obs = loader.cat["z_obs"]
r_hMpc = loader.cat["r_hMpc"]
e_r_hMpc = loader.cat["e_rhMpc"]
return csiborgtools.flow.SD_PV_validation_model(
los_overdensity, los_velocity, RA, dec, z_obs, r_hMpc, e_r_hMpc,
loader.rdist, Omega_m)
def run_model(model, nsteps, nchains, nsim, dump_folder, show_progress=True):
"""
Run the NumPyro model and save the thinned samples to a temporary file.
Parameters
----------
model : jax.numpyro.Primitive
Model to be run.
nsteps : int
Number of steps.
nchains : int
Number of chains.
nsim : int
Simulation index.
dump_folder : str
Folder where the temporary files are stored.
show_progress : bool
Whether to show the progress bar.
Returns
-------
None
"""
nuts_kernel = NUTS(model)
mcmc = MCMC(nuts_kernel, num_warmup=nsteps // 2, num_samples=nsteps // 2,
chain_method="sequential", num_chains=nchains,
progress_bar=show_progress)
rng_key = jax.random.PRNGKey(42)
mcmc.run(rng_key)
if show_progress:
print(f"Summary of the MCMC run of simulation indexed {nsim}:")
mcmc.print_summary()
samples = mcmc.get_samples()
thinned_samples = csiborgtools.thin_samples_by_acl(samples)
# Save the samples to the temporary folder.
fname = join(dump_folder, f"samples_{nsim}.npz")
np.savez(fname, **thinned_samples)
def combine_from_simulations(catalogue_name, simname, nsims, outfolder,
dumpfolder, ksmooth):
"""
Combine the results from individual simulations into a single file.
Parameters
----------
catalogue_name : str
Catalogue name.
simname : str
Simulation name.
nsims : list
List of IC realisations.
outfolder : str
Output folder.
dumpfolder : str
Dumping folder where the temporary files are stored.
ksmooth : int
Smoothing index.
Returns
-------
None
"""
fname_out = join(
outfolder,
f"flow_samples_{catalogue_name}_{simname}_smooth_{ksmooth}.hdf5")
print(f"Combining results from invidivual simulations to `{fname_out}`.")
if exists(fname_out):
remove(fname_out)
for nsim in nsims:
fname = join(dumpfolder, f"samples_{nsim}.npz")
data = np.load(fname)
with File(fname_out, 'a') as f:
grp = f.create_group(f"sim_{nsim}")
for key in data.files:
grp.create_dataset(key, data=data[key])
# Remove the temporary file.
remove(fname)
# Remove the dumping folder.
rmdir(dumpfolder)
print("Finished combining results.")
###############################################################################
# Command line interface #
###############################################################################
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--simname", type=str, required=True,
help="Simulation name.")
parser.add_argument("--catalogue", type=str, required=True,
help="PV catalogue.")
parser.add_argument("--ksmooth", type=int, required=True,
help="Smoothing index.")
args = parser.parse_args()
comm = MPI.COMM_WORLD
rank, size = comm.Get_rank(), comm.Get_size()
out_folder = "/mnt/extraspace/rstiskalek/csiborg_postprocessing/peculiar_velocity" # noqa
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsims = paths.get_ics(args.simname)
nsteps = 5000
nchains = 1
# Create the dumping folder.
if comm.Get_rank() == 0:
dump_folder = join(out_folder,
f"temp_{str(datetime.now())}".replace(" ", "_"))
print(f"Creating folder `{dump_folder}`.")
makedirs(dump_folder)
else:
dump_folder = None
dump_folder = comm.bcast(dump_folder, root=0)
def main(nsim):
model = get_model(args, nsim)
run_model(model, nsteps, nchains, nsim, dump_folder,
show_progress=size == 1)
work_delegation(main, nsims, comm, master_verbose=True)
comm.Barrier()
if rank == 0:
combine_from_simulations(args.catalogue, args.simname, nsims,
out_folder, dump_folder, args.ksmooth)

23
scripts/flow_validation.sh Executable file
View file

@ -0,0 +1,23 @@
memory=4
on_login=${1}
nthreads=${2}
queue="berg"
env="/mnt/users/rstiskalek/csiborgtools/venv_csiborg/bin/python"
file="flow_validation.py"
catalogue="A2"
simname="Carrick2015"
ksmooth=2
pythoncm="$env $file --catalogue $catalogue --simname $simname --ksmooth $ksmooth"
if [ $on_login -eq 1 ]; then
echo $pythoncm
$pythoncm
else
cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
echo "Submitting:"
echo $cm
echo
eval $cm
fi

View file

@ -0,0 +1,150 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# Copyright (C) 2024 Richard Stiskalek\n",
"# This program is free software; you can redistribute it and/or modify it\n",
"# under the terms of the GNU General Public License as published by the\n",
"# Free Software Foundation; either version 3 of the License, or (at your\n",
"# option) any later version.\n",
"# This program is distributed in the hope that it will be useful, but\n",
"# WITHOUT ANY WARRANTY; without even the implied warranty of\n",
"# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General\n",
"# Public License for more details.\n",
"#\n",
"# You should have received a copy of the GNU General Public License along\n",
"# with this program; if not, write to the Free Software Foundation, Inc.,\n",
"# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.\n",
"from os.path import join\n",
"\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from h5py import File\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Supernovae data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"a2dir = \"/Users/richard/Data/PV/A2_paper_data/A2\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### LOSS data set"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"names = [\"z_CMB\", \"mB\", \"x1\", \"c\", \"e_mB\", \"e_x1\", \"e_c\", \"RA\", \"DEC\"]\n",
"dtype = [(n, np.float32) for n in names]\n",
"data = np.genfromtxt(join(a2dir, \"loss.csv\"), delimiter=\",\", skip_header=1,\n",
" usecols=[5 + n for n in range(len(names))])\n",
"\n",
"loss_data = np.empty(len(data), dtype=dtype)\n",
"for i, n in enumerate(names):\n",
" loss_data[n] = data[:, i]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Foundation data set "
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"names = [\"z_CMB\", \"RA\", \"DEC\", \"x1\", \"mB\", \"c\", \"peak\", \"e_peak\", \"e_x1\", \"e_mB\", \"e_c\"]\n",
"dtype = [(n, np.float32) for n in names]\n",
"data = np.genfromtxt(join(a2dir, \"foundation.csv\"), delimiter=\",\", skip_header=1,\n",
" usecols=[3 + n for n in range(len(names))])\n",
"\n",
"foundation_data = np.empty(len(data), dtype=dtype)\n",
"for i, n in enumerate(names):\n",
" foundation_data[n] = data[:, i]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Write output as HDF5 file"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"outdir = \"/Users/richard/Downloads\"\n",
"fname = \"PV_compilation_Supranta2019.hdf5\"\n",
"\n",
"with File(join(outdir, fname), 'w') as f:\n",
" # Write LOSS\n",
" grp = f.create_group(\"LOSS\")\n",
" for name in loss_data.dtype.names:\n",
" grp.create_dataset(name, data=loss_data[name])\n",
"\n",
" # Write Foundation\n",
" grp = f.create_group(\"Foundation\")\n",
" for name in foundation_data.dtype.names:\n",
" grp.create_dataset(name, data=foundation_data[name])\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}