Rename repo

This commit is contained in:
rstiskalek 2022-10-20 23:34:14 +01:00
parent 942c36b142
commit a9e98f5a2d
13 changed files with 0 additions and 0 deletions

View file

@ -0,0 +1,20 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .readsim import (get_csiborg_ids, get_sim_path, get_snapshot_path, # noqa
read_info, # noqa
open_particle, open_unbinding, read_particle, # noqa
read_clumpid, read_clumps, read_mmain) # noqa
from .readobs import (read_planck2015, read_2mpp) # noqa

106
csiborgtools/io/readobs.py Normal file
View file

@ -0,0 +1,106 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Scripts to read in observation.
"""
import numpy
from astropy.io import fits
from ..utils import (add_columns, cols_to_structured)
def read_planck2015(fpath, dist_cosmo, max_comdist=None):
r"""
Read the Planck 2nd Sunyaev-Zeldovich source catalogue [1]. The following
is performed:
- removes clusters without a redshift estimate,
- calculates the comoving distance with the provided cosmology.
- Converts `MSZ` from units of :math:`1e14 M_\odot` to :math:`M_\odot`
Parameters
----------
fpath : str
Path to the source catalogue.
dist_cosmo : `astropy.cosmology` object
The cosmology to calculate cluster comoving distance from redshift.
max_comdist : float, optional
Maximum comoving distance threshold in units of :math:`\mathrm{MPc}`.
By default `None` and no threshold is applied.
References
----------
[1] https://heasarc.gsfc.nasa.gov/W3Browse/all/plancksz2.html
Returns
-------
out : `astropy.io.fits.FITS_rec`
The catalogue structured array.
"""
data = fits.open(fpath)[1].data
# Convert FITS to a structured array
out = numpy.full(data.size, numpy.nan, dtype=data.dtype.descr)
for name in out.dtype.names:
out[name] = data[name]
# Take only clusters with redshifts
out = out[out["REDSHIFT"] >= 0]
# Add comoving distance
dist = dist_cosmo.comoving_distance(out["REDSHIFT"]).value
out = add_columns(out, dist, "COMDIST")
# Convert masses
for par in ("MSZ", "MSZ_ERR_UP", "MSZ_ERR_LOW"):
out[par] *= 1e14
# Distance threshold
if max_comdist is not None:
out = out[out["COMDIST"] < max_comdist]
return out
def read_2mpp(fpath, dist_cosmo):
"""
Read in the 2M++ galaxy redshift catalogue [1], with the catalogue at [2].
Removes fake galaxies used to fill the zone of avoidance.
Parameters
----------
fpath : str
File path to the catalogue.
Returns
-------
out : structured array
The catalogue.
References
----------
[1] The 2M++ galaxy redshift catalogue; Lavaux, Guilhem, Hudson, Michael J.
[2] https://cdsarc.cds.unistra.fr/viz-bin/cat/J/MNRAS/416/2840#/article
"""
from scipy.constants import c
# Read the catalogue and select non-fake galaxies
cat = numpy.genfromtxt(fpath, delimiter="|", )
cat = cat[cat[:, 12] == 0, :]
F64 = numpy.float64
cols = [("RA", F64), ("DEC", F64), ("Ksmag", F64), ("ZCMB", F64),
("CDIST_CMB", F64)]
out = cols_to_structured(cat.shape[0], cols)
out["RA"] = cat[:, 1]
out["DEC"] = cat[:, 2]
out["Ksmag"] = cat[:, 5]
out["ZCMB"] = cat[:, 7] / (c * 1e-3)
out["CDIST_CMB"] = dist_cosmo.comoving_distance(out["ZCMB"]).value
return out

431
csiborgtools/io/readsim.py Normal file
View file

@ -0,0 +1,431 @@
# Copyright (C) 2022 Richard Stiskalek, Harry Desmond
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""Functions to read in the particle and clump files."""
import numpy
from scipy.io import FortranFile
from os import listdir
from os.path import (join, isfile)
from glob import glob
from tqdm import tqdm
from ..utils import cols_to_structured
F16 = numpy.float16
F32 = numpy.float32
F64 = numpy.float64
I32 = numpy.int32
I64 = numpy.int64
def get_csiborg_ids(srcdir):
"""
Get CSiBORG simulation IDs from the list of folders in `srcdir`.
Assumes that the folders look like `ramses_out_X` and extract the `X`
integer. Removes `5511` from the list of IDs.
Parameters
----------
srcdir : string
The folder where CSiBORG simulations are stored.
Returns
-------
ids : 1-dimensional array
Array of CSiBORG simulation IDs.
"""
files = glob(join(srcdir, "ramses_out*"))
# Select only file names
files = [f.split("/")[-1] for f in files]
# Remove files with inverted ICs
files = [f for f in files if "_inv" not in f]
# Remove the filename with _old
files = [f for f in files if "OLD" not in f]
ids = [int(f.split("_")[-1]) for f in files]
try:
ids.remove(5511)
except ValueError:
pass
return numpy.sort(ids)
def get_sim_path(n, fname="ramses_out_{}", srcdir="/mnt/extraspace/hdesmond"):
"""
Get a path to a CSiBORG simulation.
Parameters
----------
n : int
The index of the initial conditions (IC) realisation.
fname : str, optional
The file name. By default `ramses_out_{}`, where `n` is the IC index.
srcdir : str, optional
The file path to the folder where realisations of the ICs are stored.
Returns
-------
path : str
Path to the `n`th CSiBORG simulation.
"""
return join(srcdir, fname.format(n))
def get_snapshot_path(Nsnap, simpath):
"""
Get a path to a CSiBORG IC realisation snapshot.
Parameters
----------
Nsnap : int
Snapshot index.
simpath : str
Path to the CSiBORG IC realisation.
Returns
-------
snappath : str
Path to the CSiBORG IC realisation snapshot.
"""
return join(simpath, "output_{}".format(str(Nsnap).zfill(5)))
def read_info(Nsnap, simpath):
"""
Read CSiBORG simulation snapshot info.
Parameters
----------
Nsnap : int
Snapshot index.
simpath : str
Path to the CSiBORG IC realisation.
Returns
-------
info : dict
Dictionary of info paramaters. Note that both keys and values are
strings.
"""
# Open the info file
snappath = get_snapshot_path(Nsnap, simpath)
filename = join(snappath, "info_{}.txt".format(str(Nsnap).zfill(5)))
with open(filename, "r") as f:
info = f.read().split()
# Throw anything below ordering line out
info = numpy.asarray(info[:info.index("ordering")])
# Get indexes of lines with `=`. Indxs before/after be keys/vals
eqindxs = numpy.asarray([i for i in range(info.size) if info[i] == '='])
keys = info[eqindxs - 1]
vals = info[eqindxs + 1]
return {key: val for key, val in zip(keys, vals)}
def open_particle(n, simpath, verbose=True):
"""
Open particle files to a given CSiBORG simulation.
Parameters
----------
n : int
The index of a redshift snapshot.
simpath : str
The complete path to the CSiBORG simulation.
verbose : bool, optional
Verbosity flag.
Returns
-------
nparts : 1-dimensional array
Number of parts assosiated with each CPU.
partfiles : list of `scipy.io.FortranFile`
Opened part files.
"""
# Zeros filled snapshot number and the snapshot path
nout = str(n).zfill(5)
snappath = get_snapshot_path(n, simpath)
ncpu = int(read_info(n, simpath)["ncpu"])
if verbose:
print("Reading in output `{}` with ncpu = `{}`.".format(nout, ncpu))
# Check whether the unbinding file exists.
snapdirlist = listdir(snappath)
unbinding_file = "unbinding_{}.out00001".format(nout)
if unbinding_file not in snapdirlist:
raise FileNotFoundError(
"Couldn't find `{}` in `{}`. Use mergertreeplot.py -h or --help "
"to print help message.".format(unbinding_file, snappath))
# First read the headers. Reallocate arrays and fill them.
nparts = numpy.zeros(ncpu, dtype=int)
partfiles = [None] * ncpu
for cpu in range(ncpu):
cpu_str = str(cpu + 1).zfill(5)
fpath = join(snappath, "part_{}.out{}".format(nout, cpu_str))
f = FortranFile(fpath)
# Read in this order
ncpuloc = f.read_ints()
if ncpuloc != ncpu:
infopath = join(snappath, "info_{}.txt".format(nout))
raise ValueError("`ncpu = {}` of `{}` disagrees with `ncpu = {}` "
"of `{}`.".format(ncpu, infopath, ncpuloc, fpath))
ndim = f.read_ints()
nparts[cpu] = f.read_ints()
localseed = f.read_ints()
nstar_tot = f.read_ints()
mstar_tot = f.read_reals('d')
mstar_lost = f.read_reals('d')
nsink = f.read_ints()
partfiles[cpu] = f
del ndim, localseed, nstar_tot, mstar_tot, mstar_lost, nsink
return nparts, partfiles
def read_sp(dtype, partfile):
"""
Utility function to read a single particle file, depending on the dtype.
Parameters
----------
dtype : str
The dtype of the part file to be read now.
partfile : `scipy.io.FortranFile`
Part file to read from.
Returns
-------
out : 1-dimensional array
The data read from the part file.
n : int
The index of the initial conditions (IC) realisation.
simpath : str
The complete path to the CSiBORG simulation.
"""
if dtype in [F16, F32, F64]:
return partfile.read_reals('d')
elif dtype in [I32]:
return partfile.read_ints()
else:
raise TypeError("Unexpected dtype `{}`.".format(dtype))
def nparts_to_start_ind(nparts):
"""
Convert `nparts` array to starting indices in a pre-allocated array for
looping over the CPU number.
Parameters
----------
nparts : 1-dimensional array
Number of parts assosiated with each CPU.
Returns
-------
start_ind : 1-dimensional array
The starting indices calculated as a cumulative sum starting at 0.
"""
return numpy.hstack([[0], numpy.cumsum(nparts[:-1])])
def read_particle(pars_extract, n, simpath, verbose=True):
"""
Read particle files of a simulation at a given snapshot and return
values of `pars_extract`.
Parameters
----------
pars_extract : list of str
Parameters to be extacted.
n : int
The index of the redshift snapshot.
simpath : str
The complete path to the CSiBORG simulation.
verbose : bool, optional
Verbosity flag while for reading the CPU outputs.
Returns
-------
out : structured array
The data read from the particle file.
"""
# Open the particle files
nparts, partfiles = open_particle(n, simpath)
if verbose:
print("Opened {} particle files.".format(nparts.size))
ncpu = nparts.size
# Order in which the particles are written in the FortranFile
forder = [("x", F16), ("y", F16), ("z", F16),
("vx", F16), ("vy", F16), ("vz", F16),
("M", F32), ("ID", I32), ("level", I32)]
fnames = [fp[0] for fp in forder]
fdtypes = [fp[1] for fp in forder]
# Check there are no strange parameters
for p in pars_extract:
if p not in fnames:
raise ValueError("Undefined parameter `{}`. Must be one of `{}`."
.format(p, fnames))
npart_tot = numpy.sum(nparts)
# A dummy array is necessary for reading the fortran files.
dum = numpy.full(npart_tot, numpy.nan, dtype=F16)
# These are the data we read along with types
dtype = {"names": pars_extract,
"formats": [forder[fnames.index(p)][1] for p in pars_extract]}
# Allocate the output structured array
out = numpy.full(npart_tot, numpy.nan, dtype)
start_ind = nparts_to_start_ind((nparts))
iters = tqdm(range(ncpu)) if verbose else range(ncpu)
for cpu in iters:
i = start_ind[cpu]
j = nparts[cpu]
for (fname, fdtype) in zip(fnames, fdtypes):
if fname in pars_extract:
out[fname][i:i + j] = read_sp(fdtype, partfiles[cpu])
else:
dum[i:i + j] = read_sp(fdtype, partfiles[cpu])
return out
def open_unbinding(cpu, n, simpath):
"""
Open particle files to a given CSiBORG simulation. Note that to be
consistent CPU is incremented by 1.
Parameters
----------
cpu : int
The CPU index.
n : int
The index of a redshift snapshot.
simpath : str
The complete path to the CSiBORG simulation.
Returns
-------
unbinding : `scipy.io.FortranFile`
The opened unbinding FortranFile.
"""
nout = str(n).zfill(5)
cpu = str(cpu + 1).zfill(5)
fpath = join(simpath, "output_{}".format(nout),
"unbinding_{}.out{}".format(nout, cpu))
return FortranFile(fpath)
def read_clumpid(n, simpath, verbose=True):
"""
Read clump IDs from unbinding files.
Parameters
----------
n : int
The index of a redshift snapshot.
simpath : str
The complete path to the CSiBORG simulation.
verbose : bool, optional
Verbosity flag while for reading the CPU outputs.
Returns
-------
clumpid : 1-dimensional array
The array of clump IDs.
"""
nparts, __ = open_particle(n, simpath, verbose)
start_ind = nparts_to_start_ind(nparts)
ncpu = nparts.size
clumpid = numpy.full(numpy.sum(nparts), numpy.nan)
iters = tqdm(range(ncpu)) if verbose else range(ncpu)
for cpu in iters:
i = start_ind[cpu]
j = nparts[cpu]
ff = open_unbinding(cpu, n, simpath)
clumpid[i:i + j] = ff.read_ints()
return clumpid
def read_clumps(n, simpath):
"""
Read in a precomputed clump file `clump_N.dat`.
Parameters
----------
n : int
The index of a redshift snapshot.
simpath : str
The complete path to the CSiBORG simulation.
Returns
-------
out : structured array
Structured array of the clumps.
"""
n = str(n).zfill(5)
fname = join(simpath, "output_{}".format(n), "clump_{}.dat".format(n))
# Check the file exists.
if not isfile(fname):
raise FileExistsError("Clump file `{}` does not exist.".format(fname))
# Read in the clump array. This is how the columns must be written!
arr = numpy.genfromtxt(fname)
cols = [("index", I64), ("level", I64), ("parent", I64), ("ncell", F64),
("peak_x", F64), ("peak_y", F64), ("peak_z", F64),
("rho-", F64), ("rho+", F64), ("rho_av", F64),
("mass_cl", F64), ("relevance", F64)]
out = cols_to_structured(arr.shape[0], cols)
for i, name in enumerate(out.dtype.names):
out[name] = arr[:, i]
return out
def read_mmain(n, srcdir, fname="Mmain_{}.npy"):
"""
Read `mmain` numpy arrays of central halos whose mass contains their
substracture contribution.
Parameters
----------
n : int
The index of the initial conditions (IC) realisation.
srcdir : str
The path to the folder containing the files.
fname : str, optional
The file name convention. By default `Mmain_{}.npy`, where the
substituted value is `n`.
Returns
-------
out : structured array
Array with the central halo information.
"""
fpath = join(srcdir, fname.format(n))
arr = numpy.load(fpath)
cols = [("index", I64), ("peak_x", F64), ("peak_y", F64),
("peak_z", F64), ("mass_cl", F64), ("sub_frac", F64)]
out = cols_to_structured(arr.shape[0], cols)
for i, name in enumerate(out.dtype.names):
out[name] = arr[:, i]
return out