Add density field plot and start preparing CSiBORG2 (#94)

* Add RAMSES2HDF5 conversion

* Upload changes

* Clean up

* More clean up

* updates

* Little change

* pep9

* Add basic SPH calculation for a snapshot

* Add submit script

* Remove echo

* Little changes

* Send off changes

* Little formatting

* Little updates

* Add nthreads argument

* Upload chagnes

* Add nthreads arguemnts

* Some local changes..

* Update scripts

* Add submission script

* Update script

* Update params

* Rename CSiBORGBox to CSiBORG1box

* Rename CSiBORG1 reader

* Move files

* Rename folder again

* Add basic plotting here

* Add new skeletons

* Move def

* Update nbs

* Edit directories

* Rename files

* Add units to converted snapshots

* Fix empty dataset bug

* Delete file

* Edits to submission scripts

* Edit paths

* Update .gitignore

* Fix attrs

* Update weighting

* Fix RA/dec bug

* Add FORNAX cluster

* Little edit

* Remove boxes since will no longer need

* Move func back

* Edit to include sort by membership

* Edit paths

* Purge basic things

* Start removing

* Bring file back

* Scratch

* Update the rest

* Improve the entire file

* Remove old things

* Remove old

* Delete old things

* Fully updates

* Rename file

* Edit submit script

* Little things

* Add print statement

* Add here cols_to_structured

* Edit halo cat

* Remove old import

* Add comment

* Update paths manager

* Move file

* Remove file

* Add chains
This commit is contained in:
Richard Stiskalek 2023-12-13 16:08:25 +00:00 committed by GitHub
parent 6042a87111
commit aaa14fc880
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
30 changed files with 1682 additions and 1728 deletions

3
.gitignore vendored
View file

@ -26,3 +26,6 @@ scripts_plots/*.sh
notebooks/test.ipynb notebooks/test.ipynb
scripts/mgtree.py scripts/mgtree.py
scripts/makemerger.py scripts/makemerger.py
*.out
*/python.sh

View file

@ -20,10 +20,13 @@ from .utils import (center_of_mass, delta2ncells, number_counts,
hms_to_degrees, dms_to_degrees, great_circle_distance) # noqa hms_to_degrees, dms_to_degrees, great_circle_distance) # noqa
# Arguments to csiborgtools.read.Paths. # Arguments to csiborgtools.read.Paths.
paths_glamdring = {"srcdir": "/mnt/extraspace/hdesmond/", paths_glamdring = {
"postdir": "/mnt/extraspace/rstiskalek/CSiBORG/", "csiborg1_srcdir": "/mnt/extraspace/rstiskalek/csiborg1",
"borg_dir": "/users/hdesmond/BORG_final/", "csiborg2_main_srcdir": "/mnt/extraspace/rstiskalek/csiborg2_main",
"quijote_dir": "/mnt/extraspace/rstiskalek/Quijote", "csiborg2_varysmall_srcdir": "/mnt/extraspace/rstiskalek/csiborg2_varysmall", # noqa
"csiborg2_random_srcdir": "/mnt/extraspace/rstiskalek/csiborg2_random", # noqa
"postdir": "/mnt/extraspace/rstiskalek/csiborg_postprocessing/",
"quijote_dir": "/mnt/extraspace/rstiskalek/quijote",
} }
@ -76,4 +79,8 @@ clusters = {"Virgo": read.ObservedCluster(RA=hms_to_degrees(12, 27),
dec=dms_to_degrees(12, 43), dec=dms_to_degrees(12, 43),
dist=16.5 * 0.7, dist=16.5 * 0.7,
name="Virgo"), name="Virgo"),
"Fornax": read.ObservedCluster(RA=hms_to_degrees(3, 38),
dec=dms_to_degrees(-35, 27),
dist=19 * 0.7,
name="Fornax"),
} }

View file

@ -69,7 +69,7 @@ class DensityField(BaseField):
Parameters Parameters
---------- ----------
box : :py:class:`csiborgtools.read.CSiBORGBox` box : :py:class:`csiborgtools.read.CSiBORG1Box`
The simulation box information and transformations. The simulation box information and transformations.
MAS : str MAS : str
Mass assignment scheme. Options are Options are: 'NGP' (nearest grid Mass assignment scheme. Options are Options are: 'NGP' (nearest grid
@ -167,7 +167,7 @@ class DensityField(BaseField):
# #
# Parameters # Parameters
# ---------- # ----------
# box : :py:class:`csiborgtools.read.CSiBORGBox` # box : :py:class:`csiborgtools.read.CSiBORG1Box`
# The simulation box information and transformations. # The simulation box information and transformations.
# MAS : str # MAS : str
# Mass assignment scheme. Options are Options are: 'NGP' (nearest grid # Mass assignment scheme. Options are Options are: 'NGP' (nearest grid
@ -269,7 +269,7 @@ class VelocityField(BaseField):
Parameters Parameters
---------- ----------
box : :py:class:`csiborgtools.read.CSiBORGBox` box : :py:class:`csiborgtools.read.CSiBORG1Box`
The simulation box information and transformations. The simulation box information and transformations.
MAS : str MAS : str
Mass assignment scheme. Options are Options are: 'NGP' (nearest grid Mass assignment scheme. Options are Options are: 'NGP' (nearest grid
@ -405,7 +405,7 @@ class PotentialField(BaseField):
Parameters Parameters
---------- ----------
box : :py:class:`csiborgtools.read.CSiBORGBox` box : :py:class:`csiborgtools.read.CSiBORG1Box`
The simulation box information and transformations. The simulation box information and transformations.
MAS : str MAS : str
Mass assignment scheme. Options are Options are: 'NGP' (nearest grid Mass assignment scheme. Options are Options are: 'NGP' (nearest grid
@ -444,7 +444,7 @@ class TidalTensorField(BaseField):
Parameters Parameters
---------- ----------
box : :py:class:`csiborgtools.read.CSiBORGBox` box : :py:class:`csiborgtools.read.CSiBORG1Box`
The simulation box information and transformations. The simulation box information and transformations.
MAS : str MAS : str
Mass assignment scheme used to calculate the density field. Options Mass assignment scheme used to calculate the density field. Options

View file

@ -139,7 +139,7 @@ def observer_vobs(velocity_field):
return vobs return vobs
def make_sky(field, angpos, dist, box, volume_weight=True, verbose=True): def make_sky(field, angpos, dist, boxsize, volume_weight=True, verbose=True):
r""" r"""
Make a sky map of a scalar field. The observer is in the centre of the Make a sky map of a scalar field. The observer is in the centre of the
box the field is evaluated along directions `angpos` (RA [0, 360) deg, box the field is evaluated along directions `angpos` (RA [0, 360) deg,
@ -153,9 +153,9 @@ def make_sky(field, angpos, dist, box, volume_weight=True, verbose=True):
angpos : 2-dimensional arrays of shape `(ndir, 2)` angpos : 2-dimensional arrays of shape `(ndir, 2)`
Directions to evaluate the field. Directions to evaluate the field.
dist : 1-dimensional array dist : 1-dimensional array
Uniformly spaced radial distances to evaluate the field. Uniformly spaced radial distances to evaluate the field in `Mpc / h`.
box : :py:class:`csiborgtools.read.CSiBORGBox` boxsize : float
The simulation box information and transformations. Box size in `Mpc / h`.
volume_weight : bool, optional volume_weight : bool, optional
Whether to weight the field by the volume of the pixel. Whether to weight the field by the volume of the pixel.
verbose : bool, optional verbose : bool, optional
@ -168,11 +168,11 @@ def make_sky(field, angpos, dist, box, volume_weight=True, verbose=True):
dx = dist[1] - dist[0] dx = dist[1] - dist[0]
assert numpy.allclose(dist[1:] - dist[:-1], dx) assert numpy.allclose(dist[1:] - dist[:-1], dx)
assert angpos.ndim == 2 and dist.ndim == 1 assert angpos.ndim == 2 and dist.ndim == 1
# We loop over the angular directions, at each step evaluating a vector # We loop over the angular directions, at each step evaluating a vector
# of distances. We pre-allocate arrays for speed. # of distances. We pre-allocate arrays for speed.
dir_loop = numpy.full((dist.size, 3), numpy.nan, dtype=numpy.float32) dir_loop = numpy.full((dist.size, 3), numpy.nan, dtype=numpy.float32)
boxdist = box.mpc2box(dist)
boxsize = box.box2mpc(1.)
ndir = angpos.shape[0] ndir = angpos.shape[0]
out = numpy.full(ndir, numpy.nan, dtype=numpy.float32) out = numpy.full(ndir, numpy.nan, dtype=numpy.float32)
for i in trange(ndir) if verbose else range(ndir): for i in trange(ndir) if verbose else range(ndir):
@ -181,7 +181,7 @@ def make_sky(field, angpos, dist, box, volume_weight=True, verbose=True):
dir_loop[:, 2] = angpos[i, 1] dir_loop[:, 2] = angpos[i, 1]
if volume_weight: if volume_weight:
out[i] = numpy.sum( out[i] = numpy.sum(
boxdist**2 dist**2
* evaluate_sky(field, pos=dir_loop, mpc2box=1 / boxsize)) * evaluate_sky(field, pos=dir_loop, mpc2box=1 / boxsize))
else: else:
out[i] = numpy.sum( out[i] = numpy.sum(
@ -244,7 +244,7 @@ def field2rsp(field, radvel_field, box, MAS, init_value=0.):
radvel_field : 3-dimensional array of shape `(grid, grid, grid)` radvel_field : 3-dimensional array of shape `(grid, grid, grid)`
Radial velocity field in `km / s`. Expected to account for the observer Radial velocity field in `km / s`. Expected to account for the observer
velocity. velocity.
box : :py:class:`csiborgtools.read.CSiBORGBox` box : :py:class:`csiborgtools.read.CSiBORG1Box`
The simulation box information and transformations. The simulation box information and transformations.
MAS : str MAS : str
Mass assignment. Must be one of `NGP`, `CIC`, `TSC` or `PCS`. Mass assignment. Must be one of `NGP`, `CIC`, `TSC` or `PCS`.

View file

@ -49,5 +49,8 @@ def nside2radec(nside):
""" """
pixs = numpy.arange(healpy.nside2npix(nside)) pixs = numpy.arange(healpy.nside2npix(nside))
theta, phi = healpy.pix2ang(nside, pixs) theta, phi = healpy.pix2ang(nside, pixs)
theta -= numpy.pi / 2
return 180 / numpy.pi * numpy.vstack([phi, theta]).T ra = 180 / numpy.pi * phi
dec = 90 - 180 / numpy.pi * theta
return numpy.vstack([ra, dec]).T

View file

@ -12,12 +12,8 @@
# You should have received a copy of the GNU General Public License along # You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc., # with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .box_units import CSiBORGBox, QuijoteBox # noqa
from .halo_cat import (CSiBORGCatalogue, QuijoteCatalogue, # noqa from .halo_cat import (CSiBORGCatalogue, QuijoteCatalogue, # noqa
CSiBORGPHEWCatalogue, fiducial_observers) # noqa fiducial_observers) # noqa
from .obs import (SDSS, MCXCClusters, PlanckClusters, TwoMPPGalaxies, # noqa from .obs import (SDSS, MCXCClusters, PlanckClusters, TwoMPPGalaxies, # noqa
TwoMPPGroups, ObservedCluster, match_array_to_no_masking) # noqa TwoMPPGroups, ObservedCluster, match_array_to_no_masking) # noqa
from .paths import Paths # noqa from .paths import Paths # noqa
from .readsim import (CSiBORGReader, QuijoteReader, load_halo_particles, # noqa
make_halomap_dict) # noqa
from .utils import cols_to_structured, read_h5 # noqa

View file

@ -1,276 +0,0 @@
# Copyright (C) 2022 Richard Stiskalek, Deaglan Bartlett
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Simulation box unit transformations.
"""
from abc import ABC, abstractmethod, abstractproperty
import numpy
from astropy import constants, units
from astropy.cosmology import LambdaCDM
from .readsim import CSiBORGReader, QuijoteReader
###############################################################################
# Base box #
###############################################################################
class BaseBox(ABC):
_name = "box_units"
_cosmo = None
@property
def cosmo(self):
if self._cosmo is None:
raise ValueError("Cosmology not set.")
return self._cosmo
@property
def H0(self):
r"""Present Hubble parameter in :math:`\mathrm{km} \mathrm{s}^{-1}`"""
return self.cosmo.H0.value
@property
def rho_crit0(self):
"""Present-day critical density in M_sun h^2 / cMpc^3."""
rho_crit0 = self.cosmo.critical_density0
return rho_crit0.to_value(units.solMass / units.Mpc**3)
@property
def h(self):
"""The little 'h' parameter at the time of the snapshot."""
return self._h
@property
def Om0(self):
"""The present time matter density parameter."""
return self.cosmo.Om0
@abstractproperty
def boxsize(self):
"""Box size in cMpc."""
pass
@abstractmethod
def mpc2box(self, length):
r"""
Convert length from :math:`\mathrm{cMpc} / h` to box units.
Parameters
----------
length : float
Length in :math:`\mathrm{cMpc}`
Returns
-------
float
"""
pass
@abstractmethod
def box2mpc(self, length):
r"""
Convert length from box units to :math:`\mathrm{cMpc} / h`.
Parameters
----------
length : float
Length in box units.
Returns
-------
float
"""
pass
@abstractmethod
def solarmass2box(self, mass):
r"""
Convert mass from :math:`M_\odot / h` to box units.
Parameters
----------
mass : float
Mass in :math:`M_\odot / h`.
Returns
-------
float
"""
pass
@abstractmethod
def box2solarmass(self, mass):
r"""
Convert mass from box units to :math:`M_\odot / h`.
Parameters
----------
mass : float
Mass in box units.
Returns
-------
float
"""
pass
@abstractmethod
def m200c_to_r200c(self, m200c):
"""
Convert M200c to R200c in units of cMpc / h.
Parameters
----------
m200c : float
M200c in units of M_sun / h.
Returns
-------
float
"""
pass
###############################################################################
# CSiBORG box #
###############################################################################
class CSiBORGBox(BaseBox):
r"""
CSiBORG box units class for converting between box and physical units.
Paramaters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
paths : py:class`csiborgtools.read.Paths`
CSiBORG paths object.
"""
def __init__(self, nsnap, nsim, paths):
"""
Read in the snapshot info file and set the units from it.
"""
partreader = CSiBORGReader(paths)
info = partreader.read_info(nsnap, nsim)
pars = ["boxlen", "time", "aexp", "H0", "omega_m", "omega_l",
"omega_k", "omega_b", "unit_l", "unit_d", "unit_t"]
for par in pars:
setattr(self, "_" + par, info[par])
self._h = self._H0 / 100
self._cosmo = LambdaCDM(H0=100, Om0=self._omega_m,
Ode0=self._omega_l, Tcmb0=2.725 * units.K,
Ob0=self._omega_b)
self._Msuncgs = constants.M_sun.cgs.value # Solar mass in grams
def mpc2box(self, length):
conv = (self._unit_l / units.kpc.to(units.cm) / self._aexp) * 1e-3
conv *= self._h
return length / conv
def box2mpc(self, length):
conv = (self._unit_l / units.kpc.to(units.cm) / self._aexp) * 1e-3
conv *= self._h
return length * conv
def solarmass2box(self, mass):
conv = (self._unit_d * self._unit_l**3) / self._Msuncgs
conv *= self.h
return mass / conv
def box2solarmass(self, mass):
conv = (self._unit_d * self._unit_l**3) / self._Msuncgs
conv *= self.h
return mass * conv
def box2vel(self, vel):
r"""
Convert velocity from box units to :math:`\mathrm{km} \mathrm{s}^{-1}`.
Parameters
----------
vel : float
Velocity in box units.
Returns
-------
vel : float
Velocity in :math:`\mathrm{km} \mathrm{s}^{-1}`.
"""
return vel * (1e-2 * self._unit_l / self._unit_t / self._aexp) * 1e-3
@property
def boxsize(self):
return self.box2mpc(1.)
def m200c_to_r200c(self, m200c):
rho_crit = self.cosmo.critical_density(1 / self._aexp - 1)
rho_crit = rho_crit.to_value(units.solMass / units.Mpc**3)
r200c = (3 * m200c / (4 * numpy.pi * 200 * rho_crit))**(1 / 3)
return r200c / self._aexp
###############################################################################
# Quijote fiducial cosmology box #
###############################################################################
class QuijoteBox(BaseBox):
"""
Quijote cosmology box.
Parameters
----------
nsnap : int
Snapshot number.
nsim : int
IC realisation index.
paths : py:class`csiborgtools.read.Paths`
Paths manager
"""
def __init__(self, nsnap, nsim, paths):
zdict = {4: 0.0, 3: 0.5, 2: 1.0, 1: 2.0, 0: 3.0}
assert nsnap in zdict.keys(), f"`nsnap` must be in {zdict.keys()}."
info = QuijoteReader(paths).read_info(nsnap, nsim)
self._aexp = 1 / (1 + zdict[nsnap])
self._h = info["h"]
self._cosmo = LambdaCDM(H0=100, Om0=info["Omega_m"],
Ode0=info["Omega_l"], Tcmb0=2.725 * units.K)
self._info = info
@property
def boxsize(self):
return self._info["BoxSize"]
def box2mpc(self, length):
return length * self.boxsize
def mpc2box(self, length):
return length / self.boxsize
def solarmass2box(self, mass):
return mass / self._info["TotMass"]
def box2solarmass(self, mass):
return mass * self._info["TotMass"]
def m200c_to_r200c(self, m200c):
raise ValueError("Not implemented for Quijote boxes.")

View file

@ -28,9 +28,9 @@ from sklearn.neighbors import NearestNeighbors
from ..utils import (cartesian_to_radec, fprint, great_circle_distance, from ..utils import (cartesian_to_radec, fprint, great_circle_distance,
number_counts, periodic_distance_two_points, number_counts, periodic_distance_two_points,
real2redshift) real2redshift)
from .box_units import CSiBORGBox, QuijoteBox # TODO: removing these
from .box_units import CSiBORG1Box, QuijoteBox
from .paths import Paths from .paths import Paths
from .readsim import load_halo_particles, make_halomap_dict
############################################################################### ###############################################################################
# Base catalogue # # Base catalogue #
@ -622,75 +622,7 @@ class CSiBORGCatalogue(BaseCatalogue):
"csiborg", nsim, max(paths.get_snapshots(nsim, "csiborg")), "csiborg", nsim, max(paths.get_snapshots(nsim, "csiborg")),
halo_finder, catalogue_name, paths, mass_key, bounds, halo_finder, catalogue_name, paths, mass_key, bounds,
[338.85, 338.85, 338.85], observer_velocity, cache_maxsize) [338.85, 338.85, 338.85], observer_velocity, cache_maxsize)
self.box = CSiBORGBox(self.nsnap, self.nsim, self.paths) self.box = CSiBORG1Box(self.nsnap, self.nsim, self.paths)
###############################################################################
# Quijote PHEW without snapshot catalogue #
###############################################################################
class CSiBORGPHEWCatalogue(BaseCatalogue):
r"""
CSiBORG PHEW halo catalogue without snapshot. Units typically used are:
- Length: :math:`cMpc / h`
- Mass: :math:`M_\odot / h`
Note that the PHEW catalogue is not very reliable.
Parameters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
paths : py:class`csiborgtools.read.Paths`
Paths object.
mass_key : str, optional
Mass key of the catalogue.
bounds : dict, optional
Parameter bounds; keys as parameter names, values as (min, max) or
a boolean.
cache_maxsize : int, optional
Maximum number of cached arrays.
"""
def __init__(self, nsnap, nsim, paths, mass_key=None, bounds=None,
cache_maxsize=64):
super().__init__()
self.simname = "csiborg"
self.nsnap = nsnap
self.nsim = nsim
self.paths = paths
self.mass_key = mass_key
self.observer_location = [338.85, 338.85, 338.85]
fname = paths.processed_phew(nsim)
self._data = File(fname, "r")
if str(nsnap) not in self._data.keys():
raise ValueError(f"Snapshot {nsnap} not in the catalogue. "
f"Options are {self.get_snapshots(nsim, paths)}")
self.catalogue_name = str(nsnap)
self._is_closed = False
self.cache_maxsize = cache_maxsize
if bounds is not None:
self._make_mask(bounds)
self._derived_properties = ["cartesian_pos", "spherical_pos", "dist"]
self.box = CSiBORGBox(self.nsnap, self.nsim, self.paths)
self.clear_cache()
@staticmethod
def get_snapshots(nsim, paths):
"""List of snapshots available for this simulation."""
fname = paths.processed_phew(nsim)
with File(fname, "r") as f:
snaps = [int(key) for key in f.keys() if key != "info"]
f.close()
return numpy.sort(snaps)
############################################################################### ###############################################################################
@ -850,3 +782,41 @@ def find_boxed(pos, center, subbox_size, boxsize):
start_index = i + 1 start_index = i + 1
return indxs return indxs
###############################################################################
# Supplementary functions #
###############################################################################
def make_halomap_dict(halomap):
"""
Make a dictionary mapping halo IDs to their start and end indices in the
snapshot particle array.
"""
return {hid: (int(start), int(end)) for hid, start, end in halomap}
def load_halo_particles(hid, particles, hid2map):
"""
Load a halo's particles from a particle array. If it is not there, i.e
halo has no associated particles, return `None`.
Parameters
----------
hid : int
Halo ID.
particles : 2-dimensional array
Array of particles.
hid2map : dict
Dictionary mapping halo IDs to `halo_map` array positions.
Returns
-------
parts : 1- or 2-dimensional array
"""
try:
k0, kf = hid2map[hid]
return particles[k0:kf + 1]
except KeyError:
return None

View file

@ -26,7 +26,6 @@ from astropy.io import fits
from astropy.cosmology import FlatLambdaCDM from astropy.cosmology import FlatLambdaCDM
from scipy import constants from scipy import constants
from .utils import cols_to_structured
############################################################################### ###############################################################################
# Text survey base class # # Text survey base class #
@ -830,3 +829,17 @@ def match_array_to_no_masking(arr, surv):
out[indx] = arr[i] out[indx] = arr[i]
return out return out
def cols_to_structured(N, cols):
"""
Allocate a structured array from `cols`, a list of (name, dtype) tuples.
"""
if not (isinstance(cols, list)
and all(isinstance(c, tuple) and len(c) == 2 for c in cols)):
raise TypeError("`cols` must be a list of (name, dtype) tuples.")
names, formats = zip(*cols)
dtype = {"names": names, "formats": formats}
return numpy.full(N, numpy.nan, dtype=dtype)

View file

@ -15,9 +15,9 @@
""" """
CSiBORG paths manager. CSiBORG paths manager.
""" """
from glob import glob, iglob from glob import glob
from os import makedirs from os import makedirs
from os.path import isdir, join from os.path import basename, isdir, join
from warnings import warn from warnings import warn
import numpy import numpy
@ -40,6 +40,7 @@ class Paths:
Parameters Parameters
---------- ----------
# HERE EDIT EVERYTHING
srcdir : str, optional srcdir : str, optional
Path to the folder where the RAMSES outputs are stored. Path to the folder where the RAMSES outputs are stored.
postdir: str, optional postdir: str, optional
@ -49,73 +50,158 @@ class Paths:
quiote_dir : str, optional quiote_dir : str, optional
Path to the folder where Quijote simulations are stored. Path to the folder where Quijote simulations are stored.
""" """
_srcdir = None def __init__(self,
_postdir = None csiborg1_srcdir=None,
_borg_dir = None csiborg2_main_srcdir=None,
_quijote_dir = None csiborg2_random_srcdir=None,
csiborg2_varysmall_srcdir=None,
def __init__(self, srcdir=None, postdir=None, borg_dir=None, postdir=None,
quijote_dir=None): quijote_dir=None
self.srcdir = srcdir ):
self.postdir = postdir self.csiborg1_srcdir = csiborg1_srcdir
self.borg_dir = borg_dir self.csiborg2_main_srcdir = csiborg2_main_srcdir
self.csiborg2_random_srcdir = csiborg2_random_srcdir
self.csiborg2_varysmall_srcdir = csiborg2_varysmall_srcdir
self.quijote_dir = quijote_dir self.quijote_dir = quijote_dir
@property self.postdir = postdir
def srcdir(self):
"""Path to the folder where CSiBORG simulations are stored."""
if self._srcdir is None:
raise ValueError("`srcdir` is not set!")
return self._srcdir
@srcdir.setter def get_ics(self, simname, from_quijote_backup=False):
def srcdir(self, path): """
if path is None: Get available IC realisation IDs for a given simulation.
return
check_directory(path)
self._srcdir = path
@property Parameters
def borg_dir(self): ----------
"""Path to the folder where BORG MCMC chains are stored.""" simname : str
if self._borg_dir is None: Simulation name.
raise ValueError("`borg_dir` is not set!") from_quijote_backup : bool, optional
return self._borg_dir Whether to return the ICs from the Quijote backup.
@borg_dir.setter Returns
def borg_dir(self, path): -------
if path is None: ids : 1-dimensional array
return """
check_directory(path) if simname == "csiborg":
self._borg_dir = path files = glob(join(self.csiborg1_srcdir, "chain_*"))
files = [int(basename(f).replace("chain_", "") for f in files)]
elif simname == "csiborg2_main":
files = glob(join(self.csiborg2_main_srcdir, "chain_*"))
files = [int(basename(f).replace("chain_", "") for f in files)]
elif simname == "csiborg2_random":
raise NotImplementedError("TODO")
elif simname == "csiborg2_varysmall":
raise NotImplementedError("TODO")
elif simname == "quijote" or simname == "quijote_full":
if from_quijote_backup:
files = glob(join(self.quijote_dir, "halos_backup", "*"))
else:
warn(("Taking only the snapshots that also have "
"a FoF catalogue!"))
files = glob(join(self.quijote_dir, "Snapshots_fiducial", "*"))
files = [int(f.split("/")[-1]) for f in files]
files = [f for f in files if f < 100]
else:
raise ValueError(f"Unknown simulation name `{simname}`.")
@property return numpy.sort(files)
def quijote_dir(self):
"""Path to the folder where Quijote simulations are stored."""
if self._quijote_dir is None:
raise ValueError("`quijote_dir` is not set!")
return self._quijote_dir
@quijote_dir.setter def snapshots(self, nsim, simname):
def quijote_dir(self, path): if simname == "csiborg":
if path is None: fname = "ramses_out_{}"
return if tonew:
check_directory(path) fname += "_new"
self._quijote_dir = path return join(self.postdir, "output", fname.format(nsim))
return join(self.csiborg1_srcdir, fname.format(nsim))
elif simname == "csiborg2_main":
return join(self.csiborg2_main_srcdir, f"chain_{nsim}", "output")
elif simname == "csiborg2_random":
raise NotImplementedError("TODO")
elif simname == "csiborg2_varysmall":
raise NotImplementedError("TODO")
elif simname == "quijote":
return join(self.quijote_dir, "Snapshots_fiducial", str(nsim))
else:
raise ValueError(f"Unknown simulation name `{simname}`.")
@property def snapshot(self, nsnap, nsim, simname):
def postdir(self): """
"""Path to the folder where post-processed files are stored.""" Path to an IC realisation snapshot.
if self._postdir is None:
raise ValueError("`postdir` is not set!")
return self._postdir
@postdir.setter Parameters
def postdir(self, path): ----------
if path is None: nsnap : int
return Snapshot index. For Quijote, `-1` indicates the IC snapshot.
check_directory(path) nsim : inlot
self._postdir = path IC realisation index.
simname : str
Simulation name.
Returns
-------
str
"""
if simname == "csiborg":
return join(self.csiborg1_srcdir, f"chain_{nsim}",
f"snapshot_{str(nsnap).zfill(5)}")
elif simname == "csiborg2_main":
return join(self.csiborg1_srcdir, f"chain_{nsim}",
f"snapshot_{str(nsnap).zfill(5)}")
elif simname == "csiborg2_random":
raise NotImplementedError("TODO")
elif simname == "csiborg2_varysmall":
raise NotImplementedError("TODO")
elif simname == "quijote":
raise NotImplementedError("TODO")
else:
raise ValueError(f"Unknown simulation name `{simname}`.")
# simpath = self.snapshots(nsim, simname, tonew=nsnap == 1)
# if simname == "csiborg":
# return join(simpath, f"output_{str(nsnap).zfill(5)}")
# else:
# if nsnap == -1:
# return join(simpath, "ICs", "ics")
# nsnap = str(nsnap).zfill(3)
# return join(simpath, f"snapdir_{nsnap}", f"snap_{nsnap}")
def get_snapshots(self, nsim, simname):
"""
List of available snapshots of simulation.
Parameters
----------
nsim : int
IC realisation index.
simname : str
Simulation name.
Returns
-------
snapshots : 1-dimensional array
"""
simpath = self.snapshots(nsim, simname, tonew=False)
if simname == "csiborg":
# Get all files in simpath that start with output_
snaps = glob(join(simpath, "output_*"))
# Take just the last _00XXXX from each file and strip zeros
snaps = [int(snap.split("_")[-1].lstrip("0")) for snap in snaps]
elif simname == "csiborg2_main":
snaps = glob(join(simpath, "snapshot_*"))
snaps = [basename(snap) for snap in snaps]
snaps = [int(snap.split("_")[1]) for snap in snaps]
elif simname == "csiborg2_random":
raise NotImplementedError("TODO")
elif simname == "csiborg2_varysmall":
raise NotImplementedError("TODO")
elif simname == "quijote":
snaps = glob(join(simpath, "snapdir_*"))
snaps = [int(snap.split("/")[-1].split("snapdir_")[-1])
for snap in snaps]
else:
raise ValueError(f"Unknown simulation name `{simname}`.")
return numpy.sort(snaps)
@staticmethod @staticmethod
def quijote_fiducial_nsim(nsim, nobs=None): def quijote_fiducial_nsim(nsim, nobs=None):
@ -140,268 +226,6 @@ class Paths:
return nsim return nsim
return f"{str(nobs).zfill(2)}{str(nsim).zfill(3)}" return f"{str(nobs).zfill(2)}{str(nsim).zfill(3)}"
def borg_mcmc(self, nsim):
"""
Path to the BORG MCMC chain file.
Parameters
----------
nsim : int
IC realisation index.
Returns
-------
str
"""
return join(self.borg_dir, "mcmc", f"mcmc_{nsim}.h5")
def fof_cat(self, nsnap, nsim, simname, from_quijote_backup=False):
r"""
Path to the :math:`z = 0` FoF halo catalogue.
Parameters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
simname : str
Simulation name. Must be one of `csiborg` or `quijote`.
from_quijote_backup : bool, optional
Whether to return the path to the Quijote FoF catalogue from the
backup.
Returns
-------
str
"""
if simname == "csiborg":
fdir = join(self.postdir, "halo_maker", f"ramses_{nsim}",
f"output_{str(nsnap).zfill(5)}", "FOF")
try_create_directory(fdir)
return join(fdir, "fort.132")
elif simname == "quijote":
if from_quijote_backup:
return join(self.quijote_dir, "halos_backup", str(nsim))
else:
return join(self.quijote_dir, "Halos_fiducial", str(nsim))
else:
raise ValueError(f"Unknown simulation name `{simname}`.")
def get_ics(self, simname, from_quijote_backup=False):
"""
Get available IC realisation IDs for either the CSiBORG or Quijote
simulation suite.
Parameters
----------
simname : str
Simulation name. Must be `csiborg` or `quijote`.
from_quijote_backup : bool, optional
Whether to return the ICs from the Quijote backup.
Returns
-------
ids : 1-dimensional array
"""
if simname == "csiborg":
files = glob(join(self.srcdir, "ramses_out*"))
files = [f.split("/")[-1] for f in files] # Only file names
files = [f for f in files if "_inv" not in f] # Remove inv. ICs
files = [f for f in files if "_new" not in f] # Remove _new
files = [f for f in files if "OLD" not in f] # Remove _old
files = [int(f.split("_")[-1]) for f in files]
try:
files.remove(5511)
except ValueError:
pass
elif simname == "quijote" or simname == "quijote_full":
if from_quijote_backup:
files = glob(join(self.quijote_dir, "halos_backup", "*"))
else:
warn(("Taking only the snapshots that also have "
"a FoF catalogue!"))
files = glob(join(self.quijote_dir, "Snapshots_fiducial", "*"))
files = [int(f.split("/")[-1]) for f in files]
files = [f for f in files if f < 100]
else:
raise ValueError(f"Unknown simulation name `{simname}`.")
return numpy.sort(files)
def snapshots(self, nsim, simname, tonew=False):
"""
Path to an IC snapshots folder.
Parameters
----------
nsim : int
IC realisation index.
simname : str
Simulation name. Must be one of `csiborg` or `quijote`.
tonew : bool, optional
Whether to return the path to the '_new' IC realisation of
CSiBORG. Ignored for Quijote.
Returns
-------
str
"""
if simname == "csiborg":
fname = "ramses_out_{}"
if tonew:
fname += "_new"
return join(self.postdir, "output", fname.format(nsim))
return join(self.srcdir, fname.format(nsim))
elif simname == "quijote":
return join(self.quijote_dir, "Snapshots_fiducial", str(nsim))
else:
raise ValueError(f"Unknown simulation name `{simname}`.")
def get_snapshots(self, nsim, simname):
"""
List of available snapshots of simulation.
Parameters
----------
nsim : int
IC realisation index.
simname : str
Simulation name. Must be one of `csiborg` or `quijote`.
Returns
-------
snapshots : 1-dimensional array
"""
simpath = self.snapshots(nsim, simname, tonew=False)
if simname == "csiborg":
# Get all files in simpath that start with output_
snaps = glob(join(simpath, "output_*"))
# Take just the last _00XXXX from each file and strip zeros
snaps = [int(snap.split("_")[-1].lstrip("0")) for snap in snaps]
elif simname == "quijote":
snaps = glob(join(simpath, "snapdir_*"))
snaps = [int(snap.split("/")[-1].split("snapdir_")[-1])
for snap in snaps]
else:
raise ValueError(f"Unknown simulation name `{simname}`.")
return numpy.sort(snaps)
def snapshot(self, nsnap, nsim, simname):
"""
Path to an IC realisation snapshot.
Parameters
----------
nsnap : int
Snapshot index. For Quijote, `-1` indicates the IC snapshot.
nsim : int
IC realisation index.
simname : str
Simulation name. Must be one of `csiborg` or `quijote`.
Returns
-------
str
"""
simpath = self.snapshots(nsim, simname, tonew=nsnap == 1)
if simname == "csiborg":
return join(simpath, f"output_{str(nsnap).zfill(5)}")
else:
if nsnap == -1:
return join(simpath, "ICs", "ics")
nsnap = str(nsnap).zfill(3)
return join(simpath, f"snapdir_{nsnap}", f"snap_{nsnap}")
def processed_output(self, nsim, simname, halo_finder):
"""
Path to the files containing all particles of a CSiBORG realisation at
:math:`z = 0`.
Parameters
----------
nsim : int
IC realisation index.
simname : str
Simulation name. Must be one of `csiborg` or `quijote`.
halo_finder : str
Halo finder name.
Returns
-------
str
"""
if simname == "csiborg":
fdir = join(self.postdir, "processed_output")
elif simname == "quijote":
fdir = join(self.quijote_dir, "Particles_fiducial")
else:
raise ValueError(f"Unknown simulation name `{simname}`.")
try_create_directory(fdir)
fname = f"parts_{halo_finder}_{str(nsim).zfill(5)}.hdf5"
return join(fdir, fname)
def processed_phew(self, nsim):
"""
Path to the files containing PHEW CSiBORG catalogues.
Parameters
----------
nsim : int
IC realisation index.
Returns
-------
str
"""
fdir = join(self.postdir, "processed_output")
try_create_directory(fdir)
return join(fdir, f"phew_{str(nsim).zfill(5)}.hdf5")
def halomaker_particle_membership(self, nsnap, nsim, halo_finder):
"""
Path to the HaloMaker particle membership file (CSiBORG only).
Parameters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
halo_finder : str
Halo finder name.
Returns
-------
str
"""
fdir = join(self.postdir, "halo_maker", f"ramses_{nsim}",
f"output_{str(nsnap).zfill(5)}", halo_finder)
fpath = join(fdir, "*particle_membership*")
return next(iglob(fpath, recursive=True), None)
def ascii_positions(self, nsim, kind):
"""
Path to ASCII files containing the positions of particles or halos.
Parameters
----------
nsim : int
IC realisation index.
kind : str
Kind of data to extract. Must be one of `particles`,
`particles_rsp`, `halos`, `halos_rsp`.
"""
assert kind in ["particles", "particles_rsp", "halos", "halos_rsp"]
fdir = join(self.postdir, "ascii_positions")
try_create_directory(fdir)
fname = f"pos_{kind}_{str(nsim).zfill(5)}.txt"
return join(fdir, fname)
def overlap(self, simname, nsim0, nsimx, min_logmass, smoothed): def overlap(self, simname, nsim0, nsimx, min_logmass, smoothed):
""" """
Path to the overlap files between two CSiBORG simulations. Path to the overlap files between two CSiBORG simulations.
@ -569,29 +393,6 @@ class Paths:
return join(fdir, fname) return join(fdir, fname)
def observer_peculiar_velocity(self, MAS, grid, nsim):
"""
Path to the files containing the observer peculiar velocity.
Parameters
----------
MAS : str
Mass-assignment scheme.
grid : int
Grid size.
nsim : int
IC realisation index.
Returns
-------
str
"""
fdir = join(self.postdir, "environment")
try_create_directory(fdir)
fname = f"obs_vp_{MAS}_{str(nsim).zfill(5)}_{grid}.npz"
return join(fdir, fname)
def cross_nearest(self, simname, run, kind, nsim=None, nobs=None): def cross_nearest(self, simname, run, kind, nsim=None, nobs=None):
""" """
Path to the files containing distance from a halo in a reference Path to the files containing distance from a halo in a reference

View file

@ -1,631 +0,0 @@
# Copyright (C) 2022 Richard Stiskalek, Harry Desmond
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Functions to read in the particle and clump files.
"""
from abc import ABC, abstractmethod
from gc import collect
from os.path import getsize, isfile, join
from warnings import warn
import numpy
import pynbody
from scipy.io import FortranFile
from tqdm import tqdm
try:
import readgadget
from readfof import FoF_catalog
except ImportError:
warn("Could not import `readgadget` and `readfof`. Related routines will not be available", ImportWarning) # noqa
from tqdm import trange
from ..utils import fprint
from .paths import Paths
from .utils import add_columns, cols_to_structured, flip_cols
class BaseReader(ABC):
"""
Base class for all readers.
"""
_paths = None
@property
def paths(self):
"""Paths manager."""
return self._paths
@paths.setter
def paths(self, paths):
assert isinstance(paths, Paths)
self._paths = paths
@abstractmethod
def read_info(self, nsnap, nsim):
"""
Read simulation snapshot info.
Parameters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
Returns
-------
info : dict
Dictionary of information paramaters.
"""
pass
@abstractmethod
def read_snapshot(self, nsnap, nsim, kind, sort_like_final=False):
"""
Read snapshot.
Parameters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
kind : str
Information to read. Can be `pid`, `pos`, `vel`, or `mass`.
sort_like_final : bool, optional
Whether to sort the particles like the final snapshot.
Returns
-------
n-dimensional array
"""
@abstractmethod
def read_halo_id(self, nsnap, nsim, halo_finder, verbose=True):
"""
Read the (sub) halo membership of particles.
Parameters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
halo_finder : str
Halo finder used when running the catalogue.
Returns
-------
out : 1-dimensional array of shape `(nparticles, )`
"""
def read_catalogue(self, nsnap, nsim, halo_finder):
"""
Read in the halo catalogue.
Parameters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
halo_finder : str
Halo finder used when running the catalogue.
Returns
-------
structured array
"""
###############################################################################
# CSiBORG particle reader #
###############################################################################
class CSiBORGReader(BaseReader):
"""
Object to read in CSiBORG snapshots from the binary files and halo
catalogues.
Parameters
----------
paths : py:class`csiborgtools.read.Paths`
"""
def __init__(self, paths):
self.paths = paths
def read_info(self, nsnap, nsim):
snappath = self.paths.snapshot(nsnap, nsim, "csiborg")
filename = join(snappath, "info_{}.txt".format(str(nsnap).zfill(5)))
with open(filename, "r") as f:
info = f.read().split()
# Throw anything below ordering line out
info = numpy.asarray(info[:info.index("ordering")])
# Get indexes of lines with `=`. Indxs before/after be keys/vals
eqs = numpy.asarray([i for i in range(info.size) if info[i] == '='])
keys = info[eqs - 1]
vals = info[eqs + 1]
return {key: convert_str_to_num(val) for key, val in zip(keys, vals)}
def read_snapshot(self, nsnap, nsim, kind):
sim = pynbody.load(self.paths.snapshot(nsnap, nsim, "csiborg"))
if kind == "pid":
x = numpy.array(sim["iord"], dtype=numpy.uint64)
elif kind in ["pos", "vel", "mass"]:
x = numpy.array(sim[kind], dtype=numpy.float32)
else:
raise ValueError(f"Unknown kind `{kind}`.")
# Because of a RAMSES bug x and z are flipped.
if kind in ["pos", "vel"]:
x[:, [0, 2]] = x[:, [2, 0]]
del sim
collect()
return x
def read_halo_id(self, nsnap, nsim, halo_finder, verbose=True):
if halo_finder == "PHEW":
ids = self.read_phew_id(nsnap, nsim, verbose)
elif halo_finder in ["FOF"]:
ids = self.read_halomaker_id(nsnap, nsim, halo_finder, verbose)
else:
raise ValueError(f"Unknown halo finder `{halo_finder}`.")
return ids
def open_particle(self, nsnap, nsim, verbose=True):
"""Open particle files to a given CSiBORG simulation."""
snappath = self.paths.snapshot(nsnap, nsim, "csiborg")
ncpu = int(self.read_info(nsnap, nsim)["ncpu"])
nsnap = str(nsnap).zfill(5)
fprint(f"reading in output `{nsnap}` with ncpu = `{ncpu}`.", verbose)
# First read the headers. Reallocate arrays and fill them.
nparts = numpy.zeros(ncpu, dtype=int)
partfiles = [None] * ncpu
for cpu in range(ncpu):
cpu_str = str(cpu + 1).zfill(5)
fpath = join(snappath, "part_{}.out{}".format(nsnap, cpu_str))
f = FortranFile(fpath)
# Read in this order
ncpuloc = f.read_ints()
if ncpuloc != ncpu:
infopath = join(snappath, f"info_{nsnap}.txt")
raise ValueError(
"`ncpu = {}` of `{}` disagrees with `ncpu = {}` "
"of `{}`.".format(ncpu, infopath, ncpuloc, fpath))
ndim = f.read_ints()
nparts[cpu] = f.read_ints()
localseed = f.read_ints()
nstar_tot = f.read_ints()
mstar_tot = f.read_reals('d')
mstar_lost = f.read_reals('d')
nsink = f.read_ints()
partfiles[cpu] = f
del ndim, localseed, nstar_tot, mstar_tot, mstar_lost, nsink
return nparts, partfiles
def open_unbinding(self, nsnap, nsim, cpu):
"""Open PHEW unbinding files."""
nsnap = str(nsnap).zfill(5)
cpu = str(cpu + 1).zfill(5)
fpath = join(self.paths.snapshots(nsim, "csiborg", tonew=False),
f"output_{nsnap}", f"unbinding_{nsnap}.out{cpu}")
return FortranFile(fpath)
def read_phew_id(self, nsnap, nsim, verbose):
nparts, __ = self.open_particle(nsnap, nsim)
start_ind = numpy.hstack([[0], numpy.cumsum(nparts[:-1])])
ncpu = nparts.size
clumpid = numpy.full(numpy.sum(nparts), numpy.nan, dtype=numpy.int32)
for cpu in trange(ncpu, disable=not verbose, desc="CPU"):
i = start_ind[cpu]
j = nparts[cpu]
ff = self.open_unbinding(nsnap, nsim, cpu)
clumpid[i:i + j] = ff.read_ints()
ff.close()
return clumpid
def read_halomaker_id(self, nsnap, nsim, halo_finder, verbose):
fpath = self.paths.halomaker_particle_membership(
nsnap, nsim, halo_finder)
fprint("loading particle IDs from the snapshot.", verbose)
pids = self.read_snapshot(nsnap, nsim, "pid")
fprint("mapping particle IDs to their indices.", verbose)
pids_idx = {pid: i for i, pid in enumerate(pids)}
# Unassigned particle IDs are assigned a halo ID of 0.
fprint("mapping HIDs to their array indices.", verbose)
hids = numpy.zeros(pids.size, dtype=numpy.int32)
# Read lin-by-line to avoid loading the whole file into memory.
with open(fpath, 'r') as file:
for line in tqdm(file, disable=not verbose,
desc="Processing membership"):
hid, pid = map(int, line.split())
hids[pids_idx[pid]] = hid
del pids_idx
collect()
return hids
def read_catalogue(self, nsnap, nsim, halo_finder):
if halo_finder == "PHEW":
return self.read_phew_clumps(nsnap, nsim)
elif halo_finder == "FOF":
return self.read_fof_halos(nsnap, nsim)
else:
raise ValueError(f"Unknown halo finder `{halo_finder}`.")
def read_fof_halos(self, nsnap, nsim):
"""
Read in the FoF halo catalogue.
Parameters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
Returns
-------
structured array
"""
info = self.read_info(nsnap, nsim)
h = info["H0"] / 100
fpath = self.paths.fof_cat(nsnap, nsim, "csiborg")
hid = numpy.genfromtxt(fpath, usecols=0, dtype=numpy.int32)
pos = numpy.genfromtxt(fpath, usecols=(1, 2, 3), dtype=numpy.float32)
totmass = numpy.genfromtxt(fpath, usecols=4, dtype=numpy.float32)
m200c = numpy.genfromtxt(fpath, usecols=5, dtype=numpy.float32)
dtype = {"names": ["index", "x", "y", "z", "totpartmass", "m200c"],
"formats": [numpy.int32] + [numpy.float32] * 5}
out = numpy.full(hid.size, numpy.nan, dtype=dtype)
out["index"] = hid
out["x"] = pos[:, 0] * h + 677.7 / 2
out["y"] = pos[:, 1] * h + 677.7 / 2
out["z"] = pos[:, 2] * h + 677.7 / 2
# Because of a RAMSES bug x and z are flipped.
flip_cols(out, "x", "z")
out["totpartmass"] = totmass * 1e11 * h
out["m200c"] = m200c * 1e11 * h
return out
def read_phew_clumps(self, nsnap, nsim, verbose=True):
"""
Read in a PHEW clump file `clump_XXXXX.dat`.
Parameters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
verbose : bool, optional
Verbosity flag.
Returns
-------
structured array
"""
nsnap = str(nsnap).zfill(5)
fname = join(self.paths.snapshots(nsim, "csiborg", tonew=False),
"output_{}".format(nsnap),
"clump_{}.dat".format(nsnap))
if not isfile(fname) or getsize(fname) == 0:
raise FileExistsError(f"Clump file `{fname}` does not exist.")
data = numpy.genfromtxt(fname)
if data.ndim == 1:
raise FileExistsError(f"Invalid clump file `{fname}`.")
# How the data is stored in the clump file.
clump_cols = {"index": (0, numpy.int32),
"level": (1, numpy.int32),
"parent": (2, numpy.int32),
"ncell": (3, numpy.float32),
"x": (4, numpy.float32),
"y": (5, numpy.float32),
"z": (6, numpy.float32),
"rho-": (7, numpy.float32),
"rho+": (8, numpy.float32),
"rho_av": (9, numpy.float32),
"mass_cl": (10, numpy.float32),
"relevance": (11, numpy.float32),
}
cols = list(clump_cols.keys())
dtype = [(col, clump_cols[col][1]) for col in cols]
out = cols_to_structured(data.shape[0], dtype)
for col in cols:
out[col] = data[:, clump_cols[col][0]]
# Convert to cMpc / h and Msun / h
out['x'] *= 677.7
out['y'] *= 677.7
out['z'] *= 677.7
# Because of a RAMSES bug x and z are flipped.
flip_cols(out, "x", "z")
out["mass_cl"] *= 2.6543271649678946e+19
ultimate_parent, summed_mass = self.find_parents(out)
out = add_columns(out, [ultimate_parent, summed_mass],
["ultimate_parent", "summed_mass"])
return out
def find_parents(self, clumparr):
"""
Find ultimate parent haloes for every PHEW clump.
Parameters
----------
clumparr : structured array
Clump array. Must contain `index` and `parent` columns.
Returns
-------
parent_arr : 1-dimensional array of shape `(nclumps, )`
The ultimate parent halo index of every clump.
parent_mass : 1-dimensional array of shape `(nclumps, )`
The summed substructure mass of ultimate parent clumps.
"""
clindex = clumparr["index"]
parindex = clumparr["parent"]
clmass = clumparr["mass_cl"]
clindex_to_array_index = {clindex[i]: i for i in range(clindex.size)}
parent_arr = numpy.copy(parindex)
for i in range(clindex.size):
cl = clindex[i]
par = parindex[i]
while cl != par:
element = clindex_to_array_index[par]
cl = clindex[element]
par = parindex[element]
parent_arr[i] = cl
parent_mass = numpy.full(clindex.size, 0, dtype=numpy.float32)
# Assign the clump masses to the ultimate parent haloes. For each clump
# find its ultimate parent and add its mass to the parent mass.
for i in range(clindex.size):
element = clindex_to_array_index[parent_arr[i]]
parent_mass[element] += clmass[i]
# Set this to NaN for the clumps that are not ultimate parents.
parent_mass[clindex != parindex] = numpy.nan
return parent_arr, parent_mass
###############################################################################
# Quijote particle reader #
###############################################################################
class QuijoteReader(BaseReader):
"""
Object to read in Quijote snapshots from the binary files.
Parameters
----------
paths : py:class`csiborgtools.read.Paths`
"""
def __init__(self, paths):
self.paths = paths
def read_info(self, nsnap, nsim):
snapshot = self.paths.snapshot(nsnap, nsim, "quijote")
header = readgadget.header(snapshot)
out = {"BoxSize": header.boxsize / 1e3, # Mpc/h
"Nall": header.nall[1], # Tot num of particles
"PartMass": header.massarr[1] * 1e10, # Part mass in Msun/h
"Omega_m": header.omega_m,
"Omega_l": header.omega_l,
"h": header.hubble,
"redshift": header.redshift,
}
out["TotMass"] = out["Nall"] * out["PartMass"]
out["Hubble"] = (100.0 * numpy.sqrt(
header.omega_m * (1.0 + header.redshift)**3 + header.omega_l))
return out
def read_snapshot(self, nsnap, nsim, kind):
snapshot = self.paths.snapshot(nsnap, nsim, "quijote")
info = self.read_info(nsnap, nsim)
ptype = [1] # DM in Gadget speech
if kind == "pid":
return readgadget.read_block(snapshot, "ID ", ptype)
elif kind == "pos":
pos = readgadget.read_block(snapshot, "POS ", ptype) / 1e3 # Mpc/h
pos = pos.astype(numpy.float32)
pos /= info["BoxSize"] # Box units
return pos
elif kind == "vel":
vel = readgadget.read_block(snapshot, "VEL ", ptype)
vel = vel.astype(numpy.float32)
vel *= (1 + info["redshift"]) # km / s
return vel
elif kind == "mass":
return numpy.full(info["Nall"], info["PartMass"],
dtype=numpy.float32)
else:
raise ValueError(f"Unsupported kind `{kind}`.")
def read_halo_id(self, nsnap, nsim, halo_finder, verbose=True):
if halo_finder == "FOF":
path = self.paths.fof_cat(nsnap, nsim, "quijote")
cat = FoF_catalog(path, nsnap)
pids = self.read_snapshot(nsnap, nsim, kind="pid")
# Read the FoF particle membership.
fprint("reading the FoF particle membership.")
group_pids = cat.GroupIDs
group_len = cat.GroupLen
# Create a mapping from particle ID to FoF group ID.
fprint("creating the particle to FoF ID to map.")
ks = numpy.insert(numpy.cumsum(group_len), 0, 0)
pid2hid = numpy.full(
(group_pids.size, 2), numpy.nan, dtype=numpy.uint64)
for i, (k0, kf) in enumerate(zip(ks[:-1], ks[1:])):
pid2hid[k0:kf, 0] = i + 1
pid2hid[k0:kf, 1] = group_pids[k0:kf]
pid2hid = {pid: hid for hid, pid in pid2hid}
# Create the final array of hids matchign the snapshot array.
# Unassigned particles have hid 0.
fprint("creating the final hid array.")
hids = numpy.full(pids.size, 0, dtype=numpy.uint64)
for i in trange(pids.size, disable=not verbose):
hids[i] = pid2hid.get(pids[i], 0)
return hids
else:
raise ValueError(f"Unknown halo finder `{halo_finder}`.")
def read_catalogue(self, nsnap, nsim, halo_finder):
if halo_finder == "FOF":
return self.read_fof_halos(nsnap, nsim)
else:
raise ValueError(f"Unknown halo finder `{halo_finder}`.")
def read_fof_halos(self, nsnap, nsim):
"""
Read in the FoF halo catalogue.
Parameters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
Returns
-------
structured array
"""
fpath = self.paths.fof_cat(nsnap, nsim, "quijote", False)
fof = FoF_catalog(fpath, nsnap, long_ids=False, swap=False,
SFR=False, read_IDs=False)
cols = [("x", numpy.float32),
("y", numpy.float32),
("z", numpy.float32),
("vx", numpy.float32),
("vy", numpy.float32),
("vz", numpy.float32),
("group_mass", numpy.float32),
("npart", numpy.int32),
("index", numpy.int32)
]
data = cols_to_structured(fof.GroupLen.size, cols)
pos = fof.GroupPos / 1e3
vel = fof.GroupVel * (1 + self.read_info(nsnap, nsim)["redshift"])
for i, p in enumerate(["x", "y", "z"]):
data[p] = pos[:, i]
data[f"v{p}"] = vel[:, i]
data["group_mass"] = fof.GroupMass * 1e10
data["npart"] = fof.GroupLen
# We want to start indexing from 1. Index 0 is reserved for
# particles unassigned to any FoF group.
data["index"] = 1 + numpy.arange(data.size, dtype=numpy.int32)
return data
###############################################################################
# Supplementary functions #
###############################################################################
def make_halomap_dict(halomap):
"""
Make a dictionary mapping halo IDs to their start and end indices in the
snapshot particle array.
"""
return {hid: (int(start), int(end)) for hid, start, end in halomap}
def load_halo_particles(hid, particles, hid2map):
"""
Load a halo's particles from a particle array. If it is not there, i.e
halo has no associated particles, return `None`.
Parameters
----------
hid : int
Halo ID.
particles : 2-dimensional array
Array of particles.
hid2map : dict
Dictionary mapping halo IDs to `halo_map` array positions.
Returns
-------
parts : 1- or 2-dimensional array
"""
try:
k0, kf = hid2map[hid]
return particles[k0:kf + 1]
except KeyError:
return None
def convert_str_to_num(s):
"""
Convert a string representation of a number to its appropriate numeric type
(int or float).
Parameters
----------
s : str
The string representation of the number.
Returns
-------
num : int or float
"""
try:
return int(s)
except ValueError:
try:
return float(s)
except ValueError:
warn(f"Cannot convert string '{s}' to number", UserWarning)
return s

View file

@ -1,126 +0,0 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from os.path import isfile
import numpy
from h5py import File
###############################################################################
# Array manipulation #
###############################################################################
def cols_to_structured(N, cols):
"""
Allocate a structured array from `cols`, a list of (name, dtype) tuples.
"""
if not (isinstance(cols, list)
and all(isinstance(c, tuple) and len(c) == 2 for c in cols)):
raise TypeError("`cols` must be a list of (name, dtype) tuples.")
names, formats = zip(*cols)
dtype = {"names": names, "formats": formats}
return numpy.full(N, numpy.nan, dtype=dtype)
def add_columns(arr, X, cols):
"""
Add new columns `X` to a record array `arr`. Creates a new array.
"""
cols = [cols] if isinstance(cols, str) else cols
# Convert X to a list of 1D arrays for consistency
if isinstance(X, numpy.ndarray) and X.ndim == 1:
X = [X]
elif isinstance(X, numpy.ndarray):
raise ValueError("`X` should be a 1D array or a list of 1D arrays.")
if len(X) != len(cols):
raise ValueError("Mismatch between `X` and `cols` lengths.")
if not all(isinstance(x, numpy.ndarray) and x.ndim == 1 for x in X):
raise ValueError("All elements of `X` should be 1D arrays.")
if not all(x.size == arr.size for x in X):
raise ValueError("All arrays in `X` must have the same size as `arr`.")
# Define new dtype
dtype = list(arr.dtype.descr) + [(col, x.dtype) for col, x in zip(cols, X)]
# Create a new array and fill in values
out = numpy.empty(arr.size, dtype=dtype)
for col in arr.dtype.names:
out[col] = arr[col]
for col, x in zip(cols, X):
out[col] = x
return out
def rm_columns(arr, cols):
"""
Remove columns `cols` from a structured array `arr`. Allocates a new array.
"""
# Ensure cols is a list
cols = [cols] if isinstance(cols, str) else cols
# Check columns we wish to delete are in the array
missing_cols = [col for col in cols if col not in arr.dtype.names]
if missing_cols:
raise ValueError(f"Columns `{missing_cols}` not in `arr`.")
# Define new dtype without the cols to be deleted
new_dtype = [(n, dt) for n, dt in arr.dtype.descr if n not in cols]
# Allocate a new array and fill in values
out = numpy.empty(arr.size, dtype=new_dtype)
for name in out.dtype.names:
out[name] = arr[name]
return out
def flip_cols(arr, col1, col2):
"""
Flip values in columns `col1` and `col2` of a structured array `arr`.
"""
if col1 not in arr.dtype.names or col2 not in arr.dtype.names:
raise ValueError(f"Both `{col1}` and `{col2}` must exist in `arr`.")
arr[col1], arr[col2] = numpy.copy(arr[col2]), numpy.copy(arr[col1])
###############################################################################
# h5py functions #
###############################################################################
def read_h5(path):
"""
Return and return and open `h5py.File` object.
Parameters
----------
path : str
Path to the file.
Returns
-------
file : `h5py.File`
"""
if not isfile(path):
raise IOError(f"File `{path}` does not exist!")
return File(path, "r")

View file

@ -244,7 +244,7 @@ def real2redshift(pos, vel, observer_location, observer_velocity, box,
Observer location in `Mpc / h`. Observer location in `Mpc / h`.
observer_velocity: 1-dimensional array `(3,)` observer_velocity: 1-dimensional array `(3,)`
Observer velocity in `km / s`. Observer velocity in `km / s`.
box : py:class:`csiborg.read.CSiBORGBox` box : py:class:`csiborg.read.CSiBORG1Box`
Box units. Box units.
periodic_wrap : bool, optional periodic_wrap : bool, optional
Whether to wrap around the box, particles may be outside the default Whether to wrap around the box, particles may be outside the default

View file

@ -567,7 +567,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"box = csiborgtools.read.CSiBORGBox(951, 7444, paths)\n", "box = csiborgtools.read.CSiBORG1Box(951, 7444, paths)\n",
"\n", "\n",
"field_generator = csiborgtools.field.DensityField(box, \"PCS\")" "field_generator = csiborgtools.field.DensityField(box, \"PCS\")"
] ]

View file

@ -50,8 +50,8 @@ nproc = comm.Get_size()
MAS = "CIC" # mass asignment scheme MAS = "CIC" # mass asignment scheme
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
box = csiborgtools.read.CSiBORGBox(paths) box = csiborgtools.read.CSiBORG1Box(paths)
reader = csiborgtools.read.CSiBORGReader(paths) reader = csiborgtools.read.CSiBORG1Reader(paths)
ics = paths.get_ics("csiborg") ics = paths.get_ics("csiborg")
nsims = len(ics) nsims = len(ics)

View file

@ -59,7 +59,7 @@ for i, nsim in enumerate(nsims):
now = datetime.now() now = datetime.now()
print(f"{now}: calculating {i}th simulation `{nsim}`.", flush=True) print(f"{now}: calculating {i}th simulation `{nsim}`.", flush=True)
nsnap = max(paths.get_snapshots(nsim, "csiborg")) nsnap = max(paths.get_snapshots(nsim, "csiborg"))
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths) box = csiborgtools.read.CSiBORG1Box(nsnap, nsim, paths)
f = csiborgtools.read.read_h5(paths.particles(nsim, "csiborg")) f = csiborgtools.read.read_h5(paths.particles(nsim, "csiborg"))
particles = f["particles"] particles = f["particles"]

View file

@ -96,7 +96,7 @@ def sort_fofid(nsim, verbose=True):
# Columns are halo ID, particle ID. # Columns are halo ID, particle ID.
fof = numpy.load(fpath) fof = numpy.load(fpath)
reader = csiborgtools.read.CSiBORGReader(paths) reader = csiborgtools.read.CSiBORG1Reader(paths)
pars_extract = ["x"] # Dummy variable pars_extract = ["x"] # Dummy variable
__, pids = reader.read_snapshot(nsnap, nsim, pars_extract, __, pids = reader.read_snapshot(nsnap, nsim, pars_extract,
return_structured=False, verbose=verbose) return_structured=False, verbose=verbose)

View file

@ -79,7 +79,7 @@ def main(nsim, simname, verbose):
""" """
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
if simname == "csiborg": if simname == "csiborg":
partreader = csiborgtools.read.CSiBORGReader(paths) partreader = csiborgtools.read.CSiBORG1Reader(paths)
else: else:
partreader = csiborgtools.read.QuijoteReader(paths) partreader = csiborgtools.read.QuijoteReader(paths)
@ -114,7 +114,7 @@ def main(nsim, simname, verbose):
# In case of CSiBORG, we need to convert the mass and velocities from # In case of CSiBORG, we need to convert the mass and velocities from
# box units. # box units.
if simname == "csiborg": if simname == "csiborg":
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths) box = csiborgtools.read.CSiBORG1Box(nsnap, nsim, paths)
parts[:, [3, 4, 5]] = box.box2vel(parts[:, [3, 4, 5]]) parts[:, [3, 4, 5]] = box.box2vel(parts[:, [3, 4, 5]])
parts[:, 6] = box.box2solarmass(parts[:, 6]) parts[:, 6] = box.box2solarmass(parts[:, 6])

View file

@ -1,116 +0,0 @@
# Copyright (C) 2023 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""Convert the HDF5 CSiBORG particle file to an ASCII file."""
from argparse import ArgumentParser
import h5py
import numpy
from mpi4py import MPI
from taskmaster import work_delegation
from tqdm import trange
import csiborgtools
from utils import get_nsims
def positions_to_ascii(positions, output_filename, boxsize=None,
chunk_size=50_000, verbose=True):
"""
Convert array of positions to an ASCII file. If `boxsize` is given,
multiples the positions by it.
"""
total_size = len(positions)
if verbose:
print(f"Number of rows to write: {total_size}")
with open(output_filename, 'w') as out_file:
# Write the header
out_file.write("#px py pz\n")
# Loop through data in chunks
for i in trange(0, total_size, chunk_size,
desc=f"Writing to ... `{output_filename}`",
disable=not verbose):
end = i + chunk_size
if end > total_size:
end = total_size
data_chunk = positions[i:end]
# Convert to positions Mpc / h
data_chunk = data_chunk[:, :3]
if boxsize is not None:
data_chunk *= boxsize
chunk_str = "\n".join([f"{x:.4f} {y:.4f} {z:.4f}"
for x, y, z in data_chunk])
out_file.write(chunk_str + "\n")
def extract_positions(nsim, simname, paths, kind):
"""
Extract either the particle or halo positions.
"""
if kind == "particles":
fname = paths.processed_output(nsim, simname, "FOF")
return h5py.File(fname, 'r')["snapshot_final/pos"][:]
if kind == "particles_rsp":
raise NotImplementedError("RSP of particles is not implemented yet.")
fpath = paths.observer_peculiar_velocity("PCS", 512, nsim)
vpec_observer = numpy.load(fpath)["observer_vp"][0, :]
cat = csiborgtools.read.CSiBORGHaloCatalogue(
nsim, paths, "halo_catalogue", "FOF", bounds={"dist": (0, 155.5)},
observer_velocity=vpec_observer)
if kind == "halos":
return cat["cartesian_pos"]
if kind == "halos_rsp":
return cat["cartesian_redshift_pos"]
raise ValueError(f"Unknown kind `{kind}`. Allowed values are: "
"`particles`, `particles_rsp`, `halos`, `halos_rsp`.")
def main(args, paths):
boxsize = 677.7 if "particles" in args.kind else None
pos = extract_positions(args.nsim, args.simname, paths, args.kind)
output_filename = paths.ascii_positions(args.nsim, args.kind)
positions_to_ascii(pos, output_filename, boxsize=boxsize)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--kind", type=str, required=True,
choices=["particles", "particles_rsp", "halos", "halos_rsp"], # noqa
help="Kind of data to extract.")
parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all.")
parser.add_argument("--simname", type=str, default="csiborg",
choices=["csiborg"],
help="Simulation name")
args = parser.parse_args()
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsims = get_nsims(args, paths)
def _main(nsim):
main(nsim, paths, args.kind)
work_delegation(_main, nsims, MPI.COMM_WORLD)

View file

@ -49,10 +49,11 @@ def density_field(nsim, parser_args, to_save=True):
""" """
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsnap = max(paths.get_snapshots(nsim, "csiborg")) nsnap = max(paths.get_snapshots(nsim, "csiborg"))
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths) box = csiborgtools.read.CSiBORG1Box(nsnap, nsim, paths)
fname = paths.processed_output(nsim, "csiborg", "halo_catalogue") fname = paths.processed_output(nsim, "csiborg", "halo_catalogue")
if not parser_args.in_rsp: if not parser_args.in_rsp:
# TODO I removed this function
snap = csiborgtools.read.read_h5(fname)["snapshot_final"] snap = csiborgtools.read.read_h5(fname)["snapshot_final"]
pos = snap["pos"] pos = snap["pos"]
mass = snap["mass"] mass = snap["mass"]
@ -94,7 +95,7 @@ def velocity_field(nsim, parser_args, to_save=True):
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsnap = max(paths.get_snapshots(nsim, "csiborg")) nsnap = max(paths.get_snapshots(nsim, "csiborg"))
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths) box = csiborgtools.read.CSiBORG1Box(nsnap, nsim, paths)
fname = paths.processed_output(nsim, "csiborg", "halo_catalogue") fname = paths.processed_output(nsim, "csiborg", "halo_catalogue")
snap = csiborgtools.read.read_h5(fname)["snapshot_final"] snap = csiborgtools.read.read_h5(fname)["snapshot_final"]
@ -127,7 +128,7 @@ def radvel_field(nsim, parser_args, to_save=True):
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsnap = max(paths.get_snapshots(nsim, "csiborg")) nsnap = max(paths.get_snapshots(nsim, "csiborg"))
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths) box = csiborgtools.read.CSiBORG1Box(nsnap, nsim, paths)
vel = numpy.load(paths.field("velocity", parser_args.MAS, parser_args.grid, vel = numpy.load(paths.field("velocity", parser_args.MAS, parser_args.grid,
nsim, parser_args.in_rsp)) nsim, parser_args.in_rsp))
@ -154,7 +155,7 @@ def potential_field(nsim, parser_args, to_save=True):
""" """
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsnap = max(paths.get_snapshots(nsim, "csiborg")) nsnap = max(paths.get_snapshots(nsim, "csiborg"))
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths) box = csiborgtools.read.CSiBORG1Box(nsnap, nsim, paths)
if not parser_args.in_rsp: if not parser_args.in_rsp:
rho = numpy.load(paths.field( rho = numpy.load(paths.field(
@ -192,7 +193,7 @@ def environment_field(nsim, parser_args, to_save=True):
""" """
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsnap = max(paths.get_snapshots(nsim, "csiborg")) nsnap = max(paths.get_snapshots(nsim, "csiborg"))
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths) box = csiborgtools.read.CSiBORG1Box(nsnap, nsim, paths)
rho = numpy.load(paths.field( rho = numpy.load(paths.field(
"density", parser_args.MAS, parser_args.grid, nsim, in_rsp=False)) "density", parser_args.MAS, parser_args.grid, nsim, in_rsp=False))

View file

@ -0,0 +1,201 @@
# Copyright (C) 2023 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to construct the density and velocity fields for a simulation snapshot.
The SPH filter is implemented in the cosmotool package.
"""
from argparse import ArgumentParser
from os import environ, remove
from os.path import join, exists
import subprocess
from datetime import datetime
import hdf5plugin # noqa
import numpy as np
from h5py import File
def now():
return datetime.now()
def generate_unique_id(file_path):
"""
Generate a unique ID for a file path.
"""
return file_path.replace('/', '_').replace(':', '_')
def prepare_random(temporary_output_path, npart=100, dtype=np.float32):
"""
Prepare a random dataset for the SPH filter.
"""
print("Preparing random dataset.", flush=True)
arr = np.full((npart, 7), np.nan, dtype=dtype)
arr[:, :3] = np.random.uniform(0, 1, (npart, 3))
arr[:, 3:6] = np.random.normal(0, 1, (npart, 3))
arr[:, 6] = np.ones(npart, dtype=dtype)
dset = np.random.random((npart, 7)).astype(dtype)
dset[:, 6] = np.ones(npart, dtype=dtype)
with File(temporary_output_path, 'w') as target:
target.create_dataset("particles", data=dset, dtype=dtype)
return 1.
def prepare_gadget(snapshot_path, temporary_output_path):
"""
Prepare a GADGET snapshot for the SPH filter. Assumes there is only a
single file per snapshot.
"""
with File(snapshot_path, 'r') as source, File(temporary_output_path, 'w') as target: # noqa
boxsize = source["Header"].attrs["BoxSize"]
npart = sum(source["Header"].attrs["NumPart_Total"])
nhighres = source["Header"].attrs["NumPart_Total"][1]
dset = target.create_dataset("particles", (npart, 7), dtype=np.float32)
# Copy to this dataset the high-resolution particles.
dset[:nhighres, :3] = source["PartType1/Coordinates"][:]
dset[:nhighres, 3:6] = source["PartType1/Velocities"][:]
dset[:nhighres, 6] = np.ones(nhighres, dtype=np.float32) * source["Header"].attrs["MassTable"][1] # noqa
# Now copy the low-resolution particles.
dset[nhighres:, :3] = source["PartType5/Coordinates"][:]
dset[nhighres:, 3:6] = source["PartType5/Velocities"][:]
dset[nhighres:, 6] = source["PartType5/Masses"][:]
return boxsize
def run_sph_filter(particles_path, output_path, boxsize, resolution,
SPH_executable):
"""
Run the SPH filter on a snapshot.
"""
if not exists(particles_path):
raise RuntimeError(f"Particles file `{particles_path}` does not exist.") # noqa
if not isinstance(boxsize, (int, float)):
raise TypeError("`boxsize` must be a number.")
if not isinstance(resolution, int):
raise TypeError("`resolution` must be an integer.")
if not exists(SPH_executable):
raise RuntimeError(f"SPH executable `{SPH_executable}` does not exist.") # noqa
command = [SPH_executable, particles_path, str(1e14), str(boxsize),
str(resolution), str(0), str(0), str(0), output_path, "1"]
print(f"{now()}: executing `simple3DFilter`.", flush=True)
start_time = now()
process = subprocess.Popen(
command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
universal_newlines=True)
for line in iter(process.stdout.readline, ""):
print(line, end="", flush=True)
process.wait()
if process.returncode != 0:
raise RuntimeError("`simple3DFilter`failed.")
else:
dt = now() - start_time
print(f"{now()}: `simple3DFilter`completed successfully in {dt}.",
flush=True)
def main(snapshot_path, output_path, resolution, scratch_space, SPH_executable,
snapshot_kind):
"""
Construct the density and velocity fields for a simulation snapshot using
`cosmotool` [1].
Parameters
----------
snapshot_path : str
Path to the simulation snapshot.
output_path : str
Path to the output HDF5 file.
resolution : int
Resolution of the density field.
scratch_space : str
Path to a folder where temporary files can be stored.
SPH_executable : str
Path to the `simple3DFilter` executable [1].
snapshot_kind : str
Kind of the simulation snapshot. Currently only `gadget4` is supported.
Returns
-------
None
References
----------
[1] https://bitbucket.org/glavaux/cosmotool/src/master/sample/simple3DFilter.cpp # noqa
"""
if snapshot_kind != "gadget4":
raise NotImplementedError("Only GADGET HDF5 snapshots are supported.")
print("---------- SPH Density & Velocity Field Job Information ----------")
print(f"Snapshot path: {snapshot_path}")
print(f"Output path: {output_path}")
print(f"Resolution: {resolution}")
print(f"Scratch space: {scratch_space}")
print(f"SPH executable: {SPH_executable}")
print(f"Snapshot kind: {snapshot_kind}")
print("------------------------------------------------------------------")
print(flush=True)
temporary_output_path = join(
scratch_space, generate_unique_id(snapshot_path))
if not temporary_output_path.endswith(".hdf5"):
raise RuntimeError("Temporary output path must end with `.hdf5`.")
print(f"{now()}: preparing snapshot...", flush=True)
boxsize = prepare_gadget(snapshot_path, temporary_output_path)
print(f"{now()}: wrote temporary data to {temporary_output_path}.",
flush=True)
run_sph_filter(temporary_output_path, output_path, boxsize, resolution,
SPH_executable)
print(f"{now()}: removing the temporary snapshot file.", flush=True)
try:
remove(temporary_output_path)
except FileNotFoundError:
pass
if __name__ == "__main__":
parser = ArgumentParser(description="Generate SPH density and velocity field.") # noqa
parser.add_argument("--snapshot_path", type=str, required=True,
help="Path to the simulation snapshot.")
parser.add_argument("--output_path", type=str, required=True,
help="Path to the output HDF5 file.")
parser.add_argument("--resolution", type=int, required=True,
help="Resolution of the density and velocity field.")
parser.add_argument("--scratch_space", type=str, required=True,
help="Path to a folder where temporary files can be stored.") # noqa
parser.add_argument("--SPH_executable", type=str, required=True,
help="Path to the `simple3DFilter` executable.")
parser.add_argument("--snapshot_kind", type=str, required=True,
choices=["gadget4"],
help="Kind of the simulation snapshot.")
args = parser.parse_args()
main(args.snapshot_path, args.output_path, args.resolution,
args.scratch_space, args.SPH_executable, args.snapshot_kind)

View file

@ -0,0 +1,40 @@
#!/bin/sh
#SBATCH --ntasks-per-node=1
#SBATCH --nodes=1
#SBATCH --cpus-per-task=16
#SBATCH --mem-per-cpu=7000
#SBATCH -J SPH
#SBATCH -o output_%J.out
#SBATCH -e error_%J.err
#SBATCH -p cosma8-serial
#SBATCH -A dp016
#SBATCH -t 04:00:00
#SBATCH --mail-type=BEGIN,END,FAIL
#SBATCH --mail-user=richard.stiskalek@physics.ox.ac.uk
module purge
module load intel_comp/2019
module load intel_mpi/2019
module load hdf5
module load fftw
module load gsl
module load cmake
module load python/3.10.12
module list
source /cosma/home/dp016/dc-stis1/csiborgtools/venv_csiborgtools/bin/activate
export OMP_NUM_THREADS=16
export OMP_NESTED=true
# ADD CHAINS HERE
snapshot_path="/cosma8/data/dp016/dc-stis1/csiborg2_main/chain_15517/output/snapshot_099_full.hdf5"
output_path="/cosma8/data/dp016/dc-stis1/csiborg2_main/field/chain_15517.hdf5"
resolution=256
scratch_space="/cosma8/data/dp016/dc-stis1/csiborg2_main/field"
SPH_executable="/cosma8/data/dp016/dc-stis1/cosmotool/bld2/sample/simple3DFilter"
snapshot_kind="gadget4"
python3 field_sph.py --snapshot_path $snapshot_path --output_path $output_path --resolution $resolution --scratch_space $scratch_space --SPH_executable $SPH_executable --snapshot_kind $snapshot_kind

View file

@ -0,0 +1,737 @@
# Copyright (C) 2023 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to process simulation snapshots to sorted HDF5 files. Be careful
because reading the HDF5 file may require `hdf5plugin` package to be installed.
The snapshot particles are sorted by their halo ID, so that particles of a halo
can be accessed by slicing the array.
CSiBORG1 reader will complain unless it can find the halomaker FOF files
where it expects them:
fdir = f"/mnt/extraspace/rstiskalek/csiborg1/chain_{self.nsim}/FOF"
"""
from abc import ABC, abstractmethod
from argparse import ArgumentParser
from datetime import datetime
from gc import collect
from glob import glob, iglob
from os import makedirs
from os.path import basename, dirname, exists, join
from warnings import catch_warnings, filterwarnings, warn
import hdf5plugin
import numpy
import pynbody
import readgadget
from astropy import constants, units
from h5py import File
from numba import jit
from readfof import FoF_catalog
from tqdm import tqdm, trange
MSUNCGS = constants.M_sun.cgs.value
BLOSC_KWARGS = {"cname": "blosclz",
"clevel": 9,
"shuffle": hdf5plugin.Blosc.SHUFFLE,
}
###############################################################################
# Utility functions #
###############################################################################
def now():
"""
Return current time.
"""
return datetime.now()
def flip_cols(arr, col1, col2):
"""
Flip values in columns `col1` and `col2` of a structured array `arr`.
"""
if col1 not in arr.dtype.names or col2 not in arr.dtype.names:
raise ValueError(f"Both `{col1}` and `{col2}` must exist in `arr`.")
arr[col1], arr[col2] = numpy.copy(arr[col2]), numpy.copy(arr[col1])
def convert_str_to_num(s):
"""
Convert a string representation of a number to its appropriate numeric type
(int or float).
Parameters
----------
s : str
The string representation of the number.
Returns
-------
num : int or float
"""
try:
return int(s)
except ValueError:
try:
return float(s)
except ValueError:
warn(f"Cannot convert string '{s}' to number", UserWarning)
return s
def cols_to_structured(N, cols):
"""
Allocate a structured array from `cols`, a list of (name, dtype) tuples.
"""
if not (isinstance(cols, list)
and all(isinstance(c, tuple) and len(c) == 2 for c in cols)):
raise TypeError("`cols` must be a list of (name, dtype) tuples.")
names, formats = zip(*cols)
dtype = {"names": names, "formats": formats}
return numpy.full(N, numpy.nan, dtype=dtype)
###############################################################################
# Base reader of snapshots #
###############################################################################
class BaseReader(ABC):
"""Base reader layout that every subsequent reader should follow."""
@abstractmethod
def read_info(self):
pass
@abstractmethod
def read_snapshot(self, kind):
pass
@abstractmethod
def read_halo_id(self, pids):
pass
@abstractmethod
def read_halos(self):
pass
###############################################################################
# CSiBORG particle reader #
###############################################################################
class CSiBORG1Reader:
"""
Object to read in CSiBORG snapshots from the binary files and halo
catalogues.
Parameters
----------
nsim : int
IC realisation index.
which_snapshot : str
Which snapshot to read. Options are `initial` or `final`.
"""
def __init__(self, nsim, which_snapshot):
self.nsim = nsim
base_dir = "/mnt/extraspace/hdesmond/"
if which_snapshot == "initial":
self.nsnap = 1
raise RuntimeError("TODO not implemented")
self.source_dir = None
elif which_snapshot == "final":
sourcedir = join(base_dir, f"ramses_out_{nsim}")
self.nsnap = max([int(basename(f).replace("output_", ""))
for f in glob(join(sourcedir, "output_*"))])
self.source_dir = join(sourcedir,
f"output_{str(self.nsnap).zfill(5)}")
else:
raise ValueError(f"Unknown snapshot option `{which_snapshot}`.")
self.output_dir = f"/mnt/extraspace/rstiskalek/csiborg1/chain_{self.nsim}" # noqa
self.output_snap = join(self.output_dir,
f"snapshot_{str(self.nsnap).zfill(5)}.hdf5")
self.output_cat = join(self.output_dir,
f"fof_{str(self.nsnap).zfill(5)}.hdf5")
self.halomaker_dir = join(self.output_dir, "FOF")
def read_info(self):
filename = glob(join(self.source_dir, "info_*"))
if len(filename) > 1:
raise ValueError("Found too many `info` files.")
filename = filename[0]
with open(filename, "r") as f:
info = f.read().split()
# Throw anything below ordering line out
info = numpy.asarray(info[:info.index("ordering")])
# Get indexes of lines with `=`. Indxs before/after be keys/vals
eqs = numpy.asarray([i for i in range(info.size) if info[i] == '='])
keys = info[eqs - 1]
vals = info[eqs + 1]
return {key: convert_str_to_num(val) for key, val in zip(keys, vals)}
def read_snapshot(self, kind):
with catch_warnings():
filterwarnings("ignore", category=UserWarning)
sim = pynbody.load(self.source_dir)
info = self.read_info()
if kind == "pid":
x = numpy.array(sim["iord"], dtype=numpy.uint32)
elif kind == "pos":
x = numpy.array(sim[kind], dtype=numpy.float32)
# Convert box units to Mpc / h
box2mpc = (info["unit_l"] / units.kpc.to(units.cm) / info["aexp"]
* 1e-3 * info["H0"] / 100)
x *= box2mpc
elif kind == "mass":
x = numpy.array(sim[kind], dtype=numpy.float32)
# Convert box units to Msun / h
box2msun = (info["unit_d"] * info["unit_l"]**3 / MSUNCGS
* info["H0"] / 100)
x *= box2msun
elif kind == "vel":
x = numpy.array(sim[kind], dtype=numpy.float16)
# Convert box units to km / s
box2kms = (1e-2 * info["unit_l"] / info["unit_t"] / info["aexp"]
* 1e-3)
x *= box2kms
else:
raise ValueError(f"Unknown kind `{kind}`. "
"Options are: `pid`, `pos`, `vel` or `mass`.")
# Because of a RAMSES bug x and z are flipped.
if kind in ["pos", "vel"]:
print(f"For kind `{kind}` flipping x and z.")
x[:, [0, 2]] = x[:, [2, 0]]
del sim
collect()
return x
def read_halo_id(self, pids):
fpath = join(self.halomaker_dir, "*particle_membership*")
fpath = next(iglob(fpath, recursive=True), None)
if fpath is None:
raise FileNotFoundError(f"Found no Halomaker files in `{self.halomaker_dir}`.") # noqa
print(f"{now()}: mapping particle IDs to their indices.")
pids_idx = {pid: i for i, pid in enumerate(pids)}
# Unassigned particle IDs are assigned a halo ID of 0.
print(f"{now()}: mapping HIDs to their array indices.")
hids = numpy.zeros(pids.size, dtype=numpy.int32)
# Read line-by-line to avoid loading the whole file into memory.
with open(fpath, 'r') as file:
for line in tqdm(file, desc="Reading membership"):
hid, pid = map(int, line.split())
hids[pids_idx[pid]] = hid
del pids_idx
collect()
return hids
def read_halos(self):
info = self.read_info()
h = info["H0"] / 100
fpath = join(self.halomaker_dir, "fort.132")
hid = numpy.genfromtxt(fpath, usecols=0, dtype=numpy.int32)
pos = numpy.genfromtxt(fpath, usecols=(1, 2, 3), dtype=numpy.float32)
totmass = numpy.genfromtxt(fpath, usecols=4, dtype=numpy.float32)
m200c = numpy.genfromtxt(fpath, usecols=5, dtype=numpy.float32)
dtype = {"names": ["index", "x", "y", "z", "totpartmass", "m200c"],
"formats": [numpy.int32] + [numpy.float32] * 5}
out = numpy.full(hid.size, numpy.nan, dtype=dtype)
out["index"] = hid
out["x"] = pos[:, 0] * h + 677.7 / 2
out["y"] = pos[:, 1] * h + 677.7 / 2
out["z"] = pos[:, 2] * h + 677.7 / 2
# Because of a RAMSES bug x and z are flipped.
flip_cols(out, "x", "z")
out["totpartmass"] = totmass * 1e11 * h
out["m200c"] = m200c * 1e11 * h
return out
###############################################################################
# CSiBORG2 particle reader #
###############################################################################
class CSiBORG2Reader(BaseReader):
"""
Object to read in CSiBORG2 snapshots. Because this is Gadget4 the final
snapshot is already sorted, however we still have to sort the initial
snapshot.
Parameters
----------
nsim : int
IC realisation index.
which_snapshot : str
Which snapshot to read. Options are `initial` or `final`.
"""
def __init__(self, nsim, which_snapshot, kind):
self.nsim = nsim
if kind not in ["main", "random", "varysmall"]:
raise ValueError(f"Unknown kind `{kind}`.")
base_dir = f"/mnt/extraspace/rstiskalek/csiborg2_{kind}"
if which_snapshot == "initial":
self.nsnap = 0
elif which_snapshot == "final":
self.nsnap = 99
else:
raise ValueError(f"Unknown snapshot option `{which_snapshot}`.")
self.source_dir = join(
base_dir, f"chain_{nsim}", "output",
f"snapshot_{str(self.nsnap).zfill(3)}_full.hdf5")
self.output_dir = join(base_dir, f"chain_{nsim}", "output")
self.output_snap = join(
self.output_dir,
f"snapshot_{str(self.nsnap).zfill(3)}_sorted.hdf5")
self.output_cat = None
def read_info(self):
fpath = join(dirname(self.source_dir), "snapshot_99_full.hdf5")
with File(fpath, 'r') as f:
header = f["Header"]
params = f["Parameters"]
out = {"BoxSize": header.attrs["BoxSize"],
"MassTable": header.attrs["MassTable"],
"NumPart_Total": header.attrs["NumPart_Total"],
"Omega_m": params.attrs["Omega0"],
"Omega_l": params.attrs["OmegaLambda"],
"Omega_b": params.attrs["OmegaBaryon"],
"h": params.attrs["HubbleParam"],
"redshift": header.attrs["Redshift"],
}
return out
def read_snapshot(self, kind):
raise RuntimeError("TODO Not implemented.")
def read_halo_id(self, pids):
raise RuntimeError("TODO Not implemented.")
def read_halos(self):
raise RuntimeError("TODO Not implemented.")
###############################################################################
# Quijote particle reader #
###############################################################################
class QuijoteReader:
"""
Object to read in Quijote snapshots from the binary files.
Parameters
----------
nsim : int
IC realisation index.
which_snapshot : str
Which snapshot to read. Options are `initial` or `final`.
"""
def __init__(self, nsim, which_snapshot):
self.nsim = nsim
quijote_dir = "/mnt/extraspace/rstiskalek/quijote"
if which_snapshot == "initial":
self.nsnap = -1
snap_str = "ICs"
self.source_dir = join(quijote_dir, "Snapshots_fiducial",
str(nsim), "ICs", "ics")
elif which_snapshot == "final":
self.nsnap = 4
snap_str = str(self.nsnap).zfill(3)
self.source_dir = join(
quijote_dir, "Snapshots_fiducial",
str(nsim), f"snapdir_{snap_str}", f"snap_{snap_str}")
else:
raise ValueError(f"Unknown snapshot option `{which_snapshot}`.")
self.fof_dir = join(quijote_dir, "Halos_fiducial", str(nsim))
self.output_dir = f"/mnt/extraspace/rstiskalek/quijote/fiducial_processed/chain_{self.nsim}" # noqa
self.output_snap = join(self.output_dir, f"snapshot_{snap_str}.hdf5")
self.output_cat = join(self.output_dir, f"fof_{snap_str}.hdf5")
def read_info(self):
header = readgadget.header(self.source_dir)
out = {"BoxSize": header.boxsize / 1e3, # Mpc/h
"Nall": header.nall[1], # Tot num of particles
"PartMass": header.massarr[1] * 1e10, # Part mass in Msun/h
"Omega_m": header.omega_m,
"Omega_l": header.omega_l,
"h": header.hubble,
"redshift": header.redshift,
}
out["TotMass"] = out["Nall"] * out["PartMass"]
out["Hubble"] = (100.0 * numpy.sqrt(
header.omega_m * (1.0 + header.redshift)**3 + header.omega_l))
return out
def read_snapshot(self, kind):
info = self.read_info()
ptype = [1] # DM
if kind == "pid":
return readgadget.read_block(self.source_dir, "ID ", ptype)
elif kind == "pos":
pos = readgadget.read_block(self.source_dir, "POS ", ptype) / 1e3
return pos.astype(numpy.float32)
elif kind == "vel":
vel = readgadget.read_block(self.source_dir, "VEL ", ptype)
vel = vel.astype(numpy.float16)
vel *= (1 + info["redshift"]) # km / s
return vel
elif kind == "mass":
return numpy.full(info["Nall"], info["PartMass"],
dtype=numpy.float32)
else:
raise ValueError(f"Unknown kind `{kind}`. "
"Options are: `pid`, `pos`, `vel` or `mass`.")
def read_halo_id(self, pids):
cat = FoF_catalog(self.fof_dir, self.nsnap)
group_pids = cat.GroupIDs
group_len = cat.GroupLen
# Create a mapping from particle ID to FoF group ID.
print(f"{now()}: mapping particle IDs to their indices.")
ks = numpy.insert(numpy.cumsum(group_len), 0, 0)
with catch_warnings():
# Ignore because we are casting NaN as integer.
filterwarnings("ignore", category=RuntimeWarning)
pid2hid = numpy.full((group_pids.size, 2), numpy.nan,
dtype=numpy.uint64)
for i, (k0, kf) in enumerate(zip(ks[:-1], ks[1:])):
pid2hid[k0:kf, 0] = i + 1
pid2hid[k0:kf, 1] = group_pids[k0:kf]
pid2hid = {pid: hid for hid, pid in pid2hid}
# Create the final array of hids matchign the snapshot array.
# Unassigned particles have hid 0.
print(f"{now()}: mapping HIDs to their array indices.")
hids = numpy.full(pids.size, 0, dtype=numpy.uint32)
for i in trange(pids.size):
hids[i] = pid2hid.get(pids[i], 0)
return hids
def read_halos(self):
fof = FoF_catalog(self.fof_dir, self.nsnap, long_ids=False, swap=False,
SFR=False, read_IDs=False)
cols = [("x", numpy.float32),
("y", numpy.float32),
("z", numpy.float32),
("vx", numpy.float32),
("vy", numpy.float32),
("vz", numpy.float32),
("GroupMass", numpy.float32),
("npart", numpy.int32),
("index", numpy.int32)
]
data = cols_to_structured(fof.GroupLen.size, cols)
pos = fof.GroupPos / 1e3
vel = fof.GroupVel * (1 + self.read_info()["redshift"])
for i, p in enumerate(["x", "y", "z"]):
data[p] = pos[:, i]
data[f"v{p}"] = vel[:, i]
data["GroupMass"] = fof.GroupMass * 1e10
data["npart"] = fof.GroupLen
# We want to start indexing from 1. Index 0 is reserved for
# particles unassigned to any FoF group.
data["index"] = 1 + numpy.arange(data.size, dtype=numpy.uint32)
return data
###############################################################################
# Group Offsets #
###############################################################################
@jit(nopython=True, boundscheck=False)
def minmax_halo(hid, halo_ids, start_loop=0):
"""
Find the start and end index of a halo in a sorted array of halo IDs.
This is much faster than using `numpy.where` and then `numpy.min` and
`numpy.max`.
"""
start = None
end = None
for i in range(start_loop, halo_ids.size):
n = halo_ids[i]
if n == hid:
if start is None:
start = i
end = i
elif n > hid:
break
return start, end
def make_offset_map(part_hids):
"""
Make group offsets for a list of particles' halo IDs. This is a
2-dimensional array, where the first column is the halo ID, the second
column is the start index of the halo in the particle list, and the third
index is the end index of the halo in the particle list. The start index is
inclusive, while the end index is exclusive.
"""
unique_halo_ids = numpy.unique(part_hids)
unique_halo_ids = unique_halo_ids[unique_halo_ids != 0]
with catch_warnings():
filterwarnings("ignore", category=RuntimeWarning)
halo_map = numpy.full((unique_halo_ids.size, 3), numpy.nan,
dtype=numpy.uint32)
start_loop, niters = 0, unique_halo_ids.size
for i in trange(niters):
hid = unique_halo_ids[i]
k0, kf = minmax_halo(hid, part_hids, start_loop=start_loop)
halo_map[i, :] = hid, k0, kf
start_loop = kf
return halo_map, unique_halo_ids
###############################################################################
# Process the final snapshot and sort it by groups #
###############################################################################
def process_final_snapshot(nsim, simname):
"""
Read in the snapshot particles, sort them by their halo ID and dump
into a HDF5 file. Stores the first and last index of each halo in the
particle array for fast slicing of the array to acces particles of a single
halo.
"""
if simname == "csiborg1":
reader = CSiBORG1Reader(nsim, "final")
elif simname == "quijote":
reader = QuijoteReader(nsim, "final")
else:
raise RuntimeError(f"Simulation `{simname}` is not supported.")
if not exists(reader.output_dir):
makedirs(reader.output_dir)
print("---- Processing Final Snapshot Information ----")
print(f"Simulation index: {nsim}")
print(f"Simulation name: {simname}")
print(f"Output snapshot: {reader.output_snap}")
print(f"Output catalogue: {reader.output_cat}")
print("-----------------------------------------------")
print(flush=True)
# First off load the particle IDs from the raw data.
pids = reader.read_snapshot("pid")
# Then, load the halo ids and make sure their ordering is the same as the
# particle IDs ordering.
print(f"{now()}: loading HIDs.")
halo_ids = reader.read_halo_id(pids)
print(f"{now()}: sorting HIDs.")
# Get a mask that sorts the halo ids and then write the information to
# the data files sorted by it.
sort_indxs = numpy.argsort(halo_ids)
halo_ids = halo_ids[sort_indxs]
with File(reader.output_snap, 'w') as f:
print(f"{now()}: creating dataset `ParticleIDs`...",
flush=True)
f.create_dataset("ParticleIDs", data=pids[sort_indxs],
**hdf5plugin.Blosc(**BLOSC_KWARGS))
del pids
collect()
print(f"{now()}: creating dataset `Coordinates`...",
flush=True)
f.create_dataset(
"Coordinates", data=reader.read_snapshot("pos")[sort_indxs],
**hdf5plugin.Blosc(**BLOSC_KWARGS))
print(f"{now()}: creating dataset `Velocities`...",
flush=True)
f.create_dataset(
"Velocities", data=reader.read_snapshot("vel")[sort_indxs],
**hdf5plugin.Blosc(**BLOSC_KWARGS))
print(f"{now()}: creating dataset `Masses`...",
flush=True)
f.create_dataset(
"Masses", data=reader.read_snapshot("mass")[sort_indxs],
**hdf5plugin.Blosc(**BLOSC_KWARGS))
if simname == "csiborg1":
header = f.create_dataset("Header", (0,))
header.attrs["BoxSize"] = 677.7 # Mpc/h
header.attrs["Omega0"] = 0.307
header.attrs["OmegaBaryon"] = 0.0
header.attrs["OmegaLambda"] = 0.693
header.attrs["HubleParam"] = 0.6777
header.attrs["Redshift"] = 0.0
elif simname == "quijote":
info = reader.read_info()
header = f.create_dataset("Header", (0,))
header.attrs["BoxSize"] = info["BoxSize"]
header.attrs["Omega0"] = info["Omega_m"]
header.attrs["OmegaLambda"] = info["Omega_l"]
header.attrs["OmegaBaryon"] = 0.0
header.attrs["HubleParam"] = info["h"]
header.attrs["Redshift"] = info["redshift"]
else:
raise ValueError(f"Unknown simname `{simname}`.")
print(f"{now()}: done with `{reader.output_snap}`.",
flush=True)
# Lastly, create the halo mapping and default catalogue.
print(f"{datetime.now()}: creating `GroupOffset`...")
halo_map, unique_halo_ids = make_offset_map(halo_ids)
# Dump the halo mapping.
with File(reader.output_cat, "w") as f:
f.create_dataset("GroupOffset", data=halo_map)
# Add the halo finder catalogue
print(f"{now()}: adding the halo finder catalogue.")
with File(reader.output_cat, "r+") as f:
cat = reader.read_halos()
hid2pos = {hid: i for i, hid in enumerate(unique_halo_ids)}
for key in cat.dtype.names:
x = numpy.full(unique_halo_ids.size, numpy.nan,
dtype=cat[key].dtype)
for i in range(len(cat)):
j = hid2pos[cat["index"][i]]
x[j] = cat[key][i]
f.create_dataset(key, data=x)
def process_initial_snapshot(nsim, simname):
"""
Sort the initial snapshot particles according to their final snapshot and
add them to the final snapshot's HDF5 file.
"""
if simname == "csiborg1":
reader = CSiBORG1Reader(nsim, "initial")
output_snap_final = CSiBORG1Reader(nsim, "final").output_snap
elif simname == "quijote":
reader = QuijoteReader(nsim, "initial")
output_snap_final = QuijoteReader(nsim, "final").output_snap
elif "csiborg2" in simname:
reader = CSiBORG2Reader(nsim, "initial", simname.split("_")[1])
output_snap_final = CSiBORG2Reader(nsim, "final", simname.split("_")[1]).output_snap # noqa
raise RuntimeError("TODO Not implemented.")
else:
raise RuntimeError(f"Simulation `{simname}` is not supported.")
print("---- Processing Initial Snapshot Information ----")
print(f"Simulation index: {nsim}")
print(f"Simulation name: {simname}")
print(f"Output snapshot: {reader.output_snap}")
print(f"Output catalogue: {reader.output_cat}")
print("-----------------------------------------------")
print(flush=True)
print(f"{now()}: loading and sorting the initial PID.")
sort_indxs = numpy.argsort(reader.read_snapshot("pid"))
print(f"{now()}: loading the final particles.")
with File(output_snap_final, "r") as f:
sort_indxs_final = f["ParticleIDs"][:]
f.close()
print(f"{now()}: sorting the particles according to the final snapshot.")
sort_indxs_final = numpy.argsort(numpy.argsort(sort_indxs_final))
sort_indxs = sort_indxs[sort_indxs_final]
del sort_indxs_final
collect()
print(f"{now()}: loading and sorting the initial particle position.")
pos = reader.read_snapshot("pos")[sort_indxs]
del sort_indxs
collect()
# In Quijote some particles are positioned precisely at the edge of the
# box. Move them to be just inside.
if simname == "quijote":
boxsize = reader.read_info()["BoxSize"]
mask = pos >= boxsize
if numpy.any(mask):
spacing = numpy.spacing(pos[mask])
assert numpy.max(spacing) <= 1e-3
pos[mask] -= spacing
print(f"{now()}: dumping particles `{reader.output_snap}`.")
with File(reader.output_snap, 'w') as f:
f.create_dataset("Coordinates", data=pos,
**hdf5plugin.Blosc(**BLOSC_KWARGS))
###############################################################################
# Process the initial snapshot and sort it like the final snapshot #
###############################################################################
if __name__ == "__main__":
parser = ArgumentParser(description="Tool to manage the `raw` simulation data.") # noqa
parser.add_argument("--nsim", type=int, required=True,
help="Simulation index.")
parser.add_argument("--simname", type=str, required=True,
choices=["csiborg1", "quijote"],
help="Simulation name.")
parser.add_argument("--mode", type=int, required=True, choices=[0, 1, 2],
help="0: process final snapshot, 1: process initial snapshot, 2: process both.") # noqa
args = parser.parse_args()
if args.mode == 0:
process_final_snapshot(args.nsim, args.simname)
elif args.mode == 1:
process_initial_snapshot(args.nsim, args.simname)
else:
process_final_snapshot(args.nsim, args.simname)
process_initial_snapshot(args.nsim, args.simname)

View file

@ -0,0 +1,89 @@
# Copyright (C) 2023 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to write the SLURM submission script and submit it to the queue to
calculate the SPH density & velocity field.
"""
from os import system
def write_submit(chain_index, kind, resolution, nthreads):
if kind not in ["main", "random", "varysmall"]:
raise RuntimeError(f"Unknown kind `{kind}`.")
txt = f"""#!/bin/sh
#SBATCH --ntasks-per-node=1
#SBATCH --nodes=1
#SBATCH --cpus-per-task={nthreads}
#SBATCH --mem-per-cpu=7000
#SBATCH -J SPH_{chain_index}
#SBATCH -o output_{chain_index}_%J.out
#SBATCH -e error_{chain_index}_%J.err
#SBATCH -p cosma8-serial
#SBATCH -A dp016
#SBATCH -t 16:00:00
#SBATCH --mail-type=BEGIN,END,FAIL
#SBATCH --mail-user=richard.stiskalek@physics.ox.ac.uk
module purge
module load intel_comp/2019
module load intel_mpi/2019
module load hdf5
module load fftw
module load gsl
module load cmake
module load python/3.10.12
module list
source /cosma/home/dp016/dc-stis1/csiborgtools/venv_csiborgtools/bin/activate
export OMP_NUM_THREADS={nthreads}
export OMP_NESTED=true
snapshot_path="/cosma8/data/dp016/dc-stis1/csiborg2_{kind}/chain_{chain_index}/output/snapshot_099_full.hdf5"
output_path="/cosma8/data/dp016/dc-stis1/csiborg2_{kind}/field/chain_{chain_index}_{resolution}.hdf5"
resolution={resolution}
scratch_space="/snap8/scratch/dp016/dc-stis1/"
SPH_executable="/cosma8/data/dp016/dc-stis1/cosmotool/bld2/sample/simple3DFilter"
snapshot_kind="gadget4"
python3 field_sph.py --snapshot_path $snapshot_path --output_path $output_path --resolution $resolution --scratch_space $scratch_space --SPH_executable $SPH_executable --snapshot_kind $snapshot_kind
"""
fname = f"submit_SPH_{kind}_{chain_index}.sh"
print(f"Writing file: `{fname}`.")
with open(fname, "w") as txtfile:
txtfile.write(txt)
# Make the file executable
system(f"chmod +x {fname}")
return fname
if __name__ == "__main__":
# kind = "main"
# chains = [15617, 15717, 15817, 15917, 16017, 16117, 16217, 16317, 16417, 16517, 16617, 16717, 16817, 16917, 17017, 17117, 17217, 17317, 17417]
# kind = "varysmall"
# chains = ["16417_001", "16417_025", "16417_050", "16417_075", "16417_100", "16417_125", "16417_150", "16417_175", "16417_200", "16417_225", "16417_250", "16417_275", "16417_300", "16417_325", "16417_350", "16417_375", "16417_400", "16417_425", "16417_450", "16417_475"]
kind = "random"
chains = [1, 25, 50, 75, 100, 125, 150, 175, 200, 225, 250, 275, 300, 325, 350, 375, 400, 425, 450, 475]
resolution = 1024
nthreads = 32
for chain_index in chains:
fname = write_submit(chain_index, kind, resolution, nthreads)
system(f"sbatch {fname}")

View file

@ -0,0 +1,31 @@
# Copyright (C) 2023 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from os import system
if __name__ == "__main__":
# Quijote chains
chains = [1]
simname = "quijote"
mode = 2
env = "/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
memory = 64
for chain in chains:
out = f"output_{simname}_{chain}_%j.out"
cmd = f"addqueue -q berg -o {out} -n 1x1 -m {memory} {env} process_snapshot.py --nsim {chain} --simname {simname} --mode {mode}" # noqa
print(cmd)
system(cmd)
print()

View file

@ -0,0 +1,77 @@
# Copyright (C) 2023 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
def add_initial_snapshot(nsim, simname, halo_finder, verbose):
"""
Sort the initial snapshot particles according to their final snapshot and
add them to the final snapshot's HDF5 file.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
fname = paths.processed_output(nsim, simname, halo_finder)
if simname == "csiborg":
partreader = csiborgtools.read.CSiBORGReader(paths)
else:
partreader = csiborgtools.read.QuijoteReader(paths)
fprint(f"processing simulation `{nsim}`.", verbose)
if simname == "csiborg":
nsnap0 = 1
elif simname == "quijote":
nsnap0 = -1
else:
raise ValueError(f"Unknown simulation `{simname}`.")
fprint("loading and sorting the initial PID.", verbose)
sort_indxs = numpy.argsort(partreader.read_snapshot(nsnap0, nsim, "pid"))
fprint("loading the final particles.", verbose)
with h5py.File(fname, "r") as f:
sort_indxs_final = f["snapshot_final/pid"][:]
f.close()
fprint("sorting the particles according to the final snapshot.", verbose)
sort_indxs_final = numpy.argsort(numpy.argsort(sort_indxs_final))
sort_indxs = sort_indxs[sort_indxs_final]
del sort_indxs_final
collect()
fprint("loading and sorting the initial particle position.", verbose)
pos = partreader.read_snapshot(nsnap0, nsim, "pos")[sort_indxs]
del sort_indxs
collect()
# In Quijote some particles are position precisely at the edge of the
# box. Move them to be just inside.
if simname == "quijote":
mask = pos >= 1
if numpy.any(mask):
spacing = numpy.spacing(pos[mask])
assert numpy.max(spacing) <= 1e-5
pos[mask] -= spacing
fprint(f"dumping particles for `{nsim}` to `{fname}`.", verbose)
with h5py.File(fname, "r+") as f:
if "snapshot_initial" in f.keys():
del f["snapshot_initial"]
group = f.create_group("snapshot_initial")
group.attrs["header"] = "Initial snapshot data."
dset = group.create_dataset("pos", data=pos)
dset.attrs["header"] = "DM particle positions in box units."
f.close()

File diff suppressed because one or more lines are too long

View file

@ -307,7 +307,7 @@ def plot_projected_field(kind, nsim, grid, in_rsp, smooth_scale, MAS="PCS",
print(f"Plotting projected field `{kind}`. ", flush=True) print(f"Plotting projected field `{kind}`. ", flush=True)
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsnap = max(paths.get_snapshots(nsim, "csiborg")) nsnap = max(paths.get_snapshots(nsim, "csiborg"))
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths) box = csiborgtools.read.CSiBORG1Box(nsnap, nsim, paths)
if kind == "overdensity": if kind == "overdensity":
field = load_field("density", nsim, grid, MAS=MAS, in_rsp=in_rsp) field = load_field("density", nsim, grid, MAS=MAS, in_rsp=in_rsp)
@ -437,128 +437,6 @@ def plot_projected_field(kind, nsim, grid, in_rsp, smooth_scale, MAS="PCS",
plt.close() plt.close()
###############################################################################
# Sky distribution #
###############################################################################
def get_sky_label(kind, volume_weight: bool):
"""
Get the sky label for a given field kind.
"""
if volume_weight:
if kind == "density":
label = r"$\log \int_{0}^{R} r^2 \rho(r, \mathrm{RA}, \mathrm{dec}) \mathrm{d} r$" # noqa
if kind == "overdensity":
label = r"$\log \int_{0}^{R} r^2 \left[\delta(r, \mathrm{RA}, \mathrm{dec}) + 1\right] \mathrm{d} r$" # noqa
elif kind == "potential":
label = r"$\int_{0}^{R} r^2 \phi(r, \mathrm{RA}, \mathrm{dec}) \mathrm{d} r$" # noqa
elif kind == "radvel":
label = r"$\int_{0}^{R} r^2 v_r(r, \mathrm{RA}, \mathrm{dec}) \mathrm{d} r$" # noqa
else:
label = None
else:
if kind == "density":
label = r"$\log \int_{0}^{R} \rho(r, \mathrm{RA}, \mathrm{dec}) \mathrm{d} r$" # noqa
if kind == "overdensity":
label = r"$\log \int_{0}^{R} \left[\delta(r, \mathrm{RA}, \mathrm{dec}) + 1\right] \mathrm{d} r$" # noqa
elif kind == "potential":
label = r"$\int_{0}^{R} \phi(r, \mathrm{RA}, \mathrm{dec}) \mathrm{d} r$" # noqa
elif kind == "radvel":
label = r"$\int_{0}^{R} v_r(r, \mathrm{RA}, \mathrm{dec}) \mathrm{d} r$" # noqa
else:
label = None
return label
def plot_sky_distribution(field, nsim, grid, nside, smooth_scale=None,
MAS="PCS", plot_groups=True, dmin=0, dmax=220,
plot_halos=None, volume_weight=True, pdf=False):
r"""
Plot the sky distribution of a given field kind on the sky along with halos
and selected observations.
TODO
----
- Add distance for groups.
Parameters
----------
field : str
Field kind.
nsim : int
Simulation index.
grid : int
Grid size.
nside : int
Healpix nside of the sky projection.
smooth_scale : float
Smoothing scale in :math:`\mathrm{Mpc} / h`.
MAS : str, optional
Mass assignment scheme.
plot_groups : bool, optional
Whether to plot the 2M++ groups.
dmin : float, optional
Minimum projection distance in :math:`\mathrm{Mpc}/h`.
dmax : float, optional
Maximum projection distance in :math:`\mathrm{Mpc}/h`.
plot_halos : list, optional
Minimum halo mass to plot in :math:`M_\odot`.
volume_weight : bool, optional
Whether to volume weight the field.
pdf : bool, optional
Whether to save the figure as a pdf.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsnap = max(paths.get_snapshots(nsim, "csiborg"))
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
if field == "overdensity":
field = load_field("density", nsim, grid, MAS=MAS, in_rsp=False,
smooth_scale=smooth_scale)
density_gen = csiborgtools.field.DensityField(box, MAS)
field = density_gen.overdensity_field(field) + 1
else:
field = load_field(kind, nsim, grid, MAS=MAS, in_rsp=False,
smooth_scale=smooth_scale)
angpos = csiborgtools.field.nside2radec(nside)
dist = numpy.linspace(dmin, dmax, 500)
out = csiborgtools.field.make_sky(field, angpos=angpos, dist=dist, box=box,
volume_weight=volume_weight)
with plt.style.context(plt_utils.mplstyle):
label = get_sky_label(kind, volume_weight)
if kind in ["density", "overdensity"]:
out = numpy.log10(out)
healpy.mollview(out, fig=0, title="", unit=label, rot=90)
if plot_halos is not None:
bounds = {"dist": (dmin, dmax),
"totpartmass": (plot_halos, None)}
cat = csiborgtools.read.CSiBORGHaloCatalogue(nsim, paths,
bounds=bounds)
X = cat.position(cartesian=False)
healpy.projscatter(numpy.deg2rad(X[:, 2] + 90),
numpy.deg2rad(X[:, 1]),
s=5, c="red", label="CSiBORG haloes")
if plot_groups:
groups = csiborgtools.read.TwoMPPGroups(fpath="/mnt/extraspace/rstiskalek/catalogs/2M++_group_catalog.dat") # noqa
healpy.projscatter(numpy.deg2rad(groups["DEC"] + 90),
numpy.deg2rad(groups["RA"]), s=1, c="blue",
label="2M++ groups")
if plot_halos is not None or plot_groups:
plt.legend(markerscale=5)
for ext in ["png"] if pdf is False else ["png", "pdf"]:
fout = join(plt_utils.fout, f"sky_{kind}_{nsim}_from_{dmin}_to_{dmax}_vol{volume_weight}.{ext}") # noqa
print(f"Saving to `{fout}`.")
plt.savefig(fout, dpi=plt_utils.dpi, bbox_inches="tight")
plt.close()
############################################################################### ###############################################################################
# Command line interface # # Command line interface #
############################################################################### ###############################################################################