Add pynbody and other support (#92)

* Simplify box units

* Move old scripts

* Add printing

* Update readers

* Disable boundscheck

* Add new ordering

* Clean up imports

* Enforce dtype and add mass to quijote

* Simplify print statements

* Fix little typos

* Fix key bug

* Bug fixing

* Delete boring comments

* Improve ultimate clumps for PHEW

* Delete boring comments

* Add basic reading

* Remove 0th index HID

* Add flipping of X and Z

* Updates to halo catalogues

* Add ordered caching

* Fix flipping

* Add new flags

* Fix PHEW empty clumps

* Stop over-wrriting

* Little improvements to angular neighbours

* Add catalogue masking

* Change if-else statements

* Cache only filtered data

* Add PHEW cats

* Add comments

* Sort imports

* Get Quijote workign

* Docs

* Add HMF calculation

* Move to old

* Fix angular

* Add great circle distance

* Update imports

* Update impotrts

* Update docs

* Remove unused import

* Fix a quick bug

* Update compatibility

* Rename files

* Renaming

* Improve compatiblity

* Rename snapsht

* Fix snapshot bug

* Update interface

* Finish updating interface

* Update all paths

* Add old scripts

* Add basic halo

* Update imports

* Improve snapshot processing

* Update ordering

* Fix how CM positions accessed

* Add merger paths

* Add imports

* Add merger reading

* Add making a merger tree

* Add a basic merger tree reader

* Add imports

* Add main branch walking + comments + debuggin

* Get tree running

* Add working merger tree walking along main branch

* Add units conversion for merger data

* Add hid_to_array_index

* Update merger tree

* Add mergertree mass to PHEWcat

* Edit comments

* Add this to track changes...

* Fix a little bug

* Add mergertree mass

* Add cache clearing

* Improve summing substructure code

* Littbe bug

* Little updates to the merger tree reader

* Update .giignore

* Add box selection

* Add optional deletingf of a group

* add to keep track of changes

* Update changes

* Remove

* Add manual tracker

* Fix bug

* Add m200c_to_r200c

* Add manual halo tracking

* Remove skipped snapshots

* update cosmo params to match csiborg

* remove old comments

* Add SDSSxALFALFA

* Fix bugs

* Rename

* Edit paths

* Updates

* Add comments

* Add comment

* Add hour conversion

* Add imports

* Add new observation class

* Add selection

* Add imports

* Fix small bug

* Add field copying for safety

* Add matching to survey without masking

* Add P(k) calculation

* Add nb

* Edit comment

* Move files

* Remove merger import

* Edit setup.yp

* Fix typo

* Edit import warnigns

* update nb

* Update README

* Update README

* Update README

* Add skeleton

* Add skeleton
This commit is contained in:
Richard Stiskalek 2023-12-07 14:23:32 +00:00 committed by GitHub
parent 5500fbd2b9
commit e972f8e3f2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
53 changed files with 4627 additions and 1774 deletions

2
.gitignore vendored
View file

@ -24,3 +24,5 @@ scripts_plots/submit.sh
scripts_plots/*.out scripts_plots/*.out
scripts_plots/*.sh scripts_plots/*.sh
notebooks/test.ipynb notebooks/test.ipynb
scripts/mgtree.py
scripts/makemerger.py

View file

@ -1,3 +1,22 @@
# CSiBORG Tools # CSiBORG Tools
A compendium of tools for analysing the suite of Constrained Simulations in BORG (CSiBORG) simulations. Tools for analysing the suite of Constrained Simulations in BORG (CSiBORG) simulations. The interface is designed to work with the following suites of simulations:
- CSiBORG1 dark matter-only RAMSES simulations (full support),
- CSiBORG2 dark matter-only Gadget4 simulations (planned full support),
- Quijote dark matter-only Gadget2 simulations (partial support),
however with little effort it can support other simulations as well.
## TODO
- [ ] Add full support for CSiBORG2 suite of simulations.
- [ ] Add SPH field calculation from cosmotools.
## Adding a new simulation suite
box units
paths
readsim
halo_cat

View file

@ -12,12 +12,12 @@
# You should have received a copy of the GNU General Public License along # You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc., # with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from csiborgtools import clustering, field, match, read, summary # noqa from csiborgtools import clustering, field, halo, match, read, summary # noqa
from .utils import (center_of_mass, delta2ncells, number_counts, # noqa
periodic_distance, periodic_distance_two_points, # noqa
binned_statistic, cosine_similarity) # noqa
from .utils import (center_of_mass, delta2ncells, number_counts, # noqa
periodic_distance, periodic_distance_two_points, # noqa
binned_statistic, cosine_similarity, fprint, # noqa
hms_to_degrees, dms_to_degrees, great_circle_distance) # noqa
# Arguments to csiborgtools.read.Paths. # Arguments to csiborgtools.read.Paths.
paths_glamdring = {"srcdir": "/mnt/extraspace/hdesmond/", paths_glamdring = {"srcdir": "/mnt/extraspace/hdesmond/",
@ -46,5 +46,34 @@ class SDSS:
(lambda x: cls[x] < 155, ("DIST", )) (lambda x: cls[x] < 155, ("DIST", ))
] ]
def __call__(self): def __call__(self, fpath=None, apply_selection=True):
return read.SDSS(h=1, sel_steps=self.steps) if fpath is None:
fpath = "/mnt/extraspace/rstiskalek/catalogs/nsa_v1_0_1.fits"
sel_steps = self.steps if apply_selection else None
return read.SDSS(fpath, h=1, sel_steps=sel_steps)
class SDSSxALFALFA:
@staticmethod
def steps(cls):
return [(lambda x: cls[x], ("IN_DR7_LSS",)),
(lambda x: cls[x] < 17.6, ("ELPETRO_APPMAG_r", )),
(lambda x: cls[x] < 155, ("DIST", ))
]
def __call__(self, fpath=None, apply_selection=True):
if fpath is None:
fpath = "/mnt/extraspace/rstiskalek/catalogs/5asfullmatch.fits"
sel_steps = self.steps if apply_selection else None
return read.SDSS(fpath, h=1, sel_steps=sel_steps)
###############################################################################
# Clusters #
###############################################################################
clusters = {"Virgo": read.ObservedCluster(RA=hms_to_degrees(12, 27),
dec=dms_to_degrees(12, 43),
dist=16.5 * 0.7,
name="Virgo"),
}

View file

@ -15,12 +15,11 @@
from warnings import warn from warnings import warn
try: try:
import MAS_library as MASL # noqa import MAS_library as MASL # noqa
from .density import (DensityField, PotentialField, TidalTensorField, # noqa
from .density import (DensityField, PotentialField, # noqa VelocityField, power_spectrum) # noqa
TidalTensorField, VelocityField) from .interp import (evaluate_cartesian, evaluate_sky, field2rsp, # noqa
from .interp import (evaluate_cartesian, evaluate_sky, field2rsp, # noqa fill_outside, make_sky, observer_vobs) # noqa
fill_outside, make_sky, observer_vobs) from .utils import nside2radec, smoothen_field # noqa
from .utils import nside2radec, smoothen_field # noqa
except ImportError: except ImportError:
warn("MAS_library not found, `DensityField` will not be available", UserWarning) # noqa warn("MAS_library not found, `DensityField` and related Pylians-based routines will not be available") # noqa

View file

@ -18,6 +18,7 @@ Density field and cross-correlation calculations.
from abc import ABC from abc import ABC
import MAS_library as MASL import MAS_library as MASL
import Pk_library as PKL
import numpy import numpy
from numba import jit from numba import jit
from tqdm import trange from tqdm import trange
@ -33,13 +34,7 @@ class BaseField(ABC):
@property @property
def box(self): def box(self):
""" """Simulation box information and transformations."""
Simulation box information and transformations.
Returns
-------
:py:class:`csiborgtools.units.CSiBORGBox`
"""
return self._box return self._box
@box.setter @box.setter
@ -52,13 +47,7 @@ class BaseField(ABC):
@property @property
def MAS(self): def MAS(self):
""" """Mass-assignment scheme."""
Mass-assignment scheme.
Returns
-------
str
"""
if self._MAS is None: if self._MAS is None:
raise ValueError("`MAS` is not set.") raise ValueError("`MAS` is not set.")
return self._MAS return self._MAS
@ -103,7 +92,6 @@ class DensityField(BaseField):
Calculate the overdensity field from the density field. Calculate the overdensity field from the density field.
Defined as :math:`\rho/ <\rho> - 1`. Overwrites the input array. Defined as :math:`\rho/ <\rho> - 1`. Overwrites the input array.
Parameters Parameters
---------- ----------
delta : 3-dimensional array of shape `(grid, grid, grid)` delta : 3-dimensional array of shape `(grid, grid, grid)`
@ -117,7 +105,7 @@ class DensityField(BaseField):
delta -= 1 delta -= 1
return delta return delta
def __call__(self, parts, grid, flip_xz=True, nbatch=30, verbose=True): def __call__(self, pos, mass, grid, nbatch=30, verbose=True):
""" """
Calculate the density field using a Pylians routine [1, 2]. Calculate the density field using a Pylians routine [1, 2].
Iteratively loads the particles into memory, flips their `x` and `z` Iteratively loads the particles into memory, flips their `x` and `z`
@ -126,13 +114,12 @@ class DensityField(BaseField):
Parameters Parameters
---------- ----------
parts : 2-dimensional array of shape `(n_parts, 7)` pos : 2-dimensional array of shape `(n_parts, 3)`
Particle positions, velocities and masses. Particle positions
Columns are: `x`, `y`, `z`, `vx`, `vy`, `vz`, `M`. mass : 1-dimensional array of shape `(n_parts,)`
Particle masses
grid : int grid : int
Grid size. Grid size.
flip_xz : bool, optional
Whether to flip the `x` and `z` coordinates.
nbatch : int, optional nbatch : int, optional
Number of batches to split the particle loading into. Number of batches to split the particle loading into.
verbose : bool, optional verbose : bool, optional
@ -150,24 +137,20 @@ class DensityField(BaseField):
""" """
rho = numpy.zeros((grid, grid, grid), dtype=numpy.float32) rho = numpy.zeros((grid, grid, grid), dtype=numpy.float32)
nparts = parts.shape[0] nparts = pos.shape[0]
batch_size = nparts // nbatch batch_size = nparts // nbatch
start = 0 start = 0
for __ in trange(nbatch + 1, disable=not verbose, for __ in trange(nbatch + 1, disable=not verbose,
desc="Loading particles for the density field"): desc="Loading particles for the density field"):
end = min(start + batch_size, nparts) end = min(start + batch_size, nparts)
pos = parts[start:end] batch_pos = pos[start:end]
pos, vel, mass = pos[:, :3], pos[:, 3:6], pos[:, 6] batch_mass = mass[start:end]
pos = force_single_precision(pos) batch_pos = force_single_precision(batch_pos)
vel = force_single_precision(vel) batch_mass = force_single_precision(batch_mass)
mass = force_single_precision(mass)
if flip_xz:
pos[:, [0, 2]] = pos[:, [2, 0]]
vel[:, [0, 2]] = vel[:, [2, 0]]
MASL.MA(pos, rho, 1., self.MAS, W=mass, verbose=False) MASL.MA(batch_pos, rho, 1., self.MAS, W=batch_mass, verbose=False)
if end == nparts: if end == nparts:
break break
start = end start = end
@ -178,8 +161,105 @@ class DensityField(BaseField):
return rho return rho
# class SPHDensityVelocity(BaseField):
# r"""
# Density field calculation. Based primarily on routines of Pylians [1].
#
# Parameters
# ----------
# box : :py:class:`csiborgtools.read.CSiBORGBox`
# The simulation box information and transformations.
# MAS : str
# Mass assignment scheme. Options are Options are: 'NGP' (nearest grid
# point), 'CIC' (cloud-in-cell), 'TSC' (triangular-shape cloud), 'PCS'
# (piecewise cubic spline).
# paths : :py:class:`csiborgtools.read.Paths`
# The simulation paths.
#
# References
# ----------
# [1] https://pylians3.readthedocs.io/
# """
#
# def __init__(self, box, MAS):
# self.box = box
# self.MAS = MAS
#
# def overdensity_field(self, delta):
# r"""
# Calculate the overdensity field from the density field.
# Defined as :math:`\rho/ <\rho> - 1`. Overwrites the input array.
#
# Parameters
# ----------
# delta : 3-dimensional array of shape `(grid, grid, grid)`
# The density field.
#
# Returns
# -------
# 3-dimensional array of shape `(grid, grid, grid)`.
# """
# delta /= delta.mean()
# delta -= 1
# return delta
#
# def __call__(self, pos, mass, grid, nbatch=30, verbose=True):
# """
# Calculate the density field using a Pylians routine [1, 2].
# Iteratively loads the particles into memory, flips their `x` and `z`
# coordinates. Particles are assumed to be in box units, with positions
# in [0, 1]
#
# Parameters
# ----------
# pos : 2-dimensional array of shape `(n_parts, 3)`
# Particle positions
# mass : 1-dimensional array of shape `(n_parts,)`
# Particle masses
# grid : int
# Grid size.
# nbatch : int, optional
# Number of batches to split the particle loading into.
# verbose : bool, optional
# Verbosity flag.
#
# Returns
# -------
# 3-dimensional array of shape `(grid, grid, grid)`.
#
# References
# ----------
# [1] https://pylians3.readthedocs.io/
# [2] https://github.com/franciscovillaescusa/Pylians3/blob/master
# /library/MAS_library/MAS_library.pyx
# """
# rho = numpy.zeros((grid, grid, grid), dtype=numpy.float32)
#
# nparts = pos.shape[0]
# batch_size = nparts // nbatch
# start = 0
#
# for __ in trange(nbatch + 1, disable=not verbose,
# desc="Loading particles for the density field"):
# end = min(start + batch_size, nparts)
# batch_pos = pos[start:end]
# batch_mass = mass[start:end]
#
# batch_pos = force_single_precision(batch_pos)
# batch_mass = force_single_precision(batch_mass)
#
# MASL.MA(batch_pos, rho, 1., self.MAS, W=batch_mass, verbose=False)
# if end == nparts:
# break
# start = end
#
# # Divide by the cell volume in (kpc / h)^3
# rho /= (self.box.boxsize / grid * 1e3)**3
#
# return rho
############################################################################### ###############################################################################
# Density field calculation # # Velocity field calculation #
############################################################################### ###############################################################################
@ -242,7 +322,7 @@ class VelocityField(BaseField):
/ numpy.sqrt(px**2 + py**2 + pz**2)) / numpy.sqrt(px**2 + py**2 + pz**2))
return radvel return radvel
def __call__(self, parts, grid, flip_xz=True, nbatch=30, def __call__(self, pos, vel, mass, grid, flip_xz=True, nbatch=30,
verbose=True): verbose=True):
""" """
Calculate the velocity field using a Pylians routine [1, 2]. Calculate the velocity field using a Pylians routine [1, 2].
@ -251,9 +331,12 @@ class VelocityField(BaseField):
Parameters Parameters
---------- ----------
parts : 2-dimensional array of shape `(n_parts, 7)` pos : 2-dimensional array of shape `(n_parts, 3)`
Particle positions, velocities and masses. Particle positions.
Columns are: `x`, `y`, `z`, `vx`, `vy`, `vz`, `M`. vel : 2-dimensional array of shape `(n_parts, 3)`
Particle velocities.
mass : 1-dimensional array of shape `(n_parts,)`
Particle masses.
grid : int grid : int
Grid size. Grid size.
flip_xz : bool, optional flip_xz : bool, optional
@ -273,26 +356,26 @@ class VelocityField(BaseField):
[2] https://github.com/franciscovillaescusa/Pylians3/blob/master [2] https://github.com/franciscovillaescusa/Pylians3/blob/master
/library/MAS_library/MAS_library.pyx /library/MAS_library/MAS_library.pyx
""" """
rho_velx = numpy.zeros((grid, grid, grid), dtype=numpy.float32) rho_vel = [numpy.zeros((grid, grid, grid), dtype=numpy.float32),
rho_vely = numpy.zeros((grid, grid, grid), dtype=numpy.float32) numpy.zeros((grid, grid, grid), dtype=numpy.float32),
rho_velz = numpy.zeros((grid, grid, grid), dtype=numpy.float32) numpy.zeros((grid, grid, grid), dtype=numpy.float32),
rho_vel = [rho_velx, rho_vely, rho_velz] ]
cellcounts = numpy.zeros((grid, grid, grid), dtype=numpy.float32) cellcounts = numpy.zeros((grid, grid, grid), dtype=numpy.float32)
nparts = parts.shape[0] nparts = pos.shape[0]
batch_size = nparts // nbatch batch_size = nparts // nbatch
start = 0 start = 0
for __ in trange(nbatch + 1) if verbose else range(nbatch + 1): for __ in trange(nbatch + 1) if verbose else range(nbatch + 1):
end = min(start + batch_size, nparts) end = min(start + batch_size, nparts)
pos = parts[start:end]
pos, vel, mass = pos[:, :3], pos[:, 3:6], pos[:, 6]
pos = force_single_precision(pos) batch_pos = pos[start:end]
vel = force_single_precision(vel) batch_vel = vel[start:end]
mass = force_single_precision(mass) batch_mass = mass[start:end]
if flip_xz:
pos[:, [0, 2]] = pos[:, [2, 0]] batch_pos = force_single_precision(batch_pos)
vel[:, [0, 2]] = vel[:, [2, 0]] batch_vel = force_single_precision(batch_vel)
batch_mass = force_single_precision(batch_mass)
vel *= mass.reshape(-1, 1) vel *= mass.reshape(-1, 1)
for i in range(3): for i in range(3):
@ -308,7 +391,7 @@ class VelocityField(BaseField):
for i in range(3): for i in range(3):
divide_nonzero(rho_vel[i], cellcounts) divide_nonzero(rho_vel[i], cellcounts)
return numpy.stack([rho_velx, rho_vely, rho_velz]) return numpy.stack(rho_vel)
############################################################################### ###############################################################################
@ -505,3 +588,35 @@ def eigenvalues_to_environment(eigvals, th):
else: else:
env[i, j, k] = 3 env[i, j, k] = 3
return env return env
###############################################################################
# Power spectrum calculation #
###############################################################################
def power_spectrum(delta, boxsize, MAS, threads=1, verbose=True):
"""
Calculate the monopole power spectrum of the density field.
Parameters
----------
delta : 3-dimensional array of shape `(grid, grid, grid)`
The over-density field.
boxsize : float
The simulation box size in `Mpc / h`.
MAS : str
Mass assignment scheme used to calculate the density field.
threads : int, optional
Number of threads to use.
verbose : bool, optional
Verbosity flag.
Returns
-------
k, Pk : 1-dimensional arrays of shape `(grid,)`
The wavenumbers and the power spectrum.
"""
axis = 2 # Axis along which compute the quadrupole and hexadecapole
Pk = PKL.Pk(delta, boxsize, axis, MAS, threads, verbose)
return Pk.k3D, Pk.Pk[:, 0]

View file

@ -98,9 +98,12 @@ def evaluate_sky(*fields, pos, mpc2box, smooth_scales=None, verbose=False):
------- -------
(list of) 1-dimensional array of shape `(n_samples, len(smooth_scales))` (list of) 1-dimensional array of shape `(n_samples, len(smooth_scales))`
""" """
pos = force_single_precision(pos) # Make a copy of the positions to avoid modifying the input.
pos = numpy.copy(pos)
pos = force_single_precision(pos)
pos[:, 0] *= mpc2box pos[:, 0] *= mpc2box
cart_pos = radec_to_cartesian(pos) + 0.5 cart_pos = radec_to_cartesian(pos) + 0.5
if smooth_scales is not None: if smooth_scales is not None:

View file

@ -0,0 +1,16 @@
# Copyright (C) 2023 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .prop import density_profile # noqa

46
csiborgtools/halo/prop.py Normal file
View file

@ -0,0 +1,46 @@
# Copyright (C) 2023 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import numpy
from scipy.stats import binned_statistic
from ..utils import periodic_distance
def density_profile(pos, mass, center, nbins, boxsize):
"""
Calculate a density profile.
"""
raise NotImplementedError("Not implemented yet..")
rdist = periodic_distance(pos, center, boxsize)
rmin, rmax = numpy.min(rdist), numpy.max(rdist)
bin_edges = numpy.logspace(numpy.log10(rmin), numpy.log10(rmax), nbins)
rho, __, __ = binned_statistic(rdist, mass, statistic='sum',
bins=bin_edges)
rho /= 4. / 3 * numpy.pi * (bin_edges[1:]**3 - bin_edges[:-1]**3)
print(bin_edges)
r = 0.5 * (bin_edges[1:] + bin_edges[:-1])
# r = numpy.sqrt(bin_edges[:1] * bin_edges[:-1])
return r, rho

View file

@ -12,7 +12,5 @@
# You should have received a copy of the GNU General Public License along # You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc., # with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .match import (ParticleOverlap, RealisationsMatcher, # noqa from .match import (ParticleOverlap, RealisationsMatcher, calculate_overlap, # noqa
calculate_overlap, calculate_overlap_indxs, pos2cell, # noqa pos2cell, find_neighbour, matching_max) # noqa
find_neighbour, get_halo_cell_limits, # noqa
matching_max) # noqa

View file

@ -13,7 +13,8 @@
# with this program; if not, write to the Free Software Foundation, Inc., # with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
""" """
Support for matching halos between CSiBORG IC realisations. Support for matching halos between CSiBORG IC realisations based on their
Lagrangian patch overlap.
""" """
from abc import ABC from abc import ABC
from datetime import datetime from datetime import datetime
@ -21,30 +22,19 @@ from functools import lru_cache
from math import ceil from math import ceil
import numpy import numpy
from scipy.ndimage import gaussian_filter
from numba import jit from numba import jit
from scipy.ndimage import gaussian_filter
from tqdm import tqdm, trange from tqdm import tqdm, trange
from ..read import load_halo_particles
class BaseMatcher(ABC): class BaseMatcher(ABC):
""" """Base class for `RealisationsMatcher` and `ParticleOverlap`."""
Base class for `RealisationsMatcher` and `ParticleOverlap`.
"""
_box_size = None _box_size = None
_bckg_halfsize = None _bckg_halfsize = None
@property @property
def box_size(self): def box_size(self):
""" """Number of cells in the box."""
Number of cells in the box.
Returns
-------
box_size : int
"""
if self._box_size is None: if self._box_size is None:
raise RuntimeError("`box_size` has not been set.") raise RuntimeError("`box_size` has not been set.")
return self._box_size return self._box_size
@ -64,10 +54,6 @@ class BaseMatcher(ABC):
grid distance from the center of the box to each side over which to grid distance from the center of the box to each side over which to
evaluate the background density field. Must be less than or equal to evaluate the background density field. Must be less than or equal to
half the box size. half the box size.
Returns
-------
bckg_halfsize : int
""" """
if self._bckg_halfsize is None: if self._bckg_halfsize is None:
raise RuntimeError("`bckg_halfsize` has not been set.") raise RuntimeError("`bckg_halfsize` has not been set.")
@ -130,10 +116,6 @@ class RealisationsMatcher(BaseMatcher):
""" """
Multiplier of the sum of the initial Lagrangian patch sizes of a halo Multiplier of the sum of the initial Lagrangian patch sizes of a halo
pair. Determines the range within which neighbors are returned. pair. Determines the range within which neighbors are returned.
Returns
-------
nmult : float
""" """
return self._nmult return self._nmult
@ -148,10 +130,6 @@ class RealisationsMatcher(BaseMatcher):
""" """
Tolerance on the absolute logarithmic mass difference of potential Tolerance on the absolute logarithmic mass difference of potential
matches. matches.
Returns
-------
float
""" """
return self._dlogmass return self._dlogmass
@ -166,10 +144,6 @@ class RealisationsMatcher(BaseMatcher):
""" """
Mass kind whose similarity is to be checked. Must be a valid key in the Mass kind whose similarity is to be checked. Must be a valid key in the
halo catalogue. halo catalogue.
Returns
-------
str
""" """
return self._mass_kind return self._mass_kind
@ -181,17 +155,10 @@ class RealisationsMatcher(BaseMatcher):
@property @property
def overlapper(self): def overlapper(self):
""" """The overlapper object."""
The overlapper object.
Returns
-------
:py:class:`csiborgtools.match.ParticleOverlap`
"""
return self._overlapper return self._overlapper
def cross(self, cat0, catx, particles0, particlesx, halo_map0, halo_mapx, def cross(self, cat0, catx, delta_bckg, cache_size=10000, verbose=True):
delta_bckg, cache_size=10000, verbose=True):
r""" r"""
Find all neighbours whose CM separation is less than `nmult` times the Find all neighbours whose CM separation is less than `nmult` times the
sum of their initial Lagrangian patch sizes and calculate their sum of their initial Lagrangian patch sizes and calculate their
@ -204,16 +171,6 @@ class RealisationsMatcher(BaseMatcher):
Halo catalogue of the reference simulation. Halo catalogue of the reference simulation.
catx : instance of :py:class:`csiborgtools.read.BaseCatalogue` catx : instance of :py:class:`csiborgtools.read.BaseCatalogue`
Halo catalogue of the cross simulation. Halo catalogue of the cross simulation.
particles0 : 2-dimensional array
Particles archive file of the reference simulation. The columns
must be `x`, `y`, `z` and `M`.
particlesx : 2-dimensional array
Particles archive file of the cross simulation. The columns must be
`x`, `y`, `z` and `M`.
halo_map0 : 2-dimensional array
Halo map of the reference simulation.
halo_mapx : 2-dimensional array
Halo map of the cross simulation.
delta_bckg : 3-dimensional array delta_bckg : 3-dimensional array
Summed background density field of the reference and cross Summed background density field of the reference and cross
simulations calculated with particles assigned to halos at the simulations calculated with particles assigned to halos at the
@ -250,14 +207,11 @@ class RealisationsMatcher(BaseMatcher):
aratio = numpy.abs(numpy.log10(catx[p][indx] / cat0[p][i])) aratio = numpy.abs(numpy.log10(catx[p][indx] / cat0[p][i]))
match_indxs[i] = match_indxs[i][aratio < self.dlogmass] match_indxs[i] = match_indxs[i][aratio < self.dlogmass]
hid2map0 = {hid: i for i, hid in enumerate(halo_map0[:, 0])}
hid2mapx = {hid: i for i, hid in enumerate(halo_mapx[:, 0])}
# We will cache the halos from the cross simulation to speed up the I/O # We will cache the halos from the cross simulation to speed up the I/O
@lru_cache(maxsize=cache_size) @lru_cache(maxsize=cache_size)
def load_cached_halox(hid): def load_cached_halox(hid):
return load_processed_halo(hid, particlesx, halo_mapx, hid2mapx, return load_processed_halo(hid, catx, nshift=0,
nshift=0, ncells=self.box_size) ncells=self.box_size)
iterator = tqdm( iterator = tqdm(
cat0["index"], cat0["index"],
@ -273,8 +227,7 @@ class RealisationsMatcher(BaseMatcher):
# Next, we find this halo's particles, total mass, minimum and # Next, we find this halo's particles, total mass, minimum and
# maximum cells and convert positions to cells. # maximum cells and convert positions to cells.
pos0, mass0, totmass0, mins0, maxs0 = load_processed_halo( pos0, mass0, totmass0, mins0, maxs0 = load_processed_halo(
k0, particles0, halo_map0, hid2map0, nshift=0, k0, cat0, nshift=0, ncells=self.box_size)
ncells=self.box_size)
# We now loop over matches of this halo and calculate their # We now loop over matches of this halo and calculate their
# overlap, storing them in `_cross`. # overlap, storing them in `_cross`.
@ -298,9 +251,8 @@ class RealisationsMatcher(BaseMatcher):
cross = numpy.asanyarray(cross, dtype=object) cross = numpy.asanyarray(cross, dtype=object)
return match_indxs, cross return match_indxs, cross
def smoothed_cross(self, cat0, catx, particles0, particlesx, halo_map0, def smoothed_cross(self, cat0, catx, delta_bckg, match_indxs,
halo_mapx, delta_bckg, match_indxs, smooth_kwargs, smooth_kwargs, cache_size=10000, verbose=True):
cache_size=10000, verbose=True):
r""" r"""
Calculate the smoothed overlaps for pairs previously identified via Calculate the smoothed overlaps for pairs previously identified via
`self.cross(...)` to have a non-zero NGP overlap. `self.cross(...)` to have a non-zero NGP overlap.
@ -311,16 +263,6 @@ class RealisationsMatcher(BaseMatcher):
Halo catalogue of the reference simulation. Halo catalogue of the reference simulation.
catx : instance of :py:class:`csiborgtools.read.BaseCatalogue` catx : instance of :py:class:`csiborgtools.read.BaseCatalogue`
Halo catalogue of the cross simulation. Halo catalogue of the cross simulation.
particles0 : 2-dimensional array
Particles archive file of the reference simulation. The columns
must be `x`, `y`, `z` and `M`.
particlesx : 2-dimensional array
Particles archive file of the cross simulation. The columns must be
`x`, `y`, `z` and `M`.
halo_map0 : 2-dimensional array
Halo map of the reference simulation.
halo_mapx : 2-dimensional array
Halo map of the cross simulation.
delta_bckg : 3-dimensional array delta_bckg : 3-dimensional array
Smoothed summed background density field of the reference and cross Smoothed summed background density field of the reference and cross
simulations calculated with particles assigned to halos at the simulations calculated with particles assigned to halos at the
@ -339,13 +281,11 @@ class RealisationsMatcher(BaseMatcher):
overlaps : 1-dimensional array of arrays overlaps : 1-dimensional array of arrays
""" """
nshift = read_nshift(smooth_kwargs) nshift = read_nshift(smooth_kwargs)
hid2map0 = {hid: i for i, hid in enumerate(halo_map0[:, 0])}
hid2mapx = {hid: i for i, hid in enumerate(halo_mapx[:, 0])}
@lru_cache(maxsize=cache_size) @lru_cache(maxsize=cache_size)
def load_cached_halox(hid): def load_cached_halox(hid):
return load_processed_halo(hid, particlesx, halo_mapx, hid2mapx, return load_processed_halo(hid, catx, nshift=nshift,
nshift=nshift, ncells=self.box_size) ncells=self.box_size)
iterator = tqdm( iterator = tqdm(
cat0["index"], cat0["index"],
@ -355,8 +295,7 @@ class RealisationsMatcher(BaseMatcher):
cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
for i, k0 in enumerate(iterator): for i, k0 in enumerate(iterator):
pos0, mass0, __, mins0, maxs0 = load_processed_halo( pos0, mass0, __, mins0, maxs0 = load_processed_halo(
k0, particles0, halo_map0, hid2map0, nshift=nshift, k0, cat0, nshift=nshift, ncells=self.box_size)
ncells=self.box_size)
# Now loop over the matches and calculate the smoothed overlap. # Now loop over the matches and calculate the smoothed overlap.
_cross = numpy.full(match_indxs[i].size, numpy.nan, numpy.float32) _cross = numpy.full(match_indxs[i].size, numpy.nan, numpy.float32)
@ -396,8 +335,7 @@ class ParticleOverlap(BaseMatcher):
self.box_size = box_size self.box_size = box_size
self.bckg_halfsize = bckg_halfsize self.bckg_halfsize = bckg_halfsize
def make_bckg_delta(self, particles, halo_map, hid2map, halo_cat, def make_bckg_delta(self, cat, delta=None, verbose=False):
delta=None, verbose=False):
""" """
Calculate a NGP density field of particles belonging to halos of a Calculate a NGP density field of particles belonging to halos of a
halo catalogue `halo_cat`. Particles are only counted within the halo catalogue `halo_cat`. Particles are only counted within the
@ -406,15 +344,8 @@ class ParticleOverlap(BaseMatcher):
Parameters Parameters
---------- ----------
particles : 2-dimensional array cat : instance of :py:class:`csiborgtools.read.BaseCatalogue`
Particles archive file. The columns must be `x`, `y`, `z` and `M`. Halo catalogue of the reference simulation.
halo_map : 2-dimensional array
Array containing start and end indices in the particle array
corresponding to each halo.
hid2map : dict
Dictionary mapping halo IDs to `halo_map` array positions.
halo_cat : instance of :py:class:`csiborgtools.read.BaseCatalogue`
Halo catalogue.
delta : 3-dimensional array, optional delta : 3-dimensional array, optional
Array to store the density field. If `None` a new array is Array to store the density field. If `None` a new array is
created. created.
@ -436,16 +367,17 @@ class ParticleOverlap(BaseMatcher):
& (delta.dtype == numpy.float32)) & (delta.dtype == numpy.float32))
iterator = tqdm( iterator = tqdm(
halo_cat["index"], cat["index"],
desc=f"{datetime.now()} Calculating the background field", desc=f"{datetime.now()} Calculating the background field",
disable=not verbose disable=not verbose
) )
for hid in iterator: for hid in iterator:
pos = load_halo_particles(hid, particles, halo_map, hid2map) pos = cat.halo_particles(hid, "pos", in_initial=True)
if pos is None: if pos is None:
continue continue
pos, mass = pos[:, :3], pos[:, 3] mass = cat.halo_particles(hid, "mass", in_initial=True)
pos = pos2cell(pos, self.box_size) pos = pos2cell(pos, self.box_size)
# We mask out particles outside the cubical high-resolution region # We mask out particles outside the cubical high-resolution region
@ -874,7 +806,7 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
return intersect / (mass1 + mass2 - intersect) return intersect / (mass1 + mass2 - intersect)
def load_processed_halo(hid, particles, halo_map, hid2map, ncells, nshift): def load_processed_halo(hid, cat, ncells, nshift):
""" """
Load a processed halo from the `.h5` file. This is to be wrapped by a Load a processed halo from the `.h5` file. This is to be wrapped by a
cacher. cacher.
@ -883,14 +815,8 @@ def load_processed_halo(hid, particles, halo_map, hid2map, ncells, nshift):
---------- ----------
hid : int hid : int
Halo ID. Halo ID.
particles : 2-dimensional array cat : instance of :py:class:`csiborgtools.read.BaseCatalogue`
Array of particles in box units. The columns must be `x`, `y`, `z` Halo catalogue.
and `M`.
halo_map : 2-dimensional array
Array containing start and end indices in the particle array
corresponding to each halo.
hid2map : dict
Dictionary mapping halo IDs to `halo_map` array positions.
ncells : int ncells : int
Number of cells in the box density field. Number of cells in the box density field.
nshift : int nshift : int
@ -909,8 +835,8 @@ def load_processed_halo(hid, particles, halo_map, hid2map, ncells, nshift):
maxs : len-3 tuple maxs : len-3 tuple
Maximum cell indices of the halo. Maximum cell indices of the halo.
""" """
pos = load_halo_particles(hid, particles, halo_map, hid2map) pos = cat.halo_particles(hid, "pos", in_initial=True)
pos, mass = pos[:, :3], pos[:, 3] mass = cat.halo_particles(hid, "mass", in_initial=True)
pos = pos2cell(pos, ncells) pos = pos2cell(pos, ncells)
mins, maxs = get_halo_cell_limits(pos, ncells=ncells, nshift=nshift) mins, maxs = get_halo_cell_limits(pos, ncells=ncells, nshift=nshift)

View file

@ -12,9 +12,12 @@
# You should have received a copy of the GNU General Public License along # You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc., # with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .box_units import CSiBORGBox, QuijoteBox # noqa from .box_units import CSiBORGBox, QuijoteBox # noqa
from .halo_cat import CSiBORGHaloCatalogue, QuijoteHaloCatalogue, fiducial_observers # noqa from .halo_cat import (CSiBORGCatalogue, QuijoteCatalogue, # noqa
from .obs import SDSS, MCXCClusters, PlanckClusters, TwoMPPGalaxies, TwoMPPGroups # noqa CSiBORGPHEWCatalogue, fiducial_observers) # noqa
from .paths import Paths # noqa from .obs import (SDSS, MCXCClusters, PlanckClusters, TwoMPPGalaxies, # noqa
from .readsim import MmainReader, CSiBORGReader, QuijoteReader, halfwidth_mask, load_halo_particles # noqa TwoMPPGroups, ObservedCluster, match_array_to_no_masking) # noqa
from .utils import cols_to_structured, read_h5 # noqa from .paths import Paths # noqa
from .readsim import (CSiBORGReader, QuijoteReader, load_halo_particles, # noqa
make_halomap_dict) # noqa
from .utils import cols_to_structured, read_h5 # noqa

View file

@ -17,6 +17,7 @@ Simulation box unit transformations.
""" """
from abc import ABC, abstractmethod, abstractproperty from abc import ABC, abstractmethod, abstractproperty
import numpy
from astropy import constants, units from astropy import constants, units
from astropy.cosmology import LambdaCDM from astropy.cosmology import LambdaCDM
@ -28,80 +29,39 @@ from .readsim import CSiBORGReader, QuijoteReader
class BaseBox(ABC): class BaseBox(ABC):
"""
Base class for box units.
"""
_name = "box_units" _name = "box_units"
_cosmo = None _cosmo = None
@property @property
def cosmo(self): def cosmo(self):
"""
The box cosmology.
Returns
-------
cosmo : `astropy.cosmology.LambdaCDM`
"""
if self._cosmo is None: if self._cosmo is None:
raise ValueError("Cosmology not set.") raise ValueError("Cosmology not set.")
return self._cosmo return self._cosmo
@property @property
def H0(self): def H0(self):
r""" r"""Present Hubble parameter in :math:`\mathrm{km} \mathrm{s}^{-1}`"""
The Hubble parameter at the time of the snapshot in units of
:math:`\mathrm{km} \mathrm{s}^{-1} \mathrm{Mpc}^{-1}`.
Returns
-------
H0 : float
"""
return self.cosmo.H0.value return self.cosmo.H0.value
@property @property
def rho_crit0(self): def rho_crit0(self):
r""" """Present-day critical density in M_sun h^2 / cMpc^3."""
Present-day critical density in :math:`M_\odot h^2 / \mathrm{cMpc}^3`.
Returns
-------
rho_crit0 : float
"""
rho_crit0 = self.cosmo.critical_density0 rho_crit0 = self.cosmo.critical_density0
return rho_crit0.to_value(units.solMass / units.Mpc**3) return rho_crit0.to_value(units.solMass / units.Mpc**3)
@property @property
def h(self): def h(self):
r""" """The little 'h' parameter at the time of the snapshot."""
The little 'h' parameter at the time of the snapshot.
Returns
-------
h : float
"""
return self._h return self._h
@property @property
def Om0(self): def Om0(self):
r""" """The present time matter density parameter."""
The matter density parameter.
Returns
-------
Om0 : float
"""
return self.cosmo.Om0 return self.cosmo.Om0
@abstractproperty @abstractproperty
def boxsize(self): def boxsize(self):
""" """Box size in cMpc."""
Box size in cMpc.
Returns
-------
boxsize : float
"""
pass pass
@abstractmethod @abstractmethod
@ -116,8 +76,7 @@ class BaseBox(ABC):
Returns Returns
------- -------
length : float float
Length in box units.
""" """
pass pass
@ -133,8 +92,7 @@ class BaseBox(ABC):
Returns Returns
------- -------
length : float float
Length in :math:`\mathrm{cMpc} / h`
""" """
pass pass
@ -150,8 +108,7 @@ class BaseBox(ABC):
Returns Returns
------- -------
mass : float float
Mass in box units.
""" """
pass pass
@ -167,8 +124,23 @@ class BaseBox(ABC):
Returns Returns
------- -------
mass : float float
Mass in :math:`M_\odot / h`. """
pass
@abstractmethod
def m200c_to_r200c(self, m200c):
"""
Convert M200c to R200c in units of cMpc / h.
Parameters
----------
m200c : float
M200c in units of M_sun / h.
Returns
-------
float
""" """
pass pass
@ -248,6 +220,12 @@ class CSiBORGBox(BaseBox):
def boxsize(self): def boxsize(self):
return self.box2mpc(1.) return self.box2mpc(1.)
def m200c_to_r200c(self, m200c):
rho_crit = self.cosmo.critical_density(1 / self._aexp - 1)
rho_crit = rho_crit.to_value(units.solMass / units.Mpc**3)
r200c = (3 * m200c / (4 * numpy.pi * 200 * rho_crit))**(1 / 3)
return r200c / self._aexp
############################################################################### ###############################################################################
# Quijote fiducial cosmology box # # Quijote fiducial cosmology box #
@ -256,7 +234,7 @@ class CSiBORGBox(BaseBox):
class QuijoteBox(BaseBox): class QuijoteBox(BaseBox):
""" """
Quijote fiducial cosmology box. Quijote cosmology box.
Parameters Parameters
---------- ----------
@ -289,33 +267,10 @@ class QuijoteBox(BaseBox):
return length / self.boxsize return length / self.boxsize
def solarmass2box(self, mass): def solarmass2box(self, mass):
r"""
Convert mass from :math:`M_\odot / h` to box units.
Parameters
----------
mass : float
Mass in :math:`M_\odot`.
Returns
-------
mass : float
Mass in box units.
"""
return mass / self._info["TotMass"] return mass / self._info["TotMass"]
def box2solarmass(self, mass): def box2solarmass(self, mass):
r"""
Convert mass from box units to :math:`M_\odot / h`.
Parameters
----------
mass : float
Mass in box units.
Returns
-------
mass : float
Mass in :math:`M_\odot / h`.
"""
return mass * self._info["TotMass"] return mass * self._info["TotMass"]
def m200c_to_r200c(self, m200c):
raise ValueError("Not implemented for Quijote boxes.")

File diff suppressed because it is too large Load diff

View file

@ -383,6 +383,9 @@ class FitsSurvey(ABC):
return out return out
return out[self.selection_mask] return out[self.selection_mask]
def __len__(self):
return self.size
############################################################################### ###############################################################################
# Planck clusters # # Planck clusters #
@ -560,8 +563,7 @@ class SDSS(FitsSurvey):
Parameters Parameters
---------- ----------
fpath : str, optional fpath : str, optional
Path to the FITS file. By default Path to the FITS file.
`/mnt/extraspace/rstiskalek/catalogs/nsa_v1_0_1.fits`.
h : float, optional h : float, optional
Little h. By default `h = 1`. The catalogue assumes this value. Little h. By default `h = 1`. The catalogue assumes this value.
The routine properties should take care of little h conversion. The routine properties should take care of little h conversion.
@ -581,9 +583,7 @@ class SDSS(FitsSurvey):
""" """
name = "SDSS" name = "SDSS"
def __init__(self, fpath=None, h=1, Om0=0.3175, sel_steps=None): def __init__(self, fpath, h=1, Om0=0.3175, sel_steps=None):
if fpath is None:
fpath = "/mnt/extraspace/rstiskalek/catalogs/nsa_v1_0_1.fits"
self._file = fits.open(fpath, memmap=False) self._file = fits.open(fpath, memmap=False)
self.h = h self.h = h
@ -719,3 +719,114 @@ class SDSS(FitsSurvey):
Get `IN_DR7_LSS` and turn to a boolean array. Get `IN_DR7_LSS` and turn to a boolean array.
""" """
return self.get_fitsitem("IN_DR7_LSS").astype(bool) return self.get_fitsitem("IN_DR7_LSS").astype(bool)
###############################################################################
# Individual observations #
###############################################################################
class BaseSingleObservation(ABC):
"""
Base class to hold information about a single object.
"""
def __init__(self):
self._spherical_pos = None
self._name = None
@property
def spherical_pos(self):
"""
Spherical position of the observation in dist/RA/dec in Mpc / h and
degrees, respectively.
Returns
-------
1-dimensional array of shape (3,)
"""
if self._spherical_pos is None:
raise ValueError("`spherical_pos` is not set!")
return self._spherical_pos
@spherical_pos.setter
def spherical_pos(self, pos):
if isinstance(pos, (list, tuple)):
pos = numpy.array(pos)
if not pos.shape == (3,):
raise ValueError("`spherical_pos` must be a of shape (3,).")
self._spherical_pos = pos
@property
def name(self):
"""
Observated object name.
Returns
-------
str
"""
if self._name is None:
raise ValueError("`name` is not set!")
return self._name
@name.setter
def name(self, name):
if not isinstance(name, str):
raise ValueError("`name` must be a string.")
self._name = name
class ObservedCluster(BaseSingleObservation):
"""
Class to hold information about an observed cluster.
Parameters
----------
RA : float
Right ascension in degrees.
dec : float
Declination in degrees.
dist : float
Distance in Mpc / h.
name : str
Cluster name.
"""
def __init__(self, RA, dec, dist, name):
super().__init__()
self.name = name
self.spherical_pos = [dist, RA, dec]
###############################################################################
# Utility functions #
###############################################################################
def match_array_to_no_masking(arr, surv):
"""
Match an array to a survey without masking.
Parameters
----------
arr : n-dimensional array
Array to match.
surv : survey class
Survey class.
Returns
-------
out : n-dimensional array
"""
dtype = arr.dtype
if arr.ndim > 1:
shape = arr.shape
out = numpy.full((surv.selection_mask.size, *shape[1:]), numpy.nan,
dtype=dtype)
else:
out = numpy.full(surv.selection_mask.size, numpy.nan, dtype=dtype)
for i, indx in enumerate(surv["INDEX"]):
out[indx] = arr[i]
return out

View file

@ -13,7 +13,7 @@
# with this program; if not, write to the Free Software Foundation, Inc., # with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""CSiBORG paths manager.""" """CSiBORG paths manager."""
from glob import glob from glob import glob, iglob
from os import makedirs from os import makedirs
from os.path import isdir, join from os.path import isdir, join
from warnings import warn from warnings import warn
@ -61,13 +61,7 @@ class Paths:
@property @property
def srcdir(self): def srcdir(self):
""" """Path to the folder where CSiBORG simulations are stored."""
Path to the folder where CSiBORG simulations are stored.
Returns
-------
str
"""
if self._srcdir is None: if self._srcdir is None:
raise ValueError("`srcdir` is not set!") raise ValueError("`srcdir` is not set!")
return self._srcdir return self._srcdir
@ -81,13 +75,7 @@ class Paths:
@property @property
def borg_dir(self): def borg_dir(self):
""" """Path to the folder where BORG MCMC chains are stored."""
Path to the folder where BORG MCMC chains are stored.
Returns
-------
str
"""
if self._borg_dir is None: if self._borg_dir is None:
raise ValueError("`borg_dir` is not set!") raise ValueError("`borg_dir` is not set!")
return self._borg_dir return self._borg_dir
@ -101,13 +89,7 @@ class Paths:
@property @property
def quijote_dir(self): def quijote_dir(self):
""" """Path to the folder where Quijote simulations are stored."""
Path to the folder where Quijote simulations are stored.
Returns
-------
str
"""
if self._quijote_dir is None: if self._quijote_dir is None:
raise ValueError("`quijote_dir` is not set!") raise ValueError("`quijote_dir` is not set!")
return self._quijote_dir return self._quijote_dir
@ -121,13 +103,7 @@ class Paths:
@property @property
def postdir(self): def postdir(self):
""" """Path to the folder where post-processed files are stored."""
Path to the folder where post-processed files are stored.
Returns
-------
str
"""
if self._postdir is None: if self._postdir is None:
raise ValueError("`postdir` is not set!") raise ValueError("`postdir` is not set!")
return self._postdir return self._postdir
@ -139,19 +115,6 @@ class Paths:
check_directory(path) check_directory(path)
self._postdir = path self._postdir = path
@property
def temp_dumpdir(self):
"""
Path to a temporary dumping folder.
Returns
-------
str
"""
fpath = join(self.postdir, "temp")
try_create_directory(fpath)
return fpath
@staticmethod @staticmethod
def quijote_fiducial_nsim(nsim, nobs=None): def quijote_fiducial_nsim(nsim, nobs=None):
""" """
@ -167,7 +130,7 @@ class Paths:
Returns Returns
------- -------
id : str str
""" """
if nobs is None: if nobs is None:
assert isinstance(nsim, str) assert isinstance(nsim, str)
@ -190,36 +153,14 @@ class Paths:
""" """
return join(self.borg_dir, "mcmc", f"mcmc_{nsim}.h5") return join(self.borg_dir, "mcmc", f"mcmc_{nsim}.h5")
def fof_membership(self, nsim, simname, sorted=False): def fof_cat(self, nsnap, nsim, simname, from_quijote_backup=False):
"""
Path to the file containing the FoF particle membership.
Parameters
----------
nsim : int
IC realisation index.
simname : str
Simulation name. Must be one of `csiborg` or `quijote`.
sorted : bool, optional
Whether to return path to the file that is sorted in the same
order as the PHEW output.
"""
assert simname in ["csiborg", "quijote"]
if simname == "quijote":
raise RuntimeError("Quijote FoF membership is in the FoF cats.")
fdir = join(self.postdir, "FoF_membership", )
try_create_directory(fdir)
fout = join(fdir, f"fof_membership_{nsim}.npy")
if sorted:
fout = fout.replace(".npy", "_sorted.npy")
return fout
def fof_cat(self, nsim, simname, from_quijote_backup=False):
r""" r"""
Path to the :math:`z = 0` FoF halo catalogue. Path to the :math:`z = 0` FoF halo catalogue.
Parameters Parameters
---------- ----------
nsnap : int
Snapshot index.
nsim : int nsim : int
IC realisation index. IC realisation index.
simname : str simname : str
@ -228,15 +169,15 @@ class Paths:
Whether to return the path to the Quijote FoF catalogue from the Whether to return the path to the Quijote FoF catalogue from the
backup. backup.
Returns Returns
------- -------
str str
""" """
if simname == "csiborg": if simname == "csiborg":
fdir = join(self.postdir, "FoF_membership", ) fdir = join(self.postdir, "halo_maker", f"ramses_{nsim}",
f"output_{str(nsnap).zfill(5)}", "FOF")
try_create_directory(fdir) try_create_directory(fdir)
return join(fdir, f"halo_catalog_{nsim}_FOF.txt") return join(fdir, "fort.132")
elif simname == "quijote": elif simname == "quijote":
if from_quijote_backup: if from_quijote_backup:
return join(self.quijote_dir, "halos_backup", str(nsim)) return join(self.quijote_dir, "halos_backup", str(nsim))
@ -245,57 +186,6 @@ class Paths:
else: else:
raise ValueError(f"Unknown simulation name `{simname}`.") raise ValueError(f"Unknown simulation name `{simname}`.")
def mmain(self, nsnap, nsim):
"""
Path to the `mmain` CSiBORG files of summed substructure.
Parameters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
Returns
-------
str
"""
fdir = join(self.postdir, "mmain")
try_create_directory(fdir)
return join(
fdir, f"mmain_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npz")
def initmatch(self, nsim, simname, kind):
"""
Path to the `initmatch` files where the halo match between the
initial and final snapshot of a CSiBORG realisaiton is stored.
Parameters
----------
nsim : int
IC realisation index.
simname : str
Simulation name. Must be one of `csiborg` or `quijote`.
kind : str
Type of match. Must be one of `particles` or `fit`.
Returns
-------
str
"""
assert kind in ["particles", "fit"]
ftype = "npy" if kind == "fit" else "h5"
if simname == "csiborg":
fdir = join(self.postdir, "initmatch")
elif simname == "quijote":
fdir = join(self.quijote_dir, "initmatch")
else:
raise ValueError(f"Unknown simulation name `{simname}`.")
try_create_directory(fdir)
return join(fdir, f"{kind}_{str(nsim).zfill(5)}.{ftype}")
def get_ics(self, simname, from_quijote_backup=False): def get_ics(self, simname, from_quijote_backup=False):
""" """
Get available IC realisation IDs for either the CSiBORG or Quijote Get available IC realisation IDs for either the CSiBORG or Quijote
@ -411,7 +301,7 @@ class Paths:
Returns Returns
------- -------
snapstr str
""" """
simpath = self.snapshots(nsim, simname, tonew=nsnap == 1) simpath = self.snapshots(nsim, simname, tonew=nsnap == 1)
if simname == "csiborg": if simname == "csiborg":
@ -422,7 +312,27 @@ class Paths:
nsnap = str(nsnap).zfill(3) nsnap = str(nsnap).zfill(3)
return join(simpath, f"snapdir_{nsnap}", f"snap_{nsnap}") return join(simpath, f"snapdir_{nsnap}", f"snap_{nsnap}")
def particles(self, nsim, simname): def merger_tree_file(self, nsnap, nsim):
"""
Path to the CSiBORG on-the-fly generated merger tree file.
Parameters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
Returns
-------
str
"""
nsim = str(nsim)
nsnap = str(nsnap).zfill(5)
return join(self.srcdir, f"ramses_out_{nsim}",
f"output_{nsnap}", f"mergertree_{nsnap}.dat")
def processed_output(self, nsim, simname, halo_finder):
""" """
Path to the files containing all particles of a CSiBORG realisation at Path to the files containing all particles of a CSiBORG realisation at
:math:`z = 0`. :math:`z = 0`.
@ -433,22 +343,80 @@ class Paths:
IC realisation index. IC realisation index.
simname : str simname : str
Simulation name. Must be one of `csiborg` or `quijote`. Simulation name. Must be one of `csiborg` or `quijote`.
halo_finder : str
Halo finder name.
Returns Returns
------- -------
str str
""" """
if simname == "csiborg": if simname == "csiborg":
fdir = join(self.postdir, "particles") fdir = join(self.postdir, "processed_output")
elif simname == "quijote": elif simname == "quijote":
fdir = join(self.quijote_dir, "Particles_fiducial") fdir = join(self.quijote_dir, "Particles_fiducial")
else: else:
raise ValueError(f"Unknown simulation name `{simname}`.") raise ValueError(f"Unknown simulation name `{simname}`.")
try_create_directory(fdir) try_create_directory(fdir)
fname = f"parts_{str(nsim).zfill(5)}.h5" fname = f"parts_{halo_finder}_{str(nsim).zfill(5)}.hdf5"
return join(fdir, fname) return join(fdir, fname)
def processed_phew(self, nsim):
"""
Path to the files containing PHEW CSiBORG catalogues.
Parameters
----------
nsim : int
IC realisation index.
Returns
-------
str
"""
fdir = join(self.postdir, "processed_output")
try_create_directory(fdir)
return join(fdir, f"phew_{str(nsim).zfill(5)}.hdf5")
def processed_merger_tree(self, nsim):
"""
Path to the files containing the processed original merger tree files.
Parameters
----------
nsim : int
IC realisation index.
Returns
-------
str
"""
fdir = join(self.postdir, "processed_output")
try_create_directory(fdir)
return join(fdir, f"merger_{str(nsim).zfill(5)}.hdf5")
def halomaker_particle_membership(self, nsnap, nsim, halo_finder):
"""
Path to the HaloMaker particle membership file (CSiBORG only).
Parameters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
halo_finder : str
Halo finder name.
Returns
-------
str
"""
fdir = join(self.postdir, "halo_maker", f"ramses_{nsim}",
f"output_{str(nsnap).zfill(5)}", halo_finder)
fpath = join(fdir, "*particle_membership*")
return next(iglob(fpath, recursive=True), None)
def ascii_positions(self, nsim, kind): def ascii_positions(self, nsim, kind):
""" """
Path to ASCII files containing the positions of particles or halos. Path to ASCII files containing the positions of particles or halos.
@ -469,35 +437,6 @@ class Paths:
return join(fdir, fname) return join(fdir, fname)
def structfit(self, nsnap, nsim, simname):
"""
Path to the halo catalogue from `fit_halos.py`.
Parameters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
simname : str
Simulation name. Must be one of `csiborg` or `quijote`.
Returns
-------
str
"""
if simname == "csiborg":
fdir = join(self.postdir, "structfit")
elif simname == "quijote":
fdir = join(self.quijote_dir, "structfit")
else:
raise ValueError(f"Unknown simulation name `{simname}`.")
try_create_directory(fdir)
fname = f"out_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npy"
return join(fdir, fname)
def overlap(self, simname, nsim0, nsimx, min_logmass, smoothed): def overlap(self, simname, nsim0, nsimx, min_logmass, smoothed):
""" """
Path to the overlap files between two CSiBORG simulations. Path to the overlap files between two CSiBORG simulations.
@ -688,31 +627,6 @@ class Paths:
fname = f"obs_vp_{MAS}_{str(nsim).zfill(5)}_{grid}.npz" fname = f"obs_vp_{MAS}_{str(nsim).zfill(5)}_{grid}.npz"
return join(fdir, fname) return join(fdir, fname)
def halo_counts(self, simname, nsim, from_quijote_backup=False):
"""
Path to the files containing the binned halo counts.
Parameters
----------
simname : str
Simulation name. Must be `csiborg`, `quijote` or `quijote_full`.
nsim : int
IC realisation index.
from_quijote_backup : bool, optional
Whether to return the path to the Quijote halo counts from the
backup catalogues.
Returns
-------
str
"""
fdir = join(self.postdir, "HMF")
try_create_directory(fdir)
fname = f"halo_counts_{simname}_{str(nsim).zfill(5)}.npz"
if from_quijote_backup:
fname = fname.replace("halo_counts", "halo_counts_backup")
return join(fdir, fname)
def cross_nearest(self, simname, run, kind, nsim=None, nobs=None): def cross_nearest(self, simname, run, kind, nsim=None, nobs=None):
""" """
Path to the files containing distance from a halo in a reference Path to the files containing distance from a halo in a reference

File diff suppressed because it is too large Load diff

View file

@ -15,6 +15,7 @@
"""Collection of stand-off utility functions used in the scripts.""" """Collection of stand-off utility functions used in the scripts."""
import numpy import numpy
from numba import jit from numba import jit
from datetime import datetime
############################################################################### ###############################################################################
# Positions # # Positions #
@ -87,7 +88,7 @@ def periodic_distance_two_points(p1, p2, boxsize):
return dist**0.5 return dist**0.5
@jit(nopython=True) @jit(nopython=True, boundscheck=False)
def periodic_wrap_grid(pos, boxsize=1): def periodic_wrap_grid(pos, boxsize=1):
"""Wrap positions in a periodic box.""" """Wrap positions in a periodic box."""
for n in range(pos.shape[0]): for n in range(pos.shape[0]):
@ -139,17 +140,34 @@ def radec_to_cartesian(X):
""" """
dist, ra, dec = X[:, 0], X[:, 1], X[:, 2] dist, ra, dec = X[:, 0], X[:, 1], X[:, 2]
ra *= numpy.pi / 180 cdec = numpy.cos(dec * numpy.pi / 180)
dec *= numpy.pi / 180
cdec = numpy.cos(dec)
return numpy.vstack([ return numpy.vstack([
dist * cdec * numpy.cos(ra), dist * cdec * numpy.cos(ra * numpy.pi / 180),
dist * cdec * numpy.sin(ra), dist * cdec * numpy.sin(ra * numpy.pi / 180),
dist * numpy.sin(dec) dist * numpy.sin(dec * numpy.pi / 180)
]).T ]).T
@jit(nopython=True, fastmath=True, boundscheck=False)
def great_circle_distance(x1, x2):
"""
Great circle distance between two points on a sphere, defined by RA and
dec, both in degrees.
"""
ra1, dec1 = x1
ra2, dec2 = x2
ra1 *= numpy.pi / 180
dec1 *= numpy.pi / 180
ra2 *= numpy.pi / 180
dec2 *= numpy.pi / 180
return 180 / numpy.pi * numpy.arccos(
numpy.sin(dec1) * numpy.sin(dec2)
+ numpy.cos(dec1) * numpy.cos(dec2) * numpy.cos(ra1 - ra2)
)
def cosine_similarity(x, y): def cosine_similarity(x, y):
r""" r"""
Calculate the cosine similarity between two Cartesian vectors. Defined Calculate the cosine similarity between two Cartesian vectors. Defined
@ -179,6 +197,36 @@ def cosine_similarity(x, y):
return out[0] if out.size == 1 else out return out[0] if out.size == 1 else out
def hms_to_degrees(hours, minutes=None, seconds=None):
"""
Convert hours, minutes and seconds to degrees.
Parameters
----------
hours, minutes, seconds : float
Returns
-------
float
"""
return hours * 15 + (minutes or 0) / 60 * 15 + (seconds or 0) / 3600 * 15
def dms_to_degrees(degrees, arcminutes=None, arcseconds=None):
"""
Convert degrees, arcminutes and arcseconds to decimal degrees.
Parameters
----------
degrees, arcminutes, arcseconds : float
Returns
-------
float
"""
return degrees + (arcminutes or 0) / 60 + (arcseconds or 0) / 3600
def real2redshift(pos, vel, observer_location, observer_velocity, box, def real2redshift(pos, vel, observer_location, observer_velocity, box,
periodic_wrap=True, make_copy=True): periodic_wrap=True, make_copy=True):
r""" r"""
@ -262,3 +310,9 @@ def binned_statistic(x, y, left_edges, bin_width, statistic):
if numpy.any(mask): if numpy.any(mask):
out[i] = statistic(y[mask]) out[i] = statistic(y[mask])
return out return out
def fprint(msg, verbose=True):
"""Print and flush a message with a timestamp."""
if verbose:
print(f"{datetime.now()}: {msg}", flush=True)

File diff suppressed because one or more lines are too long

View file

@ -66,7 +66,7 @@ jobs = csiborgtools.utils.split_jobs(nsims, nproc)[rank]
for n in jobs: for n in jobs:
print(f"Rank {rank} at {datetime.now()}: saving {n}th delta.", flush=True) print(f"Rank {rank} at {datetime.now()}: saving {n}th delta.", flush=True)
nsim = ics[n] nsim = ics[n]
particles = reader.read_particle(max(paths.get_snapshots(nsim, "csiborg")), particles = reader.read_snapshot(max(paths.get_snapshots(nsim, "csiborg")),
nsim, ["x", "y", "z", "M"], verbose=False) nsim, ["x", "y", "z", "M"], verbose=False)
# Halfwidth -- particle selection # Halfwidth -- particle selection
if args.halfwidth < 0.5: if args.halfwidth < 0.5:

14
old/cluster_crosspk.sh Normal file
View file

@ -0,0 +1,14 @@
nthreads=20
memory=40
queue="berg"
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
file="cluster_crosspk.py"
grid=1024
halfwidth=0.13
cm="addqueue -q $queue -n $nthreads -m $memory $env $file --grid $grid --halfwidth $halfwidth"
echo "Submitting:"
echo $cm
echo
$cm

27
old/cluster_knn_auto.sh Normal file
View file

@ -0,0 +1,27 @@
nthreads=4
memory=4
queue="cmb"
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
file="cluster_knn_auto.py"
Rmax=219.8581560283688
verbose="true"
simname="quijote"
nsims="0 1 2"
# simname="csiborg"
# nsims="7444 7900 9052"
run="mass003"
pythoncm="$env $file --run $run --simname $simname --nsims $nsims --Rmax $Rmax --verbose $verbose"
echo $pythoncm
$pythoncm
# cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
# echo "Submitting:"
# echo $cm
# echo
# $cm

18
old/cluster_knn_cross.sh Normal file
View file

@ -0,0 +1,18 @@
nthreads=151
memory=4
queue="cmb"
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
file="knn_cross.py"
runs="mass001"
pythoncm="$env $file --runs $runs"
echo $pythoncm
$pythoncm
# cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
# echo "Submitting:"
# echo $cm
# echo
# $cm

26
old/cluster_tpcf_auto.sh Normal file
View file

@ -0,0 +1,26 @@
nthreads=26
memory=7
queue="cmb"
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
file="cluster_tpcf_auto.py"
Rmax=219.8581560283688
verbose="true"
# simname="quijote"
# nsims="0 1 2"
simname="csiborg"
nsims="7444 7900 9052"
run="mass003"
pythoncm="$env $file --run $run --simname $simname --nsims $nsims --Rmax $Rmax --verbose $verbose"
echo $pythoncm
$pythoncm
# cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
# echo "Submitting:"
# echo $cm
# echo
# $cm

24
old/fit_hmf.sh Executable file
View file

@ -0,0 +1,24 @@
nthreads=11
memory=2
queue="berg"
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
file="fit_hmf.py"
simname="quijote_full"
nsims="-1"
verbose=True
lower_lim=12.0
upper_lim=16.0
Rmax=155
from_quijote_backup="true"
bw=0.2
pythoncm="$env $file --simname $simname --nsims $nsims --Rmax $Rmax --lims $lower_lim $upper_lim --bw $bw --from_quijote_backup $from_quijote_backup --verbose $verbose"
$pythoncm
# cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
# echo "Submitting:"
# echo $cm
# echo
# $cm

686
old/merger.py Normal file
View file

@ -0,0 +1,686 @@
# Copyright (C) 2022 Richard Stiskalek, Harry Desmond
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Support for reading the PHEW/ACACIA CSiBORG merger trees. However, note that
the merger trees are very unreliable.
"""
from abc import ABC
from datetime import datetime
from gc import collect
import numpy
from h5py import File
from tqdm import tqdm, trange
from treelib import Tree
from ..utils import periodic_distance
from .paths import Paths
###############################################################################
# Utility functions. #
###############################################################################
def clump_identifier(clump, nsnap):
"""
Generate a unique identifier for a clump at a given snapshot.
Parameters
----------
clump : int
Clump ID.
nsnap : int
Snapshot index.
Returns
-------
str
"""
return f"{str(clump).rjust(9, 'x')}__{str(nsnap).rjust(4, 'x')}"
def extract_identifier(identifier):
"""
Extract the clump ID and snapshot index from a identifier generated by
`clump_identifier`.
Parameters
----------
identifier : str
Identifier.
Returns
-------
clump, nsnap : int
Clump ID and snapshot index.
"""
clump, nsnap = identifier.split('__')
return int(clump.lstrip('x')), int(nsnap.lstrip('x'))
###############################################################################
# Merger tree reader class. #
###############################################################################
class BaseMergerReader(ABC):
"""
Base class for the CSiBORG merger tree reader.
"""
_paths = None
_nsim = None
_min_snap = None
_cache = {}
@property
def paths(self):
"""Paths manager."""
if self._paths is None:
raise ValueError("`paths` is not set.")
return self._paths
@paths.setter
def paths(self, paths):
assert isinstance(paths, Paths)
self._paths = paths
@property
def nsim(self):
"""Simulation index."""
if self._nsim is None:
raise ValueError("`nsim` is not set.")
return self._nsim
@nsim.setter
def nsim(self, nsim):
assert isinstance(nsim, (int, numpy.integer))
self._nsim = nsim
@property
def min_snap(self):
"""Minimum snapshot index to read."""
return self._min_snap
@min_snap.setter
def min_snap(self, min_snap):
if min_snap is not None:
assert isinstance(min_snap, (int, numpy.integer))
self._min_snap = int(min_snap)
def cache_length(self):
"""Length of the cache."""
return len(self._cache)
def cache_clear(self):
"""Clear the cache."""
self._cache = {}
collect()
def __getitem__(self, key):
try:
return self._cache[key]
except KeyError:
fname = self.paths.processed_merger_tree(self.nsim)
nsnap, kind = key.split("__")
with File(fname, "r") as f:
if kind == "clump_to_array":
cl = self[f"{nsnap}__clump"]
x = {}
for i, c in enumerate(cl):
if c in x:
x[c] += (i,)
else:
x[c] = (i,)
else:
x = f[f"{str(nsnap)}/{kind}"][:]
# Cache it
self._cache[key] = x
return x
class MergerReader(BaseMergerReader):
"""
Merger tree reader.
Parameters
----------
nsim : int
Simulation index.
paths : Paths
Paths manager.
min_snap : int
Minimum snapshot index. Trees below this snapshot will not be read.
"""
def __init__(self, nsim, paths, min_snap=None):
self.nsim = nsim
self.paths = paths
self.min_snap = min_snap
def get_info(self, current_clump, current_snap, is_main=None):
"""
Make a list of information about a clump at a given snapshot.
Parameters
----------
current_clump : int
Clump ID.
current_snap : int
Snapshot index.
is_main : bool
Whether this is the main progenitor.
Returns
-------
list
"""
if current_clump < 0:
raise ValueError("Clump ID must be positive.")
if is_main is not None and not isinstance(is_main, bool):
raise ValueError("`is_main` must be a boolean.")
k = self[f"{current_snap}__clump_to_array"][current_clump][0]
out = [self[f"{current_snap}__desc_mass"][k],
*self[f"{current_snap}__desc_pos"][k][::-1]] # TODO REMOVE LATER
if is_main is not None:
return [is_main,] + out
return out
def get_mass(self, clump, snap):
"""
Get the mass of a clump at a given snapshot.
Parameters
----------
clump : int
Clump ID.
snap : int
Snapshot index.
Returns
-------
float
"""
if clump < 0:
raise ValueError("Clump ID must be positive.")
k = self[f"{snap}__clump_to_array"][clump][0]
return self[f"{snap}__desc_mass"][k]
def get_pos(self, clump, snap):
if clump < 0:
raise ValueError("Clump ID must be positive.")
k = self[f"{snap}__clump_to_array"][clump][0]
return self[f"{snap}__desc_pos"][k]
def find_main_progenitor(self, clump, nsnap):
"""
Find the main progenitor of a clump at a given snapshot. Cases are:
- `clump > 0`, `progenitor > 0`: main progenitor is in the adjacent
snapshot,
- `clump > 0`, `progenitor < 0`: main progenitor is not in the
adjacent snapshot.
- `clump < 0`, `progenitor = 0`: no progenitor, newly formed clump.
Parameters
----------
clump : int
Clump ID.
nsnap : int
Snapshot index.
Returns
-------
progenitor : int
Main progenitor clump ID.
progenitor_snap : int
Main progenitor snapshot index.
"""
if not clump > 0:
raise ValueError("Clump ID must be positive.")
cl2array = self[f"{nsnap}__clump_to_array"]
if clump in cl2array:
k = cl2array[clump]
else:
raise ValueError("Clump ID not found.")
if len(k) > 1:
raise ValueError("Found more than one main progenitor.")
k = k[0]
progenitor = abs(self[f"{nsnap}__progenitor"][k])
progenitor_snap = self[f"{nsnap}__progenitor_outputnr"][k]
if (self.min_snap is not None) and (nsnap < self.min_snap):
return 0, numpy.nan
return progenitor, progenitor_snap
def find_minor_progenitors(self, clump, nsnap):
"""
Find the minor progenitors of a clump at a given snapshot. This means
that `clump < 0`, `progenitor > 0`, i.e. this clump also has another
main progenitor.
If there are no minor progenitors, return `None` for both lists.
Parameters
----------
clump : int
Clump ID.
nsnap : int
Snapshot index.
Returns
-------
prog : list
List of minor progenitor clump IDs.
prog_snap : list
List of minor progenitor snapshot indices.
"""
if not clump > 0:
raise ValueError("Clump ID must be positive.")
try:
ks = self[f"{nsnap}__clump_to_array"][-clump]
except KeyError:
return None, None
prog = [self[f"{nsnap}__progenitor"][k] for k in ks]
prog_nsnap = [self[f"{nsnap}__progenitor_outputnr"][k] for k in ks]
if (self.min_snap is not None) and (nsnap < self.min_snap):
return None, None
return prog, prog_nsnap
def find_progenitors(self, clump, nsnap):
"""
Find all progenitors of a clump at a given snapshot. The main
progenitor is the first element of the list.
Parameters
----------
clump : int
Clump ID.
nsnap : int
Snapshot index.
Returns
-------
prog : list
List of progenitor clump IDs.
prog_nsnap : list
List of progenitor snapshot indices.
"""
main_prog, main_prog_nsnap = self.find_main_progenitor(clump, nsnap)
min_prog, min_prog_nsnap = self.find_minor_progenitors(clump, nsnap)
# Check that if the main progenitor is not in the adjacent snapshot,
# then the minor progenitor are also in that snapshot (if any).
if (min_prog is not None) and (main_prog_nsnap != nsnap - 1) and not all(prog_nsnap == mprog for mprog in min_prog_nsnap): # noqa
raise ValueError(f"For clump {clump} at snapshot {nsnap} we have "
f"main progenitor at {main_prog_nsnap} and "
"minor progenitors at {min_prog_nsnap}.")
if min_prog is None:
prog = [main_prog,]
prog_nsnap = [main_prog_nsnap,]
else:
prog = [main_prog,] + min_prog
prog_nsnap = [main_prog_nsnap,] + min_prog_nsnap
if prog[0] == 0 and len(prog) > 1:
raise ValueError("No main progenitor but minor progenitors "
"found for clump {clump} at snapshot {nsnap}.")
return prog, prog_nsnap
def tree_mass_at_snapshot(self, clump, nsnap, target_snap):
"""
Calculate the total mass of nodes in a tree at a given snapshot.
"""
# If clump is 0 (i.e., we've reached the end of the tree), return 0
if clump == 0:
return 0
# Find the progenitors for the given clump and nsnap
prog, prog_nsnap = self.find_progenitors(clump, nsnap)
if prog[0] == 0:
print(prog)
return 0
# Sum the mass of the current clump's progenitors
tot = 0
for p, psnap in zip(prog, prog_nsnap):
if psnap == target_snap:
tot += self.get_mass(p, psnap)
# Recursively sum the mass of each progenitor's progenitors
for p, psnap in zip(prog, prog_nsnap):
# print("P ", p, psnap)
tot += self.mass_all_progenitor2(p, psnap, target_snap)
return tot
def is_jumper(self, clump, nsnap, nsnap_descendant):
pass
def make_tree(self, current_clump, current_nsnap,
above_clump=None, above_nsnap=None,
tree=None, is_main=None, verbose=False):
"""
Make a merger tree for a clump at a given snapshot.
Parameters
----------
current_clump : int
Clump ID of the descendant clump.
current_nsnap : int
Snapshot index of the descendent clump.
above_clump : int, optional
Clump ID of a clump above the current clump in the tree.
above_nsnap : int, optional
Snapshot index of a clump above the current clump in the tree.
tree : treelib.Tree, optional
Tree to add to.
is_main : bool, optional
Whether this is the main progenitor.
verbose : bool, optional
Verbosity flag.
Returns
-------
treelib.Tree
Tree with the current clump as the root.
"""
if verbose:
print(f"{datetime.now()}: Node of a clump {current_clump} at "
f"snapshot {current_nsnap}.", flush=True)
# Terminate if we are at the end of the tree
if current_clump == 0:
return
# Create the root node or add a new node
if tree is None:
tree = Tree()
tree.create_node(
"root",
identifier=clump_identifier(current_clump, current_nsnap),
data=self.get_info(current_clump, current_nsnap, True),
)
else:
tree.create_node(
identifier=clump_identifier(current_clump, current_nsnap),
parent=clump_identifier(above_clump, above_nsnap),
data=self.get_info(current_clump, current_nsnap, is_main),
)
# This returns a list of progenitors and their snapshots. The first
# element is the main progenitor.
prog, prog_nsnap = self.find_progenitors(current_clump, current_nsnap)
for i, (p, psnap) in enumerate(zip(prog, prog_nsnap)):
self.make_tree(p, psnap, current_clump, current_nsnap, tree,
is_main=i == 0, verbose=verbose)
return tree
def walk_main_progenitor(self, main_clump, main_nsnap, verbose=False):
"""
Walk the main progenitor branch of a clump.
Each snapshot contains information about the clump at that snapshot.
Parameters
----------
clump : int
Clump ID.
nsnap : int
Snapshot index.
Returns
-------
structured array
"""
out = []
pbar = tqdm(disable=not verbose)
while True:
prog, prog_nsnap = self.find_progenitors(main_clump, main_nsnap)
# Unpack the main and minor progenitor
mainprog, mainprog_nsnap = prog[0], prog_nsnap[0]
if len(prog) > 1:
minprog, minprog_nsnap = prog[1:], prog_nsnap[1:]
else:
minprog, minprog_nsnap = None, None
# If there is no progenitor, then set the main progenitor mass to 0
if mainprog == 0:
mainprog_mass = numpy.nan
else:
mainprog_mass = self.get_mass(mainprog, mainprog_nsnap)
totprog_mass = mainprog_mass
# Unpack masses of the progenitors
if minprog is not None:
minprog, minprog_nsnap = prog[1:], prog_nsnap[1:]
minprog_masses = [self.get_mass(c, n)
for c, n in zip(minprog, minprog_nsnap)]
max_minprog_mass = max(minprog_masses)
minprog_totmass = sum(minprog_masses)
totprog_mass += minprog_totmass
else:
minprog_totmass = numpy.nan
max_minprog_mass = numpy.nan
out += [
[main_nsnap,]
+ self.get_info(main_clump, main_nsnap)
+ [mainprog_nsnap, totprog_mass, mainprog_mass, minprog_totmass, max_minprog_mass / mainprog_mass] # noqa
]
pbar.update(1)
pbar.set_description(f"Clump {main_clump} ({main_nsnap})")
if mainprog == 0:
pbar.close()
break
main_clump = mainprog
main_nsnap = mainprog_nsnap
# Convert output to a structured array. We store integers as float
# to avoid errors because of converting NaNs to integers.
out = numpy.vstack(out)
dtype = [("desc_snapshot_index", numpy.float32),
("desc_mass", numpy.float32),
("desc_x", numpy.float32),
("desc_y", numpy.float32),
("desc_z", numpy.float32),
("prog_snapshot_index", numpy.float32),
("prog_totmass", numpy.float32),
("mainprog_mass", numpy.float32),
("minprog_totmass", numpy.float32),
("merger_ratio", numpy.float32),
]
return numpy.array([tuple(row) for row in out], dtype=dtype)
def match_mass_to_phewcat(self, phewcat):
"""
For each clump mass in the PHEW catalogue, find the corresponding
clump mass in the merger tree file. If no match is found returns NaN.
These are not equal because the PHEW catalogue mass is the mass without
unbinding.
Parameters
----------
phewcat : csiborgtools.read.CSiBORGPEEWReader
PHEW catalogue reader.
Returns
-------
mass : float
"""
if phewcat.nsim != self.nsim:
raise ValueError("Simulation indices do not match.")
nsnap = phewcat.nsnap
indxs = phewcat["index"]
mergertree_mass = numpy.full(len(indxs), numpy.nan,
dtype=numpy.float32)
for i, ind in enumerate(indxs):
try:
mergertree_mass[i] = self.get_mass(ind, nsnap)
except KeyError:
continue
return mergertree_mass
def match_pos_to_phewcat(self, phewcat):
"""
For each clump mass in the PHEW catalogue, find the corresponding
clump mass in the merger tree file. If no match is found returns NaN.
These are not equal because the PHEW catalogue mass is the mass without
unbinding.
Parameters
----------
phewcat : csiborgtools.read.CSiBORGPEEWReader
PHEW catalogue reader.
Returns
-------
mass : float
"""
if phewcat.nsim != self.nsim:
raise ValueError("Simulation indices do not match.")
nsnap = phewcat.nsnap
indxs = phewcat["index"]
mergertree_pos = numpy.full((len(indxs), 3), numpy.nan,
dtype=numpy.float32)
for i, ind in enumerate(indxs):
try:
mergertree_pos[i] = self.get_pos(ind, nsnap)
except KeyError:
continue
return mergertree_pos[:, ::-1] # TODO later remove
###############################################################################
# Manual halo tracking. #
###############################################################################
def track_halo_manually(cats, hid, maxdist=0.15, max_dlogm=0.35):
"""
Manually track a halo without using the merger tree. Searches for nearby
halo of similar mass in adjacent snapshots. Supports only main haloes and
can only work for the most massive haloes in a simulation, however even
then significant care should be taken.
Selects the most massive halo within a search radius to be a match.
In case a progenitor is not found in the adjacent snapshot, the search
continues in the next snapshot. Occasionally some haloes disappear..
Parameters
----------
cats : dict
Dictionary of halo catalogues, keys are snapshot indices.
hid : int
Halo ID.
maxdist : float, optional
Maximum comoving distance for a halo to move between adjacent
snapshots.
max_dlogm : float, optional
Maximum |log mass ratio| for a halo to be considered a progenitor.
Returns
-------
hist : structured array
History of the halo.
"""
nsnap0 = max(cats.keys())
k = cats[nsnap0]["hid_to_array_index"][hid]
pos = cats[nsnap0]["cartesian_pos"][k]
mass = cats[nsnap0]["summed_mass"][k]
if not cats[nsnap0]["is_main"][k]:
raise ValueError("Only main haloes are supported.")
if not mass > 1e13:
raise ValueError("Only the most massive haloes are supported.")
if not cats[nsnap0]["dist"][k] < 155.5:
raise ValueError("Only high-resolution region haloes are supported.")
dtype = [("snapshot_index", numpy.float32),
("x", numpy.float32),
("y", numpy.float32),
("z", numpy.float32),
("mass", numpy.float32),
("desc_dist", numpy.float32),
]
hist = numpy.full(len(cats), numpy.nan, dtype=dtype)
hist["snapshot_index"][0] = nsnap0
hist["x"][0], hist["y"][0], hist["z"][0] = pos
hist["mass"][0] = mass
for n in trange(1, len(cats), desc="Tracking halo"):
nsnap = nsnap0 - n
hist["snapshot_index"][n] = nsnap
# Find indices of all main haloes that are within a box of width
indxs = cats[nsnap].select_in_box(pos, 2 * maxdist)
if len(indxs) == 0:
continue
nearby_pos = cats[nsnap]["cartesian_pos"][indxs]
nearby_mass = cats[nsnap]["summed_mass"][indxs]
# Distance from the previous position and |log mass ratio|
dist = periodic_distance(nearby_pos, pos, cats[nsnap].box.boxsize)
dlogm = numpy.abs(numpy.log10(nearby_mass / mass))
k = numpy.argmin(dlogm)
if (dlogm[k] < max_dlogm) & (dist[k] < maxdist):
hist["x"][n], hist["y"][n], hist["z"][n] = nearby_pos[k]
hist["mass"][n] = nearby_mass[k]
hist["desc_dist"][n] = dist[k]
pos = nearby_pos[k]
mass = nearby_mass[k]
return hist

View file

@ -98,7 +98,7 @@ def sort_fofid(nsim, verbose=True):
reader = csiborgtools.read.CSiBORGReader(paths) reader = csiborgtools.read.CSiBORGReader(paths)
pars_extract = ["x"] # Dummy variable pars_extract = ["x"] # Dummy variable
__, pids = reader.read_particle(nsnap, nsim, pars_extract, __, pids = reader.read_snapshot(nsnap, nsim, pars_extract,
return_structured=False, verbose=verbose) return_structured=False, verbose=verbose)
del __ del __
collect() collect()

17
old/mv_fofmembership.sh Normal file
View file

@ -0,0 +1,17 @@
nthreads=1
memory=100
queue="berg"
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
file="mv_fofmembership.py"
nsims="5511"
pythoncm="$env $file --nsims $nsims"
# echo $pythoncm
# $pythoncm
cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
echo "Submitting:"
echo $cm
echo
$cm

View file

@ -12,7 +12,7 @@
# with this program; if not, write to the Free Software Foundation, Inc., # with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
r""" r"""
Script to load in the simulation particles, sort them by their FoF halo ID and Script to load in the simulation particles, sort them by their halo ID and
dump into a HDF5 file. Stores the first and last index of each halo in the dump into a HDF5 file. Stores the first and last index of each halo in the
particle array. This can be used for fast slicing of the array to acces particle array. This can be used for fast slicing of the array to acces
particles of a single clump. particles of a single clump.
@ -108,7 +108,7 @@ def main(nsim, simname, verbose):
pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"] pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"]
else: else:
pars_extract = None pars_extract = None
parts, pids = partreader.read_particle( parts, pids = partreader.read_snapshot(
nsnap, nsim, pars_extract, return_structured=False, verbose=verbose) nsnap, nsim, pars_extract, return_structured=False, verbose=verbose)
# In case of CSiBORG, we need to convert the mass and velocities from # In case of CSiBORG, we need to convert the mass and velocities from

18
old/pre_dumppart.sh Normal file
View file

@ -0,0 +1,18 @@
nthreads=1
memory=40
queue="berg"
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
file="pre_dumppart.py"
simname="csiborg"
nsims="5511"
pythoncm="$env $file --nsims $nsims --simname $simname"
# echo $pythoncm
# $pythoncm
cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
echo "Submitting:"
echo $cm
echo
$cm

View file

@ -67,14 +67,13 @@ def sort_particle_membership(nsim, nsnap, method):
fout = fpath + "_sorted.hdf5" fout = fpath + "_sorted.hdf5"
print(f"{datetime.now()}: saving the sorted data to ... `{fout}`") print(f"{datetime.now()}: saving the sorted data to ... `{fout}`")
header = """
This dataset represents halo indices for each particle.
- The particles are ordered as they appear in the simulation snapshot.
- Unassigned particles are given a halo index of 0.
"""
with h5py.File(fout, 'w') as hdf: with h5py.File(fout, 'w') as hdf:
dset = hdf.create_dataset('hids_dataset', data=hids) dset = hdf.create_dataset('hids', data=hids)
dset.attrs['header'] = header dset.attrs['header'] = """
This dataset represents (sub)halo indices for each particle.
- The particles are ordered as they appear in the simulation snapshot.
- Unassigned particles are given an index of 0.
"""
if __name__ == "__main__": if __name__ == "__main__":

19
old/sort_halomaker.sh Executable file
View file

@ -0,0 +1,19 @@
nthreads=1
memory=64
queue="berg"
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
file="sort_halomaker.py"
method="FOF"
nsim="7444"
pythoncm="$env $file --method $method --nsim $nsim"
# echo $pythoncm
# $pythoncm
cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
echo "Submitting:"
echo $cm
echo
$cm

View file

@ -61,13 +61,13 @@ def positions_to_ascii(positions, output_filename, boxsize=None,
out_file.write(chunk_str + "\n") out_file.write(chunk_str + "\n")
def extract_positions(nsim, paths, kind): def extract_positions(nsim, simname, paths, kind):
""" """
Extract either the particle or halo positions. Extract either the particle or halo positions.
""" """
if kind == "particles": if kind == "particles":
fname = paths.particles(nsim, args.simname) fname = paths.processed_output(nsim, simname, "FOF")
return h5py.File(fname, 'r')["particles"] return h5py.File(fname, 'r')["snapshot_final/pos"][:]
if kind == "particles_rsp": if kind == "particles_rsp":
raise NotImplementedError("RSP of particles is not implemented yet.") raise NotImplementedError("RSP of particles is not implemented yet.")
@ -75,23 +75,23 @@ def extract_positions(nsim, paths, kind):
fpath = paths.observer_peculiar_velocity("PCS", 512, nsim) fpath = paths.observer_peculiar_velocity("PCS", 512, nsim)
vpec_observer = numpy.load(fpath)["observer_vp"][0, :] vpec_observer = numpy.load(fpath)["observer_vp"][0, :]
cat = csiborgtools.read.CSiBORGHaloCatalogue( cat = csiborgtools.read.CSiBORGHaloCatalogue(
nsim, paths, bounds={"dist": (0, 155.5)}, load_fitted=True, nsim, paths, "halo_catalogue", "FOF", bounds={"dist": (0, 155.5)},
load_initial=False, observer_velocity=vpec_observer, ) observer_velocity=vpec_observer)
if kind == "halos": if kind == "halos":
return cat.position() return cat["cartesian_pos"]
if kind == "halos_rsp": if kind == "halos_rsp":
return cat.redshift_space_position() return cat["cartesian_redshift_pos"]
raise ValueError(f"Unknown kind `{kind}`. Allowed values are: " raise ValueError(f"Unknown kind `{kind}`. Allowed values are: "
"`particles`, `particles_rsp`, `halos`, `halos_rsp`.") "`particles`, `particles_rsp`, `halos`, `halos_rsp`.")
def main(nsim, paths, kind): def main(args, paths):
boxsize = 677.7 if "particles" in kind else None boxsize = 677.7 if "particles" in args.kind else None
pos = extract_positions(nsim, paths, kind) pos = extract_positions(args.nsim, args.simname, paths, args.kind)
output_filename = paths.ascii_positions(nsim, kind) output_filename = paths.ascii_positions(args.nsim, args.kind)
positions_to_ascii(pos, output_filename, boxsize=boxsize) positions_to_ascii(pos, output_filename, boxsize=boxsize)

View file

@ -28,6 +28,16 @@ from taskmaster import work_delegation
import csiborgtools import csiborgtools
from utils import get_nsims from utils import get_nsims
###############################################################################
# Cosmotool SPH density & velocity field #
###############################################################################
def cosmotool_sph(nsim, parser_args):
pass
############################################################################### ###############################################################################
# Density field # # Density field #
############################################################################### ###############################################################################
@ -40,13 +50,15 @@ def density_field(nsim, parser_args, to_save=True):
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsnap = max(paths.get_snapshots(nsim, "csiborg")) nsnap = max(paths.get_snapshots(nsim, "csiborg"))
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths) box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
fname = paths.processed_output(nsim, "csiborg", "halo_catalogue")
if not parser_args.in_rsp: if not parser_args.in_rsp:
parts = csiborgtools.read.read_h5(paths.particles(nsim, "csiborg")) snap = csiborgtools.read.read_h5(fname)["snapshot_final"]
parts = parts["particles"] pos = snap["pos"]
mass = snap["mass"]
gen = csiborgtools.field.DensityField(box, parser_args.MAS) gen = csiborgtools.field.DensityField(box, parser_args.MAS)
field = gen(parts, parser_args.grid, verbose=parser_args.verbose) field = gen(pos, mass, parser_args.grid, verbose=parser_args.verbose)
else: else:
field = numpy.load(paths.field( field = numpy.load(paths.field(
"density", parser_args.MAS, parser_args.grid, nsim, False)) "density", parser_args.MAS, parser_args.grid, nsim, False))
@ -83,12 +95,15 @@ def velocity_field(nsim, parser_args, to_save=True):
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsnap = max(paths.get_snapshots(nsim, "csiborg")) nsnap = max(paths.get_snapshots(nsim, "csiborg"))
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths) box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
fname = paths.processed_output(nsim, "csiborg", "halo_catalogue")
parts = csiborgtools.read.read_h5(paths.particles(nsim, "csiborg")) snap = csiborgtools.read.read_h5(fname)["snapshot_final"]
parts = parts["particles"] pos = snap["pos"]
vel = snap["vel"]
mass = snap["mass"]
gen = csiborgtools.field.VelocityField(box, parser_args.MAS) gen = csiborgtools.field.VelocityField(box, parser_args.MAS)
field = gen(parts, parser_args.grid, verbose=parser_args.verbose) field = gen(pos, vel, mass, parser_args.grid, verbose=parser_args.verbose)
if to_save: if to_save:
fout = paths.field("velocity", parser_args.MAS, parser_args.grid, fout = paths.field("velocity", parser_args.MAS, parser_args.grid,
@ -247,6 +262,7 @@ if __name__ == "__main__":
parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)), parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
help="Verbosity flag for reading in particles.") help="Verbosity flag for reading in particles.")
parser.add_argument("--simname", type=str, default="csiborg", parser.add_argument("--simname", type=str, default="csiborg",
choices=["csiborg", "csiborg2"],
help="Verbosity flag for reading in particles.") help="Verbosity flag for reading in particles.")
parser_args = parser.parse_args() parser_args = parser.parse_args()
comm = MPI.COMM_WORLD comm = MPI.COMM_WORLD

View file

@ -53,12 +53,20 @@ def open_galaxy_positions(survey_name, comm):
if rank == 0: if rank == 0:
if survey_name == "SDSS": if survey_name == "SDSS":
survey = csiborgtools.read.SDSS( survey = csiborgtools.SDSS()()
h=1, sel_steps=lambda cls: steps(cls, survey_name))
pos = numpy.vstack([survey["DIST_UNCORRECTED"], pos = numpy.vstack([survey["DIST_UNCORRECTED"],
survey["RA"], survey["RA"],
survey["DEC"]], survey["DEC"]],
).T ).T
pos = pos.astype(numpy.float32)
indxs = survey["INDEX"]
if survey_name == "SDSSxALFALFA":
survey = csiborgtools.SDSSxALFALFA()()
pos = numpy.vstack([survey["DIST_UNCORRECTED"],
survey["RA_1"],
survey["DEC_1"]],
).T
pos = pos.astype(numpy.float32)
indxs = survey["INDEX"] indxs = survey["INDEX"]
elif survey_name == "GW170817": elif survey_name == "GW170817":
samples = File("/mnt/extraspace/rstiskalek/GWLSS/H1L1V1-EXTRACT_POSTERIOR_GW170817-1187008600-400.hdf", 'r')["samples"] # noqa samples = File("/mnt/extraspace/rstiskalek/GWLSS/H1L1V1-EXTRACT_POSTERIOR_GW170817-1187008600-400.hdf", 'r')["samples"] # noqa
@ -110,7 +118,7 @@ def evaluate_field(field, pos, nrand, smooth_scales=None, seed=42,
field_smoothed = csiborgtools.field.smoothen_field( field_smoothed = csiborgtools.field.smoothen_field(
field, scale * MPC2BOX, boxsize=1, make_copy=True) field, scale * MPC2BOX, boxsize=1, make_copy=True)
else: else:
field_smoothed = field field_smoothed = numpy.copy(field)
val[:, i] = csiborgtools.field.evaluate_sky( val[:, i] = csiborgtools.field.evaluate_sky(
field_smoothed, pos=pos, mpc2box=MPC2BOX) field_smoothed, pos=pos, mpc2box=MPC2BOX)
@ -164,7 +172,7 @@ if __name__ == "__main__":
parser.add_argument("--nsims", type=int, nargs="+", default=None, parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all.") help="IC realisations. If `-1` processes all.")
parser.add_argument("--survey", type=str, required=True, parser.add_argument("--survey", type=str, required=True,
choices=["SDSS", "GW170817"], choices=["SDSS", "SDSSxALFALFA", "GW170817"],
help="Galaxy survey") help="Galaxy survey")
parser.add_argument("--smooth_scales", type=float, nargs="+", default=None, parser.add_argument("--smooth_scales", type=float, nargs="+", default=None,
help="Smoothing scales in Mpc / h.") help="Smoothing scales in Mpc / h.")
@ -189,12 +197,6 @@ if __name__ == "__main__":
pos, indxs = open_galaxy_positions(args.survey, MPI.COMM_WORLD) pos, indxs = open_galaxy_positions(args.survey, MPI.COMM_WORLD)
if MPI.COMM_WORLD.Get_rank() == 0 and args.survey != "GW170817":
fout = f"/mnt/extraspace/rstiskalek/CSiBORG/ascii_positions/{args.survey}_positions.npz" # noqa
pos = csiborgtools.utils.radec_to_cartesian(pos) + 677.7 / 2
print(f"Saving to ... `{fout}`.")
numpy.savez(fout, pos=pos, indxs=indxs)
def _main(nsim): def _main(nsim):
main(nsim, args, pos, indxs, paths, main(nsim, args, pos, indxs, paths,
verbose=MPI.COMM_WORLD.Get_size() == 1) verbose=MPI.COMM_WORLD.Get_size() == 1)

View file

@ -1,118 +0,0 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to calculate the particle centre of mass, Lagrangian patch size in the
initial snapshot.
The initial snapshot particles are read from the sorted files.
"""
from argparse import ArgumentParser
from datetime import datetime
import numpy
from mpi4py import MPI
from taskmaster import work_delegation
from tqdm import tqdm
from utils import get_nsims
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
def _main(nsim, simname, verbose):
"""
Calculate the Lagrangian halo centre of mass and Lagrangian patch size in
the initial snapshot.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
cols = [("index", numpy.int32),
("x", numpy.float32),
("y", numpy.float32),
("z", numpy.float32),
("lagpatch_size", numpy.float32),
("lagpatch_ncells", numpy.int32),]
fname = paths.initmatch(nsim, simname, "particles")
parts = csiborgtools.read.read_h5(fname)
parts = parts['particles']
halo_map = csiborgtools.read.read_h5(paths.particles(nsim, simname))
halo_map = halo_map["halomap"]
if simname == "csiborg":
cat = csiborgtools.read.CSiBORGHaloCatalogue(
nsim, paths, bounds=None, load_fitted=False, load_initial=False)
else:
cat = csiborgtools.read.QuijoteHaloCatalogue(
nsim, paths, nsnap=4, load_fitted=False, load_initial=False)
hid2map = {hid: i for i, hid in enumerate(halo_map[:, 0])}
# Initialise the overlapper.
if simname == "csiborg":
kwargs = {"box_size": 2048, "bckg_halfsize": 512}
else:
kwargs = {"box_size": 512, "bckg_halfsize": 256}
overlapper = csiborgtools.match.ParticleOverlap(**kwargs)
out = csiborgtools.read.cols_to_structured(len(cat), cols)
for i, hid in enumerate(tqdm(cat["index"]) if verbose else cat["index"]):
out["index"][i] = hid
part = csiborgtools.read.load_halo_particles(hid, parts, halo_map,
hid2map)
# Skip if the halo has no particles or is too small.
if part is None or part.size < 40:
continue
pos, mass = part[:, :3], part[:, 3]
# Calculate the centre of mass and the Lagrangian patch size.
cm = csiborgtools.center_of_mass(pos, mass, boxsize=1.0)
distances = csiborgtools.periodic_distance(pos, cm, boxsize=1.0)
out["x"][i], out["y"][i], out["z"][i] = cm
out["lagpatch_size"][i] = numpy.percentile(distances, 99)
# Calculate the number of cells with > 0 density.
delta = overlapper.make_delta(pos, mass, subbox=True)
out["lagpatch_ncells"][i] = csiborgtools.delta2ncells(delta)
# Now save it
fout = paths.initmatch(nsim, simname, "fit")
if verbose:
print(f"{datetime.now()}: dumping fits to .. `{fout}`.", flush=True)
with open(fout, "wb") as f:
numpy.save(f, out)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--simname", type=str, default="csiborg",
choices=["csiborg", "quijote"],
help="Simulation name")
parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all.")
args = parser.parse_args()
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsims = get_nsims(args, paths)
def main(nsim):
_main(nsim, args.simname, MPI.COMM_WORLD.Get_size() == 1)
work_delegation(main, nsims, MPI.COMM_WORLD)

View file

@ -69,7 +69,7 @@ def pair_match_max(nsim0, nsimx, simname, min_logmass, mult, verbose):
raise ValueError(f"Unknown simulation `{simname}`.") raise ValueError(f"Unknown simulation `{simname}`.")
reader = csiborgtools.summary.PairOverlap(cat0, catx, paths, min_logmass, reader = csiborgtools.summary.PairOverlap(cat0, catx, paths, min_logmass,
maxdist=maxdist) maxdist=maxdist)
out = csiborgtools.match.matching_max( out = csiborgtools.match.matching_max(
cat0, catx, mass_kind, mult=mult, periodic=periodic, cat0, catx, mass_kind, mult=mult, periodic=periodic,
overlap=reader.overlap(from_smoothed=True), overlap=reader.overlap(from_smoothed=True),
@ -106,54 +106,36 @@ def pair_match(nsim0, nsimx, simname, min_logmass, sigma, verbose):
""" """
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
smooth_kwargs = {"sigma": sigma, "mode": "constant", "cval": 0} smooth_kwargs = {"sigma": sigma, "mode": "constant", "cval": 0}
bounds = {"lagpatch_size": (0, None)}
if simname == "csiborg": if simname == "csiborg":
overlapper_kwargs = {"box_size": 2048, "bckg_halfsize": 512} overlapper_kwargs = {"box_size": 2048, "bckg_halfsize": 512}
mass_kind = "fof_totpartmass" mass_kind = "fof_totpartmass"
bounds = {"dist": (0, 155), mass_kind: (10**min_logmass, None)} bounds |= {"dist": (0, 155), mass_kind: (10**min_logmass, None)}
cat0 = csiborgtools.read.CSiBORGCatalogue(
cat0 = csiborgtools.read.CSiBORGHaloCatalogue( nsim0, paths, "halo_catalogue", "FOF", mass_kind, bounds)
nsim0, paths, bounds=bounds, load_fitted=False, catx = csiborgtools.read.CSiBORGCatalogue(
with_lagpatch=True) nsimx, paths, "halo_catalogue", "FOF", mass_kind, bounds)
catx = csiborgtools.read.CSiBORGHaloCatalogue(
nsimx, paths, bounds=bounds, load_fitted=False,
with_lagpatch=True)
elif simname == "quijote": elif simname == "quijote":
overlapper_kwargs = {"box_size": 512, "bckg_halfsize": 256} overlapper_kwargs = {"box_size": 512, "bckg_halfsize": 256}
mass_kind = "group_mass" mass_kind = "group_mass"
bounds = {mass_kind: (10**min_logmass, None)} bounds |= {mass_kind: (10**min_logmass, None)}
cat0 = csiborgtools.read.QuijoteHaloCatalogue( cat0 = csiborgtools.read.QuijoteCatalogue(
nsim0, paths, 4, bounds=bounds, load_fitted=False, nsim0, paths, "halo_catalogue", "FOF", mass_kind, bounds=bounds)
with_lagpatch=True) catx = csiborgtools.read.QuijoteCatalogue(
catx = csiborgtools.read.QuijoteHaloCatalogue( nsimx, paths, "halo_catalogue", "FOF", mass_kind, bounds=bounds)
nsimx, paths, 4, bounds=bounds, load_fitted=False,
with_lagpatch=True)
else: else:
raise ValueError(f"Unknown simulation name: `{simname}`.") raise ValueError(f"Unknown simulation name: `{simname}`.")
halomap0 = csiborgtools.read.read_h5(
paths.particles(nsim0, simname))["halomap"]
parts0 = csiborgtools.read.read_h5(
paths.initmatch(nsim0, simname, "particles"))["particles"]
hid2map0 = {hid: i for i, hid in enumerate(halomap0[:, 0])}
halomapx = csiborgtools.read.read_h5(
paths.particles(nsimx, simname))["halomap"]
partsx = csiborgtools.read.read_h5(
paths.initmatch(nsimx, simname, "particles"))["particles"]
hid2mapx = {hid: i for i, hid in enumerate(halomapx[:, 0])}
overlapper = csiborgtools.match.ParticleOverlap(**overlapper_kwargs) overlapper = csiborgtools.match.ParticleOverlap(**overlapper_kwargs)
delta_bckg = overlapper.make_bckg_delta(parts0, halomap0, hid2map0, cat0, delta_bckg = overlapper.make_bckg_delta(cat0, verbose=verbose)
delta_bckg = overlapper.make_bckg_delta(catx, delta=delta_bckg,
verbose=verbose) verbose=verbose)
delta_bckg = overlapper.make_bckg_delta(partsx, halomapx, hid2mapx, catx,
delta=delta_bckg, verbose=verbose)
matcher = csiborgtools.match.RealisationsMatcher( matcher = csiborgtools.match.RealisationsMatcher(mass_kind=mass_kind,
mass_kind=mass_kind, **overlapper_kwargs) **overlapper_kwargs)
match_indxs, ngp_overlap = matcher.cross(cat0, catx, parts0, partsx, match_indxs, ngp_overlap = matcher.cross(cat0, catx, delta_bckg,
halomap0, halomapx, delta_bckg,
verbose=verbose) verbose=verbose)
# We want to store the halo IDs of the matches, not their array positions # We want to store the halo IDs of the matches, not their array positions
@ -177,8 +159,7 @@ def pair_match(nsim0, nsimx, simname, min_logmass, sigma, verbose):
gaussian_filter(delta_bckg, output=delta_bckg, **smooth_kwargs) gaussian_filter(delta_bckg, output=delta_bckg, **smooth_kwargs)
# We calculate the smoothed overlap for the pairs whose NGP overlap is > 0. # We calculate the smoothed overlap for the pairs whose NGP overlap is > 0.
smoothed_overlap = matcher.smoothed_cross(cat0, catx, parts0, partsx, smoothed_overlap = matcher.smoothed_cross(cat0, catx, delta_bckg,
halomap0, halomapx, delta_bckg,
match_indxs, smooth_kwargs, match_indxs, smooth_kwargs,
verbose=verbose) verbose=verbose)

View file

@ -0,0 +1,979 @@
# Copyright (C) 2023 Mladen Ivkovic, Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import copy
import os
from os.path import exists, join
from os import makedirs
from sys import argv
from datetime import datetime
import numpy as np
from joblib import dump, load
from tqdm import trange
errmsg = """
------------------------------------
mergertree-extract.py
------------------------------------
---------------
Usage
---------------
This script extracts the masses of clumps and haloes written by the mergertree
patch.
It needs output_XXXXX/mergertree_XXXXX.txtYYYYY and
output_XXXXX/clump_XXXXX.txtYYYYY files to work.
You need to run it from the directory where the output_XXXXX directories are
in.
There are three working modes defined:
1) do for one clump only.
You need to provide the clump ID you want it done for.
You can provide a starting directory, but by default the script will
search for the directory where z = 0.
run with `python3 mergertree-extract.py <clumpid> [--options] `
this creates the file mergertree_XXXXX_halo-<halo-ID>.txt. Its contents are
discussed below.
2) do for one halo.
You need to provide the halo ID you want it done for, and the flag
-c or --children.
The script will by itself find all the child clumps and walk through
their main branches as well, and write them down.
run with `python3 mergertree-extract.py <haloid> -c [--options]`
or `python3 mergertree-extract.py <haloid> --children [--options]`
this creates the hollowing files:
- halo_hierarchy_XXXXX-<halo-ID>.txt
contains the halo ID, how many children it has, and the children
IDs
- mergertree_XXXXX_halo-<halo-ID>.txt
mergertree data for halo that you chose.
- mergertree_XXXXX_subhalo-<child-ID>.txt
mergertree data for subhalos of the halo you chose. One file will
be created for each subhalo.
The contents of the mergertree_XXXXX* files are discussed below.
3) do for all haloes
The script will just walk off all haloes in the z = 0 directory. Note:
Haloes, not clumps!
run with `python3 mergertree-extract.py -a [--options]`
or `python3 mergertree-extract.py --all [--options]`
This will create the same type of files as in mode (2), just for all
haloes.
If only an integer is given as cmdline arg, mode (1) [one clump only] will be
run. If no cmd line argument is given, mode (3) [--all] will be run.
---------------
Output
---------------
the mergertree_XXXXX* files have 6 columns:
snapshot The snapshot from which this data is taken from
redshift The redshift of that snapshot
clump_ID The clump ID of the clump at that snapshot
mass The mass of the clump at that snapshot, based on what's in
the output_XXXXX/mergertree_XXXXX.txtYYYYY files, not the
output_XXXXX/clump_XXXXX.txtYYYYY files.
mass_from_mergers how much mass has been merged into this clump in this
snapshot, i.e. the sum of all the clump masses that have
been found to merge with this clump at this snapshot. This
does not include the mass of clumps which only seem to
merge with this clump, but re-emerge later.
mass_from_jumpers The mass of all clumps that seem to merge with this clump,
but re-emerge at a later time.
----------------
Options
----------------
List of all flags:
Running modes
-a, --all: make trees for all clumps in output where z = 0
-c --children: make trees for a halo and all its subhaloes. You need to
specify which halo via its halo ID.
-h, --help: print this help and exit.
Options:
--start-at=INT don't start at z = 0 snapshot, but with the specified
directory output_00INT.
--prefix=some/path/ path where you want your output written to.
-v, --verbose: be more verbose about what you're doing
-----------------
Requirements
-----------------
It needs output_XXXXX/mergertree_XXXXX.txtYYYYY and
output_XXXXX/clump_XXXXX.txtYYYYY files to work, which are created using the
mergertree patch in ramses.
Also needs numpy.
"""
###############################################################################
# Clump data #
###############################################################################
class ClumpData:
"""
Data from clump_XXXXX.txt
Parameters
----------
par : params object
"""
def __init__(self, par):
self.clumpids = np.zeros(1) # clump ID
self.parent = np.zeros(1) # parent ID
self.level = np.zeros(1) # clump level
def read_clumpdata(self, par):
"""Reads in the clump data for the z = 0 directory."""
if par.verbose:
print("Reading clump data.")
out = p.z0
raw_data = [None for i in range(par.ncpu)]
dirnrstr = str(par.outputnrs[out]).zfill(5)
dirname = 'output_' + dirnrstr
i = 0
for cpu in range(1):
fname = join(par.workdir, dirname, 'clump_' + dirnrstr + '.dat')
new_data = np.loadtxt(fname, dtype='int', skiprows=1,
usecols=[0, 1, 2])
if new_data.ndim == 2:
raw_data[i] = new_data
i += 1
elif new_data.shape[0] == 3: # if only 1 row is present in file
raw_data[i] = np.atleast_2d(new_data)
i += 1
fulldata = np.concatenate(raw_data[:i], axis=0)
self.clumpids = fulldata[:, 0]
self.level = fulldata[:, 1]
self.parent = fulldata[:, 2]
def cleanup_clumpdata(self, par, mtd):
"""
The particle unbinding can remove entire clumps from the catalogue.
If the option isn't set in the namelist, the clumpfinder output will
still be made not based on the clumpfinder. If that is the case, the
clumpfinder catalogue will contain clumps which the mergertree data
doesn't have, leading to problems. So remove those here.
"""
for i, c in enumerate(self.clumpids):
if c not in mtd.descendants[par.z0]:
self.clumpids[i] = 0
self.level[i] = 0
self.parent[i] = -1 # don't make it the same as clumpid
def find_children(self, clumpid):
"""Find the children for given clump ID."""
children = []
last_added = [clumpid]
loopcounter = 0
while True:
loopcounter += 1
this_level_parents = copy.copy(last_added)
children += this_level_parents
last_added = []
for i, cid in enumerate(self.clumpids):
if self.parent[i] in this_level_parents and cid != clumpid:
last_added.append(cid)
if len(last_added) == 0:
break
if loopcounter == 100:
print("Finished 100 iterations, we shouldn't be this deep")
break
return children[1:] # don't return top level parent
def write_children(self, par, clumpid, children):
"""Write the children to file."""
hfile = join(par.outdir, f"{par.halofilename}-{str(clumpid)}.txt")
with open(hfile, 'w') as f:
f.write("# {0:>18} {1:>18} {2:>18}\n".format("halo", "nr_of_children", "children")) # noqa
nc = len(children)
dumpstring = " {0:18d} {1:18d}".format(clumpid, nc)
dumpstring = "".join([dumpstring] + [" {0:18d}".format(c) for c in children] + ['\n']) # noqa
f.write(dumpstring)
###############################################################################
# Constants object #
###############################################################################
class Constants:
"""
Class holding constants.
"""
def __init__(self):
self.Mpc = 3.086e24 # cm
self.M_Sol = 1.98855e33 # g
self.Gyr = (24 * 3600 * 365 * 1e9) # s
self.G = 4.492e-15 # Mpc^3/(M_sol Gyr^2)
self.H0 = 100 # km/s/Mpc
self.omega_m = 0.307000011205673
self.omega_l = 0.693000018596649
self.omega_k = 0.0
self.omega_b = 0.0
###############################################################################
# Params object #
###############################################################################
class Params:
"""
Global parameters to be stored
"""
def __init__(self):
# self.workdir = f"/mnt/extraspace/hdesmond/ramses_out_{self.nsim}"
# self.outdir = f"/mnt/extraspace/rstiskalek/CSiBORG/cleaned_mtree/ramses_out_{self.nsim}" # noqa
# if not exists(self.outdir):
# makedirs(self.outdir)
self.lastdir = "" # last output_XXXXX directory
self.lastdirnr = -1 # XXXX from lastdir
self.ncpu = 1 # Number of CPUs used
self.noutput = 1 # how many output_XXXXX dirs exist
self.nout = 1 # how many outputs we're gonna deal with. (Some might not have merger tree data) # noqa
self.outputnrs = None # numpy array of output numbers
self.output_lowest = 0 # lowest snapshot number that we're dealing with (>= 1) # noqa
self.z0 = 0 # index of z=0 snapshot (or whichever you want to start with) # noqa
# NOTE: params.nout will be defined such that you can easily loop
self.verbose = False # verbosity
self.start_at = 0 # output dir to start with, if given
self.output_prefix = "" # user given prefix for output files
self.outputfilename = "" # output filename. Stores prefix/mergertree_XXXXX part of name only # noqa
self.halofilename = "" # output filename for halo hierarchy. Stores prefix/halo_hierarchy_XXXXX part of filename only # noqa
self.one_halo_only = False # do the tree for one clump only
self.halo_and_children = False # do the tree for one halo, including subhaloes # noqa
self.do_all = False # do for all clumps at z=0 output
self.clumpid = 0 # which clump ID to work for.
self.nsim = None
# Dictionnary of accepted keyword command line arguments
self.accepted_flags = {
'-a': self.set_do_all,
'--all': self.set_do_all,
'-r': self.set_halo_and_children,
'--recursive': self.set_halo_and_children,
'-c': self.set_halo_and_children,
'--children': self.set_halo_and_children,
'-h': self.get_help,
'--help': self.get_help,
'-v': self.set_verbose,
'--verbose': self.set_verbose,
}
self.accepted_flags_with_args = {
"--nsim": self.set_nsim,
'--start-at': self.set_startnr,
'--prefix': self.set_prefix,
}
# -----------------------------
# Setter methods
# -----------------------------
def set_do_all(self):
self.do_all = True
return
def set_halo_and_children(self):
self.halo_and_children = True
return
def get_help(self):
print(errmsg)
quit()
return
def set_verbose(self):
self.verbose = True
return
def set_startnr(self, arg):
flag, startnr = arg.split("=")
try:
self.start_at = int(startnr)
except ValueError:
print("given value for --start-at=INT isn't an integer?")
def set_prefix(self, arg):
flag, prefix = arg.split("=")
# try:
self.output_prefix = prefix
try:
os.makedirs(self.output_prefix)
except FileExistsError:
pass
return
def set_nsim(self, arg):
flag, nsim = arg.split("=")
try:
self.nsim = int(nsim)
except ValueError:
print("given value for --nsim=INT isn't an integer?")
def read_cmdlineargs(self):
"""
Reads in the command line arguments and store them in the
global_params object.
"""
nargs = len(argv)
i = 1 # first cmdlinearg is filename of this file, so skip it
while i < nargs:
arg = argv[i]
arg = arg.strip()
if arg in self.accepted_flags.keys():
self.accepted_flags[arg]()
else:
for key in self.accepted_flags_with_args.keys():
if arg.startswith(key):
self.accepted_flags_with_args[key](arg)
break
else:
try:
self.clumpid = int(arg)
except ValueError:
print(f"I didn't recognize the argument '{arg}'. Use "
"mergertre-extract.py -h or --help to print "
"help message.")
quit()
i += 1
if self.nsim is None:
raise ValueError("nsim not set. Use --nsim=INT to set it.")
@property
def workdir(self):
return f"/mnt/extraspace/hdesmond/ramses_out_{self.nsim}"
@property
def outdir(self):
fname = f"/mnt/extraspace/rstiskalek/CSiBORG/cleaned_mtree/ramses_out_{self.nsim}" # noqa
if not exists(fname):
makedirs(fname)
return fname
def get_output_info(self):
"""
Read in the output info based on the files in the current working
directory. Reads in last directory, ncpu, noutputs. Doesn't read
infofiles.
"""
# self.workdir = os.getcwd()
filelist = os.listdir(self.workdir)
outputlist = []
for filename in filelist:
if filename.startswith('output_'):
outputlist.append(filename)
if len(outputlist) < 1:
print("I didn't find any output_XXXXX directories in current "
"working directory. Are you in the correct workdir? "
"Use mergertree-extract.py -h or --help to print help "
"message.")
quit()
outputlist.sort()
self.lastdir = outputlist[-1]
self.lastdirnr = int(self.lastdir[-5:])
self.noutput = len(outputlist)
if (self.start_at > 0):
# check that directory exists
startnrstr = str(self.start_at).zfill(5)
if 'output_' + startnrstr not in outputlist:
print("Didn't find specified starting directory "
f"output_{startnrstr} use mergertree-extract.py -h or "
"--help to print help message.")
quit()
# read ncpu from infofile in last output directory
infofile = join(self.workdir, self.lastdir,
f"info_{self.lastdir[-5:]}.txt")
with open(infofile, 'r') as f:
ncpuline = f.readline()
line = ncpuline.split()
self.ncpu = int(line[-1])
def setup_and_checks(self, sd):
"""
Do checks and additional setups once you have all the cmd line args and
output infos
Parameters
----------
sd: snapshotdata object
"""
# set running mode
if not self.do_all:
if self.clumpid <= 0:
print("No or wrong clump id given. Setting the --all mode.")
self.set_do_all()
else:
if not self.halo_and_children:
self.one_halo_only = True
# generate list of outputdirnumbers
startnr = self.lastdirnr
self.outputnrs = np.array(range(startnr, startnr - self.noutput, -1))
# find starting output directory
self.z0 = np.argmin(np.absolute(sd.redshift))
if self.start_at > 0:
# replace z0 dir with starting dir
self.z0 = self.lastdirnr - self.start_at
# generate output filename
dirnrstr = str(self.outputnrs[self.z0]).zfill(5)
fname = "mergertree_" + dirnrstr
self.outputfilename = join(self.output_prefix, fname)
# generate halo output filename
fname = "halo_hierarchy_" + dirnrstr
self.halofilename = join(self.output_prefix, fname)
# rename output_prefix to something if it wasn't set
if self.output_prefix == "":
self.output_prefix = os.path.relpath(self.workdir)
# find self.nout; i.e. how many outputs we are actually going to have
for out in range(self.noutput - 1, -1, -1):
dirnrstr = str(self.outputnrs[out]).zfill(5)
mtreefile = join(self.workdir,
f"output_{dirnrstr}",
f"mergertree_{dirnrstr}.dat")
if os.path.exists(mtreefile):
print("Loading mergertree data from ", mtreefile)
# if there is a file, this is lowest snapshot number directory
# that we'll be dealing with, and hence will have the highest
# index number in the arrays I'm using
# NOTE: params.nout will be defined such that you can easily
# loop for out in range(p.z0, p.nout)
self.nout = out + 1
break
def print_params(self):
"""Prints out the parameters that are set."""
if self.do_all:
print("Working mode: all clumps")
else:
if self.halo_and_children:
print("Working mode: halo", self.clumpid, "and its children") # noqa
else:
print("Working mode: clump ", self.clumpid)
print("workdir: ", self.workdir)
print("snapshot of tree root: ", self.outputnrs[self.z0])
print("p.one_halo_only ", p.one_halo_only)
print("p.do_all ", p.do_all)
print("p.halo_and_children ", p.halo_and_children)
print("p.one_halo_only ", p.one_halo_only)
###############################################################################
# Merger tree data #
###############################################################################
class MTreeData:
"""
Merger tree data lists
Parameters
----------
par : params object
"""
def __init__(self, par):
self.progenitors = [np.zeros(1) for i in range(par.noutput)] # progenitor IDs # noqa
self.descendants = [np.zeros(1) for i in range(par.noutput)] # descendant IDs # noqa
self.progenitor_outputnrs = [np.zeros(1) for i in range(par.noutput)] # snapshot number of progenitor # noqa
self.mass = [np.zeros(1) for i in range(par.noutput)] # descendant mass # noqa
self.mass_to_remove = [np.zeros(1) for i in range(par.noutput)] # descendant mass # noqa
def read_mergertree_data(self, par, sd):
"""Reads in mergertree data."""
if par.verbose:
print("Reading in mergertree data")
# Preparation
# define new datatype for mergertree output
mtree = np.dtype([('clump', 'i4'),
('prog', 'i4'),
('prog_outnr', 'i4'),
('mass', 'f8'),
('npart', 'f8'),
('x', 'f8'),
('y', 'f8'),
('z', 'f8'),
('vx', 'f8'),
('vy', 'f8'),
('vz', 'f8')
])
# ---------------------------
# Loop over directories
# ---------------------------
startnr = par.lastdirnr
# READ THE ONES BEFORE z0 TOO!
for output in trange(par.nout, desc="Reading merger"):
dirnr = str(startnr - output).zfill(5)
srcdir = 'output_' + dirnr
fnames = [srcdir + '/' + "mergertree_" + dirnr + '.dat']
fnames[0] = join(par.workdir, fnames[0])
datalist = [np.zeros((1, 3)) for i in range(par.ncpu)]
i = 0
nofile = 0
for f in fnames:
if os.path.exists(f):
datalist[i] = np.atleast_1d(np.genfromtxt(f, dtype=mtree,
skip_header=1))
i += 1
else:
nofile += 1
if nofile == p.ncpu:
print("Didn't find any mergertree data in", srcdir)
# ---------------------------------
# Sort out data
# ---------------------------------
if i > 0:
fulldata = np.concatenate(datalist[:i], axis=0)
self.descendants[output] = fulldata[:]['clump']
self.progenitors[output] = fulldata[:]['prog']
self.progenitor_outputnrs[output] = fulldata[:]['prog_outnr']
self.mass[output] = fulldata[:]['mass']
# self.npart[output] = fulldata[:]['npart']
# self.x[output] = fulldata[:]['x']
# self.y[output] = fulldata[:]['y']
# self.z[output] = fulldata[:]['z']
# self.vx[output] = fulldata[:]['vx']
# self.vy[output] = fulldata[:]['vy']
# self.vz[output] = fulldata[:]['vz']
# --------------------------------------
# Transform units to physical units
# --------------------------------------
# transform units to physical units
for i in range(len(self.descendants)):
self.mass[i] *= sd.unit_m[i]
# self.x[i] *= sd.unit_l[i] # only transform later when needed; Need to check for periodicity first! # noqa
# self.y[i] *= sd.unit_l[i]
# self.z[i] *= sd.unit_l[i]
# self.vx[i] *= sd.unit_l[i]/sd.unit_t[i]
# self.vy[i] *= sd.unit_l[i]/sd.unit_t[i]
# self.vz[i] *= sd.unit_l[i]/sd.unit_t[i]
def clean_up_jumpers(self, par):
"""
Remove jumpers from the merger list. Take note of how much mass should
be removed from the descendant because the jumper is to be removed.
"""
# First initialize mass_to_remove arrays
self.mass_to_remove = [np.zeros(self.descendants[out].shape)
for out in range(par.noutput)]
nreplaced = 0
for out in trange(par.nout + par.z0 - 1, desc="Cleaning jumpers"):
for i, pr in enumerate(self.progenitors[out]):
if pr < 0:
# Subtract 1 here from snapind:
# progenitor_outputnrs gives the snapshot number where the
# jumper was a descendant for the last time
# so you need to overwrite the merging one snapshot later,
# where the clump is the progenitor
snapind = get_snap_ind(p, self.progenitor_outputnrs[out][i]) - 1 # noqa
# NOTE bottleneck
jumpind = self.progenitors[snapind] == -pr
# NOTE bottleneck
# find index of descendant into which this clump will
# appearingly merge into
mergerind = self.descendants[snapind] == - self.descendants[snapind][jumpind] # noqa
# overwrite merging event so it won't count
self.descendants[snapind][jumpind] = 0
# find mass of jumper in previous snapshot
jumpmassind = self.descendants[snapind + 1] == -pr
# note how much mass might need to be removed for whatever
# you need it
self.mass_to_remove[snapind][mergerind] += self.mass[snapind + 1][jumpmassind] # noqa
nreplaced += 1
print("Cleaned out", nreplaced, "jumpers")
def get_tree(self, par, tree, sd, clumpid):
"""Follow the main branch down."""
if par.verbose:
print("Computing tree for clump", clumpid)
dind = self.descendants[par.z0] == clumpid
desc_snap_ind = p.z0
desc = self.descendants[p.z0][dind]
prog = self.progenitors[p.z0][dind]
def get_prog_indices(prog, desc_snap_ind):
"""
Compute snapshot index at which given progenitor has been a
descendant and its index in the array
prog: progenitor ID
desc_snap_ind: snapshot index of descendant of given prog
returns:
p_snap_ind: snapshot index of the progenitor
pind: progenitor index (np.array mask) of progenitor in
array where it is descendant
"""
if prog > 0: # if progenitor isn't jumper
# find progenitor's index in previous snapshot
p_snap_ind = desc_snap_ind + 1
pind = self.descendants[p_snap_ind] == prog
elif prog < 0:
p_snap_ind = get_snap_ind(
par, self.progenitor_outputnrs[desc_snap_ind][dind])
pind = self.descendants[p_snap_ind] == -prog
return p_snap_ind, pind
while True:
# first calculate merger mass
mergers = self.descendants[desc_snap_ind] == -desc
mergermass = 0.0
if mergers.any():
for m in self.progenitors[desc_snap_ind][mergers]:
# find mass of merger. That's been written down at the
# place where merger was descendant.
m_snap_ind, mergerind = get_prog_indices(m, desc_snap_ind)
mergermass += self.mass[m_snap_ind][mergerind]
# add the descendant to the tree
tree.add_snap(par.outputnrs[desc_snap_ind],
sd.redshift[desc_snap_ind], desc,
self.mass[desc_snap_ind][dind], mergermass,
self.mass_to_remove[desc_snap_ind][dind])
# now descend down the main branch
if prog != 0:
p_snap_ind, pind = get_prog_indices(prog, desc_snap_ind)
else:
# stop at progenitor = 0
break
# prepare for next round
desc_snap_ind = p_snap_ind
dind = pind
desc = abs(prog)
prog = self.progenitors[p_snap_ind][pind]
###############################################################################
# Snapshot data #
###############################################################################
class SnapshotData():
"""Snapshot specific data"""
def __init__(self, par):
# read in
self.aexp = np.zeros(par.noutput)
self.unit_l = np.zeros(par.noutput)
self.unit_m = np.zeros(par.noutput)
self.unit_t = np.zeros(par.noutput)
self.unit_dens = np.zeros(par.noutput)
# to be computed
self.redshift = np.zeros(par.noutput) # z
def read_infofiles(self, par, const):
"""Read the info_XXXXX.txt files."""
if par.verbose:
print("Reading info files.")
startnr = par.lastdirnr
for output in range(p.noutput):
# Start with last directory (e.g. output_00060),
# work your way to first directory (e.g. output_00001)
# p.z0 isn't decided yet, so just read in everything here.
dirnr = str(startnr - output).zfill(5)
srcdir = 'output_' + dirnr
try:
# ------------------------------------------------------
# get time, redshift, and units even for output_00001
# ------------------------------------------------------
fileloc = srcdir + '/info_' + dirnr + '.txt'
fileloc = join(par.workdir, fileloc)
infofile = open(fileloc)
for i in range(9):
infofile.readline() # skip first 9 lines
# get expansion factor
aline = infofile.readline()
astring, equal, aval = aline.partition("=")
afloat = float(aval)
sd.aexp[output] = afloat
for i in range(5):
infofile.readline() # skip 5 lines
# get unit_l
unitline = infofile.readline()
unitstring, equal, unitval = unitline.partition("=")
unitfloat = float(unitval)
sd.unit_l[output] = unitfloat
# get unit_dens
unitline = infofile.readline()
unitstring, equal, unitval = unitline.partition("=")
unitfloat = float(unitval)
sd.unit_dens[output] = unitfloat
# get unit_t
unitline = infofile.readline()
unitstring, equal, unitval = unitline.partition("=")
unitfloat = float(unitval)
sd.unit_t[output] = unitfloat
infofile.close()
except IOError: # If file doesn't exist
print("Didn't find any info data in ", srcdir)
break
self.unit_m = self.unit_dens * self.unit_l ** 3 / const.M_Sol
self.unit_l /= const.Mpc
self.unit_t /= const.Gyr
self.redshift = 1. / self.aexp - 1
###############################################################################
# Tree object #
###############################################################################
class Tree:
"""
Holds tree result data. It's not really a tree, it's just the values along
the main branch, but let's call it a tree anyway. Sue me.
Parameters
----------
nelements : int
Estimate for how many snapshots you need to allocate space for.
"""
def __init__(self, nelements):
self.n = 0 # number of elements in tree # noqa
self.snapshotnr = -np.ones(nelements, dtype=int) # snapshot number of array values # noqa
self.redshift = -np.ones(nelements, dtype=float) # redshift at that snapshot # noqa
self.clumpids = -np.ones(nelements, dtype=int) # clump id of halo in that snapshot # noqa
self.mass = np.zeros(nelements, dtype=float) # mass at that snapshot # noqa
self.mergermass = np.zeros(nelements, dtype=float) # sum of mass of swallowed up clumps # noqa
self.mass_to_remove = np.zeros(nelements, dtype=float) # sum of mass of swallowed up clumps # noqa
def add_snap(self, nr, z, ID, m, mm, mdel):
"""Add new result."""
n = self.n
self.snapshotnr[n] = nr
self.redshift[n] = z
self.clumpids[n] = ID
self.mass[n] = m
self.mergermass[n] = mm
self.mass_to_remove[n] = mdel
self.n += 1
def write_tree(self, par, case='halo'):
"""Write the results to file."""
resfile = join(
par.outdir,
f"{par.outputfilename}_{case}-{str(self.clumpids[0])}.txt")
with open(resfile, 'w') as f:
f.write('# {0:>12} {1:>12} {2:>16} {3:>18} {4:>18} {5:>18}\n'.format( # noqa
"snapshot", "redshift", "clump_ID", "mass[M_sol]",
"mass_from_mergers", "mass_from_jumpers"))
for i in range(self.n):
f.write(' {0:12d} {1:12.4f} {2:16d} {3:18.6e} {4:18.6e} {5:18.6e}\n'.format( # noqa
self.snapshotnr[i], self.redshift[i], self.clumpids[i],
self.mass[i], self.mergermass[i], self.mass_to_remove[i]))
return
def get_snap_ind(p, snap):
"""
Computes the snapshot index in mtreedata/halodata/snapshotdata arrays for a
given snapshot number snap
"""
return (p.noutput - snap).item()
if __name__ == '__main__':
p = Params()
c = Constants()
# Read cmdlineargs, available output, get global parameters
p.read_cmdlineargs()
p.get_output_info()
sd = SnapshotData(p)
sd.read_infofiles(p, c)
# finish setup
p.setup_and_checks(sd)
p.print_params()
# now read in mergertree data
fname = join(p.outdir, "mtreedata.p")
if exists(fname):
print(f"{datetime.now()}: loading mergertree data from `{fname}`.",
flush=True)
mtd = load(fname)
print(f"{datetime.now()}: finished loading mergertree data from `{fname}`.", # noqa
flush=True)
else:
print("Generating mergertree data.", flush=True)
mtd = MTreeData(p)
mtd.read_mergertree_data(p, sd)
# clean up jumpers
mtd.clean_up_jumpers(p)
print("Saving mergertree data.", flush=True)
dump(mtd, fname)
# read in clump data if required
if p.do_all or p.halo_and_children:
cd = ClumpData(p)
cd.read_clumpdata(p)
# clean up halo catalogue
cd.cleanup_clumpdata(p, mtd)
# find children, and write them down
if p.verbose:
print("Searching for child clumps.")
if p.halo_and_children:
children = cd.find_children(p.clumpid)
cd.write_children(p, p.clumpid, children)
if p.do_all:
is_halo = cd.clumpids == cd.parent
childlist = [None for c in cd.clumpids[is_halo]]
for i, halo in enumerate(cd.clumpids[is_halo]):
children = cd.find_children(halo)
cd.write_children(p, halo, children)
childlist[i] = children
# finally, get the bloody tree
if p.one_halo_only:
newtree = Tree(p.nout)
mtd.get_tree(p, newtree, sd, p.clumpid)
newtree.write_tree(p, 'halo')
if p.halo_and_children:
newtree = Tree(p.nout)
mtd.get_tree(p, newtree, sd, p.clumpid)
newtree.write_tree(p, 'halo')
for c in children:
newtree = Tree(p.nout)
mtd.get_tree(p, newtree, sd, c)
newtree.write_tree(p, 'subhalo')
if p.do_all:
for i, halo in enumerate(cd.clumpids[is_halo]):
newtree = Tree(p.nout)
mtd.get_tree(p, newtree, sd, halo)
newtree.write_tree(p, 'halo')
for c in childlist[i]:
newtree = Tree(p.nout)
mtd.get_tree(p, newtree, sd, c)
newtree.write_tree(p, 'subhalo')
print('Finished.')

457
scripts/process_snapshot.py Normal file
View file

@ -0,0 +1,457 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
r"""
Script to process simulation files and create a single HDF5 file, in which
particles are sorted by the particle halo IDs.
"""
from argparse import ArgumentParser
from gc import collect
import h5py
import numpy
from mpi4py import MPI
import csiborgtools
from csiborgtools import fprint
from numba import jit
from taskmaster import work_delegation
from tqdm import trange, tqdm
from utils import get_nsims
@jit(nopython=True, boundscheck=False)
def minmax_halo(hid, halo_ids, start_loop=0):
"""
Find the start and end index of a halo in a sorted array of halo IDs.
This is much faster than using `numpy.where` and then `numpy.min` and
`numpy.max`.
"""
start = None
end = None
for i in range(start_loop, halo_ids.size):
n = halo_ids[i]
if n == hid:
if start is None:
start = i
end = i
elif n > hid:
break
return start, end
def process_snapshot(nsim, simname, halo_finder, verbose):
"""
Read in the snapshot particles, sort them by their halo ID and dump
into a HDF5 file. Stores the first and last index of each halo in the
particle array for fast slicing of the array to acces particles of a single
halo.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsnap = max(paths.get_snapshots(nsim, simname))
if simname == "csiborg":
partreader = csiborgtools.read.CSiBORGReader(paths)
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
else:
partreader = csiborgtools.read.QuijoteReader(paths)
box = None
desc = {"hid": f"Halo finder ID ({halo_finder})of the particle.",
"pos": "DM particle positions in box units.",
"vel": "DM particle velocity in km / s.",
"mass": "DM particle mass in Msun / h.",
"pid": "DM particle ID",
}
fname = paths.processed_output(nsim, simname, halo_finder)
fprint(f"loading HIDs of IC {nsim}.", verbose)
hids = partreader.read_halo_id(nsnap, nsim, halo_finder, verbose)
collect()
fprint(f"sorting HIDs of IC {nsim}.")
sort_indxs = numpy.argsort(hids)
with h5py.File(fname, "w") as f:
group = f.create_group("snapshot_final")
group.attrs["header"] = "Snapshot data at z = 0."
fprint("dumping halo IDs.", verbose)
dset = group.create_dataset("halo_ids", data=hids[sort_indxs])
dset.attrs["header"] = desc["hid"]
del hids
collect()
fprint("reading, sorting and dumping the snapshot particles.", verbose)
for kind in ["pos", "vel", "mass", "pid"]:
x = partreader.read_snapshot(nsnap, nsim, kind)[sort_indxs]
if simname == "csiborg" and kind == "vel":
x = box.box2vel(x) if simname == "csiborg" else x
if simname == "csiborg" and kind == "mass":
x = box.box2solarmass(x) if simname == "csiborg" else x
dset = f["snapshot_final"].create_dataset(kind, data=x)
dset.attrs["header"] = desc[kind]
del x
collect()
del sort_indxs
collect()
fprint(f"creating a halo map for IC {nsim}.")
with h5py.File(fname, "r") as f:
part_hids = f["snapshot_final"]["halo_ids"][:]
# We loop over the unique halo IDs and remove the 0 halo ID
unique_halo_ids = numpy.unique(part_hids)
unique_halo_ids = unique_halo_ids[unique_halo_ids != 0]
halo_map = numpy.full((unique_halo_ids.size, 3), numpy.nan,
dtype=numpy.uint64)
start_loop, niters = 0, unique_halo_ids.size
for i in trange(niters, disable=not verbose):
hid = unique_halo_ids[i]
k0, kf = minmax_halo(hid, part_hids, start_loop=start_loop)
halo_map[i, :] = hid, k0, kf
start_loop = kf
# Dump the halo mapping.
with h5py.File(fname, "r+") as f:
dset = f["snapshot_final"].create_dataset("halo_map", data=halo_map)
dset.attrs["header"] = """
Halo to particle mapping. Columns are HID, start index, end index.
"""
f.close()
del part_hids
collect()
# Add the halo finder catalogue
with h5py.File(fname, "r+") as f:
group = f.create_group("halofinder_catalogue")
group.attrs["header"] = f"Original {halo_finder} halo catalogue."
cat = partreader.read_catalogue(nsnap, nsim, halo_finder)
hid2pos = {hid: i for i, hid in enumerate(unique_halo_ids)}
for key in cat.dtype.names:
x = numpy.full(unique_halo_ids.size, numpy.nan,
dtype=cat[key].dtype)
for i in range(len(cat)):
j = hid2pos[cat["index"][i]]
x[j] = cat[key][i]
group.create_dataset(key, data=x)
f.close()
# Lastly create the halo catalogue
with h5py.File(fname, "r+") as f:
group = f.create_group("halo_catalogue")
group.attrs["header"] = f"{halo_finder} halo catalogue."
group.create_dataset("index", data=unique_halo_ids)
f.close()
def add_initial_snapshot(nsim, simname, halo_finder, verbose):
"""
Sort the initial snapshot particles according to their final snapshot and
add them to the final snapshot's HDF5 file.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
fname = paths.processed_output(nsim, simname, halo_finder)
if simname == "csiborg":
partreader = csiborgtools.read.CSiBORGReader(paths)
else:
partreader = csiborgtools.read.QuijoteReader(paths)
fprint(f"processing simulation `{nsim}`.", verbose)
if simname == "csiborg":
nsnap0 = 1
elif simname == "quijote":
nsnap0 = -1
else:
raise ValueError(f"Unknown simulation `{simname}`.")
fprint("loading and sorting the initial PID.", verbose)
sort_indxs = numpy.argsort(partreader.read_snapshot(nsnap0, nsim, "pid"))
fprint("loading the final particles.", verbose)
with h5py.File(fname, "r") as f:
sort_indxs_final = f["snapshot_final/pid"][:]
f.close()
fprint("sorting the particles according to the final snapshot.", verbose)
sort_indxs_final = numpy.argsort(numpy.argsort(sort_indxs_final))
sort_indxs = sort_indxs[sort_indxs_final]
del sort_indxs_final
collect()
fprint("loading and sorting the initial particle position.", verbose)
pos = partreader.read_snapshot(nsnap0, nsim, "pos")[sort_indxs]
del sort_indxs
collect()
# In Quijote some particles are position precisely at the edge of the
# box. Move them to be just inside.
if simname == "quijote":
mask = pos >= 1
if numpy.any(mask):
spacing = numpy.spacing(pos[mask])
assert numpy.max(spacing) <= 1e-5
pos[mask] -= spacing
fprint(f"dumping particles for `{nsim}` to `{fname}`.", verbose)
with h5py.File(fname, "r+") as f:
if "snapshot_initial" in f.keys():
del f["snapshot_initial"]
group = f.create_group("snapshot_initial")
group.attrs["header"] = "Initial snapshot data."
dset = group.create_dataset("pos", data=pos)
dset.attrs["header"] = "DM particle positions in box units."
f.close()
def calculate_initial(nsim, simname, halo_finder, verbose):
"""Calculate the Lagrangian patch centre of mass and size."""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
fname = paths.processed_output(nsim, simname, halo_finder)
fprint("loading the particle information.", verbose)
f = h5py.File(fname, "r")
pos = f["snapshot_initial/pos"]
mass = f["snapshot_final/mass"]
hid = f["halo_catalogue/index"][:]
hid2map = csiborgtools.read.make_halomap_dict(
f["snapshot_final/halo_map"][:])
if simname == "csiborg":
kwargs = {"box_size": 2048, "bckg_halfsize": 512}
else:
kwargs = {"box_size": 512, "bckg_halfsize": 256}
overlapper = csiborgtools.match.ParticleOverlap(**kwargs)
lagpatch_pos = numpy.full((len(hid), 3), numpy.nan, dtype=numpy.float32)
lagpatch_size = numpy.full(len(hid), numpy.nan, dtype=numpy.float32)
lagpatch_ncells = numpy.full(len(hid), numpy.nan, dtype=numpy.int32)
for i in trange(len(hid), disable=not verbose):
h = hid[i]
# These are unasigned particles.
if h == 0:
continue
parts_pos = csiborgtools.read.load_halo_particles(h, pos, hid2map)
parts_mass = csiborgtools.read.load_halo_particles(h, mass, hid2map)
# Skip if the halo has no particles or is too small.
if parts_pos is None or parts_pos.size < 5:
continue
cm = csiborgtools.center_of_mass(parts_pos, parts_mass, boxsize=1.0)
sep = csiborgtools.periodic_distance(parts_pos, cm, boxsize=1.0)
delta = overlapper.make_delta(parts_pos, parts_mass, subbox=True)
lagpatch_pos[i] = cm
lagpatch_size[i] = numpy.percentile(sep, 99)
lagpatch_ncells[i] = csiborgtools.delta2ncells(delta)
f.close()
collect()
with h5py.File(fname, "r+") as f:
grp = f["halo_catalogue"]
dset = grp.create_dataset("lagpatch_pos", data=lagpatch_pos)
dset.attrs["header"] = "Lagrangian patch centre of mass in box units."
dset = grp.create_dataset("lagpatch_size", data=lagpatch_size)
dset.attrs["header"] = "Lagrangian patch size in box units."
dset = grp.create_dataset("lagpatch_ncells", data=lagpatch_ncells)
dset.attrs["header"] = f"Lagrangian patch number of cells on a {kwargs['box_size']}^3 grid." # noqa
f.close()
def make_phew_halo_catalogue(nsim, verbose):
"""
Process the PHEW halo catalogue for a CSiBORG simulation at all snapshots.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
snapshots = paths.get_snapshots(nsim, "csiborg")
reader = csiborgtools.read.CSiBORGReader(paths)
keys_write = ["index", "x", "y", "z", "mass_cl", "parent",
"ultimate_parent", "summed_mass"]
# Create a HDF5 file to store all this.
fname = paths.processed_phew(nsim)
with h5py.File(fname, "w") as f:
f.close()
for nsnap in tqdm(snapshots, disable=not verbose, desc="Snapshot"):
try:
data = reader.read_phew_clumps(nsnap, nsim, verbose=False)
except FileExistsError:
continue
with h5py.File(fname, "r+") as f:
if str(nsnap) in f:
print(f"Group {nsnap} already exists. Deleting.", flush=True)
del f[str(nsnap)]
grp = f.create_group(str(nsnap))
for key in keys_write:
grp.create_dataset(key, data=data[key])
grp.attrs["header"] = f"CSiBORG PHEW clumps at snapshot {nsnap}."
f.close()
# Now write the redshifts
scale_factors = numpy.full(len(snapshots), numpy.nan, dtype=numpy.float32)
for i, nsnap in enumerate(snapshots):
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
scale_factors[i] = box._aexp
redshifts = scale_factors[-1] / scale_factors - 1
with h5py.File(fname, "r+") as f:
grp = f.create_group("info")
grp.create_dataset("redshift", data=redshifts)
grp.create_dataset("snapshots", data=snapshots)
grp.create_dataset("Om0", data=[box.Om0])
grp.create_dataset("boxsize", data=[box.boxsize])
f.close()
def make_merger_tree_file(nsim, verbose):
"""
Process the `.dat` merger tree files and dump them into a HDF5 file.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
reader = csiborgtools.read.CSiBORGReader(paths)
snaps = paths.get_snapshots(nsim, "csiborg")
fname = paths.processed_merger_tree(nsim)
with h5py.File(fname, "w") as f:
f.close()
for nsnap in tqdm(snaps, desc="Loading merger files",
disable=not verbose):
try:
data = reader.read_merger_tree(nsnap, nsim)
except FileExistsError:
continue
with h5py.File(fname, "r+") as f:
grp = f.create_group(str(nsnap))
grp.create_dataset("clump",
data=data[:, 0].astype(numpy.int32))
grp.create_dataset("progenitor",
data=data[:, 1].astype(numpy.int32))
grp.create_dataset("progenitor_outputnr",
data=data[:, 2].astype(numpy.int32))
grp.create_dataset("desc_mass",
data=data[:, 3].astype(numpy.float32))
grp.create_dataset("desc_npart",
data=data[:, 4].astype(numpy.int32))
grp.create_dataset("desc_pos",
data=data[:, 5:8].astype(numpy.float32))
grp.create_dataset("desc_vel",
data=data[:, 8:11].astype(numpy.float32))
f.close()
def append_merger_tree_mass_to_phew_catalogue(nsim, verbose):
"""
Append mass of haloes from mergertree files to the PHEW catalogue. The
difference between this and the PHEW value is that the latter is written
before unbinding is performed.
Note that currently only does this for the highest snapshot.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
snapshots = paths.get_snapshots(nsim, "csiborg")
merger_reader = csiborgtools.read.MergerReader(nsim, paths)
for nsnap in tqdm(snapshots, disable=not verbose, desc="Snapshot"):
# TODO do this for all later
if nsnap < 930:
continue
try:
phewcat = csiborgtools.read.CSiBORGPHEWCatalogue(nsnap, nsim,
paths)
except ValueError:
phewcat.close()
continue
mergertree_mass = merger_reader.match_mass_to_phewcat(phewcat)
phewcat.close()
fname = paths.processed_phew(nsim)
with h5py.File(fname, "r+") as f:
grp = f[str(nsnap)]
grp.create_dataset("mergertree_mass_new", data=mergertree_mass)
f.close()
def main(nsim, args):
if args.make_final:
process_snapshot(nsim, args.simname, args.halofinder, True)
if args.make_initial:
add_initial_snapshot(nsim, args.simname, args.halofinder, True)
calculate_initial(nsim, args.simname, args.halofinder, True)
if args.make_phew:
make_phew_halo_catalogue(nsim, True)
if args.make_merger:
make_merger_tree_file(nsim, True)
if args.append_merger_mass:
append_merger_tree_mass_to_phew_catalogue(nsim, True)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--simname", type=str, default="csiborg",
choices=["csiborg", "quijote"],
help="Simulation name")
parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all.")
parser.add_argument("--halofinder", type=str, help="Halo finder")
parser.add_argument("--make_final", action="store_true", default=False,
help="Process the final snapshot.")
parser.add_argument("--make_initial", action="store_true", default=False,
help="Process the initial snapshot.")
parser.add_argument("--make_phew", action="store_true", default=False,
help="Process the PHEW halo catalogue.")
parser.add_argument("--make_merger", action="store_true", default=False,
help="Process the merger tree files.")
parser.add_argument("--append_merger_mass", action="store_true",
default=False,
help="Append the merger tree mass to the PHEW cat.")
args = parser.parse_args()
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsims = get_nsims(args, paths)
def _main(nsim):
main(nsim, args)
work_delegation(_main, nsims, MPI.COMM_WORLD)

View file

@ -1,114 +0,0 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
r"""
Script to sort the initial snapshot particles according to their final
snapshot ordering, which is sorted by the halo IDs.
Ensures the following units:
- Positions in box units.
- Masses in :math:`M_\odot / h`.
"""
from argparse import ArgumentParser
from datetime import datetime
from gc import collect
import h5py
import numpy
from mpi4py import MPI
from taskmaster import work_delegation
import csiborgtools
from utils import get_nsims
def _main(nsim, simname, verbose):
"""
Sort the initial snapshot particles according to their final snapshot
ordering and dump them into a HDF5 file.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
if simname == "csiborg":
partreader = csiborgtools.read.CSiBORGReader(paths)
else:
partreader = csiborgtools.read.QuijoteReader(paths)
print(f"{datetime.now()}: processing simulation `{nsim}`.", flush=True)
# We first load the particle IDs in the final snapshot.
pidf = csiborgtools.read.read_h5(paths.particles(nsim, simname))
pidf = pidf["particle_ids"]
# Then we load the particles in the initil snapshot and make sure that
# their particle IDs are sorted as in the final snapshot. Again, because of
# precision this must be read as structured.
if simname == "csiborg":
pars_extract = ["x", "y", "z", "M", "ID"]
# CSiBORG's initial snapshot ID
nsnap = 1
else:
pars_extract = None
# Use this to point the reader to the ICs snapshot
nsnap = -1
part0, pid0 = partreader.read_particle(
nsnap, nsim, pars_extract, return_structured=False, verbose=verbose)
# In CSiBORG we need to convert particle masses from box units.
if simname == "csiborg":
box = csiborgtools.read.CSiBORGBox(
max(paths.get_snapshots(nsim, simname)), nsim, paths)
part0[:, 3] = box.box2solarmass(part0[:, 3])
# Quijote's initial snapshot information also contains velocities but we
# don't need those.
if simname == "quijote":
part0 = part0[:, [0, 1, 2, 6]]
# In Quijote some particles are position precisely at the edge of the
# box. Move them to be just inside.
pos = part0[:, :3]
mask = pos >= 1
if numpy.any(mask):
spacing = numpy.spacing(pos[mask])
assert numpy.max(spacing) <= 1e-5
pos[mask] -= spacing
# First enforce them to already be sorted and then apply reverse
# sorting from the final snapshot.
part0 = part0[numpy.argsort(pid0)]
del pid0
collect()
part0 = part0[numpy.argsort(numpy.argsort(pidf))]
fout = paths.initmatch(nsim, simname, "particles")
if verbose:
print(f"{datetime.now()}: dumping particles for `{nsim}` to `{fout}`",
flush=True)
with h5py.File(fout, "w") as f:
f.create_dataset("particles", data=part0)
if __name__ == "__main__":
# Argument parser
parser = ArgumentParser()
parser.add_argument("--simname", type=str, default="csiborg",
choices=["csiborg", "quijote"],
help="Simulation name")
parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all.")
args = parser.parse_args()
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsims = get_nsims(args, paths)
def main(nsim):
_main(nsim, args.simname, MPI.COMM_WORLD.Get_size() == 1)
work_delegation(main, nsims, MPI.COMM_WORLD)

View file

@ -1,52 +1,28 @@
from setuptools import find_packages, setup from setuptools import find_packages, setup
# List of dependencies:
# - Corrfunc -> To be moved to a separate package.
# - NumPy
# - SciPy
# - Numba
# - Pylians
# - tqdm
# - healpy
# - astropy
# - scikit-learn
# - joblib
# - h5py
# - MPI
# - pyyaml
# - taskmaster
# - matplotlib
# - scienceplots
# - cache_to_disk
BUILD_REQ = ["numpy", "scipy"] BUILD_REQ = ["numpy", "scipy"]
INSTALL_REQ = BUILD_REQ INSTALL_REQ = BUILD_REQ
INSTALL_REQ += ["Corrfunc", INSTALL_REQ += [
"Pylians",
"numba", "numba",
"tqdm", "tqdm",
"healpy", "healpy",
"astropy", "astropy",
"scikit-learn", "scikit-learn",
"h5py", "h5py",
"matplotlib", "pynbody",
"scienceplots", "joblib",
"mpi4py", ]
"pyyaml",
"joblib",]
setup( setup(
name="csiborgtools", name="csiborgtools",
version="0.2", version="0.3",
description="CSiBORG analysis tools", description="CSiBORG analysis tools",
url="https://github.com/Richard-Sti/csiborgtools", url="https://github.com/Richard-Sti/csiborgtools",
author="Richard Stiskalek", author="Richard Stiskalek",
author_email="richard.stiskalek@protonmail.com", author_email="richard.stiskalek@protonmail.com",
license="GPL-3.0", license="GPL-3.0",
packages=find_packages(), packages=find_packages(),
python_requires=">=3.8", python_requires=">=3.6",
build_requires=BUILD_REQ, build_requires=BUILD_REQ,
setup_requires=BUILD_REQ, setup_requires=BUILD_REQ,
install_requires=INSTALL_REQ, install_requires=INSTALL_REQ,
@ -55,5 +31,6 @@ setup(
"Intended Audience :: Science/Research", "Intended Audience :: Science/Research",
"Operating System :: POSIX :: Linux", "Operating System :: POSIX :: Linux",
"Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9"] "Programming Language :: Python :: 3.9"
]
) )