Add mmain and other major updates (#44)

* Move paths to a separate file

* Add mmain reader

* Add a verbosity flag

* Fix imports

* Fix bug

* Rename files

* Return ultimate parents

* Add script to generate mmain

* Remove mmain path

* edit path

* Add mmain path

* Change function name

* Rename function

* Turn off verbose

* Fix list requirement

* Edit init match paths

* Fix init pathing

* Edit paths docs

* Edit dumpdir name

* Rename path

* Fix split paths

* Remove unused import

* Add comment

* Update readme

* remove read mmain

* Rename haloatalogue

* Fix minor bugs

* Update nbs

* Add create directory option

* Move split jobs

* Move spliot jobs

* Remove splitting

* Add import

* Edit script

* Deeper split folder

* Fix paths bug

* Rename catalogue

* Rename Catalogue

* Add new clumpread

* Edit paths

* add knn paths

* Update commenting

* Update imports

* Add more conversions

* Update temp file

* Add a note

* Add catalogue

* Cooment

* Update TODO

* Update script

* add nb

* Update

* pep8

* edit paths & pep8

* Fix knn auto paths

* add paths docs

* Add auto and cross knn paths

* Add new paths

* Simplify tpcf reading

* pep8 patch

* update readme

* Update progress

* pep8

* pep8

* pep8

* pep8

* pep8

* pep8

* pep8

* pep8

* pep8

* pep8

* pep8

* pep8

* pep8

* pep8

* pep8

* Pep 8 and restructure

* add lambda spin

* add clump and halo

* add checks

* Edit halo profile fit

* Update gitignore

* backup script
This commit is contained in:
Richard Stiskalek 2023-04-18 11:02:36 +02:00 committed by GitHub
parent e0d3854277
commit fdb0df8d4c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
50 changed files with 2152 additions and 1844 deletions

1
.gitignore vendored
View file

@ -17,3 +17,4 @@ Pylians3/*
scripts/plot_correlation.ipynb
scripts/*.sh
venv/
.trunk/*

View file

@ -2,8 +2,16 @@
## Project Overlap
- [ ] Calculate the overlap between all 101 IC realisations on DiRAC.
- [x] Clean up the kNN paths in the summary.
- [x] Clean up the 2PCF paths in the summary.
- [ ] Sort out the splitting of individual clumps.
- [ ] Update the fitting scripts to work for clumps and parent halos.
- [ ] Calculated fitted quantities for clumps and parent halos and add them to the catalogues.
- [ ] Update overlap scripts to work with summed parent halos.
- [ ] Update the clustering scripts to work with clumps instead.
- [ ] Calculate the overlap between all 101 IC realisations on DiRAC.
## Project Clustering
@ -20,7 +28,7 @@
- [x] For the cross-correlation try making the second field randoms.
- [x] Clean up the reader code.
- [x] Correct the crossing script.
- [ ] Get started with the 2PCF calculation.
- [x] Get started with the 2PCF calculation.
## Project Environmental Dependence
- [ ] Add gradient and Hessian of the overdensity field.

View file

@ -12,11 +12,10 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from csiborgtools import (read, match, utils, units, fits, field, clustering) # noqa
from csiborgtools import clustering, field, fits, match, read # noqa
# Arguments to csiborgtools.read.CSiBORGPaths.
paths_glamdring = {
"srcdir": "/mnt/extraspace/hdesmond/",
"dumpdir": "/mnt/extraspace/rstiskalek/csiborg/",
"mmain_path": "/mnt/zfsusers/hdesmond/Mmain",
"initmatch_path": "/mnt/extraspace/rstiskalek/csiborg/initmatch/"}
"postdir": "/mnt/extraspace/rstiskalek/csiborg/"
}

View file

@ -13,10 +13,19 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from warnings import warn
from .knn import kNN_CDF # noqa
from .utils import (RVSinsphere, RVSinbox, RVSonsphere, BaseRVS, normalised_marks) # noqa
from csiborgtools.clustering.knn import kNN_CDF # noqa
from csiborgtools.clustering.utils import ( # noqa
BaseRVS,
RVSinbox,
RVSinsphere,
RVSonsphere,
normalised_marks,
)
try:
import Corrfunc
import Corrfunc # noqa
from .tpcf import Mock2PCF # noqa
except ImportError:
warn("`Corrfunc` not installed. 2PCF modules will not be available (`Mock2PCF`).") # noqa

View file

@ -13,11 +13,12 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
kNN-CDF calculation
kNN-CDF calculation.
"""
import numpy
from scipy.interpolate import interp1d
from scipy.stats import binned_statistic
from .utils import BaseRVS
@ -105,7 +106,7 @@ class kNN_CDF:
Catalogue NN object.
rvs_gen : :py:class:`csiborgtools.clustering.BaseRVS`
Uniform RVS generator matching `knn`.
neighbours : int
nneighbours : int
Maximum number of neighbours to use for the kNN-CDF calculation.
nsamples : int
Number of random points to sample for the knn-CDF calculation.
@ -155,7 +156,7 @@ class kNN_CDF:
NN object of the second catalogue.
rvs_gen : :py:class:`csiborgtools.clustering.BaseRVS`
Uniform RVS generator matching `knn1` and `knn2`.
neighbours : int
nneighbours : int
Maximum number of neighbours to use for the kNN-CDF calculation.
Rmax : float
Maximum radius of the sphere in which to sample random points for
@ -246,7 +247,7 @@ class kNN_CDF:
Catalogue NN object.
rvs_gen : :py:class:`csiborgtools.clustering.BaseRVS`
Uniform RVS generator matching `knn1` and `knn2`.
neighbours : int
nneighbours : int
Maximum number of neighbours to use for the kNN-CDF calculation.
nsamples : int
Number of random points to sample for the knn-CDF calculation.

View file

@ -16,6 +16,7 @@
import numpy
from Corrfunc.theory.DD import DD
from Corrfunc.utils import convert_3d_counts_to_cf
from .utils import BaseRVS
@ -63,6 +64,7 @@ class Mock2PCF:
periodic=False)
ndata = pos.shape[0]
xi = convert_3d_counts_to_cf(ndata, ndata, nrandom, nrandom, dd, dr, dr, rr)
xi = convert_3d_counts_to_cf(ndata, ndata, nrandom, nrandom,
dd, dr, dr, rr)
rp = 0.5 * (bins[1:] + bins[:-1])
return rp, xi

View file

@ -13,10 +13,10 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""Clustering support functions."""
from abc import (ABC, abstractmethod)
from abc import ABC, abstractmethod
from warnings import warn
import numpy
import numpy
###############################################################################
# Random points #
@ -77,7 +77,7 @@ class RVSinsphere(BaseRVS):
class RVSinbox(BaseRVS):
"""
r"""
Generator of uniform RVS in a box of width `L` in Cartesian coordinates in
:math:`[0, L]^3`.
@ -100,7 +100,7 @@ class RVSinbox(BaseRVS):
class RVSonsphere(BaseRVS):
"""
r"""
Generator of uniform RVS on the surface of a unit sphere. RA is in
:math:`[0, 2\pi)` and dec in :math:`[-\pi / 2, \pi / 2]`, respectively.
If `indeg` is `True` then converted to degrees.
@ -148,7 +148,7 @@ def wrapRA(ra, indeg):
"""
mask = ra < 0
if numpy.sum(mask) == 0:
warn("No negative right ascension found.", UserWarning())
warn("No negative right ascension found.", UserWarning, stacklevel=1)
ra[mask] += 360 if indeg else 2 * numpy.pi
return ra
@ -177,7 +177,7 @@ def normalised_marks(x, y, nbins):
"""
assert x.ndim == y.ndim == 1
if y.dtype not in [numpy.float32, numpy.float64]:
raise NotImplemented("Marks from integers are not supported.")
raise NotImplementedError("Marks from integers are not supported.")
bins = numpy.percentile(x, q=numpy.linspace(0, 100, nbins + 1))
marks = numpy.full_like(y, numpy.nan)

View file

@ -13,8 +13,10 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from warnings import warn
try:
import MAS_library as MASL
from .density import DensityField
import MAS_library as MASL # noqa
from .density import DensityField # noqa
except ImportError:
warn("MAS_library not found, `DensityField` will not be available", UserWarning) # noqa

View file

@ -16,12 +16,12 @@
Density field and cross-correlation calculations.
"""
from warnings import warn
from tqdm import trange
import numpy
import MAS_library as MASL
import numpy
import Pk_library as PKL
import smoothing_library as SL
from ..units import (BoxUnits, radec_to_cartesian)
from tqdm import trange
class DensityField:
@ -56,8 +56,7 @@ class DensityField:
def __init__(self, particles, boxsize, box, MAS="CIC"):
self.particles = particles
assert boxsize > 0
self._boxsize = boxsize
assert isinstance(box, BoxUnits)
self.boxsize = boxsize
self.box = box
assert MAS in ["NGP", "CIC", "TSC", "PCS"]
self._MAS = MAS
@ -103,6 +102,14 @@ class DensityField:
"""
return self._box
@box.setter
def box(self, box):
try:
assert box._name == "box_units"
self._box = box
except AttributeError as err:
raise TypeError from err
@property
def MAS(self):
"""
@ -117,7 +124,7 @@ class DensityField:
@staticmethod
def _force_f32(x, name):
if x.dtype != numpy.float32:
warn("Converting `{}` to float32.".format(name))
warn("Converting `{}` to float32.".format(name), stacklevel=1)
x = x.astype(numpy.float32)
return x
@ -348,13 +355,15 @@ class DensityField:
-------
interp_field : (list of) 1-dimensional array of shape `(n_samples,).
"""
self._force_f32(pos, "pos")
X = numpy.vstack(
radec_to_cartesian(*(pos[:, i] for i in range(3)), isdeg)).T
X = X.astype(numpy.float32)
# Place the observer at the center of the box
X += 0.5 * self.boxsize
return self.evaluate_field(*field, pos=X)
# TODO: implement this
raise NotImplementedError("This method is not yet implemented.")
# self._force_f32(pos, "pos")
# X = numpy.vstack(
# radec_to_cartesian(*(pos[:, i] for i in range(3)), isdeg)).T
# X = X.astype(numpy.float32)
# # Place the observer at the center of the box
# X += 0.5 * self.boxsize
# return self.evaluate_field(*field, pos=X)
@staticmethod
def gravitational_field_norm(gx, gy, gz):

View file

@ -12,7 +12,5 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .haloprofile import (NFWProfile, NFWPosterior) # noqa
from .halo import (distribute_halos, clump_with_particles, # noqa
dump_split_particles, load_split_particles, # noqa
split_jobs, pick_single_clump, Clump) # noqa
from .halo import Clump, Halo # noqa
from .haloprofile import NFWPosterior, NFWProfile # noqa

View file

@ -12,555 +12,195 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Tools for splitting the particles and a clump object.
"""
from os import remove
from warnings import warn
from os.path import join
"""A clump object."""
from abc import ABC
import numpy
from tqdm import trange
from ..read import ParticleReader
def clump_with_particles(particle_clumps, clumps):
class BaseStructure(ABC):
"""
Count how many particles does each clump have.
Parameters
----------
particle_clumps : 1-dimensional array
Array of particles' clump IDs.
clumps : structured array
Clumps array.
Returns
-------
with_particles : 1-dimensional boolean array
Array of whether a clump has any particles.
Basic structure object for handling operations on its particles.
"""
return numpy.isin(clumps["index"], particle_clumps)
_particles = None
_info = None
_box = None
def distribute_halos(n_splits, clumps):
"""
Evenly distribute clump indices to smaller splits. Clumps should only be
clumps that contain particles.
Parameters
----------
n_splits : int
Number of splits.
clumps : structured array
Clumps array.
Returns
-------
splits : 2-dimensional array of shape `(njobs, 2)`
Array of starting and ending indices of each CPU.
"""
# Make sure these are unique IDs
indxs = clumps["index"]
if indxs.size > numpy.unique((indxs)).size:
raise ValueError("`clump_indxs` constains duplicate indices.")
Ntotal = indxs.size
njobs_per_cpu = numpy.ones(n_splits, dtype=int) * Ntotal // n_splits
# Split the remainder Ntotal % njobs among the CPU
njobs_per_cpu[:Ntotal % n_splits] += 1
start = ParticleReader.nparts_to_start_ind(njobs_per_cpu)
return numpy.vstack([start, start + njobs_per_cpu]).T
def dump_split_particles(particles, particle_clumps, clumps, n_splits,
nsnap, nsim, paths, verbose=True):
"""
Save the data needed for each split so that a process does not have to load
everything.
Parameters
----------
particles : structured array
The particle array.
particle_clumps : 1-dimensional array
Array of particles' clump IDs.
clumps : structured array
The clumps array.
n_splits : int
Number of times to split the clumps.
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
paths : py:class`csiborgtools.read.CSiBORGPaths`
CSiBORG paths-handling object with set `n_sim` and `n_snap`.
verbose : bool, optional
Verbosity flag. By default `True`.
Returns
-------
None
"""
if particles.size != particle_clumps.size:
raise ValueError("`particles` must correspond to `particle_clumps`.")
# Calculate which clumps have particles
with_particles = clump_with_particles(particle_clumps, clumps)
clumps = numpy.copy(clumps)[with_particles]
if verbose:
warn(r"There are {:.4f}% clumps that have identified particles."
.format(with_particles.sum() / with_particles.size * 100))
# The starting clump index of each split
splits = distribute_halos(n_splits, clumps)
fname = join(paths.temp_dumpdir, "out_{}_snap_{}_{}.npz")
tot = 0
for n in trange(n_splits) if verbose else range(n_splits):
# Lower and upper array index of the clumps array
i, j = splits[n, :]
# Clump indices in this split
indxs = clumps["index"][i:j]
hmin, hmax = indxs.min(), indxs.max()
mask = (particle_clumps >= hmin) & (particle_clumps <= hmax)
# Check number of clumps
npart_unique = numpy.unique(particle_clumps[mask]).size
if indxs.size > npart_unique:
raise RuntimeError(
"Split `{}` contains more unique clumps (`{}`) than there are "
"unique particles' clump indices (`{}`)after removing clumps "
"with no particles.".format(n, indxs.size, npart_unique))
# Dump it!
tot += mask.sum()
fout = fname.format(nsim, nsnap, n)
numpy.savez(fout, particles[mask], particle_clumps[mask], clumps[i:j])
# There are particles whose clump ID is > 1 and have no counterpart in the
# clump file. Therefore can save fewer particles, depending on the cut.
if tot > particle_clumps.size:
raise RuntimeError(
"Num. of dumped particles `{}` is greater than the particle file "
"size `{}`.".format(tot, particle_clumps.size))
def split_jobs(njobs, ncpu):
"""
Split `njobs` amongst `ncpu`.
Parameters
----------
njobs : int
Number of jobs.
ncpu : int
Number of CPUs.
Returns
-------
jobs : list of lists of integers
Outer list of each CPU and inner lists for CPU's jobs.
"""
njobs_per_cpu, njobs_remainder = divmod(njobs, ncpu)
jobs = numpy.arange(njobs_per_cpu * ncpu).reshape((njobs_per_cpu, ncpu)).T
jobs = jobs.tolist()
for i in range(njobs_remainder):
jobs[i].append(njobs_per_cpu * ncpu + i)
return jobs
def load_split_particles(nsplit, nsnap, nsim, paths, remove_split=False):
"""
Load particles of a split saved by `dump_split_particles`.
Parameters
--------
n_split : int
Split index.
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
paths : py:class`csiborgtools.read.CSiBORGPaths`
CSiBORG paths-handling object with set `n_sim` and `n_snap`.
remove_split : bool, optional
Whether to remove the split file. By default `False`.
Returns
-------
particles : structured array
Particle array of this split.
clumps_indxs : 1-dimensional array
Array of particles' clump IDs of this split.
clumps : 1-dimensional array
Clumps belonging to this split.
"""
fname = join(paths.temp_dumpdir,
"out_{}_snap_{}_{}.npz".format(nsim, nsnap, nsplit))
file = numpy.load(fname)
particles, clump_indxs, clumps = (file[f] for f in file.files)
if remove_split:
remove(fname)
return particles, clump_indxs, clumps
def pick_single_clump(n, particles, particle_clumps, clumps):
"""
Get particles belonging to the `n`th clump in `clumps` arrays.
Parameters
----------
n : int
Clump position in `clumps` array. Not its halo finder index!
particles : structured array
@property
def particles(self):
"""
Particle array.
particle_clumps : 1-dimensional array
Array of particles' clump IDs.
clumps : structured array
Array of clumps.
Returns
-------
sel_particles : structured array
Particles belonging to the requested clump.
sel_clump : array
A slice of a `clumps` array corresponding to this clump. Must
contain `["peak_x", "peak_y", "peak_z", "mass_cl"]`.
"""
# Clump index on the nth position
k = clumps["index"][n]
# Mask of which particles belong to this clump
mask = particle_clumps == k
return particles[mask], clumps[n]
Returns
-------
particles : structured array
"""
return self._particles
@particles.setter
def particles(self, particles):
pars = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M']
assert all(p in particles.dtype.names for p in pars)
self._particles = particles
###############################################################################
# Clump object #
###############################################################################
@property
def info(self):
"""
Array containing information from the clump finder.
Returns
-------
info : structured array
"""
return self._info
class Clump:
r"""
A clump (halo) object to handle the particles and their clump's data.
@info.setter
def info(self, info):
# TODO turn this into a structured array and add some checks
self._info = info
Parameters
----------
x : 1-dimensional array
Particle coordinates along the x-axis.
y : 1-dimensional array
Particle coordinates along the y-axis.
z : 1-dimensional array
Particle coordinates along the z-axis.
m : 1-dimensional array
Particle masses.
x0 : float
Clump center coordinate along the x-axis.
y0 : float
Clump center coordinate along the y-axis.
z0 : float
Clump center coordinate along the z-axis.
clump_mass : float, optional
Mass of the clump. By default not set.
vx : 1-dimensional array, optional
Particle velocity along the x-axis. By default not set.
vy : 1-dimensional array, optional
Particle velocity along the y-axis. By default not set.
vz : 1-dimensional array, optional
Particle velocity along the z-axis. By default not set.
index : int, optional
The halo finder index of this clump. By default not set.
rhoc : float, optional
The critical density :math:`\rho_c` at this snapshot in box units. By
default not set.
G : float, optional
The gravitational constant :math:`G` in box units. By default not set.
"""
_pos = None
_clump_pos = None
_clump_mass = None
_vel = None
_index = None
_rhoc = None
_G = None
@property
def box(self):
"""
CSiBORG box object handling unit conversion.
def __init__(self, x, y, z, m, x0, y0, z0, clump_mass=None,
vx=None, vy=None, vz=None, index=None, rhoc=None, G=None):
self._pos = numpy.vstack([x - x0, y - y0, z - z0]).T
self._clump_pos = numpy.asarray((x0, y0, z0))
assert clump_mass is None or isinstance(clump_mass, float)
self._clump_mass = clump_mass
if all(v is not None for v in (vx, vy, vz)):
self._vel = numpy.vstack([vx, vy, vz]).T
assert self._vel.shape == self.pos.shape
assert m.ndim == 1 and m.size == self.Npart
self._m = m
assert index is None or (isinstance(index, (int, numpy.int64)) and index >= 0) # noqa
self._index = index
assert rhoc is None or rhoc > 0
self._rhoc = rhoc
assert G is None or G > 0
self._G = G
Returns
-------
box : :py:class:`csiborgtools.units.BoxUnits`
"""
return self._box
@box.setter
def box(self, box):
try:
assert box._name == "box_units"
self._box = box
except AttributeError as err:
raise TypeError from err
@property
def pos(self):
"""
Cartesian particle coordinates centered at the clump.
Cartesian particle coordinates centered at the object.
Returns
-------
pos : 2-dimensional array of shape `(n_particles, 3)`.
"""
return self._pos
@property
def Npart(self):
"""
Number of particles associated with this clump.
Returns
-------
Npart : int
"""
return self.pos.shape[0]
ps = ('x', 'y', 'z')
return numpy.vstack([self[p] - self.info[p] for p in ps]).T
@property
def r(self):
"""
Radial distance of the particles from the clump peak.
Radial separation of the particles from the centre of the object.
Returns
-------
r : 1-dimensional array of shape `(n_particles, )`.
"""
return numpy.sum(self.pos**2, axis=1)**0.5
@property
def rmin(self):
"""
The minimum radial distance of a particle.
Returns
-------
rmin : float
"""
return numpy.min(self.r)
@property
def rmax(self):
"""
The maximum radial distance of a particle.
Returns
-------
rmin : float
"""
return numpy.max(self.r)
@property
def clump_pos(self):
"""
Cartesian position components of the clump.
Returns
-------
pos : 1-dimensional array of shape `(3, )`
"""
return self._clump_pos
@property
def clump_mass(self):
"""
Clump mass.
Returns
-------
mass : float
"""
if self._clump_mass is None:
raise ValueError("Clump mass `clump_mass` has not been set.")
return self._clump_mass
return numpy.linalg.norm(self.pos, axis=1)
@property
def vel(self):
"""
Cartesian velocity components of the clump.
Cartesian particle velocity components.
Returns
-------
vel : 2-dimensional array of shape (`n_particles, 3`)
"""
if self._vel is None:
raise ValueError("Velocities `vel` have not been set.")
return self._vel
return numpy.vstack([self[p] for p in ("vx", "vy", "vz")]).T
@property
def m(self):
def cmass(self):
"""
Particle masses.
Returns
-------
m : 1-dimensional array of shape `(n_particles, )`
"""
return self._m
@property
def center_mass(self):
"""
Cartesian position components of the clump centre of mass. Note that
this is already in a frame centered at the clump's potential minimum.
Cartesian position components of the object's centre of mass. Note that
this is already in a frame centered at the clump's potential minimum,
so its distance from origin indicates the separation of the centre of
mass and potential minimum.
Returns
-------
cm : 1-dimensional array of shape `(3, )`
"""
return numpy.average(self.pos, axis=0, weights=self.m)
return numpy.average(self.pos, axis=0, weights=self['M'])
@property
def angular_momentum(self):
"""
Clump angular momentum in the box coordinates.
Angular momentum in the box coordinates.
NOTE: here also change velocities to the CM and appropriately edit the
docs.
Returns
-------
J : 1-dimensional array or shape `(3, )`
"""
J = numpy.cross(self.pos - self.center_mass, self.vel)
J = numpy.cross(self.pos - self.cmass, self.vel)
return numpy.einsum("i,ij->j", self.m, J)
@property
def lambda200c(self):
r"""
Clump Bullock spin, see Eq. 5 in [1], in a radius of
:math:`R_{\rm 200c}`.
References
----------
[1] A Universal Angular Momentum Profile for Galactic Halos; 2001;
Bullock, J. S.; Dekel, A.; Kolatt, T. S.; Kravtsov, A. V.;
Klypin, A. A.; Porciani, C.; Primack, J. R.
Returns
-------
lambda200c : float
def enclosed_mass(self, rmax, rmin=0):
"""
J = self.angular_momentum
R, M = self.spherical_overdensity_mass(200)
V = numpy.sqrt(self.G * M / R)
return numpy.linalg.norm(J) / (numpy.sqrt(2) * M * V * R)
@property
def index(self):
"""
Halo finder clump index.
Returns
-------
hindex : int
"""
if self._index is None:
raise ValueError("Halo index `hindex` has not been set.")
return self._index
@property
def rhoc(self):
r"""
Critical density :math:`\rho_c` at this snapshot in box units.
Returns
-------
rhoc : float
"""
if self._rhoc is None:
raise ValueError("The critical density `rhoc` has not been set.")
return self._rhoc
@property
def G(self):
r"""
Gravitational constant :math:`G` in box units.
Returns
-------
G : float
"""
if self._G is None:
raise ValueError("The grav. constant `G` has not been set.")
return self._G
@property
def total_particle_mass(self):
"""
Total mass of all particles.
Returns
-------
tot_mass : float
"""
return numpy.sum(self.m)
@property
def mean_particle_pos(self):
"""
Mean Cartesian particle coordinate. Not centered at the halo!
Returns
-------
pos : 1-dimensional array of shape `(3, )`
"""
return numpy.mean(self.pos + self.clump_pos, axis=0)
def enclosed_spherical_mass(self, rmax, rmin=0):
"""
Enclosed spherical mass between two radii in box units.
Parameters
----------
rmax : float
The maximum radial distance.
rmin : float, optional
The minimum radial distance. By default 0.
Returns
-------
M_enclosed : float
The enclosed mass.
"""
return numpy.sum(self.m[(self.r >= rmin) & (self.r <= rmax)])
def enclosed_spherical_volume(self, rmax, rmin=0):
"""
Enclosed spherical volume within two radii in box units.
Sum of particle masses between two radii.
Parameters
----------
rmax : float
Maximum radial distance.
rmin : float, optional
Minimum radial distance. By default 0.
Minimum radial distance.
Returns
-------
vol : float
enclosed_mass : float
"""
return 4 * numpy.pi / 3 * (rmax**3 - rmin**3)
r = self.r
return numpy.sum(self['M'][(r >= rmin) & (r <= rmax)])
def spherical_overdensity_mass(self, delta, n_particles_min=10):
def lambda_bullock(self, radius, npart_min=10):
r"""
Spherical overdensity mass and radius. The mass is defined as the
enclosed mass within a radius of where the mean enclosed spherical
density reaches a multiple of the critical radius at a given redshift
`self.rho_c`.
Starts from the furthest particle, working its way inside the halo
through an ordered list of particles. The corresponding values is the
radial distance of the first particle whose addition sufficiently
increases the mean density.
Bullock spin, see Eq. 5 in [1], in a radius of `radius`, which should
define to some overdensity radius.
Parameters
----------
delta : list of int or float
The :math:`\delta_{\rm x}` parameters where :math:`\mathrm{x}` is
the overdensity multiple.
n_particles_min : int
radius : float
Radius in which to calculate the spin.
npart_min : int
Minimum number of enclosed particles for a radius to be
considered trustworthy.
Returns
-------
lambda_bullock : float
References
----------
[1] A Universal Angular Momentum Profile for Galactic Halos; 2001;
Bullock, J. S.; Dekel, A.; Kolatt, T. S.; Kravtsov, A. V.;
Klypin, A. A.; Porciani, C.; Primack, J. R.
"""
mask = self.r <= radius
if numpy.sum(mask) < npart_min:
return numpy.nan
mass = self.enclosed_mass(radius)
V = numpy.sqrt(self.box.box_G * mass / radius)
return (numpy.linalg.norm(self.angular_momentum[mask])
/ (numpy.sqrt(2) * mass * V * radius))
def spherical_overdensity_mass(self, delta_mult, npart_min=10):
r"""
Calculate spherical overdensity mass and radius. The mass is defined as
the enclosed mass within an outermost radius where the mean enclosed
spherical density reaches a multiple of the critical density `delta`.
Parameters
----------
delta_mult : list of int or float
Overdensity multiple.
npart_min : int
Minimum number of enclosed particles for a radius to be
considered trustworthy.
@ -571,83 +211,75 @@ class Clump:
mx : float
Corresponding spherical enclosed mass.
"""
# If single `delta` turn to list
delta = [delta] if isinstance(delta, (float, int)) else delta
# If given a list or tuple turn to array
_istlist = isinstance(delta, (list, tuple))
delta = numpy.asarray(delta, dtype=float) if _istlist else delta
# We first sort the particles in an increasing separation
rs = self.r
order = numpy.argsort(rs)
rs = rs[order]
cmass = numpy.cumsum(self['M']) # Cumulative mass
# We calculate the enclosed volume and indices where it is above target
vol = 4 * numpy.pi / 3 * (rs**3 - rs[0]**3)
ks = numpy.where([cmass / vol > delta_mult * self.box.rhoc])[0]
if ks.size == 0: # Never above the threshold?
return numpy.nan, numpy.nan
k = numpy.maximum(ks)
if k < npart_min: # Too few particles?
return numpy.nan, numpy.nan
return rs[k], cmass[k]
# Ordering of deltas
order_delta = numpy.argsort(delta)
# Sort the particles
order_particles = numpy.argsort(self.r)[::-1]
# Density to aim for
n_delta = delta.size
target_density = delta * self.rhoc
# The sum of particle masses, starting from the outside
# Adds the furtherst particle ensure that the 0th index is tot mass
cummass_ordered = (self.total_particle_mass
+ self.m[order_particles][0]
- numpy.cumsum(self.m[order_particles]))
# Enclosed volumes at particle radii
volumes = self.enclosed_spherical_volume(self.r[order_particles])
densities = cummass_ordered / volumes
# Pre-allocate arrays
rfound = numpy.full_like(delta, numpy.nan)
mfound = numpy.full_like(rfound, numpy.nan)
for n in order_delta:
overdense_mask = densities > target_density[n]
# Enforce that we have at least several particles enclosed
if numpy.sum(overdense_mask) < n_particles_min:
continue
# The outermost particle radius where the overdensity is achieved
k = numpy.where(overdense_mask)[0][0]
rfound[n] = self.r[order_particles][k]
mfound[n] = cummass_ordered[k]
# If only one delta return simply numbers
if n_delta == 1:
rfound = rfound[0]
mfound = mfound[0]
return rfound, mfound
@classmethod
def from_arrays(cls, particles, clump, rhoc=None, G=None):
r"""
Initialises `Clump` from `particles` containing the relevant particle
information and its `clump` information.
Paramaters
----------
particles : structured array
Array of particles belonging to this clump. Must contain
`["x", "y", "z", "M"]` and optionally also `["vx", "vy", "vz"]`.
clump : array
A slice of a `clumps` array corresponding to this clump. Must
contain `["peak_x", "peak_y", "peak_z", "mass_cl"]`.
rhoc : float, optional
The critical density :math:`\rho_c` at this snapshot in box units.
By default not set.
G : float, optional
The gravitational constant :math:`G` in box units. By default not
set.
@property
def keys(self):
"""
Particle array keys.
Returns
-------
clump : `Clump`
key : list of str
"""
x, y, z, m = (particles[p] for p in ["x", "y", "z", "M"])
x0, y0, z0, cl_mass, hindex = (
clump[p] for p in ["peak_x", "peak_y", "peak_z", "mass_cl",
"index"])
try:
vx, vy, vz = (particles[p] for p in ["vx", "vy", "vz"])
except ValueError:
vx, vy, vz = None, None, None
return cls(x, y, z, m, x0, y0, z0, cl_mass,
vx, vy, vz, hindex, rhoc, G)
return self.data.dtype.names
def __getitem__(self, key):
if key not in self.keys:
raise RuntimeError("Invalid key `{}`!".format(key))
return self.particles[key]
def __len__(self):
return self.particles.size
class Clump(BaseStructure):
"""
Clump object to handle operations on its particles.
Parameters
----------
particles : structured array
Particle array. Must contain `['x', 'y', 'z', 'vx', 'vy', 'vz', 'M']`.
info : structured array
Array containing information from the clump finder.
box : :py:class:`csiborgtools.read.BoxUnits`
Box units object.
"""
def __init__(self, particles, info, box):
self.particles = particles
self.info = info
self.box = box
class Halo(BaseStructure):
"""
Ultimate halo object to handle operations on its particles, i.e. the summed
particles halo.
Parameters
----------
particles : structured array
Particle array. Must contain `['x', 'y', 'z', 'vx', 'vy', 'vz', 'M']`.
info : structured array
Array containing information from the clump finder.
box : :py:class:`csiborgtools.read.BoxUnits`
Box units object.
"""
def __init__(self, particles, info, box):
self.particles = particles
self.info = info
self.box = box

View file

@ -12,109 +12,78 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Halo profiles functions and posteriors.
"""
from jax import numpy as jnumpy
from jax import grad
"""Halo profiles functions and posteriors."""
import numpy
from scipy.optimize import minimize_scalar
from scipy.stats import uniform
from .halo import Clump
class NFWProfile:
r"""
The Navarro-Frenk-White (NFW) density profile defined as
The Navarro-Frenk-White (NFW) density profile.
.. math::
\rho(r) = \frac{\rho_0}{x(1 + x)^2}
\rho(r) = \frac{\rho_0}{x(1 + x)^2},
where :math:`x = r / R_s` with free parameters :math:`R_s, \rho_0`.
Parameters
----------
Rs : float
Scale radius :math:`R_s`.
rho0 : float
NFW density parameter :math:`\rho_0`.
:math:`x = r / R_s` and its free paramaters are :math:`R_s, \rho_0`: scale
radius and NFW density parameter.
"""
@staticmethod
def profile(r, Rs, rho0):
r"""
Halo profile evaluated at :math:`r`.
"""
Evaluate the halo profile at `r`.
Parameters
----------
r : float or 1-dimensional array
Radial distance :math:`r`.
r : 1-dimensional array
Radial distance.
Rs : float
Scale radius :math:`R_s`.
Scale radius.
rho0 : float
NFW density parameter :math:`\rho_0`.
NFW density parameter.
Returns
-------
density : float or 1-dimensional array
Density of the NFW profile at :math:`r`.
density : 1-dimensional array
"""
x = r / Rs
return rho0 / (x * (1 + x)**2)
@staticmethod
def logprofile(r, Rs, rho0, use_jax=False):
r"""
Natural logarithm of the halo profile evaluated at :math:`r`.
Parameters
----------
r : float or 1-dimensional array
Radial distance :math:`r`.
Rs : float
Scale radius :math:`R_s`.
rho0 : float
NFW density parameter :math:`\rho_0`.
use_jax : bool, optional
Whether to use `JAX` expressions. By default `False`.
Returns
-------
logdensity : float or 1-dimensional array
Logarithmic density of the NFW profile at :math:`r`.
"""
log = jnumpy.log if use_jax else numpy.log
def _logprofile(r, Rs, rho0):
"""Natural logarithm of `NFWPprofile.profile(...)`."""
x = r / Rs
return log(rho0) - log(x) - 2 * log(1 + x)
return numpy.log(rho0) - numpy.log(x) - 2 * numpy.log(1 + x)
@staticmethod
def enclosed_mass(r, Rs, rho0, use_jax=False):
def mass(r, Rs, rho0):
r"""
Enclosed mass of a NFW profile in radius :math:`r`.
Calculate the enclosed mass of a NFW profile in radius `r`.
Parameters
----------
r : float or 1-dimensional array
Radial distance :math:`r`.
r : 1-dimensional array
Radial distance.
Rs : float
Scale radius :math:`R_s`.
Scale radius.
rho0 : float
NFW density parameter :math:`\rho_0`.
use_jax : bool, optional
Whether to use `JAX` expressions. By default `False`.
NFW density parameter.
Returns
-------
M : float or 1-dimensional array
M : 1-dimensional array
The enclosed mass.
"""
log = jnumpy.log if use_jax else numpy.log
x = r / Rs
out = log(1 + x) - x / (1 + x)
out = numpy.log(1 + x) - x / (1 + x)
return 4 * numpy.pi * rho0 * Rs**3 * out
def bounded_enclosed_mass(self, rmin, rmax, Rs, rho0, use_jax=False):
def bounded_mass(self, rmin, rmax, Rs, rho0):
r"""
Calculate the enclosed mass between :math:`r_min <= r <= r_max`.
Calculate the enclosed mass between `rmin` and `rmax`.
Parameters
----------
@ -125,51 +94,46 @@ class NFWProfile:
Rs : float
Scale radius :math:`R_s`.
rho0 : float
NFW density parameter :math:`\rho_0`.
use_jax : bool, optional
Whether to use `JAX` expressions. By default `False`.
NFW density parameter.
Returns
-------
M : float
Enclosed mass within the radial range.
"""
return (self.enclosed_mass(rmax, Rs, rho0, use_jax)
- self.enclosed_mass(rmin, Rs, rho0, use_jax))
return self.mass(rmax, Rs, rho0) - self.mass(rmin, Rs, rho0)
def pdf(self, r, Rs, rmin, rmax):
r"""
The radial probability density function of the NFW profile calculated
as
Calculate the radial PDF of the NFW profile, defined below.
.. math::
\frac{4\pi r^2 \rho(r)} {M(r_\min, r_\max)}
\frac{4\pi r^2 \rho(r)} {M(r_\min, r_\max)},
where :math:`M(r_\min, r_\max)` is the enclosed mass between
:math:`r_\min` and :math:`r_\max'. Note that the dependance on
:math:`\rho_0` is cancelled.
:math:`\rho_0` is cancelled and must be accounted for in the
normalisation term to match the total mass.
Parameters
----------
r : float or 1-dimensional array
Radial distance :math:`r`.
r : 1-dimensional array
Radial distance.
Rs : float
Scale radius :math:`R_s`.
Scale radius.
rmin : float
Minimum radius.
Minimum radius to evaluate the PDF (denominator term).
rmax : float
Maximum radius.
Maximum radius to evaluate the PDF (denominator term).
Returns
-------
pdf : float or 1-dimensional array
Probability density of the NFW profile at :math:`r`.
pdf : 1-dimensional array
"""
norm = self.bounded_enclosed_mass(rmin, rmax, Rs, 1)
return 4 * numpy.pi * r**2 * self.profile(r, Rs, 1) / norm
def rvs(self, rmin, rmax, Rs, N=1):
def rvs(self, rmin, rmax, Rs, size=1):
"""
Generate random samples from the NFW profile via rejection sampling.
@ -180,8 +144,8 @@ class NFWProfile:
rmax : float
Maximum radius.
Rs : float
Scale radius :math:`R_s`.
N : int, optional
Scale radius.
size : int, optional
Number of samples to generate. By default 1.
Returns
@ -190,15 +154,15 @@ class NFWProfile:
Samples following the NFW profile.
"""
gen = uniform(rmin, rmax-rmin)
samples = numpy.full(N, numpy.nan)
for i in range(N):
samples = numpy.full(size, numpy.nan)
for i in range(size):
while True:
r = gen.rvs()
if self.pdf(r, Rs, rmin, rmax) > numpy.random.rand():
samples[i] = r
break
if N == 1:
if size == 1:
return samples[0]
return samples
@ -206,11 +170,10 @@ class NFWProfile:
class NFWPosterior(NFWProfile):
r"""
Posterior for fitting the NFW profile in the range specified by the
closest particle and the :math:`r_{200c}` radius. The likelihood is
calculated as
closest particle and the :math:`r_{200c}` radius, calculated as below.
.. math::
\frac{4\pi r^2 \rho(r)} {M(r_{\min} r_{200c})} \frac{m}{M / N}
\frac{4\pi r^2 \rho(r)} {M(r_{\min} r_{200c})} \frac{m}{M / N},
where :math:`M(r_{\min} r_{200c}))` is the NFW enclosed mass between the
closest particle and the :math:`r_{200c}` radius, :math:`m` is the particle
@ -223,18 +186,9 @@ class NFWPosterior(NFWProfile):
clump : `Clump`
Clump object containing the particles and clump information.
"""
_clump = None
_binsguess = 10
_r = None
_Npart = None
_m = None
_rmin = None
_rmax = None
def __init__(self, clump):
# Initialise the NFW profile
def __init__(self):
super().__init__()
self.clump = clump
@property
def clump(self):
@ -248,97 +202,12 @@ class NFWPosterior(NFWProfile):
"""
return self._clump
@property
def r(self):
r"""
Radial distance of particles used to fit the NFW profile, i.e. the ones
whose radial distance is less than :math:`R_{\rm 200c}`.
Returns
-------
r : 1-dimensional array
"""
return self._r
@property
def Npart(self):
r"""
Number of particles used to fit the NFW profile, i.e. the ones
whose radial distance is less than :math:`R_{\rm 200c}`.
Returns
-------
Npart : int
"""
return self._Npart
@property
def m(self):
r"""
Mass of particles used to fit the NFW profile, i.e. the ones
whose radial distance is less than :math:`R_{\rm 200c}`.
Returns
-------
r : 1-dimensional array
"""
return self._m
@property
def rmin(self):
"""
The minimum radial distance of a particle.
Returns
-------
rmin : float
"""
return self._rmin
@property
def rmax(self):
r"""
The maximum radial distance used to fit the profile, here takem to be
the :math:`R_{\rm 200c}`.
Returns
-------
rmax : float
"""
return self._rmax
@clump.setter
def clump(self, clump):
"""Sets `clump` and precalculates useful things."""
if not isinstance(clump, Clump):
raise TypeError(
"`clump` must be :py:class:`csiborgtools.fits.Clump` type. "
"Currently `{}`".format(type(clump)))
assert isinstance(clump, Clump)
self._clump = clump
# The minimum separation
rmin = self.clump.rmin
rmax, __ = self.clump.spherical_overdensity_mass(200)
# Set the distances
self._rmin = rmin
self._rmax = rmax
# Set particles that will be used to fit the halo
mask_r200 = (self.clump.r >= rmin) & (self.clump.r <= rmax)
self._r = self.clump.r[mask_r200]
self._m = self.clump.m[mask_r200]
self._Npart = self._r.size
# Ensure that the minimum separation is > 0 for finite log
if self.rmin > 0:
self._logrmin = numpy.log10(self.rmin)
else:
self._logrmin = numpy.log10(numpy.min(self.r[self.r > 0]))
self._logrmax = numpy.log10(self.rmax)
self._logprior_volume = numpy.log(self._logrmax - self._logrmin)
# Precalculate useful things
self._logMtot = numpy.log(numpy.sum(self.m))
gamma = 4 * numpy.pi * self.r**2 * self.m * self.Npart
self._ll0 = numpy.sum(numpy.log(gamma)) - self.Npart * self._logMtot
def rho0_from_Rs(self, Rs):
def rho0_from_Rs(self, Rs, rmin, rmax, mass):
r"""
Obtain :math:`\rho_0` of the NFW profile from the integral constraint
on total mass. Calculated as the ratio between the total particle mass
@ -346,75 +215,96 @@ class NFWPosterior(NFWProfile):
Parameters
----------
logRs : float
Rs : float
Logarithmic scale factor in units matching the coordinates.
rmin : float
Minimum radial distance of particles used to fit the profile.
rmax : float
Maximum radial distance of particles used to fit the profile.
mass : float
Mass enclosed within the radius used to fit the NFW profile.
Returns
-------
rho0: float
"""
Mtot = numpy.exp(self._logMtot)
Mnfw_norm = self.bounded_enclosed_mass(self.rmin, self.rmax, Rs, 1)
return Mtot / Mnfw_norm
return mass / self.bounded_enclosed_mass(rmin, rmax, Rs, 1)
def logprior(self, logRs):
def initlogRs(self, r, rmin, rmax, binsguess=10):
r"""
Logarithmic uniform prior on :math:`\log R_{\rm s}`.
Calculate the most often occuring value of :math:`r` used as initial
guess of :math:`R_{\rm s}` since :math:`r^2 \rho(r)` peaks at
:math:`r = R_{\rm s}`.
Parameters
----------
r : 1-dimensional array
Radial distance of particles used to fit the profile.
rmin : float
Minimum radial distance of particles used to fit the profile.
rmax : float
Maximum radial distance of particles used to fit the profile.
binsguess : int
Number of bins to initially guess :math:`R_{\rm s}`.
Returns
-------
initlogRs : float
"""
bins = numpy.linspace(rmin, rmax, binsguess)
counts, edges = numpy.histogram(r, bins)
return numpy.log10(edges[numpy.argmax(counts)])
def logprior(self, logRs, rmin, rmax):
r"""
Logarithmic uniform prior on :math:`\log R_{\rm s}`. Unnormalised but
that does not matter.
Parameters
----------
logRs : float
Logarithmic scale factor in units matching the coordinates.
Logarithmic scale factor.
rmin : float
Minimum radial distance of particles used to fit the profile.
rmax : float
Maximum radial distance of particles used to fit the profile.
Returns
-------
lp : float
"""
if not self._logrmin < logRs < self._logrmax:
if not rmin < 10**logRs < rmax:
return - numpy.infty
return - self._logprior_volume
return 0.
def loglikelihood(self, logRs, use_jax=False):
def loglikelihood(self, logRs, r, rmin, rmax, npart):
"""
Logarithmic likelihood.
Parameters
----------
r : 1-dimensional array
Radial distance of particles used to fit the profile.
logRs : float
Logarithmic scale factor in units matching the coordinates.
use_jax : bool, optional
Whether to use `JAX` expressions. By default `False`.
rmin : float
Minimum radial distance of particles used to fit the profile.
rmax : float
Maximum radial distance of particles used to fit the profile.
npart : int
Number of particles used to fit the profile.
Returns
-------
ll : float
"""
Rs = 10**logRs
log = jnumpy.log if use_jax else numpy.log
# Expected enclosed mass from a NFW
Mnfw = self.bounded_enclosed_mass(self.rmin, self.rmax,
Rs, 1, use_jax)
fsum = jnumpy.sum if use_jax else numpy.sum
ll = fsum(self.logprofile(self.r, Rs, 1, use_jax)) + self._ll0
return ll - self.Npart * log(Mnfw)
mnfw = self.bounded_mass(rmin, rmax, Rs, 1)
return numpy.sum(self._logprofile(r, Rs, 1)) - npart * numpy.log(mnfw)
@property
def initlogRs(self):
r"""
The most often occuring value of :math:`r` used as initial guess of
:math:`R_{\rm s}` since :math:`r^2 \rho(r)` peaks at
:math:`r = R_{\rm s}`.
Returns
-------
initlogRs : float
"""
bins = numpy.linspace(self.rmin, self.rmax,
self._binsguess)
counts, edges = numpy.histogram(self.r, bins)
return numpy.log10(edges[numpy.argmax(counts)])
def __call__(self, logRs, use_jax=False):
def __call__(self, logRs, r, rmin, rmax, npart):
"""
Logarithmic posterior. Sum of the logarithmic prior and likelihood.
@ -422,83 +312,64 @@ class NFWPosterior(NFWProfile):
----------
logRs : float
Logarithmic scale factor in units matching the coordinates.
use_jax : bool, optional
Whether to use `JAX` expressions. By default `False`.
r : 1-dimensional array
Radial distance of particles used to fit the profile.
rmin : float
Minimum radial distance of particles used to fit the profile.
rmax : float
Maximum radial distance of particles used to fit the profile.
npart : int
Number of particles used to fit the profile.
Returns
-------
lpost : float
"""
lp = self.logprior(logRs)
lp = self.logprior(logRs, rmin, rmax)
if not numpy.isfinite(lp):
return - numpy.infty
return self.loglikelihood(logRs, use_jax) + lp
return self.loglikelihood(logRs, r, rmin, rmax, npart) + lp
def uncertainty_at_maxpost(self, logRs_max):
def fit(self, clump, eps=1e-4):
r"""
Calculate Gaussian approximation of the uncertainty at `logRs_max`, the
maximum a-posteriori estimate. This is the square root of the negative
inverse 2nd derivate of the logarithimic posterior with respect to the
logarithm of the scale factor. This is only valid `logRs_max` is the
maximum of the posterior!
Fit the NFW profile. If the fit is not converged returns NaNs.
This uses `JAX`. The functions should be compiled but unless there is
a need for more speed this is fine as it is.
Parameters
----------
logRs_max : float
Position :math:`\log R_{\rm s}` to evaluate the uncertainty. Must
be the maximum.
Returns
-------
uncertainty : float
"""
def f(x):
return self(x, use_jax=True)
# Evaluate the second derivative
h = grad(grad(f))(logRs_max)
h = float(h)
if not h < 0:
return numpy.nan
return (- 1 / h)**0.5
def maxpost_logRs(self, calc_err=False, eps=1e-4):
r"""
Maximum a-posteriori estimate of the scale radius
:math:`\log R_{\rm s}`. Returns the scale radius if the fit converged,
otherwise `numpy.nan`. Checks whether
:math:`log r_{\rm max} / R_{\rm s} > \epsilon`, where
Checks whether :math:`log r_{\rm max} / R_{\rm s} > \epsilon`,
to ensure that the scale radius is not too close to the boundary which
occurs if the fit fails.
Parameters
----------
calc_err : bool, optional
Optional toggle to calculate the uncertainty on the scale radius.
By default false.
clump : :py:class:`csiborgtools.fits.Clump`
Clump being fitted.
eps : float
Tolerance to ensure we are sufficiently far from math:`R_{200c}`.
Returns
-------
logRs: float
Log scale radius.
uncertainty : float
Uncertainty on the scale radius. Calculated following
`self.uncertainty_at_maxpost`.
Rs: float
Best fit scale radius.
rho0: float
Best fit NFW central density.
"""
assert isinstance(clump, Clump)
r = clump.r
rmin = numpy.min(r)
rmax, mtot = clump.spherical_overdensity_mass(200)
npart = numpy.sum((rmin <= r) & (r <= rmax))
# Loss function to optimize
def loss(logRs):
return - self(logRs)
return - self(logRs, r, rmin, rmax, npart)
res = minimize_scalar(loss, bounds=(self._logrmin, self._logrmax),
method='bounded')
res = minimize_scalar(
loss, bounds=(numpy.log10(rmin), numpy.log10(rmax)),
method='bounded')
if self._logrmax - res.x < eps:
if numpy.log10(rmax) - res.x < eps:
res.success = False
if not res.success:
return numpy.nan, numpy.nan
e_logRs = self.uncertainty_at_maxpost(res.x) if calc_err else numpy.nan
return res.x, e_logRs
rho0 = self.rho0_from_Rs(10**res.x, rmin, rmax, mtot)
return 10**res.x, rho0

View file

@ -12,9 +12,16 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .match import (RealisationsMatcher, cosine_similarity, # noqa
ParticleOverlap, get_clumplims, fill_delta, fill_delta_indxs, # noqa
calculate_overlap, calculate_overlap_indxs, # noqa
dist_centmass, dist_percentile) # noqa
from .num_density import (binned_counts, number_density) # noqa
from csiborgtools.match.match import ( # noqa
ParticleOverlap,
RealisationsMatcher,
calculate_overlap,
calculate_overlap_indxs,
cosine_similarity,
dist_centmass,
dist_percentile,
fill_delta,
fill_delta_indxs,
get_clumplims,
)
from csiborgtools.match.num_density import binned_counts, number_density # noqa

View file

@ -15,14 +15,15 @@
"""
Support for matching halos between CSiBORG IC realisations.
"""
from datetime import datetime
from gc import collect
import numpy
from scipy.ndimage import gaussian_filter
from numba import jit
from tqdm import (tqdm, trange)
from ..read import concatenate_clumps
from ..utils import now
import numpy
from numba import jit
from scipy.ndimage import gaussian_filter
from tqdm import tqdm, trange
from .utils import concatenate_clumps
###############################################################################
# Realisations matcher for calculating overlaps #
@ -47,24 +48,20 @@ class RealisationsMatcher:
The mass kind whose similarity is to be checked. Must be a valid
catalogue key. By default `totpartmass`, i.e. the total particle
mass associated with a halo.
overlapper_kwargs : dict, optional
Keyword arguments passed to `ParticleOverlapper`.
"""
_nmult = None
_dlogmass = None
_mass_kind = None
_overlapper = None
def __init__(self, nmult=1., dlogmass=2., mass_kind="totpartmass",
overlapper_kwargs={}):
def __init__(self, nmult=1., dlogmass=2., mass_kind="totpartmass"):
assert nmult > 0
assert dlogmass > 0
assert isinstance(mass_kind, str)
self._nmult = nmult
self._dlogmass = dlogmass
self._mass_kind = mass_kind
self._overlapper = ParticleOverlap(**overlapper_kwargs)
self._overlapper = ParticleOverlap()
@property
def nmult(self):
@ -121,9 +118,9 @@ class RealisationsMatcher:
Parameters
----------
cat0 : :py:class:`csiborgtools.read.HaloCatalogue`
cat0 : :py:class:`csiborgtools.read.ClumpsCatalogue`
Halo catalogue of the reference simulation.
catx : :py:class:`csiborgtools.read.HaloCatalogue`
catx : :py:class:`csiborgtools.read.ClumpsCatalogue`
Halo catalogue of the cross simulation.
clumps0 : list of structured arrays
List of clump structured arrays of the reference simulation, keys
@ -133,7 +130,7 @@ class RealisationsMatcher:
List of clump structured arrays of the cross simulation, keys must
include `x`, `y`, `z` and `M`. The positions must already be
converted to cell numbers.
delta_bcgk : 3-dimensional array
delta_bckg : 3-dimensional array
Summed background density field of the reference and cross
simulations calculated with particles assigned to halos at the
final snapshot. Assumed to only be sampled in cells
@ -153,7 +150,8 @@ class RealisationsMatcher:
Overlaps with the cross catalogue.
"""
# Query the KNN
verbose and print("{}: querying the KNN.".format(now()), flush=True)
verbose and print("{}: querying the KNN."
.format(datetime.now()), flush=True)
match_indxs = radius_neighbours(
catx.knn(select_initial=True), cat0.positions(in_initial=True),
radiusX=cat0["lagpatch"], radiusKNN=catx["lagpatch"],
@ -229,7 +227,7 @@ class RealisationsMatcher:
List of clump structured arrays of the cross simulation, keys must
include `x`, `y`, `z` and `M`. The positions must already be
converted to cell numbers.
delta_bcgk : 3-dimensional array
delta_bckg : 3-dimensional array
Smoothed summed background density field of the reference and cross
simulations calculated with particles assigned to halos at the
final snapshot. Assumed to only be sampled in cells
@ -582,7 +580,7 @@ class ParticleOverlap:
must include `x`, `y`, `z` and `M`.
cellmins : len-3 tuple
Tuple of left-most cell ID in the full box.
delta_bcgk : 3-dimensional array
delta_bckg : 3-dimensional array
Summed background density field of the reference and cross
simulations calculated with particles assigned to halos at the
final snapshot. Assumed to only be sampled in cells
@ -735,7 +733,7 @@ def calculate_overlap(delta1, delta2, cellmins, delta_bckg):
Density field of the second halo.
cellmins : len-3 tuple
Tuple of left-most cell ID in the full box.
delta_bcgk : 3-dimensional array
delta_bckg : 3-dimensional array
Summed background density field of the reference and cross simulations
calculated with particles assigned to halos at the final snapshot.
Assumed to only be sampled in cells :math:`[512, 1536)^3`.
@ -787,7 +785,7 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
Density field of the second halo.
cellmins : len-3 tuple
Tuple of left-most cell ID in the full box.
delta_bcgk : 3-dimensional array
delta_bckg : 3-dimensional array
Summed background density field of the reference and cross simulations
calculated with particles assigned to halos at the final snapshot.
Assumed to only be sampled in cells :math:`[512, 1536)^3`.
@ -876,8 +874,8 @@ def dist_percentile(dist, qs, distmax=0.075):
return x
def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1., enforce_in32=False,
verbose=True):
def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1.,
enforce_int32=False, verbose=True):
"""
Find all neigbours of a trained KNN model whose center of mass separation
is less than `nmult` times the sum of their respective radii.
@ -922,7 +920,7 @@ def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1., enforce_in32=False,
# so we take the first item where appropriate
mask = (dist[0] / (radiusX[i] + radiusKNN[indx[0]])) < nmult
indxs[i] = indx[0][mask]
if enforce_in32:
if enforce_int32:
indxs[i] = indxs[i].astype(numpy.int32)
return numpy.asarray(indxs, dtype=object)

View file

@ -0,0 +1,55 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""Useful functions."""
import numpy
def concatenate_clumps(clumps):
"""
Concatenate an array of clumps to a single array containing all particles.
Parameters
----------
clumps : list of structured arrays
List of clumps. Each clump must be a structured array with keys
Returns
-------
particles : structured array
"""
# Count how large array will be needed
N = 0
for clump, __ in clumps:
N += clump.size
# Infer dtype of positions
if clumps[0][0]['x'].dtype.char in numpy.typecodes["AllInteger"]:
posdtype = numpy.int32
else:
posdtype = numpy.float32
# Pre-allocate array
dtype = {"names": ['x', 'y', 'z', 'M'],
"formats": [posdtype] * 3 + [numpy.float32]}
particles = numpy.full(N, numpy.nan, dtype)
# Fill it one clump by another
start = 0
for clump, __ in clumps:
end = start + clump.size
for p in ('x', 'y', 'z', 'M'):
particles[p][start:end] = clump[p]
start = end
return particles

View file

@ -12,13 +12,18 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .readsim import (CSiBORGPaths, ParticleReader, read_mmain, read_initcm, halfwidth_select) # noqa
from .halo_cat import (HaloCatalogue, concatenate_clumps) # noqa
from .obs import (PlanckClusters, MCXCClusters, TwoMPPGalaxies, # noqa
TwoMPPGroups, SDSS) # noqa
from .outsim import (dump_split, combine_splits) # noqa
from .overlap_summary import (PairOverlap, NPairsOverlap, binned_resample_mean) # noqa
from .halo_cat import ClumpsCatalogue, HaloCatalogue # noqa
from .knn_summary import kNNCDFReader # noqa
from .obs import ( # noqa
SDSS,
MCXCClusters,
PlanckClusters,
TwoMPPGalaxies,
TwoMPPGroups,
)
from .outsim import combine_splits, dump_split # noqa
from .overlap_summary import NPairsOverlap, PairOverlap, binned_resample_mean # noqa
from .paths import CSiBORGPaths # noqa
from .pk_summary import PKReader # noqa
from .readsim import MmainReader, ParticleReader, halfwidth_select, read_initcm # noqa
from .tpcf_summary import TPCFReader # noqa

View file

@ -16,17 +16,17 @@
Simulation box unit transformations.
"""
import numpy
from scipy.interpolate import interp1d
from astropy import constants, units
from astropy.cosmology import LambdaCDM
from astropy import (constants, units)
from ..read import ParticleReader
from scipy.interpolate import interp1d
from .readsim import ParticleReader
# Map of unit conversions
CONV_NAME = {
"length": ["peak_x", "peak_y", "peak_z", "Rs", "rmin", "rmax", "r200",
"r500", "x0", "y0", "z0", "lagpatch"],
"mass": ["mass_cl", "totpartmass", "m200", "m500", "mass_mmain"],
"length": ['x', 'y', 'z', "peak_x", "peak_y", "peak_z", "Rs", "rmin",
"rmax", "r200", "r500", "x0", "y0", "z0", "lagpatch"],
"mass": ["mass_cl", "totpartmass", "m200", "m500", "mass_mmain", 'M'],
"density": ["rho0"]
}
@ -44,6 +44,7 @@ class BoxUnits:
paths : py:class`csiborgtools.read.CSiBORGPaths`
CSiBORG paths object.
"""
_name = "box_units"
_cosmo = None
def __init__(self, nsnap, nsim, paths):

View file

@ -12,54 +12,75 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Functions to read in the particle and clump files.
"""
"""CSiBORG halo catalogue."""
from abc import ABC
import numpy
from os.path import join
from sklearn.neighbors import NearestNeighbors
from .readsim import (CSiBORGPaths, read_mmain, read_initcm)
from ..utils import (flip_cols, add_columns)
from ..units import (BoxUnits, cartesian_to_radec)
from .box_units import BoxUnits
from .paths import CSiBORGPaths
from .readsim import ParticleReader, read_initcm
from .utils import add_columns, cartesian_to_radec, flip_cols
class HaloCatalogue:
r"""
Processed halo catalogue, the data should be calculated in `run_fit_halos`.
Parameters
----------
nsim : int
IC realisation index.
paths : py:class`csiborgtools.read.CSiBORGPaths`
CSiBORG paths object.
min_mass : float, optional
The minimum :math:`M_{rm tot} / M_\odot` mass. By default no threshold.
max_dist : float, optional
The maximum comoving distance of a halo. By default no upper limit.
load_init : bool, optional
Whether to load the initial snapshot information. By default False.
class BaseCatalogue(ABC):
"""
Base (sub)halo catalogue.
"""
_data = None
_paths = None
_nsim = None
_data = None
_selmask = None
def __init__(self, nsim, paths, min_mass=None, max_dist=None,
load_init=False):
assert isinstance(paths, CSiBORGPaths)
self._nsim = nsim
self._paths = paths
self._set_data(min_mass, max_dist, load_init)
@property
def nsim(self):
"""
The IC realisation index.
Returns
-------
nsim : int
"""
if self._nsim is None:
raise RuntimeError("`nsim` is not set!")
return self._nsim
@nsim.setter
def nsim(self, nsim):
assert isinstance(nsim, int)
self._nsim = nsim
@property
def paths(self):
"""
CSiBORG paths manager.
Returns
-------
paths : :py:class:`csiborgtools.read.CSiBORGPaths`
"""
if self._paths is None:
raise RuntimeError("`paths` is not set!")
return self._paths
@paths.setter
def paths(self, paths):
assert isinstance(paths, CSiBORGPaths)
self._paths = paths
@property
def data(self):
"""
The catalogue.
Returns
-------
data : structured array
"""
if self._data is None:
raise RuntimeError("Catalogue data not loaded!")
return self._data
@property
def nsnap(self):
"""
@ -83,163 +104,51 @@ class HaloCatalogue:
"""
return BoxUnits(self.nsnap, self.nsim, self.paths)
@property
def data(self):
"""
Halo catalogue.
@box.setter
def box(self, box):
try:
assert box._name == "box_units"
self._box = box
except AttributeError as err:
raise TypeError from err
Returns
-------
cat : structured array
"""
return self._data
def _set_data(self, min_mass, max_dist, load_init):
"""
Loads the data, merges with mmain, does various coordinate transforms.
"""
# Load the processed data
data = numpy.load(self.paths.hcat_path(self.nsim))
# Load the mmain file and add it to the data
mmain = read_mmain(self.nsim, self.paths.mmain_path)
data = self.merge_mmain_to_clumps(data, mmain)
flip_cols(data, "peak_x", "peak_z")
# Cut on number of particles and finite m200. Do not change! Hardcoded
data = data[(data["npart"] > 100) & numpy.isfinite(data["m200"])]
# Now also load the initial positions
if load_init:
initcm = read_initcm(self.nsim, self.paths.initmatch_path)
if initcm is not None:
data = self.merge_initmatch_to_clumps(data, initcm)
flip_cols(data, "x0", "z0")
# # Calculate redshift
# pos = [data["peak_{}".format(p)] - 0.5 for p in ("x", "y", "z")]
# vel = [data["v{}".format(p)] for p in ("x", "y", "z")]
# zpec = self.box.box2pecredshift(*vel, *pos)
# zobs = self.box.box2obsredshift(*vel, *pos)
# zcosmo = self.box.box2cosmoredshift(
# sum(pos[i]**2 for i in range(3))**0.5)
# data = add_columns(data, [zpec, zobs, zcosmo],
# ["zpec", "zobs", "zcosmo"])
# Unit conversion
convert_cols = ["m200", "m500", "totpartmass", "mass_mmain",
"r200", "r500", "Rs", "rho0",
"peak_x", "peak_y", "peak_z"]
data = self.box.convert_from_boxunits(data, convert_cols)
# Now calculate spherical coordinates
d, ra, dec = cartesian_to_radec(
data["peak_x"], data["peak_y"], data["peak_z"])
data = add_columns(data, [d, ra, dec], ["dist", "ra", "dec"])
# And do the unit transform
if load_init and initcm is not None:
data = self.box.convert_from_boxunits(
data, ["x0", "y0", "z0", "lagpatch"])
# Convert all that is not an integer to float32
names = list(data.dtype.names)
formats = []
for name in names:
if data[name].dtype.char in numpy.typecodes["AllInteger"]:
formats.append(numpy.int32)
else:
formats.append(numpy.float32)
dtype = numpy.dtype({"names": names, "formats": formats})
# Apply cuts on distance and total particle mass if any
data = data[data["dist"] < max_dist] if max_dist is not None else data
data = (data[data["totpartmass"] > min_mass]
if min_mass is not None else data)
self._data = data.astype(dtype)
def merge_mmain_to_clumps(self, clumps, mmain):
"""
Merge columns from the `mmain` files to the `clump` file, matches them
by their halo index while assuming that the indices `index` in both
arrays are sorted.
Parameters
----------
clumps : structured array
Clumps structured array.
mmain : structured array
Parent halo array whose information is to be merged into `clumps`.
Returns
-------
out : structured array
Array with added columns.
"""
X = numpy.full((clumps.size, 2), numpy.nan)
# Mask of which clumps have a mmain index
mask = numpy.isin(clumps["index"], mmain["index"])
X[mask, 0] = mmain["mass_cl"]
X[mask, 1] = mmain["sub_frac"]
return add_columns(clumps, X, ["mass_mmain", "sub_frac"])
def merge_initmatch_to_clumps(self, clumps, initcat):
"""
Merge columns from the `init_cm` files to the `clump` file.
Parameters
----------
clumps : structured array
Clumps structured array.
initcat : structured array
Catalog with the clumps initial centre of mass at z = 70.
Returns
-------
out : structured array
"""
# There are more initcat clumps, so check which ones have z = 0
# and then downsample
mask = numpy.isin(initcat["ID"], clumps["index"])
initcat = initcat[mask]
# Now the index ordering should match
if not numpy.alltrue(initcat["ID"] == clumps["index"]):
raise ValueError(
"Ordering of `initcat` and `clumps` is inconsistent.")
X = numpy.full((clumps.size, 4), numpy.nan)
for i, p in enumerate(['x', 'y', 'z', "lagpatch"]):
X[:, i] = initcat[p]
return add_columns(clumps, X, ["x0", "y0", "z0", "lagpatch"])
def positions(self, in_initial=False):
def position(self, in_initial=False, cartesian=True):
r"""
Cartesian position components of halos in :math:`\mathrm{cMpc}`.
Position components. If Cartesian, then in :math:`\mathrm{cMpc}`. If
spherical, then radius is in :math:`\mathrm{cMpc}`, RA in
:math:`[0, 360)` degrees and DEC in :math:`[-90, 90]` degrees. Note
that the position is defined as the minimum of the gravitationl
potential.
Parameters
----------
in_initial : bool, optional
Whether to define the kNN on the initial or final snapshot.
Whether to return the initial snapshot positions.
cartesian : bool, optional
Whether to return the Cartesian or spherical position components.
By default Cartesian.
Returns
-------
pos : 2-dimensional array of shape `(nhalos, 3)`
pos : 2-dimensional array of shape `(nobjects, 3)`
"""
if in_initial:
ps = ["x0", "y0", "z0"]
ps = ['x0', 'y0', 'z0']
else:
ps = ["peak_x", "peak_y", "peak_z"]
return numpy.vstack([self[p] for p in ps]).T
ps = ['x', 'y', 'z']
pos = [self[p] for p in ps]
if cartesian:
return numpy.vstack(pos).T
else:
return numpy.vstack([cartesian_to_radec(*pos)]).T
def velocities(self):
def velocity(self):
"""
Cartesian velocity components of halos. Likely in box units.
Cartesian velocity components in box units.
Returns
-------
vel : 2-dimensional array of shape `(nhalos, 3)`
vel : 2-dimensional array of shape `(nobjects, 3)`
"""
return numpy.vstack([self["v{}".format(p)] for p in ("x", "y", "z")]).T
@ -250,13 +159,13 @@ class HaloCatalogue:
Returns
-------
angmom : 2-dimensional array of shape `(nhalos, 3)`
angmom : 2-dimensional array of shape `(nobjects, 3)`
"""
return numpy.vstack([self["L{}".format(p)] for p in ("x", "y", "z")]).T
def knn(self, in_initial):
"""
kNN object of all halo positions.
kNN object fitted on all catalogue objects.
Parameters
----------
@ -307,50 +216,205 @@ class HaloCatalogue:
initpars = ["x0", "y0", "z0"]
if key in initpars and key not in self.keys:
raise RuntimeError("Initial positions are not set!")
return self._data[key]
return self.data[key]
def __len__(self):
return self.data.size
###############################################################################
# Useful functions #
###############################################################################
class ClumpsCatalogue(BaseCatalogue):
r"""
Clumps catalogue, defined in the final snapshot.
def concatenate_clumps(clumps):
"""
Concatenate an array of clumps to a single array containing all particles.
TODO:
Add fitted quantities.
Add threshold on number of particles
Parameters
----------
clumps : list of structured arrays
Returns
-------
particles : structured array
nsim : int
IC realisation index.
paths : py:class`csiborgtools.read.CSiBORGPaths`
CSiBORG paths object.
maxdist : float, optional
The maximum comoving distance of a halo. By default
:math:`155.5 / 0.705 ~ \mathrm{Mpc}` with assumed :math:`h = 0.705`,
which corresponds to the high-resolution region.
"""
# Count how large array will be needed
N = 0
for clump, __ in clumps:
N += clump.size
# Infer dtype of positions
if clumps[0][0]['x'].dtype.char in numpy.typecodes["AllInteger"]:
posdtype = numpy.int32
else:
posdtype = numpy.float32
def __init__(self, nsim, paths, maxdist=155.5 / 0.705):
self.nsim = nsim
self.paths = paths
# Read in the clumps from the final snapshot
partreader = ParticleReader(self.paths)
cols = ["index", "parent", 'x', 'y', 'z', "mass_cl"]
data = partreader.read_clumps(self.nsnap, self.nsim, cols=cols)
# Overwrite the parent with the ultimate parent
mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim))
data["parent"] = mmain["ultimate_parent"]
# Pre-allocate array
dtype = {"names": ['x', 'y', 'z', 'M'],
"formats": [posdtype] * 3 + [numpy.float32]}
particles = numpy.full(N, numpy.nan, dtype)
# Flip positions and convert from code units to cMpc. Convert M too
flip_cols(data, "x", "z")
for p in ('x', 'y', 'z'):
data[p] -= 0.5
data = self.box.convert_from_boxunits(data, ['x', 'y', 'z', "mass_cl"])
# Fill it one clump by another
start = 0
for clump, __ in clumps:
end = start + clump.size
for p in ('x', 'y', 'z', 'M'):
particles[p][start:end] = clump[p]
start = end
mask = numpy.sqrt(data['x']**2 + data['y']**2 + data['z']**2) < maxdist
self._data = data[mask]
return particles
@property
def ismain(self):
"""
Whether the clump is a main halo.
Returns
-------
ismain : 1-dimensional array
"""
return self["index"] == self["parent"]
def _set_data(self, min_mass, max_dist, load_init):
"""
TODO: old later remove.
Loads the data, merges with mmain, does various coordinate transforms.
"""
# Load the processed data
data = numpy.load(self.paths.hcat_path(self.nsim))
# Load the mmain file and add it to the data
# TODO: read the mmain here
# mmain = read_mmain(self.nsim, self.paths.mmain_dir)
# data = self.merge_mmain_to_clumps(data, mmain)
flip_cols(data, "peak_x", "peak_z")
# Cut on number of particles and finite m200. Do not change! Hardcoded
data = data[(data["npart"] > 100) & numpy.isfinite(data["m200"])]
# Now also load the initial positions
if load_init:
initcm = read_initcm(self.nsim,
self.paths.initmatch_path(self.nsim, "cm"))
if initcm is not None:
data = self.merge_initmatch_to_clumps(data, initcm)
flip_cols(data, "x0", "z0")
# Unit conversion
convert_cols = ["m200", "m500", "totpartmass", "mass_mmain",
"r200", "r500", "Rs", "rho0",
"peak_x", "peak_y", "peak_z"]
data = self.box.convert_from_boxunits(data, convert_cols)
# And do the unit transform
if load_init and initcm is not None:
data = self.box.convert_from_boxunits(
data, ["x0", "y0", "z0", "lagpatch"])
# Convert all that is not an integer to float32
names = list(data.dtype.names)
formats = []
for name in names:
if data[name].dtype.char in numpy.typecodes["AllInteger"]:
formats.append(numpy.int32)
else:
formats.append(numpy.float32)
dtype = numpy.dtype({"names": names, "formats": formats})
# Apply cuts on distance and total particle mass if any
data = data[data["dist"] < max_dist] if max_dist is not None else data
data = (data[data["totpartmass"] > min_mass]
if min_mass is not None else data)
self._data = data.astype(dtype)
def merge_mmain_to_clumps(self, clumps, mmain):
"""
TODO: old, later remove.
Merge columns from the `mmain` files to the `clump` file, matches them
by their halo index while assuming that the indices `index` in both
arrays are sorted.
Parameters
----------
clumps : structured array
Clumps structured array.
mmain : structured array
Parent halo array whose information is to be merged into `clumps`.
Returns
-------
out : structured array
Array with added columns.
"""
X = numpy.full((clumps.size, 2), numpy.nan)
# Mask of which clumps have a mmain index
mask = numpy.isin(clumps["index"], mmain["index"])
X[mask, 0] = mmain["mass_cl"]
X[mask, 1] = mmain["sub_frac"]
return add_columns(clumps, X, ["mass_mmain", "sub_frac"])
def merge_initmatch_to_clumps(self, clumps, initcat):
"""
TODO: old, later remove.
Merge columns from the `init_cm` files to the `clump` file.
Parameters
----------
clumps : structured array
Clumps structured array.
initcat : structured array
Catalog with the clumps initial centre of mass at z = 70.
Returns
-------
out : structured array
"""
# There are more initcat clumps, so check which ones have z = 0
# and then downsample
mask = numpy.isin(initcat["ID"], clumps["index"])
initcat = initcat[mask]
# Now the index ordering should match
if not numpy.alltrue(initcat["ID"] == clumps["index"]):
raise ValueError(
"Ordering of `initcat` and `clumps` is inconsistent.")
X = numpy.full((clumps.size, 4), numpy.nan)
for i, p in enumerate(['x', 'y', 'z', "lagpatch"]):
X[:, i] = initcat[p]
return add_columns(clumps, X, ["x0", "y0", "z0", "lagpatch"])
class HaloCatalogue(BaseCatalogue):
r"""
Halo catalogue, i.e. parent halos with summed substructure, defined in the
final snapshot.
TODO:
Add the fitted quantities
Add threshold on number of particles
Parameters
----------
nsim : int
IC realisation index.
paths : py:class`csiborgtools.read.CSiBORGPaths`
CSiBORG paths object.
maxdist : float, optional
The maximum comoving distance of a halo. By default
:math:`155.5 / 0.705 ~ \mathrm{Mpc}` with assumed :math:`h = 0.705`,
which corresponds to the high-resolution region.
"""
def __init__(self, nsim, paths, maxdist=155.5 / 0.705):
self.nsim = nsim
self.paths = paths
# Read in the mmain catalogue of summed substructure
mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim))
data = mmain["mmain"]
# Flip positions and convert from code units to cMpc. Convert M too
flip_cols(data, "x", "z")
for p in ('x', 'y', 'z'):
data[p] -= 0.5
data = self.box.convert_from_boxunits(data, ['x', 'y', 'z', 'M'])
mask = numpy.sqrt(data['x']**2 + data['y']**2 + data['z']**2) < maxdist
self._data = data[mask]

View file

@ -13,18 +13,41 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""kNN-CDF reader."""
from os.path import join
from glob import glob
import joblib
import numpy
from scipy.special import factorial
import joblib
class kNNCDFReader:
"""
Shortcut object to read in the kNN CDF data.
Parameters
----------
paths : py:class`csiborgtools.read.CSiBORGPaths`
"""
def read(self, run, folder, rmin=None, rmax=None, to_clip=True):
_paths = None
def __init__(self, paths):
self.paths = paths
@property
def paths(self):
"""
Paths manager.
Parameters
----------
paths : py:class`csiborgtools.read.CSiBORGPaths`
"""
return self._paths
@paths.setter
def paths(self, paths):
# assert isinstance(paths, CSiBORGPaths) # REMOVE
self._paths = paths
def read(self, run, kind, rmin=None, rmax=None, to_clip=True):
"""
Read the auto- or cross-correlation kNN-CDF data. Infers the type from
the data files.
@ -33,8 +56,8 @@ class kNNCDFReader:
----------
run : str
Run ID to read in.
folder : str
Path to the folder where the auto-correlation kNN-CDF is stored.
kind : str
Type of correlation. Can be either `auto` or `cross`.
rmin : float, optional
Minimum separation. By default ignored.
rmax : float, optional
@ -50,10 +73,13 @@ class kNNCDFReader:
out : 3-dimensional array of shape `(len(files), len(ks), neval)`
Array of CDFs or cross-correlations.
"""
run += ".p"
files = [f for f in glob(join(folder, "*")) if run in f]
assert kind in ["auto", "cross"]
if kind == "auto":
files = self.paths.knnauto_path(run)
else:
files = self.paths.knncross_path(run)
if len(files) == 0:
raise RuntimeError("No files found for run `{}`.".format(run[:-2]))
raise RuntimeError("No files found for run `{}`.".format(run))
for i, file in enumerate(files):
data = joblib.load(file)
@ -200,22 +226,3 @@ class kNNCDFReader:
"""
V = 4 * numpy.pi / 3 * rs**3
return (ndensity * V)**k / factorial(k) * numpy.exp(-ndensity * V)
@staticmethod
def cross_files(ic, folder):
"""
Return the file paths corresponding to the cross-correlation of a given
IC.
Parameters
----------
ic : int
The desired IC.
folder : str
The folder containing the cross-correlation files.
Returns
-------
filepath : list of str
"""
return [file for file in glob(join(folder, "*")) if str(ic) in file]

View file

@ -18,13 +18,14 @@ Scripts to read in observation.
from abc import ABC, abstractproperty
from os.path import join
from warnings import warn
import numpy
from scipy import constants
from astropy.io import fits
from astropy.coordinates import SkyCoord
from astropy import units
from ..utils import (cols_to_structured)
import numpy
from astropy import units
from astropy.coordinates import SkyCoord
from astropy.io import fits
from scipy import constants
from .utils import cols_to_structured
###############################################################################
# Text survey base class #
@ -101,10 +102,8 @@ class TwoMPPGalaxies(TextSurvey):
self._set_data(fpath)
def _set_data(self, fpath):
"""
Set the catalogue
"""
from scipy.constants import c
# Read the catalogue and select non-fake galaxies
cat = numpy.genfromtxt(fpath, delimiter="|", )
cat = cat[cat[:, 12] == 0, :]
@ -151,9 +150,6 @@ class TwoMPPGroups(TextSurvey):
self._set_data(fpath)
def _set_data(self, fpath):
"""
Set the catalogue
"""
cat = numpy.genfromtxt(fpath, delimiter="|", )
# Pre-allocate and fill the array
cols = [("RA", numpy.float64), ("DEC", numpy.float64),
@ -218,13 +214,12 @@ class FitsSurvey(ABC):
@h.setter
def h(self, h):
"""Sets the little h."""
self._h = h
@staticmethod
def _check_in_list(member, members, kind):
"""
Checks that `member` is a member of a list `members`, `kind` is a
Check that `member` is a member of a list `members`, `kind` is a
member type name.
"""
if member not in members:
@ -247,7 +242,7 @@ class FitsSurvey(ABC):
@abstractproperty
def size(self):
"""
Number of samples in the catalogue.
Return the number of samples in the catalogue.
Returns
-------
@ -274,7 +269,7 @@ class FitsSurvey(ABC):
@selection_mask.setter
def selection_mask(self, mask):
"""Sets the selection mask."""
"""Set the selection mask."""
if not (isinstance(mask, numpy.ndarray)
and mask.ndim == 1
and mask.dtype == bool):
@ -311,6 +306,7 @@ class FitsSurvey(ABC):
Parameters
----------
key : str
FITS key.
Returns
-------
@ -331,7 +327,7 @@ class FitsSurvey(ABC):
def make_mask(self, steps):
"""
Make a survey mask from a series of steps. Expected to look e.g. like
Make a survey mask from a series of steps, expected to look as below.
```
def steps(cls):
@ -343,6 +339,7 @@ class FitsSurvey(ABC):
Parameters
----------
steps : list of steps
Selection steps.
Returns
-------
@ -359,20 +356,17 @@ class FitsSurvey(ABC):
return out
def __getitem__(self, key):
"""
Return values for this `key`. If in both return from `routine_keys`.
"""
# Check duplicates
if key in self.routine_keys and key in self.fits_keys:
warn("Key `{}` found in both `routine_keys` and `fits_keys`. "
"Returning `routine_keys` value.".format(key), UserWarning)
"Returning `routine_keys` value.".format(key), stacklevel=1)
if key in self.routine_keys:
func, args = self.routines[key]
out = func(*args)
elif key in self.fits_keys:
warn("Returning a FITS property. Be careful about little h!",
UserWarning)
stacklevel=1)
out = self.get_fitsitem(key)
else:
raise KeyError("Unrecognised key `{}`.".format(key))
@ -541,7 +535,7 @@ class MCXCClusters(FitsSurvey):
return self.get_fitsitem(key) * 1e14 * (self._hdata / self.h)**2
def _lum(self, key):
"""Get luminosity. Puts back units to be in ergs/s"""
"""Get luminosity, puts back units to be in ergs/s."""
return self.get_fitsitem(key) * 1e44 * (self._hdata / self.h)**2
###############################################################################
@ -669,14 +663,14 @@ class SDSS(FitsSurvey):
return self._absmag(photo, band1) - self._absmag(photo, band2)
def _dist(self):
"""
Get the corresponding distance estimate from `ZDIST`, which is defined
as:
r"""
Get the corresponding distance estimate from `ZDIST`, defined as below.
"Distance estimate using pecular velocity model of Willick et al.
(1997), expressed as a redshift equivalent; multiply by c/H0 for
Mpc"
Converts little h.
Distance is converted to math:`h != 1` units.
"""
return self.get_fitsitem("ZDIST") * constants.c * 1e-3 / (100 * self.h)

View file

@ -15,9 +15,10 @@
"""
I/O functions for analysing the CSiBORG realisations.
"""
import numpy
from os.path import join
from os import remove
from os.path import join
import numpy
from tqdm import trange

View file

@ -15,7 +15,8 @@
"""
Tools for summarising various results.
"""
from os.path import (join, isfile)
from os.path import isfile, join
import numpy
from tqdm import tqdm
@ -26,7 +27,7 @@ class PairOverlap:
Parameters
----------
cat0, catx: :py:class:`csiborgtools.read.HaloCatalogue`
cat0, catx: :py:class:`csiborgtools.read.ClumpsCatalogue`
Halo catalogues corresponding to the reference and cross
simulations.
fskel : str, optional
@ -121,9 +122,9 @@ class PairOverlap:
inv_ngp_overlap = [[] for __ in range(cross_size)]
inv_smoothed_overlap = [[] for __ in range(cross_size)]
for ref_id in range(match_indxs.size):
for cross_id, ngp_cross, smoothed_cross in zip(match_indxs[ref_id],
ngp_overlap[ref_id],
smoothed_overlap[ref_id]): # noqa
iters = zip(match_indxs[ref_id], ngp_overlap[ref_id],
smoothed_overlap[ref_id], strict=True)
for cross_id, ngp_cross, smoothed_cross in iters:
inv_match_indxs[cross_id].append(ref_id)
inv_ngp_overlap[cross_id].append(ngp_cross)
inv_smoothed_overlap[cross_id].append(smoothed_cross)
@ -198,8 +199,8 @@ class PairOverlap:
def summed_overlap(self, from_smoothed):
"""
Summed overlap of each halo in the reference simulation with the cross
simulation.
Calculate summed overlap of each halo in the reference simulation with
the cross simulation.
Parameters
----------
@ -319,7 +320,7 @@ class PairOverlap:
simulation from the crossed simulation.
Parameters
-----------
----------
from_smoothed : bool
Whether to use the smoothed overlap or not.
overlap_threshold : float, optional
@ -408,7 +409,7 @@ class PairOverlap:
Returns
-------
out : :py:class:`csiborgtools.read.HaloCatalogue` or array
out : :py:class:`csiborgtools.read.ClumpsCatalogue` or array
"""
if key is None:
return self._cat0
@ -429,7 +430,7 @@ class PairOverlap:
Returns
-------
out : :py:class:`csiborgtools.read.HaloCatalogue` or array
out : :py:class:`csiborgtools.read.ClumpsCatalogue` or array
"""
if key is None:
return self._catx
@ -456,9 +457,9 @@ class NPairsOverlap:
Parameters
----------
cat0 : :py:class:`csiborgtools.read.HaloCatalogue`
cat0 : :py:class:`csiborgtools.read.ClumpsCatalogue`
Reference simulation halo catalogue.
catxs : list of :py:class:`csiborgtools.read.HaloCatalogue`
catxs : list of :py:class:`csiborgtools.read.ClumpsCatalogue`
List of cross simulation halo catalogues.
fskel : str, optional
Path to the overlap. By default `None`, i.e.
@ -478,8 +479,8 @@ class NPairsOverlap:
def summed_overlap(self, from_smoothed, verbose=False):
"""
Summed overlap of each halo in the reference simulation with the cross
simulations.
Calcualte summed overlap of each halo in the reference simulation with
the cross simulations.
Parameters
----------
@ -526,7 +527,7 @@ class NPairsOverlap:
simulation from the crossed simulation.
Parameters
-----------
----------
from_smoothed : bool
Whether to use the smoothed overlap or not.
overlap_threshold : float, optional

366
csiborgtools/read/paths.py Normal file
View file

@ -0,0 +1,366 @@
# Copyright (C) 2022 Richard Stiskalek, Harry Desmond
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""CSiBORG paths manager."""
from glob import glob
from os import makedirs, mkdir
from os.path import isdir, join
from warnings import warn
import numpy
class CSiBORGPaths:
"""
Paths manager for CSiBORG IC realisations.
Parameters
----------
srcdir : str
Path to the folder where the RAMSES outputs are stored.
postdir: str
Path to the folder where post-processed files are stored.
"""
_srcdir = None
_postdir = None
def __init__(self, srcdir=None, postdir=None):
self.srcdir = srcdir
self.postdir = postdir
@staticmethod
def _check_directory(path):
if not isdir(path):
raise IOError("Invalid directory `{}`!".format(path))
@property
def srcdir(self):
"""
Path to the folder where CSiBORG simulations are stored.
Returns
-------
path : str
"""
if self._srcdir is None:
raise ValueError("`srcdir` is not set!")
return self._srcdir
@srcdir.setter
def srcdir(self, path):
if path is None:
return
self._check_directory(path)
self._srcdir = path
@property
def postdir(self):
"""
Path to the folder where post-processed files are stored.
Returns
-------
path : str
"""
if self._postdir is None:
raise ValueError("`postdir` is not set!")
return self._postdir
@postdir.setter
def postdir(self, path):
if path is None:
return
self._check_directory(path)
self._postdir = path
@property
def temp_dumpdir(self):
"""
Path to a temporary dumping folder.
Returns
-------
path : str
"""
fpath = join(self.postdir, "temp")
if not isdir(fpath):
mkdir(fpath)
warn("Created directory `{}`.".format(fpath), UserWarning,
stacklevel=1)
return fpath
def mmain_path(self, nsnap, nsim):
"""
Path to the `mmain` files summed substructure files.
Parameters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
Returns
-------
path : str
"""
fdir = join(self.postdir, "mmain")
if not isdir(fdir):
mkdir(fdir)
warn("Created directory `{}`.".format(fdir), UserWarning,
stacklevel=1)
return join(
fdir,
"mmain_{}_{}.npz".format(str(nsim).zfill(5), str(nsnap).zfill(5))
)
def initmatch_path(self, nsim, kind):
"""
Path to the `initmatch` files where the clump match between the
initial and final snapshot is stored.
Parameters
----------
nsim : int
IC realisation index.
kind : str
Type of match. Can be either `cm` or `particles`.
Returns
-------
path : str
"""
assert kind in ["cm", "particles"]
fdir = join(self.postdir, "initmatch")
if not isdir(fdir):
mkdir(fdir)
warn("Created directory `{}`.".format(fdir), UserWarning,
stacklevel=1)
return join(fdir, "{}_{}.npy".format(kind, str(nsim).zfill(5)))
def split_path(self, nsnap, nsim):
"""
Path to the `split` files from `pre_splithalos`.
Parameters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
Returns
-------
path : str
"""
fdir = join(self.postdir, "split")
if not isdir(fdir):
mkdir(fdir)
warn("Created directory `{}`.".format(fdir), UserWarning,
stacklevel=1)
return join(fdir, "clumps_{}_{}.npz"
.format(str(nsim).zfill(5), str(nsnap).zfill(5)))
def get_ics(self, tonew):
"""
Get CSiBORG IC realisation IDs from the list of folders in
`self.srcdir`.
Parameters
----------
tonew : bool
If `True`, path to the '_new' ICs is returned.
Returns
-------
ids : 1-dimensional array
"""
files = glob(join(self.srcdir, "ramses_out*"))
files = [f.split("/")[-1] for f in files] # Select only file names
if tonew:
files = [f for f in files if "_new" in f]
ids = [int(f.split("_")[2]) for f in files] # Take the IC IDs
else:
files = [f for f in files if "_inv" not in f] # Remove inv. ICs
files = [f for f in files if "_new" not in f] # Remove _new
files = [f for f in files if "OLD" not in f] # Remove _old
ids = [int(f.split("_")[-1]) for f in files]
try:
ids.remove(5511)
except ValueError:
pass
return numpy.sort(ids)
def ic_path(self, nsim, tonew=False):
"""
Path to a CSiBORG IC realisation folder.
Parameters
----------
nsim : int
IC realisation index.
tonew : bool, optional
Whether to return the path to the '_new' IC realisation.
Returns
-------
path : str
"""
fname = "ramses_out_{}"
if tonew:
fname += "_new"
return join(self.srcdir, fname.format(nsim))
def get_snapshots(self, nsim):
"""
List of available snapshots of a CSiBORG IC realisation.
Parameters
----------
nsim : int
IC realisation index.
Returns
-------
snapshots : 1-dimensional array
"""
simpath = self.ic_path(nsim, tonew=False)
# Get all files in simpath that start with output_
snaps = glob(join(simpath, "output_*"))
# Take just the last _00XXXX from each file and strip zeros
snaps = [int(snap.split('_')[-1].lstrip('0')) for snap in snaps]
return numpy.sort(snaps)
def snapshot_path(self, nsnap, nsim):
"""
Path to a CSiBORG IC realisation snapshot.
Parameters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
Returns
-------
snappath : str
"""
tonew = nsnap == 1
simpath = self.ic_path(nsim, tonew=tonew)
return join(simpath, "output_{}".format(str(nsnap).zfill(5)))
def hcat_path(self, nsim):
"""
Path to the final snapshot halo catalogue from `fit_halos.py`.
Parameters
----------
nsim : int
IC realisation index.
Returns
-------
path : str
"""
nsnap = str(max(self.get_snapshots(nsim))).zfill(5)
fname = "ramses_out_{}_{}.npy".format(str(self.nsim).zfill(5), nsnap)
return join(self.postdir, fname)
def knnauto_path(self, run, nsim=None):
"""
Path to the `knn` auto-correlation files. If `nsim` is not specified
returns a list of files for this run for all available simulations.
Parameters
----------
run : str
Type of run.
nsim : int, optional
IC realisation index.
Returns
-------
path : str
"""
fdir = join(self.postdir, "knn", "auto")
if not isdir(fdir):
makedirs(fdir)
warn("Created directory `{}`.".format(fdir), UserWarning,
stacklevel=1)
if nsim is not None:
return join(fdir, "knncdf_{}_{}.p".format(str(nsim).zfill(5), run))
files = glob(join(fdir, "knncdf*"))
run = "__" + run
return [f for f in files if run in f]
def knncross_path(self, run, nsims=None):
"""
Path to the `knn` cross-correlation files. If `nsims` is not specified
returns a list of files for this run for all available simulations.
Parameters
----------
run : str
Type of run.
nsims : len-2 tuple of int, optional
IC realisation indices.
Returns
-------
path : str
"""
fdir = join(self.postdir, "knn", "cross")
if not isdir(fdir):
makedirs(fdir)
warn("Created directory `{}`.".format(fdir), UserWarning,
stacklevel=1)
if nsims is not None:
assert isinstance(nsims, (list, tuple)) and len(nsims) == 2
nsim0 = str(nsims[0]).zfill(5)
nsimx = str(nsims[1]).zfill(5)
return join(fdir, "knncdf_{}_{}__{}.p".format(nsim0, nsimx, run))
files = glob(join(fdir, "knncdf*"))
run = "__" + run
return [f for f in files if run in f]
def tpcfauto_path(self, run, nsim=None):
"""
Path to the `tpcf` auto-correlation files. If `nsim` is not specified
returns a list of files for this run for all available simulations.
Parameters
----------
run : str
Type of run.
nsim : int, optional
IC realisation index.
Returns
-------
path : str
"""
fdir = join(self.postdir, "tpcf", "auto")
if not isdir(fdir):
makedirs(fdir)
warn("Created directory `{}`.".format(fdir), UserWarning,
stacklevel=1)
if nsim is not None:
return join(fdir, "tpcf{}_{}.p".format(str(nsim).zfill(5), run))
files = glob(join(fdir, "tpcf*"))
run = "__" + run
return [f for f in files if run in f]

View file

@ -13,8 +13,8 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""Power spectrum reader."""
import numpy
import joblib
import numpy
from tqdm import tqdm
@ -24,7 +24,7 @@ class PKReader:
Parameters
----------
ic_ids : list of int
get_ics : list of int
IC IDs to be read.
hw : float
Box half-width.
@ -35,8 +35,8 @@ class PKReader:
dtype : dtype, optional
Output precision. By default `numpy.float32`.
"""
def __init__(self, ic_ids, hw, fskel=None, dtype=numpy.float32):
self.ic_ids = ic_ids
def __init__(self, get_ics, hw, fskel=None, dtype=numpy.float32):
self.get_ics = get_ics
self.hw = hw
if fskel is None:
fskel = "/mnt/extraspace/rstiskalek/csiborg/crosspk/out_{}_{}_{}.p"
@ -46,7 +46,7 @@ class PKReader:
@staticmethod
def _set_klim(kmin, kmax):
"""
Sets limits on the wavenumber to 0 and infinity if `None`s provided.
Set limits on the wavenumber to 0 and infinity if `None`s provided.
"""
if kmin is None:
kmin = 0
@ -62,27 +62,27 @@ class PKReader:
----------
kmin : float, optional
The minimum wavenumber. By default `None`, i.e. 0.
kmin : float, optional
kmax : float, optional
The maximum wavenumber. By default `None`, i.e. infinity.
Returns
-------
ks : 1-dimensional array
Array of wavenumbers.
pks : 2-dimensional array of shape `(len(self.ic_ids), ks.size)`
pks : 2-dimensional array of shape `(len(self.get_ics), ks.size)`
Autocorrelation of each simulation.
"""
kmin, kmax = self._set_klim(kmin, kmax)
ks, pks, sel = None, None, None
for i, nsim in enumerate(self.ic_ids):
for i, nsim in enumerate(self.get_ics):
pk = joblib.load(self.fskel.format(nsim, nsim, self.hw))
# Get cuts and pre-allocate arrays
if i == 0:
x = pk.k3D
sel = (kmin < x) & (x < kmax)
ks = x[sel].astype(self.dtype)
pks = numpy.full((len(self.ic_ids), numpy.sum(sel)), numpy.nan,
dtype=self.dtype)
pks = numpy.full((len(self.get_ics), numpy.sum(sel)),
numpy.nan, dtype=self.dtype)
pks[i, :] = pk.Pk[sel, 0, 0]
return ks, pks
@ -99,7 +99,7 @@ class PKReader:
The second IC ID.
kmin : float, optional
The minimum wavenumber. By default `None`, i.e. 0.
kmin : float, optional
kmax : float, optional
The maximum wavenumber. By default `None`, i.e. infinity.
Returns
@ -133,7 +133,7 @@ class PKReader:
----------
kmin : float, optional
The minimum wavenumber. By default `None`, i.e. 0.
kmin : float, optional
kmax : float, optional
The maximum wavenumber. By default `None`, i.e. infinity.
Returns
@ -144,12 +144,12 @@ class PKReader:
Cross-correlations. The first column is the the IC and is being
cross-correlated with the remaining ICs, in the second column.
"""
nics = len(self.ic_ids)
nics = len(self.get_ics)
ks, xpks = None, None
for i, ic0 in enumerate(tqdm(self.ic_ids)):
for i, ic0 in enumerate(tqdm(self.get_ics)):
k = 0
for ic1 in self.ic_ids:
for ic1 in self.get_ics:
# We don't want cross-correlation
if ic0 == ic1:
continue

View file

@ -15,274 +15,18 @@
"""
Functions to read in the particle and clump files.
"""
from os.path import (join, isfile, isdir)
from glob import glob
from os.path import isfile, join
from warnings import warn
import numpy
from scipy.io import FortranFile
from tqdm import tqdm
from ..utils import (cols_to_structured)
from tqdm import tqdm, trange
from .paths import CSiBORGPaths
from .utils import cols_to_structured
###############################################################################
# Paths manager #
###############################################################################
class CSiBORGPaths:
"""
Paths manager for CSiBORG IC realisations.
Parameters
----------
srcdir : str
Path to the folder where CSiBORG simulations are stored.
dumpdir : str
Path to the folder where files from `run_fit_halos` are stored.
mmain_path : str
Path to folder where mmain files are stored.
initmatch_path : str
Path to the folder where particle ID match between the first and final
snapshot is stored.
"""
_srcdir = None
_dumpdir = None
_mmain_path = None
_initmatch_path = None
def __init__(self, srcdir=None, dumpdir=None, mmain_path=None,
initmatch_path=None):
self.srcdir = srcdir
self.dumpdir = dumpdir
self.mmain_path = mmain_path
self.initmatch_path = initmatch_path
@staticmethod
def _check_directory(path):
if not isdir(path):
raise IOError("Invalid directory `{}`!".format(path))
@property
def srcdir(self):
"""
Path to the folder where CSiBORG simulations are stored.
Returns
-------
path : str
"""
if self._srcdir is None:
raise ValueError("`srcdir` is not set!")
return self._srcdir
@srcdir.setter
def srcdir(self, path):
if path is None:
return
self._check_directory(path)
self._srcdir = path
@property
def dumpdir(self):
"""
Path to the folder where files from `run_fit_halos` are stored.
Returns
-------
path : str
"""
if self._dumpdir is None:
raise ValueError("`dumpdir` is not set!")
return self._dumpdir
@dumpdir.setter
def dumpdir(self, path):
if path is None:
return
self._check_directory(path)
self._dumpdir = path
@property
def temp_dumpdir(self):
"""
Path to a temporary dumping folder.
Returns
-------
path : str
"""
fpath = join(self.dumpdir, "temp")
if not isdir(fpath):
raise IOError("Invalid directory `{}`.".format(fpath))
return fpath
@property
def mmain_path(self):
"""
Path to the folder where mmain files are stored.
Returns
-------
path : str
"""
if self._mmain_path is None:
raise ValueError("`mmain_path` is not set!")
return self._mmain_path
@mmain_path.setter
def mmain_path(self, path):
if path is None:
return
self._check_directory(path)
self._mmain_path = path
@property
def initmatch_path(self):
"""
Path to the folder where the match between the first and final
snapshot is stored.
Returns
-------
path : str
"""
if self._initmatch_path is None:
raise ValueError("`initmatch_path` is not set!")
return self._initmatch_path
@initmatch_path.setter
def initmatch_path(self, path):
if path is None:
return
self._check_directory(path)
self._initmatch_path = path
def ic_ids(self, tonew):
"""
CSiBORG IC realisation IDs from the list of folders in `self.srcdir`.
Parameters
----------
tonew : bool
If `True`, path to the '_new' ICs is returned.
Returns
-------
ids : 1-dimensional array
"""
files = glob(join(self.srcdir, "ramses_out*"))
files = [f.split("/")[-1] for f in files] # Select only file names
if tonew:
files = [f for f in files if "_new" in f]
ids = [int(f.split("_")[2]) for f in files] # Take the IC IDs
else:
files = [f for f in files if "_inv" not in f] # Remove inv. ICs
files = [f for f in files if "_new" not in f] # Remove _new
files = [f for f in files if "OLD" not in f] # Remove _old
ids = [int(f.split("_")[-1]) for f in files]
try:
ids.remove(5511)
except ValueError:
pass
return numpy.sort(ids)
def ic_path(self, nsim, tonew=False):
"""
Path to a CSiBORG IC realisation folder.
Parameters
----------
nsim : int
IC realisation index.
tonew : bool, optional
Whether to return the path to the '_new' IC realisation.
Returns
-------
path : str
"""
fname = "ramses_out_{}"
if tonew:
fname += "_new"
return join(self.srcdir, fname.format(nsim))
def get_snapshots(self, nsim):
"""
List of available snapshots of a CSiBORG IC realisation.
Parameters
----------
nsim : int
IC realisation index.
Returns
-------
snapshots : 1-dimensional array
"""
simpath = self.ic_path(nsim, tonew=False)
# Get all files in simpath that start with output_
snaps = glob(join(simpath, "output_*"))
# Take just the last _00XXXX from each file and strip zeros
snaps = [int(snap.split('_')[-1].lstrip('0')) for snap in snaps]
return numpy.sort(snaps)
def clump0_path(self, nsim):
"""
Path to a single dumped clump's particles. Expected to point to a
dictonary whose keys are the clump indices and items structured
arrays with the clump's particles in the initial snapshot.
Parameters
----------
nsim : int
IC realisation index.
Returns
-------
path : str
"""
cdir = join(self.dumpdir, "initmatch")
return join(cdir, "clump_{}_{}.npy".format(nsim, "particles"))
def snapshot_path(self, nsnap, nsim):
"""
Path to a CSiBORG IC realisation snapshot.
Parameters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
Returns
-------
snappath : str
"""
if nsnap == 1:
tonew = True
simpath = self.ic_path(nsim, tonew=tonew)
return join(simpath, "output_{}".format(str(nsnap).zfill(5)))
def hcat_path(self, nsim):
"""
Path to the final snapshot halo catalogue from `fit_halos.py`.
Parameters
----------
nsim : int
IC realisation index.
Returns
-------
path : str
"""
nsnap = str(max(self.get_snapshots(nsim))).zfill(5)
fname = "ramses_out_{}_{}.npy".format(str(self.nsim).zfill(5), nsnap)
return join(self.dumpdir, fname)
###############################################################################
# Fortran readers #
# Fortran particle reader #
###############################################################################
@ -297,13 +41,24 @@ class ParticleReader:
_paths = None
def __init__(self, paths):
# assert isinstance(paths, CSiBORGPaths)
self._paths = paths
self.paths = paths
@property
def paths(self):
"""
Paths manager.
Parameters
----------
paths : py:class`csiborgtools.read.CSiBORGPaths`
"""
return self._paths
@paths.setter
def paths(self, paths):
# assert isinstance(paths, CSiBORGPaths) # REMOVE
self._paths = paths
def read_info(self, nsnap, nsim):
"""
Read CSiBORG simulation snapshot info.
@ -332,7 +87,7 @@ class ParticleReader:
keys = info[eqs - 1]
vals = info[eqs + 1]
return {key: val for key, val in zip(keys, vals)}
return {key: val for key, val in zip(keys, vals, strict=True)}
def open_particle(self, nsnap, nsim, verbose=True):
"""
@ -392,7 +147,7 @@ class ParticleReader:
@staticmethod
def read_sp(dtype, partfile):
"""
Utility function to read a single particle file.
Read a single particle file.
Parameters
----------
@ -490,7 +245,7 @@ class ParticleReader:
for cpu in iters:
i = start_ind[cpu]
j = nparts[cpu]
for (fname, fdtype) in zip(fnames, fdtypes):
for (fname, fdtype) in zip(fnames, fdtypes, strict=True):
if fname in pars_extract:
out[fname][i:i + j] = self.read_sp(fdtype, partfiles[cpu])
else:
@ -522,7 +277,7 @@ class ParticleReader:
"""
nsnap = str(nsnap).zfill(5)
cpu = str(cpu + 1).zfill(5)
fpath = join(self.paths.ic_path(nsim, to_new=False),
fpath = join(self.paths.ic_path(nsim, tonew=False),
"output_{}".format(nsnap),
"unbinding_{}.out{}".format(nsnap, cpu))
return FortranFile(fpath)
@ -561,31 +316,9 @@ class ParticleReader:
return clumpid
@staticmethod
def drop_zero_indx(clump_ids, particles):
"""
Drop from `clump_ids` and `particles` entries whose clump index is 0.
Parameters
----------
clump_ids : 1-dimensional array
Array of clump IDs.
particles : structured array
Array of the particle data.
Returns
-------
clump_ids : 1-dimensional array
The array of clump IDs after removing zero clump ID entries.
particles : structured array
The particle data after removing zero clump ID entries.
"""
mask = clump_ids != 0
return clump_ids[mask], particles[mask]
def read_clumps(self, nsnap, nsim, cols=None):
"""
Read in a clump file `clump_Nsnap.dat`.
Read in a clump file `clump_xxXXX.dat`.
Parameters
----------
@ -593,7 +326,6 @@ class ParticleReader:
Snapshot index.
nsim : int
IC realisation index.
cols : list of str, optional.
Columns to extract. By default `None` and all columns are
extracted.
@ -601,84 +333,150 @@ class ParticleReader:
Returns
-------
out : structured array
Structured array of the clumps.
"""
nsnap = str(nsnap).zfill(5)
fname = join(self.paths.ic_path(nsim, to_new=False),
fname = join(self.paths.ic_path(nsim, tonew=False),
"output_{}".format(nsnap),
"clump_{}.dat".format(nsnap))
# Check the file exists.
if not isfile(fname):
raise FileExistsError(
"Clump file `{}` does not exist.".format(fname))
# Read in the clump array. This is how the columns must be written!
raise FileExistsError("Clump file `{}` does not exist."
.format(fname))
data = numpy.genfromtxt(fname)
clump_cols = [("index", numpy.int64), ("level", numpy.int64),
("parent", numpy.int64), ("ncell", numpy.float64),
("peak_x", numpy.float64), ("peak_y", numpy.float64),
("peak_z", numpy.float64), ("rho-", numpy.float64),
("rho+", numpy.float64), ("rho_av", numpy.float64),
("mass_cl", numpy.float64), ("relevance", numpy.float64)]
out0 = cols_to_structured(data.shape[0], clump_cols)
for i, name in enumerate(out0.dtype.names):
out0[name] = data[:, i]
# If take all cols then return
if cols is None:
return out0
# Make sure we have a list
# How the data is stored in the clump file.
clump_cols = {"index": (0, numpy.int32),
"level": (1, numpy.int32),
"parent": (2, numpy.int32),
"ncell": (3, numpy.float32),
"x": (4, numpy.float32),
"y": (5, numpy.float32),
"z": (6, numpy.float32),
"rho-": (7, numpy.float32),
"rho+": (8, numpy.float32),
"rho_av": (9, numpy.float32),
"mass_cl": (10, numpy.float32),
"relevance": (11, numpy.float32),
}
# Return the requested columns.
cols = [cols] if isinstance(cols, str) else cols
# Get the indxs of clump_cols to output
clump_names = [col[0] for col in clump_cols]
indxs = [None] * len(cols)
for i, col in enumerate(cols):
if col not in clump_names:
raise KeyError("...")
indxs[i] = clump_names.index(col)
# Make an array and fill it
out = cols_to_structured(out0.size, [clump_cols[i] for i in indxs])
for name in out.dtype.names:
out[name] = out0[name]
cols = list(clump_cols.keys()) if cols is None else cols
dtype = [(col, clump_cols[col][1]) for col in cols]
out = cols_to_structured(data.shape[0], dtype)
for col in cols:
out[col] = data[:, clump_cols[col][0]]
return out
###############################################################################
# Supplementary reading functions #
# Summed substructure catalogue #
###############################################################################
def read_mmain(nsim, srcdir, fname="Mmain_{}.npy"):
class MmainReader:
"""
Read `mmain` numpy arrays of central halos whose mass contains their
substracture contribution.
Parameters
----------
nsim : int
IC realisation index.
srcdir : str
Path to the folder containing the files.
fname : str, optional
File name convention. By default `Mmain_{}.npy`, where the
substituted value is `n`.
Returns
-------
out : structured array
Array with the central halo information.
Object to generate the summed substructure catalogue.
"""
fpath = join(srcdir, fname.format(nsim))
arr = numpy.load(fpath)
_paths = None
cols = [("index", numpy.int64), ("peak_x", numpy.float64),
("peak_y", numpy.float64), ("peak_z", numpy.float64),
("mass_cl", numpy.float64), ("sub_frac", numpy.float64)]
out = cols_to_structured(arr.shape[0], cols)
for i, name in enumerate(out.dtype.names):
out[name] = arr[:, i]
def __init__(self, paths):
assert isinstance(paths, CSiBORGPaths) # REMOVE
self._paths = paths
return out
@property
def paths(self):
return self._paths
def find_parents(self, clumparr, verbose=False):
"""
Find ultimate parent haloes for every clump in a final snapshot.
Parameters
----------
clumparr : structured array
Clump array. Read from `ParticleReader.read_clumps`. Must contain
`index` and `parent` columns.
verbose : bool, optional
Verbosity flag.
Returns
-------
parent_arr : 1-dimensional array of shape `(nclumps, )`
The ultimate parent halo index for every clump, i.e. referring to
its ultimate parent clump.
"""
clindex = clumparr["index"]
parindex = clumparr["parent"]
# The ultimate parent for every clump
parent_arr = numpy.zeros(clindex.size, dtype=numpy.int32)
for i in trange(clindex.size) if verbose else range(clindex.size):
tocont = clindex[i] != parindex[i] # Continue if not a main halo
par = parindex[i] # First we try the parent of this clump
while tocont:
# The element of the array corresponding to the parent clump to
# the one we're looking at
element = numpy.where(clindex == par)[0][0]
# We stop if the parent is its own parent, so a main halo. Else
# move onto the parent of the parent. Eventually this is its
# own parent and we stop, with ultimate parent=par
if clindex[element] == clindex[element]:
tocont = False
else:
par = parindex[element]
parent_arr[i] = par
return parent_arr
def make_mmain(self, nsim, verbose=False):
"""
Make the summed substructure catalogue for a final snapshot. Includes
the position of the paren, the summed mass and the fraction of mass in
substructure.
Parameters
----------
nsim : int
IC realisation index.
verbose : bool, optional
Verbosity flag.
Returns
-------
mmain : structured array
The `mmain` catalogue.
ultimate_parent : 1-dimensional array of shape `(nclumps,)`
The ultimate parent halo index for every clump, i.e. referring to
its ultimate parent clump.
"""
nsnap = max(self.paths.get_snapshots(nsim))
partreader = ParticleReader(self.paths)
cols = ["index", "parent", "mass_cl", 'x', 'y', 'z']
clumparr = partreader.read_clumps(nsnap, nsim, cols)
ultimate_parent = self.find_parents(clumparr, verbose=verbose)
mask_main = clumparr["index"] == clumparr["parent"]
nmain = numpy.sum(mask_main)
# Preallocate already the output array
out = cols_to_structured(
nmain, [("ID", numpy.int32), ("x", numpy.float32),
("y", numpy.float32), ("z", numpy.float32),
("M", numpy.float32), ("subfrac", numpy.float32)])
out["ID"] = clumparr["index"][mask_main]
# Because for these index == parent
for p in ('x', 'y', 'z'):
out[p] = clumparr[p][mask_main]
# We want a total mass for each halo in ID_main
for i in range(nmain):
# Should include the main halo itself, i.e. its own ultimate parent
out["M"][i] = numpy.sum(
clumparr["mass_cl"][ultimate_parent == out["ID"][i]])
out["subfrac"] = 1 - clumparr["mass_cl"][mask_main] / out["M"]
return out, ultimate_parent
###############################################################################
# Supplementary reading functions #
###############################################################################
def read_initcm(nsim, srcdir, fname="clump_{}_cm.npy"):
@ -704,7 +502,8 @@ def read_initcm(nsim, srcdir, fname="clump_{}_cm.npy"):
try:
return numpy.load(fpath)
except FileNotFoundError:
warn("File {} does not exist.".format(fpath))
warn("File {} does not exist.".format(fpath), UserWarning,
stacklevel=1)
return None

View file

@ -13,17 +13,42 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""2PCF reader."""
from os.path import join
from glob import glob
import numpy
import joblib
import numpy
from .paths import CSiBORGPaths
class TPCFReader:
"""
Shortcut object to read in the 2PCF data.
Parameters
----------
paths : py:class`csiborgtools.read.CSiBORGPaths`
"""
def read(self, run, folder):
_paths = None
def __init__(self, paths):
self.paths = paths
@property
def paths(self):
"""
Paths manager.
Parameters
----------
paths : py:class`csiborgtools.read.CSiBORGPaths`
"""
return self._paths
@paths.setter
def paths(self, paths):
assert isinstance(paths, CSiBORGPaths)
self._paths = paths
def read(self, run):
"""
Read the auto- or cross-correlation kNN-CDF data. Infers the type from
the data files.
@ -32,8 +57,6 @@ class TPCFReader:
----------
run : str
Run ID to read in.
folder : str
Path to the folder where the auto-2PCF is stored.
Returns
-------
@ -42,8 +65,7 @@ class TPCFReader:
out : 2-dimensional array of shape `(len(files), len(rp))`
Array of 2PCFs.
"""
run += ".p"
files = [f for f in glob(join(folder, "*")) if run in f]
files = self.paths.tpcfauto_path(run)
if len(files) == 0:
raise RuntimeError("No files found for run `{}`.".format(run[:-2]))

View file

@ -13,10 +13,68 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Utilility functions for manipulation structured arrays.
Various coordinate transformations.
"""
import numpy
###############################################################################
# Coordinate transforms #
###############################################################################
def cartesian_to_radec(x, y, z):
"""
Calculate the radial distance, right ascension in [0, 360) degrees and
declination [-90, 90] degrees. Note, the observer should be placed in the
middle of the box.
Parameters
----------
x, y, z : 1-dimensional arrays
Cartesian coordinates.
Returns
-------
dist, ra, dec : 1-dimensional arrays
Radial distance, right ascension and declination.
"""
dist = numpy.sqrt(x**2 + y**2 + z**2)
dec = numpy.rad2deg(numpy.arcsin(z/dist))
ra = numpy.rad2deg(numpy.arctan2(y, x))
# Make sure RA in the correct range
ra[ra < 0] += 360
return dist, ra, dec
def radec_to_cartesian(dist, ra, dec, isdeg=True):
"""
Convert distance, right ascension and declination to Cartesian coordinates.
Parameters
----------
dist, ra, dec : 1-dimensional arrays
Spherical coordinates.
isdeg : bool, optional
Whether `ra` and `dec` are in degres. By default `True`.
Returns
-------
x, y, z : 1-dimensional arrays
Cartesian coordinates.
"""
if isdeg:
ra = numpy.deg2rad(ra)
dec = numpy.deg2rad(dec)
x = dist * numpy.cos(dec) * numpy.cos(ra)
y = dist * numpy.cos(dec) * numpy.sin(ra)
z = dist * numpy.sin(dec)
return x, y, z
###############################################################################
# Array manipulation #
###############################################################################
def cols_to_structured(N, cols):
"""
@ -108,7 +166,7 @@ def rm_columns(arr, cols):
# Get a new dtype without the cols to be deleted
new_dtype = []
for dtype, name in zip(arr.dtype.descr, arr.dtype.names):
for dtype, name in zip(arr.dtype.descr, arr.dtype.names, strict=True):
if name not in cols:
new_dtype.append(dtype)

View file

@ -1,17 +0,0 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .transforms import cartesian_to_radec, radec_to_cartesian # noqa
from .box_units import BoxUnits # noqa

View file

@ -1,66 +0,0 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Various coordinate transformations.
"""
import numpy
def cartesian_to_radec(x, y, z):
"""
Calculate the radial distance, right ascension in [0, 360) degrees and
declination [-90, 90] degrees. Note, the observer should be placed in the
middle of the box.
Parameters
----------
x, y, z : 1-dimensional arrays
Cartesian coordinates.
Returns
-------
dist, ra, dec : 1-dimensional arrays
Radial distance, right ascension and declination.
"""
dist = numpy.sqrt(x**2 + y**2 + z**2)
dec = numpy.rad2deg(numpy.arcsin(z/dist))
ra = numpy.rad2deg(numpy.arctan2(y, x))
# Make sure RA in the correct range
ra[ra < 0] += 360
return dist, ra, dec
def radec_to_cartesian(dist, ra, dec, isdeg=True):
"""
Convert distance, right ascension and declination to Cartesian coordinates.
Parameters
----------
dist, ra, dec : 1-dimensional arrays
Spherical coordinates.
isdeg : bool, optional
Whether `ra` and `dec` are in degres. By default `True`.
Returns
-------
x, y, z : 1-dimensional arrays
Cartesian coordinates.
"""
if isdeg:
ra = numpy.deg2rad(ra)
dec = numpy.deg2rad(dec)
x = dist * numpy.cos(dec) * numpy.cos(ra)
y = dist * numpy.cos(dec) * numpy.sin(ra)
z = dist * numpy.sin(dec)
return x, y, z

View file

@ -1,24 +0,0 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from datetime import datetime
from .recarray_manip import (cols_to_structured, add_columns, rm_columns, # noqa
list_to_ndarray, array_to_structured, # noqa
flip_cols, extract_from_structured) # noqa
def now(tz=None):
"""Shortcut to `datetime.datetime.now`."""
return datetime.now(tz=tz)

View file

@ -51,8 +51,8 @@
},
"outputs": [],
"source": [
"cat0 = csiborgtools.read.HaloCatalogue(7468)\n",
"catxs = [csiborgtools.read.HaloCatalogue(nsim) for nsim in (7588, 8020, 8452, 8836)]\n",
"cat0 = csiborgtools.read.ClumpsCatalogue(7468)\n",
"catxs = [csiborgtools.read.ClumpsCatalogue(nsim) for nsim in (7588, 8020, 8452, 8836)]\n",
"reader = csiborgtools.read.NPairsOverlap(cat0, catxs, max_dist=150 / 0.705)"
]
},

View file

@ -75,6 +75,17 @@
"wp3 = reader.mean_wp(wp3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c05b4db6",
"metadata": {},
"outputs": [],
"source": [
"\n",
"connect ECONNRFUSED"
]
},
{
"cell_type": "code",
"execution_count": 33,
@ -657,7 +668,7 @@
},
"outputs": [],
"source": [
"cat = csiborgtools.read.HaloCatalogue(7444, paths, min_mass=1e12, max_dist=155/0.705)"
"cat = csiborgtools.read.ClumpsCatalogue(7444, paths, min_mass=1e12, max_dist=155/0.705)"
]
},
{
@ -813,7 +824,7 @@
},
"outputs": [],
"source": [
"cat = csiborgtools.read.HaloCatalogue(7444, paths)"
"cat = csiborgtools.read.ClumpsCatalogue(7444, paths)"
]
},
{
@ -831,7 +842,7 @@
"from tqdm import trange\n",
"x = np.full((len(ics), 3), np.nan)\n",
"for i in trange(len(ics)):\n",
" cat = csiborgtools.read.HaloCatalogue(ics[i], paths, max_dist=155 / 0.705)\n",
" cat = csiborgtools.read.ClumpsCatalogue(ics[i], paths, max_dist=155 / 0.705)\n",
" for j, th in enumerate([1e12, 1e13, 1e14]):\n",
" mask = cat[\"totpartmass\"] > th\n",
" x[i, j] = np.nanmedian(cat[\"lambda200c\"][mask])"
@ -1149,8 +1160,8 @@
},
"outputs": [],
"source": [
"cat1 = csiborgtools.read.HaloCatalogue(7444, min_mass=1e13, max_dist=155 / 0.705)\n",
"cat2 = csiborgtools.read.HaloCatalogue(7468, min_mass=1e13, max_dist=155 / 0.705)"
"cat1 = csiborgtools.read.ClumpsCatalogue(7444, min_mass=1e13, max_dist=155 / 0.705)\n",
"cat2 = csiborgtools.read.ClumpsCatalogue(7468, min_mass=1e13, max_dist=155 / 0.705)"
]
},
{

View file

@ -62,8 +62,8 @@
},
"outputs": [],
"source": [
"cat0 = csiborgtools.read.HaloCatalogue(7468)\n",
"catx = csiborgtools.read.HaloCatalogue(7588)"
"cat0 = csiborgtools.read.ClumpsCatalogue(7468)\n",
"catx = csiborgtools.read.ClumpsCatalogue(7588)"
]
},
{

View file

@ -78,7 +78,7 @@
},
"outputs": [],
"source": [
"pkreader = csiborgtools.read.PKReader(paths.ic_ids, hw)\n",
"pkreader = csiborgtools.read.PKReader(paths.get_ics, hw)\n",
"\n",
"autoks, pks = pkreader.read_autos()\n",
"\n",
@ -1134,7 +1134,7 @@
"axs[0].set_title(\"hw = {}\".format(hw))\n",
"m = autoks < 40\n",
"mu = np.mean(pks, axis=0)\n",
"for i in range(len(paths.ic_ids)):\n",
"for i in range(len(paths.get_ics)):\n",
" axs[0].plot(autoks[m], pks[i, m], c=\"k\", lw=0.1)\n",
" axs[1].plot(autoks[m], pks[i, m] / mu[m], c=\"k\", lw=0.1)\n",
"axs[0].plot(autoks[m], mu[m], c=\"red\", lw=1, label=r\"$\\langle P(k) \\rangle$\")\n",
@ -2156,7 +2156,7 @@
"axs[0].set_title(r\"$\\mathrm{{hw}} = {}$\".format(hw))\n",
"m = autoks < 22\n",
"mu = np.mean(pks, axis=0)\n",
"for i in range(len(paths.ic_ids)):\n",
"for i in range(len(paths.get_ics)):\n",
" axs[0].plot(autoks[m], pks[i, m], c=\"k\", lw=0.1)\n",
" axs[1].plot(autoks[m], pks[i, m] / mu[m], c=\"k\", lw=0.1)\n",
"axs[0].plot(autoks[m], mu[m], c=\"red\", lw=1, label=r\"$\\langle P(k) \\rangle$\")\n",
@ -3242,13 +3242,13 @@
"fskel = \"/mnt/extraspace/rstiskalek/csiborg/crosspk/out_{}_{}_{}.p\"\n",
"\n",
"autoks, autopks = None, None\n",
"for i, nsim in enumerate(paths.ic_ids):\n",
"for i, nsim in enumerate(paths.get_ics):\n",
" pk = joblib.load(fskel.format(nsim, nsim, hw))\n",
" x = pk.k3D\n",
" y = pk.Pk[:, 0, 0]\n",
" sel = x < 20\n",
" if i == 0:\n",
" autoks = np.full((len(paths.ic_ids), np.sum(sel)), np.nan)\n",
" autoks = np.full((len(paths.get_ics), np.sum(sel)), np.nan)\n",
" autopks = np.full_like(autoks, np.nan)\n",
" \n",
" autoks[i, :] = x[sel]\n",
@ -3268,7 +3268,7 @@
"outputs": [],
"source": [
"plt.figure()\n",
"for i in range(len(paths.ic_ids)):\n",
"for i in range(len(paths.get_ics)):\n",
" plt.plot(autoks[i, :], autopks[i, :], c=\"k\", lw=0.1)\n",
"plt.plot(np.mean(autoks, axis=0), np.mean(autopks, axis=0), label=\"CSiBORG\", c=\"k\", lw=1)\n",
" \n",
@ -3308,10 +3308,10 @@
"source": [
"fskel = \"/mnt/extraspace/rstiskalek/csiborg/crosspk/out_{}_{}_{}.p\"\n",
"\n",
"ic0 = paths.ic_ids[25]\n",
"ic0 = paths.get_ics[25]\n",
"crossks, crosspks = None, None\n",
"i = 0\n",
"for ic in paths.ic_ids:\n",
"for ic in paths.get_ics:\n",
" if ic == ic0:\n",
" continue\n",
" ics = (ic0, ic)\n",
@ -3700,7 +3700,7 @@
"outputs": [],
"source": [
"paths = csiborgtools.read.CSiBORGPaths()\n",
"cat = csiborgtools.read.CombinedHaloCatalogue(paths, min_m500=1e13, max_dist=210)"
"cat = csiborgtools.read.CombinedCatalogue(paths, min_m500=1e13, max_dist=210)"
]
},
{
@ -4144,7 +4144,7 @@
},
"outputs": [],
"source": [
"cat = csiborgtools.io.HaloCatalogue(9844, 1016, minimum_m500=0)"
"cat = csiborgtools.io.Catalogue(9844, 1016, minimum_m500=0)"
]
},
{

View file

@ -515,7 +515,7 @@
}
],
"source": [
"len(paths.ic_ids(tonew=True))"
"len(paths.get_ics(tonew=True))"
]
},
{
@ -542,8 +542,8 @@
},
"outputs": [],
"source": [
"cat0 = csiborgtools.read.HaloCatalogue(7468)\n",
"catx = csiborgtools.read.HaloCatalogue(7588)"
"cat0 = csiborgtools.read.ClumpsCatalogue(7468)\n",
"catx = csiborgtools.read.ClumpsCatalogue(7588)"
]
},
{

322
notebooks/test_mmain.ipynb Normal file
View file

@ -0,0 +1,322 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "5a38ed25",
"metadata": {
"ExecuteTime": {
"end_time": "2022-12-31T17:12:28.663839Z",
"start_time": "2022-12-31T17:12:25.134607Z"
}
},
"outputs": [],
"source": [
"import sys\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import joblib\n",
"import scienceplots\n",
"sys.path.append(\"../\")\n",
"import csiborgtools\n",
"\n",
"plt.style.use([\"science\", \"notebook\"])\n",
"%matplotlib widget\n",
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "22130d0b",
"metadata": {},
"outputs": [],
"source": [
"d = np.load(\"/mnt/extraspace/rstiskalek/csiborg/split/clumps_07444_00951.npz\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "4d9d9d11",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"772 µs ± 4.01 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
]
}
],
"source": [
"%timeit d[\"232\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "66f32cef",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 2,
"id": "8a24c0fa",
"metadata": {},
"outputs": [],
"source": [
"paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)\n",
"# cat = csiborgtools.read.ClumpsCatalogue(7444, paths)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "6e3ba9f4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'/mnt/extraspace/rstiskalek/csiborg/knn/auto/knncdf_07444_la.npz'"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"paths.knn_path(7444, \"auto\", \"la\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "2f4793b9",
"metadata": {},
"outputs": [],
"source": [
"np.savez(\"test.npz\", a=np.random.rand(510, 510, 510), b=np.random.rand(510, 510, 510))"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "041d80d8",
"metadata": {},
"outputs": [],
"source": [
"d = np.load(\"test.npz\")"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "dc320130",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['a', 'b']"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d.files"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "11231e20",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['a', 'b']"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d.files"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0b6d02f8",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 26,
"id": "4ae2a2a8",
"metadata": {},
"outputs": [],
"source": [
"np.save(\"test.npy\", np.array([]))"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "b675510f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([], dtype=float64)"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.load(\"test.npy\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6f8f96b7",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "b4b63c20",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 21,
"id": "1cdcf448",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'/mnt/extraspace/rstiskalek/csiborg/split/ic_00952/out_07444_123.npz'"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# paths.split_path(123, 7444, 952)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "d07431f5",
"metadata": {},
"outputs": [],
"source": [
"nsim = 7444\n",
"nsnap = max(paths.get_snapshots(7444))\n",
"reader = csiborgtools.read.ParticleReader(paths)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "def3c21f",
"metadata": {},
"outputs": [],
"source": [
"\n",
"# clumpind = reader.read_clumps(nsnap, nsim, cols=\"index\")[\"index\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "832e82ce",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "aa69261b",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 17,
"id": "105dd2e2",
"metadata": {},
"outputs": [],
"source": [
"parts = np.load(\"/mnt/extraspace/rstiskalek/csiborg/initmatch/clump_7468_particles.npy\", allow_pickle=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c49f174b",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "38e9490d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "venv_galomatch",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -16,16 +16,18 @@
MPI script to calculate the matter cross power spectrum between CSiBORG
IC realisations. Units are Mpc/h.
"""
from gc import collect
from argparse import ArgumentParser
from datetime import datetime
from gc import collect
from itertools import combinations
from os import remove
from os.path import join
from itertools import combinations
from datetime import datetime
import numpy
import joblib
from mpi4py import MPI
import numpy
import Pk_library as PKL
from mpi4py import MPI
try:
import csiborgtools
except ModuleNotFoundError:
@ -47,9 +49,9 @@ nproc = comm.Get_size()
MAS = "CIC" # mass asignment scheme
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
box = csiborgtools.units.BoxUnits(paths)
box = csiborgtools.read.BoxUnits(paths)
reader = csiborgtools.read.ParticleReader(paths)
ics = paths.ic_ids(tonew=False)
ics = paths.get_ics(tonew=False)
nsims = len(ics)
# File paths
@ -59,7 +61,7 @@ fout = join(dumpdir, "crosspk",
"out_{}_{}" + "_{}.p".format(args.halfwidth))
jobs = csiborgtools.fits.split_jobs(nsims, nproc)[rank]
jobs = csiborgtools.utils.split_jobs(nsims, nproc)[rank]
for n in jobs:
print("Rank {}@{}: saving {}th delta.".format(rank, datetime.now(), n))
nsim = ics[n]
@ -99,7 +101,7 @@ for i in range(nsims):
combs.append((i, i))
prev_delta = [-1, None, None, None] # i, delta, aexp, length
jobs = csiborgtools.fits.split_jobs(len(combs), nproc)[rank]
jobs = csiborgtools.utils.split_jobs(len(combs), nproc)[rank]
for n in jobs:
i, j = combs[n]
print("Rank {}@{}: combination {}.".format(rank, datetime.now(), (i, j)))

View file

@ -13,17 +13,18 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues."""
from os.path import join
from warnings import warn
from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime
from mpi4py import MPI
from TaskmasterMPI import master_process, worker_process
import numpy
from sklearn.neighbors import NearestNeighbors
from warnings import warn
import joblib
import numpy
import yaml
from mpi4py import MPI
from sklearn.neighbors import NearestNeighbors
from TaskmasterMPI import master_process, worker_process
try:
import csiborgtools
except ModuleNotFoundError:
@ -58,8 +59,6 @@ ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
9292, 9316, 9340, 9364, 9388, 9412, 9436, 9460, 9484, 9508, 9532,
9556, 9580, 9604, 9628, 9652, 9676, 9700, 9724, 9748, 9772, 9796,
9820, 9844]
dumpdir = "/mnt/extraspace/rstiskalek/csiborg/knn"
fout = join(dumpdir, "auto", "knncdf_{}_{}.p")
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
knncdf = csiborgtools.clustering.kNN_CDF()
@ -67,6 +66,7 @@ knncdf = csiborgtools.clustering.kNN_CDF()
# Analysis #
###############################################################################
def read_single(selection, cat):
"""Positions for single catalogue auto-correlation."""
mmask = numpy.ones(len(cat), dtype=bool)
@ -101,11 +101,13 @@ def read_single(selection, cat):
return pos[smask, ...]
def do_auto(run, cat, ic):
"""Calculate the kNN-CDF single catalgoue autocorrelation."""
_config = config.get(run, None)
if _config is None:
warn("No configuration for run {}.".format(run))
warn("No configuration for run {}.".format(run), UserWarning,
stacklevel=1)
return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
@ -119,13 +121,15 @@ def do_auto(run, cat, ic):
batch_size=int(config["batch_size"]), random_state=config["seed"])
joblib.dump({"rs": rs, "cdf": cdf, "ndensity": pos.shape[0] / totvol},
fout.format(str(ic).zfill(5), run))
paths.knnauto_path(run, ic))
def do_cross_rand(run, cat, ic):
"""Calculate the kNN-CDF cross catalogue random correlation."""
_config = config.get(run, None)
if _config is None:
warn("No configuration for run {}.".format(run))
warn("No configuration for run {}.".format(run), UserWarning,
stacklevel=1)
return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
@ -143,14 +147,11 @@ def do_cross_rand(run, cat, ic):
nsamples=int(config["nsamples"]), neval=int(config["neval"]),
batch_size=int(config["batch_size"]), random_state=config["seed"])
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
joblib.dump({"rs": rs, "corr": corr}, fout.format(str(ic).zfill(5), run))
joblib.dump({"rs": rs, "corr": corr}, paths.knnauto_path(run, ic))
def do_runs(ic):
cat = csiborgtools.read.HaloCatalogue(ic, paths, max_dist=Rmax,
min_mass=minmass)
cat = csiborgtools.read.ClumpsCatalogue(ic, paths, maxdist=Rmax)
for run in args.runs:
if "random" in run:
do_cross_rand(run, cat, ic)

View file

@ -13,18 +13,19 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues."""
from warnings import warn
from os.path import join
from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime
from itertools import combinations
from mpi4py import MPI
from TaskmasterMPI import master_process, worker_process
import numpy
from sklearn.neighbors import NearestNeighbors
from os.path import join
from warnings import warn
import joblib
import numpy
import yaml
from mpi4py import MPI
from sklearn.neighbors import NearestNeighbors
from TaskmasterMPI import master_process, worker_process
try:
import csiborgtools
except ModuleNotFoundError:
@ -67,6 +68,7 @@ knncdf = csiborgtools.clustering.kNN_CDF()
# Analysis #
###############################################################################
def read_single(selection, cat):
mmask = numpy.ones(len(cat), dtype=bool)
pos = cat.positions(False)
@ -79,19 +81,20 @@ def read_single(selection, cat):
mmask &= (cat[psel["name"]] < pmax)
return pos[mmask, ...]
def do_cross(run, ics):
_config = config.get(run, None)
if _config is None:
warn("No configuration for run {}.".format(run))
warn("No configuration for run {}.".format(run), stacklevel=1)
return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
knn1, knn2 = NearestNeighbors(), NearestNeighbors()
cat1 = csiborgtools.read.HaloCatalogue(ics[0], paths, max_dist=Rmax)
cat1 = csiborgtools.read.ClumpsCatalogue(ics[0], paths, max_dist=Rmax)
pos1 = read_single(_config, cat1)
knn1.fit(pos1)
cat2 = csiborgtools.read.HaloCatalogue(ics[1], paths, max_dist=Rmax)
cat2 = csiborgtools.read.ClumpsCatalogue(ics[1], paths, max_dist=Rmax)
pos2 = read_single(_config, cat2)
knn2.fit(pos2)
@ -102,9 +105,8 @@ def do_cross(run, ics):
batch_size=int(config["batch_size"]), random_state=config["seed"])
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
joblib.dump({"rs": rs, "corr": corr}, paths.knncross_path(run, ics))
joblib.dump({"rs": rs, "corr": corr},
fout.format(str(ics[0]).zfill(5), str(ics[1]).zfill(5), run))
def do_runs(ics):
print(ics)

View file

@ -13,16 +13,18 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""A script to calculate the auto-2PCF of CSiBORG catalogues."""
from os.path import join
from warnings import warn
from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime
from os.path import join
from warnings import warn
import joblib
import numpy
import yaml
from mpi4py import MPI
from TaskmasterMPI import master_process, worker_process
import numpy
import joblib
import yaml
try:
import csiborgtools
except ModuleNotFoundError:
@ -65,6 +67,7 @@ tpcf = csiborgtools.clustering.Mock2PCF()
# Analysis #
###############################################################################
def read_single(selection, cat):
"""Positions for single catalogue auto-correlation."""
mmask = numpy.ones(len(cat), dtype=bool)
@ -99,10 +102,11 @@ def read_single(selection, cat):
return pos[smask, ...]
def do_auto(run, cat, ic):
_config = config.get(run, None)
if _config is None:
warn("No configuration for run {}.".format(run))
warn("No configuration for run {}.".format(run), stacklevel=1)
return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
@ -112,12 +116,11 @@ def do_auto(run, cat, ic):
nrandom = int(config["randmult"] * pos.shape[0])
rp, wp = tpcf(pos, rvs_gen, nrandom, bins)
joblib.dump({"rp": rp, "wp": wp}, fout.format(str(ic).zfill(5), run))
joblib.dump({"rp": rp, "wp": wp}, paths.tpcfauto_path(run, ic))
def do_runs(ic):
cat = csiborgtools.read.HaloCatalogue(ic, paths, max_dist=Rmax,
min_mass=minmass)
cat = csiborgtools.read.ClumpsCatalogue(ic, paths, maxdist=Rmax)
for run in args.runs:
do_auto(run, cat, ic)

View file

@ -16,17 +16,20 @@
MPI script to evaluate field properties at the galaxy positions.
"""
from argparse import ArgumentParser
from os.path import join
from os import remove
from datetime import datetime
from os import remove
from os.path import join
import numpy
from mpi4py import MPI
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
import utils
dumpdir = "/mnt/extraspace/rstiskalek/csiborg/"
@ -61,16 +64,16 @@ dtype = {"names": ["delta", "phi"], "formats": [numpy.float32] * 2}
# CSiBORG simulation paths
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
ics = paths.ic_ids(tonew=False)
ics = paths.get_ics(tonew=False)
nsims = len(ics)
for n in csiborgtools.fits.split_jobs(nsims, nproc)[rank]:
for n in csiborgtools.utils.split_jobs(nsims, nproc)[rank]:
print("Rank {}@{}: working on {}th IC.".format(rank, datetime.now(), n),
flush=True)
nsim = ics[n]
nsnap = max(paths.get_snapshots(nsim))
reader = csiborgtools.read.ParticleReader(paths)
box = csiborgtools.units.BoxUnits(nsnap, nsim, paths)
box = csiborgtools.read.BoxUnits(nsnap, nsim, paths)
# Read particles and select a subset of them
particles = reader.read_particle(nsnap, nsim, ["x", "y", "z", "M"],

View file

@ -13,17 +13,20 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""A script to calculate overlap between two CSiBORG realisations."""
from os.path import join
from argparse import ArgumentParser
from datetime import datetime
from os.path import join
import numpy
from scipy.ndimage import gaussian_filter
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
import utils
# Argument parser
@ -44,18 +47,19 @@ overlapper = csiborgtools.match.ParticleOverlap()
# Load catalogues
print("{}: loading catalogues {} and {}."
.format(datetime.now(), args.nsim0, args.nsimx), flush=True)
cat0 = csiborgtools.read.HaloCatalogue(args.nsim0, paths)
catx = csiborgtools.read.HaloCatalogue(args.nsimx, paths)
cat0 = csiborgtools.read.ClumpsCatalogue(args.nsim0, paths)
catx = csiborgtools.read.ClumpsCatalogue(args.nsimx, paths)
print("{}: loading simulation {} and converting positions to cell numbers."
.format(datetime.now(), args.nsim0), flush=True)
with open(paths.clump0_path(args.nsim0), "rb") as f:
with open(paths.initmatch_path(args.nsim0, "particles"), "rb") as f:
clumps0 = numpy.load(f, allow_pickle=True)
overlapper.clumps_pos2cell(clumps0)
print("{}: loading simulation {} and converting positions to cell numbers."
.format(datetime.now(), args.nsimx), flush=True)
with open(paths.clump0_path(args.nsimx), 'rb') as f:
with open(paths.initmatch_path(args.nsimx, "particles"), 'rb') as f:
clumpsx = numpy.load(f, allow_pickle=True)
overlapper.clumps_pos2cell(clumpsx)

View file

@ -16,17 +16,17 @@
A script to fit halos (concentration, ...). The particle array of each CSiBORG
realisation must have been split in advance by `runsplit_halos`.
"""
from os.path import join
from datetime import datetime
import numpy
from mpi4py import MPI
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
import utils
# Get MPI things
@ -35,8 +35,8 @@ rank = comm.Get_rank()
nproc = comm.Get_size()
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
dumpdir = "/mnt/extraspace/rstiskalek/csiborg/"
loaddir = join(dumpdir, "temp")
partreader =csiborgtools.read.ParticleReader(paths)
cols_collect = [("npart", numpy.int64), ("totpartmass", numpy.float64),
("Rs", numpy.float64), ("vx", numpy.float64),
("vy", numpy.float64), ("vz", numpy.float64),
@ -47,14 +47,48 @@ cols_collect = [("npart", numpy.int64), ("totpartmass", numpy.float64),
("r500", numpy.float64), ("m200", numpy.float64),
("m500", numpy.float64), ("lambda200c", numpy.float64)]
def fit_clump(particles, clump, box):
for i, nsim in enumerate(paths.ic_ids(tonew=False)):
out["npart"][n] = clump.Npart
out["rmin"][n] = clump.rmin
out["rmax"][n] = clump.rmax
out["totpartmass"][n] = clump.total_particle_mass
out["vx"][n] = numpy.average(clump.vel[:, 0], weights=clump.m)
out["vy"][n] = numpy.average(clump.vel[:, 1], weights=clump.m)
out["vz"][n] = numpy.average(clump.vel[:, 2], weights=clump.m)
out["Lx"][n], out["Ly"][n], out["Lz"][n] = clump.angular_momentum
for i, nsim in enumerate(paths.get_ics(tonew=False)):
if rank == 0:
print("{}: calculating {}th simulation.".format(datetime.now(), i))
print("{}: calculating {}th simulation `{}`."
.format(datetime.now(), i, nsim), flush=True)
nsnap = max(paths.get_snapshots(nsim))
box = csiborgtools.units.BoxUnits(nsnap, nsim, paths)
box = csiborgtools.read.BoxUnits(nsnap, nsim, paths)
jobs = csiborgtools.fits.split_jobs(utils.Nsplits, nproc)[rank]
# Archive of clumps, keywords are their clump IDs
particle_archive = paths.split_path(nsnap, nsim)
clumpsarr = partreader.read_clumps(nsnap, nsim,
cols=["index", 'x', 'y', 'z'])
clumpid2arrpos = {ind: ii for ii, ind in enumerate(clumpsarr["index"])}
nclumps = len(particle_archive.files)
# Fit 5000 clumps at a time, then dump results
batchsize = 5000
# This rank does these `batchsize` clumps/halos
jobs = csiborgtools.utils.split_jobs(nclumps, nclumps // batchsize)[rank]
for clumpid in jobs:
... = fit_clump(particle_archive[str(clumpid)], clumpsarr[clumpid2arrpos[clumpid]])
jobs = csiborgtools.utils.split_jobs(nclumps, nproc)[rank]
for nsplit in jobs:
parts, part_clumps, clumps = csiborgtools.fits.load_split_particles(
nsplit, nsnap, nsim, paths, remove_split=False)
@ -111,18 +145,18 @@ for i, nsim in enumerate(paths.ic_ids(tonew=False)):
# Wait until all jobs finished before moving to another simulation
comm.Barrier()
# Use the rank 0 to combine outputs for this CSiBORG realisation
if rank == 0:
print("Collecting results!")
partreader = csiborgtools.read.ParticleReader(paths)
out_collected = csiborgtools.read.combine_splits(
utils.Nsplits, nsnap, nsim, partreader, cols_collect,
remove_splits=True, verbose=False)
fname = paths.hcat_path(nsim)
print("Saving results to `{}`.".format(fname))
numpy.save(fname, out_collected)
comm.Barrier()
if rank == 0:
print("All finished! See ya!")
# # Use the rank 0 to combine outputs for this CSiBORG realisation
# if rank == 0:
# print("Collecting results!")
# partreader = csiborgtools.read.ParticleReader(paths)
# out_collected = csiborgtools.read.combine_splits(
# utils.Nsplits, nsnap, nsim, partreader, cols_collect,
# remove_splits=True, verbose=False)
# fname = paths.hcat_path(nsim)
# print("Saving results to `{}`.".format(fname))
# numpy.save(fname, out_collected)
#
# comm.Barrier()
#
# if rank == 0:
# print("All finished! See ya!")

View file

@ -19,14 +19,16 @@ are grouped in a clump at present redshift.
Optionally also dumps the clumps information, however watch out as this will
eat up a lot of memory.
"""
from gc import collect
from os.path import join
from os import remove
from argparse import ArgumentParser
from datetime import datetime
from distutils.util import strtobool
from gc import collect
from os import remove
from os.path import join
import numpy
from mpi4py import MPI
try:
import csiborgtools
except ModuleNotFoundError:
@ -45,12 +47,10 @@ parser.add_argument("--dump_clumps", type=lambda x: bool(strtobool(x)))
args = parser.parse_args()
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
nsims = paths.ic_ids(tonew=True)
nsims = paths.get_ics(tonew=True)
# Output files
ftemp = join(paths.dumpdir, "temp_initmatch", "temp_{}_{}_{}.npy")
fpermcm = join(paths.dumpdir, "initmatch", "clump_{}_cm.npy")
fpermpart = join(paths.dumpdir, "initmatch", "clump_{}_particles.npy")
# Temporary output file
ftemp = join(paths.dumpdir, "temp", "initmatch_{}_{}_{}.npy")
for nsim in nsims:
if rank == 0:
@ -87,7 +87,7 @@ for nsim in nsims:
unique_clumpids = numpy.unique(clump_ids)
njobs = unique_clumpids.size
jobs = csiborgtools.fits.split_jobs(njobs, nproc)[rank]
jobs = csiborgtools.utils.split_jobs(njobs, nproc)[rank]
for i in jobs:
n = unique_clumpids[i]
x0 = part0[clump_ids == n]
@ -139,8 +139,8 @@ for nsim in nsims:
out["ID"][i] = n
print("{}: dumping to .. `{}`.".format(
datetime.now(), fpermcm.format(nsim)), flush=True)
with open(fpermcm.format(nsim), 'wb') as f:
datetime.now(), paths.initmatch_path(nsim, "cm")), flush=True)
with open(paths.initmatch_path(nsim, "cm"), 'wb') as f:
numpy.save(f, out)
if args.dump_clumps:
@ -157,9 +157,11 @@ for nsim in nsims:
out["clump"][i] = fin
out["ID"][i] = n
remove(fpath)
print("{}: dumping to .. `{}`.".format(
datetime.now(), fpermpart.format(nsim)), flush=True)
with open(fpermpart.format(nsim), "wb") as f:
fout = paths.initmatch_path(nsim, "particles")
print("{}: dumping to .. `{}`.".format(datetime.now(), fout),
flush=True)
with open(fout, "wb") as f:
numpy.save(f, out)
del out

64
scripts/pre_mmain.py Normal file
View file

@ -0,0 +1,64 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to generate the mmain files, i.e. sums up the substructe of children.
"""
from datetime import datetime
import numpy
from mpi4py import MPI
from TaskmasterMPI import master_process, worker_process
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
# Get MPI things
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
mmain_reader = csiborgtools.read.MmainReader(paths)
def do_mmain(nsim):
nsnap = max(paths.get_snapshots(nsim))
# NOTE: currently works for highest snapshot anyway
mmain, ultimate_parent = mmain_reader.make_mmain(nsim, verbose=False)
numpy.savez(paths.mmain_path(nsnap, nsim),
mmain=mmain, ultimate_parent=ultimate_parent)
###############################################################################
# MPI task delegation #
###############################################################################
if nproc > 1:
if rank == 0:
tasks = list(paths.get_ics(tonew=False))
master_process(tasks, comm, verbose=True)
else:
worker_process(do_mmain, comm, verbose=False)
else:
tasks = paths.get_ics(tonew=False)
for task in tasks:
print("{}: completing task `{}`.".format(datetime.now(), task))
do_mmain(task)
comm.Barrier()

115
scripts/pre_splithalos.py Normal file
View file

@ -0,0 +1,115 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""Script to split particles to indivudual files according to their clump."""
from datetime import datetime
from gc import collect
from glob import glob
from os import remove
from os.path import join
import numpy
from mpi4py import MPI
from TaskmasterMPI import master_process, worker_process
from tqdm import tqdm
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
# Get MPI things
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
verbose = nproc == 1
partcols = ['x', 'y', 'z', "vx", "vy", "vz", 'M']
def do_split(nsim):
nsnap = max(paths.get_snapshots(nsim))
reader = csiborgtools.read.ParticleReader(paths)
ftemp_base = join(
paths.temp_dumpdir,
"split_{}_{}".format(str(nsim).zfill(5), str(nsnap).zfill(5))
)
ftemp = ftemp_base + "_{}.npz"
# Load the particles and their clump IDs
particles = reader.read_particle(nsnap, nsim, partcols, verbose=verbose)
particle_clumps = reader.read_clumpid(nsnap, nsim, verbose=verbose)
# Drop all particles whose clump index is 0 (not assigned to any clump)
assigned_mask = particle_clumps != 0
particle_clumps = particle_clumps[assigned_mask]
particles = particles[assigned_mask]
del assigned_mask
collect()
# Load the clump indices
clumpinds = reader.read_clumps(nsnap, nsim, cols="index")["index"]
# Some of the clumps have no particles, so we do not loop over them
clumpinds = clumpinds[numpy.isin(clumpinds, particle_clumps)]
# Loop over the clump indices and save the particles to a temporary file
# every 10000 clumps. We will later read this back and combine into a
# single file.
out = {}
for i, clind in enumerate(tqdm(clumpinds) if verbose else clumpinds):
key = str(clind)
out.update({str(clind): particles[particle_clumps == clind]})
# REMOVE bump this back up
if i % 10000 == 0 or i == clumpinds.size - 1:
numpy.savez(ftemp.format(i), **out)
out = {}
# Clear up memory because we will be loading everything back
del particles, particle_clumps, clumpinds
collect()
# Now load back in every temporary file, combine them into a single
# dictionary and save as a single .npz file.
out = {}
for file in glob(ftemp_base + '*'):
inp = numpy.load(file)
for key in inp.files:
out.update({key: inp[key]})
remove(file)
numpy.savez(paths.split_path(nsnap, nsim), **out)
###############################################################################
# MPI task delegation #
###############################################################################
if nproc > 1:
if rank == 0:
tasks = list(paths.get_ics(tonew=False))
master_process(tasks, comm, verbose=True)
else:
worker_process(do_split, comm, verbose=False)
else:
tasks = paths.get_ics(tonew=False)
tasks = [tasks[0]] # REMOVE
for task in tasks:
print("{}: completing task `{}`.".format(datetime.now(), task))
do_split(task)
comm.Barrier()

View file

@ -1,58 +0,0 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to split particles into smaller files according to their clump
membership for faster manipulation. Currently does this for the maximum
snapshot of each simulation. Running this requires a lot of memory.
"""
from mpi4py import MPI
from datetime import datetime
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
import utils
# Get MPI things
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
sims = paths.ic_ids(False)
partcols = ["x", "y", "z", "vx", "vy", "vz", "M", "level"]
jobs = csiborgtools.fits.split_jobs(len(sims), nproc)[rank]
for icount, sim_index in enumerate(jobs):
print("{}: rank {} working {} / {} jobs."
.format(datetime.now(), rank, icount + 1, len(jobs)), flush=True)
nsim = sims[sim_index]
nsnap = max(paths.get_snapshots(nsim))
partreader = csiborgtools.read.ParticleReader(paths)
# Load the clumps, particles' clump IDs and particles.
clumps = partreader.read_clumps(nsnap, nsim)
particle_clumps = partreader.read_clumpid(nsnap, nsim, verbose=False)
particles = partreader.read_particle(nsnap, nsim, partcols, verbose=False)
# Drop all particles whose clump index is 0 (not assigned to any halo)
particle_clumps, particles = partreader.drop_zero_indx(
particle_clumps, particles)
# Dump it!
csiborgtools.fits.dump_split_particles(particles, particle_clumps, clumps,
utils.Nsplits, nsnap, nsim, paths,
verbose=False)
print("All finished!", flush=True)