Correct fitting script for both clumps and halos (#46)

* Minor typos

* fix minor bugs

* pep8

* formatting

* pep8

* Fix minor bugs

* New path & pep8

* add splitt

* Updates

* Improve calculation within radius

* pep8

* pep8

* get the script working

* Add matter overdensity

* Add m200m to the script

* Fix looping bug

* add parents support

* add import

* Optionally concatenate velocities

* Make optional masking

* Ignore the error message

* Start reading in raw data

* Fix cat reading

* Additional units conversions

* Add clump reading

* Fix indexing

* Remove old comment

* Remove old comment

* set npart to 0 instead of overflow from NaN

* fix docs

* rm boring stuff

* Remove old stuff

* Remove old stuff

* Remove old comment

* Update nb
This commit is contained in:
Richard Stiskalek 2023-04-19 16:39:35 +02:00 committed by GitHub
parent c2fde1566b
commit 39b3498621
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 709 additions and 357 deletions

View file

@ -14,3 +14,4 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .halo import Clump, Halo # noqa
from .haloprofile import NFWPosterior, NFWProfile # noqa
from .utils import split_jobs # noqa

View file

@ -22,6 +22,7 @@ class BaseStructure(ABC):
"""
Basic structure object for handling operations on its particles.
"""
_particles = None
_info = None
_box = None
@ -39,7 +40,7 @@ class BaseStructure(ABC):
@particles.setter
def particles(self, particles):
pars = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M']
pars = ["x", "y", "z", "vx", "vy", "vz", "M"]
assert all(p in particles.dtype.names for p in pars)
self._particles = particles
@ -73,7 +74,7 @@ class BaseStructure(ABC):
@box.setter
def box(self, box):
try:
assert box._name == "box_units"
assert box._name == "box_units"
self._box = box
except AttributeError as err:
raise TypeError from err
@ -87,20 +88,9 @@ class BaseStructure(ABC):
-------
pos : 2-dimensional array of shape `(n_particles, 3)`.
"""
ps = ('x', 'y', 'z')
ps = ("x", "y", "z")
return numpy.vstack([self[p] - self.info[p] for p in ps]).T
@property
def r(self):
"""
Radial separation of the particles from the centre of the object.
Returns
-------
r : 1-dimensional array of shape `(n_particles, )`.
"""
return numpy.linalg.norm(self.pos, axis=1)
@property
def vel(self):
"""
@ -112,34 +102,61 @@ class BaseStructure(ABC):
"""
return numpy.vstack([self[p] for p in ("vx", "vy", "vz")]).T
@property
def cmass(self):
def r(self):
"""
Cartesian position components of the object's centre of mass. Note that
this is already in a frame centered at the clump's potential minimum,
so its distance from origin indicates the separation of the centre of
mass and potential minimum.
Calculate the radial separation of the particles from the centre of the
object.
Returns
-------
r : 1-dimensional array of shape `(n_particles, )`.
"""
return numpy.linalg.norm(self.pos, axis=1)
def cmass(self, rmax, rmin):
"""
Calculate Cartesian position components of the object's centre of mass.
Note that this is already in a frame centered at the clump's potential
minimum, so its distance from origin indicates the separation of the
centre of mass and potential minimum.
Parameters
----------
rmax : float
Maximum radius for particles to be included in the calculation.
rmin : float
Minimum radius for particles to be included in the calculation.
Returns
-------
cm : 1-dimensional array of shape `(3, )`
"""
return numpy.average(self.pos, axis=0, weights=self['M'])
r = self.r()
mask = (r >= rmin) & (r <= rmax)
return numpy.average(self.pos[mask], axis=0, weights=self["M"][mask])
@property
def angular_momentum(self):
def angular_momentum(self, rmax, rmin=0):
"""
Angular momentum in the box coordinates.
Calculate angular momentum in the box coordinates.
NOTE: here also change velocities to the CM and appropriately edit the
docs.
Parameters
----------
rmax : float
Maximum radius for particles to be included in the calculation.
rmin : float
Minimum radius for particles to be included in the calculation.
Returns
-------
J : 1-dimensional array or shape `(3, )`
"""
J = numpy.cross(self.pos - self.cmass, self.vel)
return numpy.einsum("i,ij->j", self.m, J)
r = self.r()
mask = (r >= rmin) & (r <= rmax)
pos = self.pos[mask] - self.cmass(rmax, rmin)
# Velocitities in the object CM frame
vel = self.vel[mask]
vel -= numpy.average(self.vel[mask], axis=0, weights=self["M"][mask])
return numpy.einsum("i,ij->j", self["M"][mask], numpy.cross(pos, vel))
def enclosed_mass(self, rmax, rmin=0):
"""
@ -156,8 +173,8 @@ class BaseStructure(ABC):
-------
enclosed_mass : float
"""
r = self.r
return numpy.sum(self['M'][(r >= rmin) & (r <= rmax)])
r = self.r()
return numpy.sum(self["M"][(r >= rmin) & (r <= rmax)])
def lambda_bullock(self, radius, npart_min=10):
r"""
@ -182,19 +199,21 @@ class BaseStructure(ABC):
Bullock, J. S.; Dekel, A.; Kolatt, T. S.; Kravtsov, A. V.;
Klypin, A. A.; Porciani, C.; Primack, J. R.
"""
mask = self.r <= radius
mask = self.r() <= radius
if numpy.sum(mask) < npart_min:
return numpy.nan
mass = self.enclosed_mass(radius)
V = numpy.sqrt(self.box.box_G * mass / radius)
return (numpy.linalg.norm(self.angular_momentum[mask])
/ (numpy.sqrt(2) * mass * V * radius))
return numpy.linalg.norm(self.angular_momentum(radius)) / (
numpy.sqrt(2) * mass * V * radius
)
def spherical_overdensity_mass(self, delta_mult, npart_min=10):
def spherical_overdensity_mass(self, delta_mult, npart_min=10, kind="crit"):
r"""
Calculate spherical overdensity mass and radius. The mass is defined as
the enclosed mass within an outermost radius where the mean enclosed
spherical density reaches a multiple of the critical density `delta`.
spherical density reaches a multiple of the critical density `delta`
(times the matter density if `kind` is `matter`).
Parameters
----------
@ -203,6 +222,8 @@ class BaseStructure(ABC):
npart_min : int
Minimum number of enclosed particles for a radius to be
considered trustworthy.
kind : str
Either `crit` or `matter`, for critical or matter overdensity
Returns
-------
@ -211,17 +232,25 @@ class BaseStructure(ABC):
mx : float
Corresponding spherical enclosed mass.
"""
# Quick check of inputs
assert kind in ["crit", "matter"]
# We first sort the particles in an increasing separation
rs = self.r
rs = self.r()
order = numpy.argsort(rs)
rs = rs[order]
cmass = numpy.cumsum(self['M']) # Cumulative mass
cmass = numpy.cumsum(self["M"][order]) # Cumulative mass
# We calculate the enclosed volume and indices where it is above target
vol = 4 * numpy.pi / 3 * (rs**3 - rs[0]**3)
ks = numpy.where([cmass / vol > delta_mult * self.box.rhoc])[0]
vol = 4 * numpy.pi / 3 * (rs**3 - rs[0] ** 3)
target_density = delta_mult * self.box.box_rhoc
if kind == "matter":
target_density *= self.box.cosmo.Om0
with numpy.errstate(divide="ignore"):
ks = numpy.where(cmass / vol > target_density)[0]
if ks.size == 0: # Never above the threshold?
return numpy.nan, numpy.nan
k = numpy.maximum(ks)
k = numpy.max(ks)
if k < npart_min: # Too few particles?
return numpy.nan, numpy.nan
return rs[k], cmass[k]
@ -235,7 +264,7 @@ class BaseStructure(ABC):
-------
key : list of str
"""
return self.data.dtype.names
return self.particles.dtype.names
def __getitem__(self, key):
if key not in self.keys:
@ -259,6 +288,7 @@ class Clump(BaseStructure):
box : :py:class:`csiborgtools.read.BoxUnits`
Box units object.
"""
def __init__(self, particles, info, box):
self.particles = particles
self.info = info
@ -279,6 +309,7 @@ class Halo(BaseStructure):
box : :py:class:`csiborgtools.read.BoxUnits`
Box units object.
"""
def __init__(self, particles, info, box):
self.particles = particles
self.info = info

View file

@ -50,7 +50,7 @@ class NFWProfile:
density : 1-dimensional array
"""
x = r / Rs
return rho0 / (x * (1 + x)**2)
return rho0 / (x * (1 + x) ** 2)
@staticmethod
def _logprofile(r, Rs, rho0):
@ -153,7 +153,7 @@ class NFWProfile:
samples : float or 1-dimensional array
Samples following the NFW profile.
"""
gen = uniform(rmin, rmax-rmin)
gen = uniform(rmin, rmax - rmin)
samples = numpy.full(size, numpy.nan)
for i in range(size):
while True:
@ -187,9 +187,6 @@ class NFWPosterior(NFWProfile):
Clump object containing the particles and clump information.
"""
def __init__(self):
super().__init__()
@property
def clump(self):
"""
@ -228,7 +225,7 @@ class NFWPosterior(NFWProfile):
-------
rho0: float
"""
return mass / self.bounded_enclosed_mass(rmin, rmax, Rs, 1)
return mass / self.bounded_mass(rmin, rmax, Rs, 1)
def initlogRs(self, r, rmin, rmax, binsguess=10):
r"""
@ -255,7 +252,6 @@ class NFWPosterior(NFWProfile):
counts, edges = numpy.histogram(r, bins)
return numpy.log10(edges[numpy.argmax(counts)])
def logprior(self, logRs, rmin, rmax):
r"""
Logarithmic uniform prior on :math:`\log R_{\rm s}`. Unnormalised but
@ -275,8 +271,8 @@ class NFWPosterior(NFWProfile):
lp : float
"""
if not rmin < 10**logRs < rmax:
return - numpy.infty
return 0.
return -numpy.infty
return 0.0
def loglikelihood(self, logRs, r, rmin, rmax, npart):
"""
@ -303,7 +299,6 @@ class NFWPosterior(NFWProfile):
mnfw = self.bounded_mass(rmin, rmax, Rs, 1)
return numpy.sum(self._logprofile(r, Rs, 1)) - npart * numpy.log(mnfw)
def __call__(self, logRs, r, rmin, rmax, npart):
"""
Logarithmic posterior. Sum of the logarithmic prior and likelihood.
@ -327,7 +322,7 @@ class NFWPosterior(NFWProfile):
"""
lp = self.logprior(logRs, rmin, rmax)
if not numpy.isfinite(lp):
return - numpy.infty
return -numpy.infty
return self.loglikelihood(logRs, r, rmin, rmax, npart) + lp
def fit(self, clump, eps=1e-4):
@ -353,18 +348,20 @@ class NFWPosterior(NFWProfile):
Best fit NFW central density.
"""
assert isinstance(clump, Clump)
r = clump.r
r = clump.r()
rmin = numpy.min(r)
rmax, mtot = clump.spherical_overdensity_mass(200)
npart = numpy.sum((rmin <= r) & (r <= rmax))
mask = (rmin <= r) & (r <= rmax)
npart = numpy.sum(mask)
r = r[mask]
# Loss function to optimize
def loss(logRs):
return - self(logRs, r, rmin, rmax, npart)
return -self(logRs, r, rmin, rmax, npart)
res = minimize_scalar(
loss, bounds=(numpy.log10(rmin), numpy.log10(rmax)),
method='bounded')
loss, bounds=(numpy.log10(rmin), numpy.log10(rmax)), method="bounded"
)
if numpy.log10(rmax) - res.x < eps:
res.success = False

View file

@ -0,0 +1,43 @@
# Copyright (C) 2022 Richard Stiskalek, Deaglan Bartlett
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""Fitting utility functions."""
import numpy
def split_jobs(njobs, ncpu):
"""
Split `njobs` amongst `ncpu`.
Parameters
----------
njobs : int
Number of jobs.
ncpu : int
Number of CPUs.
Returns
-------
jobs : list of lists of integers
Outer list of each CPU and inner lists for CPU's jobs.
"""
njobs_per_cpu, njobs_remainder = divmod(njobs, ncpu)
jobs = numpy.arange(njobs_per_cpu * ncpu).reshape((njobs_per_cpu, ncpu)).T
jobs = jobs.tolist()
for i in range(njobs_remainder):
jobs[i].append(njobs_per_cpu * ncpu + i)
return jobs

View file

@ -12,7 +12,7 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from csiborgtools.match.match import ( # noqa
from .match import ( # noqa
ParticleOverlap,
RealisationsMatcher,
calculate_overlap,
@ -24,4 +24,5 @@ from csiborgtools.match.match import ( # noqa
fill_delta_indxs,
get_clumplims,
)
from csiborgtools.match.num_density import binned_counts, number_density # noqa
from .num_density import binned_counts, number_density # noqa
from .utils import concatenate_clumps # noqa

View file

@ -16,7 +16,7 @@
import numpy
def concatenate_clumps(clumps):
def concatenate_clumps(clumps, include_velocities=False):
"""
Concatenate an array of clumps to a single array containing all particles.
@ -24,6 +24,8 @@ def concatenate_clumps(clumps):
----------
clumps : list of structured arrays
List of clumps. Each clump must be a structured array with keys
include_velocities : bool, optional
Whether to include velocities in the output array.
Returns
-------
@ -34,21 +36,31 @@ def concatenate_clumps(clumps):
for clump, __ in clumps:
N += clump.size
# Infer dtype of positions
if clumps[0][0]['x'].dtype.char in numpy.typecodes["AllInteger"]:
if clumps[0][0]["x"].dtype.char in numpy.typecodes["AllInteger"]:
posdtype = numpy.int32
else:
posdtype = numpy.float32
# Pre-allocate array
dtype = {"names": ['x', 'y', 'z', 'M'],
"formats": [posdtype] * 3 + [numpy.float32]}
# We pre-allocate an empty array. By default, we include just particle positions,
# which may be specified by cell IDs if integers, and masses. Additionally also
# outputs velocities.
if include_velocities:
dtype = {
"names": ["x", "y", "z", "vx", "vy", "vz", "M"],
"formats": [posdtype] * 3 + [numpy.float32] * 4,
}
else:
dtype = {
"names": ["x", "y", "z", "M"],
"formats": [posdtype] * 3 + [numpy.float32],
}
particles = numpy.full(N, numpy.nan, dtype)
# Fill it one clump by another
start = 0
for clump, __ in clumps:
end = start + clump.size
for p in ('x', 'y', 'z', 'M'):
for p in dtype["names"]:
particles[p][start:end] = clump[p]
start = end

View file

@ -12,6 +12,7 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .box_units import BoxUnits # noqa
from .halo_cat import ClumpsCatalogue, HaloCatalogue # noqa
from .knn_summary import kNNCDFReader # noqa
from .obs import ( # noqa
@ -26,4 +27,4 @@ from .paths import CSiBORGPaths # noqa
from .pk_summary import PKReader # noqa
from .readsim import MmainReader, ParticleReader, halfwidth_select, read_initcm # noqa
from .tpcf_summary import TPCFReader # noqa
from .utils import cartesian_to_radec, radec_to_cartesian
from .utils import cartesian_to_radec, cols_to_structured, radec_to_cartesian # noqa

View file

@ -24,11 +24,27 @@ from .readsim import ParticleReader
# Map of unit conversions
CONV_NAME = {
"length": ['x', 'y', 'z', "peak_x", "peak_y", "peak_z", "Rs", "rmin",
"rmax", "r200", "r500", "x0", "y0", "z0", "lagpatch"],
"mass": ["mass_cl", "totpartmass", "m200", "m500", "mass_mmain", 'M'],
"density": ["rho0"]
}
"length": [
"x",
"y",
"z",
"peak_x",
"peak_y",
"peak_z",
"Rs",
"rmin",
"rmax",
"r200c",
"r500c",
"r200m",
"x0",
"y0",
"z0",
"lagpatch",
],
"mass": ["mass_cl", "totpartmass", "m200c", "m500c", "mass_mmain", "M", "m200m"],
"density": ["rho0"],
}
class BoxUnits:
@ -53,15 +69,29 @@ class BoxUnits:
"""
partreader = ParticleReader(paths)
info = partreader.read_info(nsnap, nsim)
pars = ["boxlen", "time", "aexp", "H0",
"omega_m", "omega_l", "omega_k", "omega_b",
"unit_l", "unit_d", "unit_t"]
pars = [
"boxlen",
"time",
"aexp",
"H0",
"omega_m",
"omega_l",
"omega_k",
"omega_b",
"unit_l",
"unit_d",
"unit_t",
]
for par in pars:
setattr(self, "_" + par, float(info[par]))
self._cosmo = LambdaCDM(H0=self._H0, Om0=self._omega_m,
Ode0=self._omega_l, Tcmb0=2.725 * units.K,
Ob0=self._omega_b)
self._cosmo = LambdaCDM(
H0=self._H0,
Om0=self._omega_m,
Ode0=self._omega_l,
Tcmb0=2.725 * units.K,
Ob0=self._omega_b,
)
self._Msuncgs = constants.M_sun.cgs.value # Solar mass in grams
@property
@ -108,7 +138,7 @@ class BoxUnits:
-------
G : float
"""
return constants.G.cgs.value * (self._unit_d * self._unit_t ** 2)
return constants.G.cgs.value * (self._unit_d * self._unit_t**2)
@property
def box_H0(self):
@ -142,7 +172,7 @@ class BoxUnits:
rhoc : float
"""
return 3 * self.box_H0 ** 2 / (8 * numpy.pi * self.box_G)
return 3 * self.box_H0**2 / (8 * numpy.pi * self.box_G)
def box2kpc(self, length):
r"""
@ -227,7 +257,7 @@ class BoxUnits:
cosmo_redshift : foat
Cosmological redshift.
"""
x = numpy.linspace(0., 1., 5001)
x = numpy.linspace(0.0, 1.0, 5001)
y = self.cosmo.comoving_distance(x)
return interp1d(y, x)(self.box2mpc(dist))
@ -254,7 +284,7 @@ class BoxUnits:
"""
# Peculiar velocity along the radial distance
r = numpy.vstack([px - p0x, py - p0y, pz - p0z]).T
norm = numpy.sum(r**2, axis=1)**0.5
norm = numpy.sum(r**2, axis=1) ** 0.5
v = numpy.vstack([vx, vy, vz]).T
vpec = numpy.sum(r * v, axis=1) / norm
# Ratio between the peculiar velocity and speed of light
@ -284,7 +314,7 @@ class BoxUnits:
Observed redshift.
"""
r = numpy.vstack([px - p0x, py - p0y, pz - p0z]).T
zcosmo = self.box2cosmoredshift(numpy.sum(r**2, axis=1)**0.5)
zcosmo = self.box2cosmoredshift(numpy.sum(r**2, axis=1) ** 0.5)
zpec = self.box2pecredshift(vx, vy, vz, px, py, pz, p0x, p0y, p0z)
return (1 + zpec) * (1 + zcosmo) - 1
@ -337,8 +367,7 @@ class BoxUnits:
density : float
Density in :math:`M_\odot / \mathrm{pc}^3`.
"""
return (density * self._unit_d / self._Msuncgs
* (units.Mpc.to(units.cm))**3)
return density * self._unit_d / self._Msuncgs * (units.Mpc.to(units.cm)) ** 3
def dens2box(self, density):
r"""
@ -355,8 +384,7 @@ class BoxUnits:
density : float
Density in box units.
"""
return (density / self._unit_d * self._Msuncgs
/ (units.Mpc.to(units.cm))**3)
return density / self._unit_d * self._Msuncgs / (units.Mpc.to(units.cm)) ** 3
def convert_from_boxunits(self, data, names):
r"""
@ -389,8 +417,8 @@ class BoxUnits:
transforms = {
"length": self.box2mpc,
"mass": self.box2solarmass,
"density": self.box2dens
}
"density": self.box2dens,
}
for name in names:
# Check that the name is even in the array
@ -407,7 +435,8 @@ class BoxUnits:
# If nothing found
if not found:
raise NotImplementedError(
"Conversion of `{}` is not defined.".format(name))
"Conversion of `{}` is not defined.".format(name)
)
# Center at the observer
if name in ["peak_x", "peak_y", "peak_z", "x0", "y0", "z0"]:

View file

@ -21,13 +21,14 @@ from sklearn.neighbors import NearestNeighbors
from .box_units import BoxUnits
from .paths import CSiBORGPaths
from .readsim import ParticleReader
from .utils import cartesian_to_radec, flip_cols, radec_to_cartesian
from .utils import add_columns, cartesian_to_radec, flip_cols, radec_to_cartesian
class BaseCatalogue(ABC):
"""
Base (sub)halo catalogue.
"""
_data = None
_paths = None
_nsim = None
@ -106,7 +107,7 @@ class BaseCatalogue(ABC):
@box.setter
def box(self, box):
try:
assert box._name == "box_units"
assert box._name == "box_units"
self._box = box
except AttributeError as err:
raise TypeError from err
@ -132,9 +133,9 @@ class BaseCatalogue(ABC):
pos : 2-dimensional array of shape `(nobjects, 3)`
"""
if in_initial:
ps = ['x0', 'y0', 'z0']
ps = ["x0", "y0", "z0"]
else:
ps = ['x', 'y', 'z']
ps = ["x", "y", "z"]
pos = numpy.vstack([self[p] for p in ps]).T
if not cartesian:
pos = cartesian_to_radec(pos)
@ -248,8 +249,7 @@ class BaseCatalogue(ABC):
knn.fit(pos)
# Convert angular radius to cosine difference.
metric_maxdist = 1 - numpy.cos(numpy.deg2rad(ang_radius))
dist, ind = knn.radius_neighbors(X, radius=metric_maxdist,
sort_results=True)
dist, ind = knn.radius_neighbors(X, radius=metric_maxdist, sort_results=True)
# And the cosine difference to angular distance.
for i in range(X.shape[0]):
dist[i] = numpy.rad2deg(numpy.arccos(1 - dist[i]))
@ -282,13 +282,9 @@ class ClumpsCatalogue(BaseCatalogue):
r"""
Clumps catalogue, defined in the final snapshot.
TODO:
Add fitted quantities.
Add threshold on number of particles
Parameters
----------
nsim : int
sim : int
IC realisation index.
paths : py:class`csiborgtools.read.CSiBORGPaths`
CSiBORG paths object.
@ -299,28 +295,68 @@ class ClumpsCatalogue(BaseCatalogue):
minmass : len-2 tuple, optional
Minimum mass. The first element is the catalogue key and the second is
the value.
load_fitted : bool, optional
Whether to load fitted quantities.
rawdata : bool, optional
Whether to return the raw data. In this case applies no cuts and
transformations.
"""
def __init__(self, nsim, paths, maxdist=155.5 / 0.705,
minmass=("mass_cl", 1e12)):
def __init__(
self,
nsim,
paths,
maxdist=155.5 / 0.705,
minmass=("mass_cl", 1e12),
load_fitted=True,
rawdata=False,
):
self.nsim = nsim
self.paths = paths
# Read in the clumps from the final snapshot
partreader = ParticleReader(self.paths)
cols = ["index", "parent", 'x', 'y', 'z', "mass_cl"]
data = partreader.read_clumps(self.nsnap, self.nsim, cols=cols)
cols = ["index", "parent", "x", "y", "z", "mass_cl"]
self._data = partreader.read_clumps(self.nsnap, self.nsim, cols=cols)
# Overwrite the parent with the ultimate parent
mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim))
data["parent"] = mmain["ultimate_parent"]
self._data["parent"] = mmain["ultimate_parent"]
# Flip positions and convert from code units to cMpc. Convert M too
flip_cols(data, "x", "z")
for p in ('x', 'y', 'z'):
data[p] -= 0.5
data = self.box.convert_from_boxunits(data, ['x', 'y', 'z', "mass_cl"])
if load_fitted:
fits = numpy.load(paths.structfit_path(self.nsnap, nsim, "clumps"))
cols = [col for col in fits.dtype.names if col != "index"]
X = [fits[col] for col in cols]
self._data = add_columns(self._data, X, cols)
mask = numpy.sqrt(data['x']**2 + data['y']**2 + data['z']**2) < maxdist
mask &= data[minmass[0]] > minmass[1]
self._data = data[mask]
# If the raw data is not required, then start applying transformations
# and cuts.
if not rawdata:
flip_cols(self._data, "x", "z")
for p in ("x", "y", "z"):
self._data[p] -= 0.5
self._data = self.box.convert_from_boxunits(
self._data,
[
"x",
"y",
"z",
"mass_cl",
"totpartmass",
"rho0",
"r200c",
"r500c",
"m200c",
"m500c",
"r200m",
"m200m",
],
)
if maxdist is not None:
dist = numpy.sqrt(
self._data["x"] ** 2 + self._data["y"] ** 2 + self._data["z"] ** 2
)
self._data = self._data[dist < maxdist]
if minmass is not None:
self._data = self._data[self._data[minmass[0]] > minmass[1]]
@property
def ismain(self):
@ -341,7 +377,6 @@ class HaloCatalogue(BaseCatalogue):
TODO:
Add the fitted quantities
Add threshold on number of particles
Parameters
----------
@ -356,20 +391,32 @@ class HaloCatalogue(BaseCatalogue):
minmass : len-2 tuple
Minimum mass. The first element is the catalogue key and the second is
the value.
rawdata : bool, optional
Whether to return the raw data. In this case applies no cuts and
transformations.
"""
def __init__(self, nsim, paths, maxdist=155.5 / 0.705,
minmass=('M', 1e12)):
def __init__(
self, nsim, paths, maxdist=155.5 / 0.705, minmass=("M", 1e12), rawdata=False
):
self.nsim = nsim
self.paths = paths
# Read in the mmain catalogue of summed substructure
mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim))
data = mmain["mmain"]
# Flip positions and convert from code units to cMpc. Convert M too
flip_cols(data, "x", "z")
for p in ('x', 'y', 'z'):
data[p] -= 0.5
data = self.box.convert_from_boxunits(data, ['x', 'y', 'z', 'M'])
self._data = mmain["mmain"]
if not rawdata:
# Flip positions and convert from code units to cMpc. Convert M too
flip_cols(self._data, "x", "z")
for p in ("x", "y", "z"):
self._data[p] -= 0.5
self._data = self.box.convert_from_boxunits(
self._data, ["x", "y", "z", "M"]
)
mask = numpy.sqrt(data['x']**2 + data['y']**2 + data['z']**2) < maxdist
mask &= data[minmass[0]] > minmass[1]
self._data = data[mask]
if maxdist is not None:
dist = numpy.sqrt(
self._data["x"] ** 2 + self._data["y"] ** 2 + self._data["z"] ** 2
)
self._data = self._data[dist < maxdist]
if minmass is not None:
self._data = self._data[self._data[minmass[0]] > minmass[1]]

View file

@ -23,15 +23,16 @@ import numpy
class CSiBORGPaths:
"""
Paths manager for CSiBORG IC realisations.
Paths manager for CSiBORG IC realisations.
Parameters
----------
srcdir : str
Path to the folder where the RAMSES outputs are stored.
postdir: str
Path to the folder where post-processed files are stored.
Parameters
----------
srcdir : str
Path to the folder where the RAMSES outputs are stored.
postdir: str
Path to the folder where post-processed files are stored.
"""
_srcdir = None
_postdir = None
@ -96,8 +97,7 @@ class CSiBORGPaths:
fpath = join(self.postdir, "temp")
if not isdir(fpath):
mkdir(fpath)
warn("Created directory `{}`.".format(fpath), UserWarning,
stacklevel=1)
warn("Created directory `{}`.".format(fpath), UserWarning, stacklevel=1)
return fpath
def mmain_path(self, nsnap, nsim):
@ -118,12 +118,10 @@ class CSiBORGPaths:
fdir = join(self.postdir, "mmain")
if not isdir(fdir):
mkdir(fdir)
warn("Created directory `{}`.".format(fdir), UserWarning,
stacklevel=1)
warn("Created directory `{}`.".format(fdir), UserWarning, stacklevel=1)
return join(
fdir,
"mmain_{}_{}.npz".format(str(nsim).zfill(5), str(nsnap).zfill(5))
)
fdir, "mmain_{}_{}.npz".format(str(nsim).zfill(5), str(nsnap).zfill(5))
)
def initmatch_path(self, nsim, kind):
"""
@ -145,8 +143,7 @@ class CSiBORGPaths:
fdir = join(self.postdir, "initmatch")
if not isdir(fdir):
mkdir(fdir)
warn("Created directory `{}`.".format(fdir), UserWarning,
stacklevel=1)
warn("Created directory `{}`.".format(fdir), UserWarning, stacklevel=1)
return join(fdir, "{}_{}.npy".format(kind, str(nsim).zfill(5)))
def split_path(self, nsnap, nsim):
@ -167,10 +164,10 @@ class CSiBORGPaths:
fdir = join(self.postdir, "split")
if not isdir(fdir):
mkdir(fdir)
warn("Created directory `{}`.".format(fdir), UserWarning,
stacklevel=1)
return join(fdir, "clumps_{}_{}.npz"
.format(str(nsim).zfill(5), str(nsnap).zfill(5)))
warn("Created directory `{}`.".format(fdir), UserWarning, stacklevel=1)
return join(
fdir, "clumps_{}_{}.npz".format(str(nsim).zfill(5), str(nsnap).zfill(5))
)
def get_ics(self, tonew):
"""
@ -194,7 +191,7 @@ class CSiBORGPaths:
else:
files = [f for f in files if "_inv" not in f] # Remove inv. ICs
files = [f for f in files if "_new" not in f] # Remove _new
files = [f for f in files if "OLD" not in f] # Remove _old
files = [f for f in files if "OLD" not in f] # Remove _old
ids = [int(f.split("_")[-1]) for f in files]
try:
ids.remove(5511)
@ -239,7 +236,7 @@ class CSiBORGPaths:
# Get all files in simpath that start with output_
snaps = glob(join(simpath, "output_*"))
# Take just the last _00XXXX from each file and strip zeros
snaps = [int(snap.split('_')[-1].lstrip('0')) for snap in snaps]
snaps = [int(snap.split("_")[-1].lstrip("0")) for snap in snaps]
return numpy.sort(snaps)
def snapshot_path(self, nsnap, nsim):
@ -261,22 +258,31 @@ class CSiBORGPaths:
simpath = self.ic_path(nsim, tonew=tonew)
return join(simpath, "output_{}".format(str(nsnap).zfill(5)))
def hcat_path(self, nsim):
def structfit_path(self, nsnap, nsim, kind):
"""
Path to the final snapshot halo catalogue from `fit_halos.py`.
Path to the clump or halo catalogue from `fit_halos.py`.
Parameters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
kind : str
Type of catalogue. Can be either `clumps` or `halos`.
Returns
-------
path : str
"""
nsnap = str(max(self.get_snapshots(nsim))).zfill(5)
fname = "ramses_out_{}_{}.npy".format(str(self.nsim).zfill(5), nsnap)
return join(self.postdir, fname)
assert kind in ["clumps", "halos"]
fdir = join(self.postdir, "structfit")
if not isdir(fdir):
mkdir(fdir)
warn("Created directory `{}`.".format(fdir), UserWarning, stacklevel=1)
fname = "{}_out_{}_{}.npy".format(kind, str(nsim).zfill(5), str(nsnap).zfill(5))
return join(fdir, fname)
def knnauto_path(self, run, nsim=None):
"""
@ -297,8 +303,7 @@ class CSiBORGPaths:
fdir = join(self.postdir, "knn", "auto")
if not isdir(fdir):
makedirs(fdir)
warn("Created directory `{}`.".format(fdir), UserWarning,
stacklevel=1)
warn("Created directory `{}`.".format(fdir), UserWarning, stacklevel=1)
if nsim is not None:
return join(fdir, "knncdf_{}_{}.p".format(str(nsim).zfill(5), run))
@ -325,8 +330,7 @@ class CSiBORGPaths:
fdir = join(self.postdir, "knn", "cross")
if not isdir(fdir):
makedirs(fdir)
warn("Created directory `{}`.".format(fdir), UserWarning,
stacklevel=1)
warn("Created directory `{}`.".format(fdir), UserWarning, stacklevel=1)
if nsims is not None:
assert isinstance(nsims, (list, tuple)) and len(nsims) == 2
nsim0 = str(nsims[0]).zfill(5)
@ -356,8 +360,7 @@ class CSiBORGPaths:
fdir = join(self.postdir, "tpcf", "auto")
if not isdir(fdir):
makedirs(fdir)
warn("Created directory `{}`.".format(fdir), UserWarning,
stacklevel=1)
warn("Created directory `{}`.".format(fdir), UserWarning, stacklevel=1)
if nsim is not None:
return join(fdir, "tpcf{}_{}.p".format(str(nsim).zfill(5), run))

View file

@ -41,7 +41,7 @@ def cartesian_to_radec(X, indeg=True):
"""
x, y, z = X[:, 0], X[:, 1], X[:, 2]
dist = numpy.linalg.norm(X, axis=1)
dec = numpy.arcsin(z/dist)
dec = numpy.arcsin(z / dist)
ra = numpy.arctan2(y, x)
ra[ra < 0] += 2 * numpy.pi # Wrap RA to [0, 2pi)
if indeg:
@ -101,8 +101,7 @@ def cols_to_structured(N, cols):
if not isinstance(cols, list) and all(isinstance(c, tuple) for c in cols):
raise TypeError("`cols` must be a list of tuples.")
dtype = {"names": [col[0] for col in cols],
"formats": [col[1] for col in cols]}
dtype = {"names": [col[0] for col in cols], "formats": [col[1] for col in cols]}
return numpy.full(N, numpy.nan, dtype=dtype)
@ -237,8 +236,9 @@ def array_to_structured(arr, cols):
"""
cols = [cols] if isinstance(cols, str) else cols
if arr.ndim != 2 and arr.shape[1] != len(cols):
raise TypeError("`arr` must be a 2-dimensional array of "
"shape `(n_samples, n_cols)`.")
raise TypeError(
"`arr` must be a 2-dimensional array of " "shape `(n_samples, n_cols)`."
)
dtype = {"names": cols, "formats": [arr.dtype] * len(cols)}
out = numpy.full(arr.shape[0], numpy.nan, dtype=dtype)
@ -299,5 +299,7 @@ def extract_from_structured(arr, cols):
out[:, i] = arr[col]
# Optionally flatten
if len(cols) == 1:
return out.reshape(-1,)
return out.reshape(
-1,
)
return out

140
notebooks/fitting.ipynb Normal file

File diff suppressed because one or more lines are too long

View file

@ -29,6 +29,7 @@ try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
@ -43,23 +44,13 @@ nproc = comm.Get_size()
parser = ArgumentParser()
parser.add_argument("--runs", type=str, nargs="+")
args = parser.parse_args()
with open('../scripts/knn_auto.yml', 'r') as file:
with open("../scripts/knn_auto.yml", "r") as file:
config = yaml.safe_load(file)
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
totvol = 4 * numpy.pi * Rmax**3 / 3
minmass = 1e12
ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
7708, 7732, 7756, 7780, 7804, 7828, 7852, 7876, 7900, 7924, 7948,
7972, 7996, 8020, 8044, 8068, 8092, 8116, 8140, 8164, 8188, 8212,
8236, 8260, 8284, 8308, 8332, 8356, 8380, 8404, 8428, 8452, 8476,
8500, 8524, 8548, 8572, 8596, 8620, 8644, 8668, 8692, 8716, 8740,
8764, 8788, 8812, 8836, 8860, 8884, 8908, 8932, 8956, 8980, 9004,
9028, 9052, 9076, 9100, 9124, 9148, 9172, 9196, 9220, 9244, 9268,
9292, 9316, 9340, 9364, 9388, 9412, 9436, 9460, 9484, 9508, 9532,
9556, 9580, 9604, 9628, 9652, 9676, 9700, 9724, 9748, 9772, 9796,
9820, 9844]
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
ics = paths.get_ics(False)
knncdf = csiborgtools.clustering.kNN_CDF()
###############################################################################
@ -75,9 +66,9 @@ def read_single(selection, cat):
psel = selection["primary"]
pmin, pmax = psel.get("min", None), psel.get("max", None)
if pmin is not None:
mmask &= (cat[psel["name"]] >= pmin)
mmask &= cat[psel["name"]] >= pmin
if pmax is not None:
mmask &= (cat[psel["name"]] < pmax)
mmask &= cat[psel["name"]] < pmax
pos = pos[mmask, ...]
# Secondary selection
@ -92,12 +83,13 @@ def read_single(selection, cat):
if ssel.get("marked", True):
x = cat[psel["name"]][mmask]
prop = csiborgtools.clustering.normalised_marks(
x, prop, nbins=config["nbins_marks"])
x, prop, nbins=config["nbins_marks"]
)
if smin is not None:
smask &= (prop >= smin)
smask &= prop >= smin
if smax is not None:
smask &= (prop < smax)
smask &= prop < smax
return pos[smask, ...]
@ -106,8 +98,7 @@ def do_auto(run, cat, ic):
"""Calculate the kNN-CDF single catalgoue autocorrelation."""
_config = config.get(run, None)
if _config is None:
warn("No configuration for run {}.".format(run), UserWarning,
stacklevel=1)
warn("No configuration for run {}.".format(run), UserWarning, stacklevel=1)
return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
@ -115,21 +106,28 @@ def do_auto(run, cat, ic):
knn = NearestNeighbors()
knn.fit(pos)
rs, cdf = knncdf(
knn, rvs_gen=rvs_gen, nneighbours=config["nneighbours"],
rmin=config["rmin"], rmax=config["rmax"],
nsamples=int(config["nsamples"]), neval=int(config["neval"]),
batch_size=int(config["batch_size"]), random_state=config["seed"])
knn,
rvs_gen=rvs_gen,
nneighbours=config["nneighbours"],
rmin=config["rmin"],
rmax=config["rmax"],
nsamples=int(config["nsamples"]),
neval=int(config["neval"]),
batch_size=int(config["batch_size"]),
random_state=config["seed"],
)
joblib.dump({"rs": rs, "cdf": cdf, "ndensity": pos.shape[0] / totvol},
paths.knnauto_path(run, ic))
joblib.dump(
{"rs": rs, "cdf": cdf, "ndensity": pos.shape[0] / totvol},
paths.knnauto_path(run, ic),
)
def do_cross_rand(run, cat, ic):
"""Calculate the kNN-CDF cross catalogue random correlation."""
_config = config.get(run, None)
if _config is None:
warn("No configuration for run {}.".format(run), UserWarning,
stacklevel=1)
warn("No configuration for run {}.".format(run), UserWarning, stacklevel=1)
return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
@ -142,10 +140,17 @@ def do_cross_rand(run, cat, ic):
knn2.fit(pos2)
rs, cdf0, cdf1, joint_cdf = knncdf.joint(
knn1, knn2, rvs_gen=rvs_gen, nneighbours=int(config["nneighbours"]),
rmin=config["rmin"], rmax=config["rmax"],
nsamples=int(config["nsamples"]), neval=int(config["neval"]),
batch_size=int(config["batch_size"]), random_state=config["seed"])
knn1,
knn2,
rvs_gen=rvs_gen,
nneighbours=int(config["nneighbours"]),
rmin=config["rmin"],
rmax=config["rmax"],
nsamples=int(config["nsamples"]),
neval=int(config["neval"]),
batch_size=int(config["batch_size"]),
random_state=config["seed"],
)
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
joblib.dump({"rs": rs, "corr": corr}, paths.knnauto_path(run, ic))

View file

@ -16,7 +16,6 @@
from argparse import ArgumentParser
from datetime import datetime
from itertools import combinations
from os.path import join
from warnings import warn
import joblib
@ -30,6 +29,7 @@ try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
@ -44,24 +44,12 @@ nproc = comm.Get_size()
parser = ArgumentParser()
parser.add_argument("--runs", type=str, nargs="+")
args = parser.parse_args()
with open('../scripts/knn_cross.yml', 'r') as file:
with open("../scripts/knn_cross.yml", "r") as file:
config = yaml.safe_load(file)
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
minmass = 1e12
ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
7708, 7732, 7756, 7780, 7804, 7828, 7852, 7876, 7900, 7924, 7948,
7972, 7996, 8020, 8044, 8068, 8092, 8116, 8140, 8164, 8188, 8212,
8236, 8260, 8284, 8308, 8332, 8356, 8380, 8404, 8428, 8452, 8476,
8500, 8524, 8548, 8572, 8596, 8620, 8644, 8668, 8692, 8716, 8740,
8764, 8788, 8812, 8836, 8860, 8884, 8908, 8932, 8956, 8980, 9004,
9028, 9052, 9076, 9100, 9124, 9148, 9172, 9196, 9220, 9244, 9268,
9292, 9316, 9340, 9364, 9388, 9412, 9436, 9460, 9484, 9508, 9532,
9556, 9580, 9604, 9628, 9652, 9676, 9700, 9724, 9748, 9772, 9796,
9820, 9844]
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
dumpdir = "/mnt/extraspace/rstiskalek/csiborg/knn"
fout = join(dumpdir, "cross", "knncdf_{}_{}_{}.p")
ics = paths.get_ics(False)
knncdf = csiborgtools.clustering.kNN_CDF()
###############################################################################
@ -76,9 +64,9 @@ def read_single(selection, cat):
psel = selection["primary"]
pmin, pmax = psel.get("min", None), psel.get("max", None)
if pmin is not None:
mmask &= (cat[psel["name"]] >= pmin)
mmask &= cat[psel["name"]] >= pmin
if pmax is not None:
mmask &= (cat[psel["name"]] < pmax)
mmask &= cat[psel["name"]] < pmax
return pos[mmask, ...]
@ -99,10 +87,17 @@ def do_cross(run, ics):
knn2.fit(pos2)
rs, cdf0, cdf1, joint_cdf = knncdf.joint(
knn1, knn2, rvs_gen=rvs_gen, nneighbours=int(config["nneighbours"]),
rmin=config["rmin"], rmax=config["rmax"],
nsamples=int(config["nsamples"]), neval=int(config["neval"]),
batch_size=int(config["batch_size"]), random_state=config["seed"])
knn1,
knn2,
rvs_gen=rvs_gen,
nneighbours=int(config["nneighbours"]),
rmin=config["rmin"],
rmax=config["rmax"],
nsamples=int(config["nsamples"]),
neval=int(config["neval"]),
batch_size=int(config["batch_size"]),
random_state=config["seed"],
)
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
joblib.dump({"rs": rs, "corr": corr}, paths.knncross_path(run, ics))

View file

@ -16,7 +16,6 @@
from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime
from os.path import join
from warnings import warn
import joblib
@ -29,6 +28,7 @@ try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
@ -43,24 +43,12 @@ nproc = comm.Get_size()
parser = ArgumentParser()
parser.add_argument("--runs", type=str, nargs="+")
args = parser.parse_args()
with open('../scripts/tpcf_auto.yml', 'r') as file:
with open("../scripts/tpcf_auto.yml", "r") as file:
config = yaml.safe_load(file)
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
minmass = 1e12
ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
7708, 7732, 7756, 7780, 7804, 7828, 7852, 7876, 7900, 7924, 7948,
7972, 7996, 8020, 8044, 8068, 8092, 8116, 8140, 8164, 8188, 8212,
8236, 8260, 8284, 8308, 8332, 8356, 8380, 8404, 8428, 8452, 8476,
8500, 8524, 8548, 8572, 8596, 8620, 8644, 8668, 8692, 8716, 8740,
8764, 8788, 8812, 8836, 8860, 8884, 8908, 8932, 8956, 8980, 9004,
9028, 9052, 9076, 9100, 9124, 9148, 9172, 9196, 9220, 9244, 9268,
9292, 9316, 9340, 9364, 9388, 9412, 9436, 9460, 9484, 9508, 9532,
9556, 9580, 9604, 9628, 9652, 9676, 9700, 9724, 9748, 9772, 9796,
9820, 9844]
dumpdir = "/mnt/extraspace/rstiskalek/csiborg/tpcf"
fout = join(dumpdir, "auto", "tpcf_{}_{}.p")
paths = csiborgtools.read.CSiBORGPaths()
ics = paths.get_ics(False)
tpcf = csiborgtools.clustering.Mock2PCF()
###############################################################################
@ -76,9 +64,9 @@ def read_single(selection, cat):
psel = selection["primary"]
pmin, pmax = psel.get("min", None), psel.get("max", None)
if pmin is not None:
mmask &= (cat[psel["name"]] >= pmin)
mmask &= cat[psel["name"]] >= pmin
if pmax is not None:
mmask &= (cat[psel["name"]] < pmax)
mmask &= cat[psel["name"]] < pmax
pos = pos[mmask, ...]
# Secondary selection
@ -93,12 +81,13 @@ def read_single(selection, cat):
if ssel.get("marked", True):
x = cat[psel["name"]][mmask]
prop = csiborgtools.clustering.normalised_marks(
x, prop, nbins=config["nbins_marks"])
x, prop, nbins=config["nbins_marks"]
)
if smin is not None:
smask &= (prop >= smin)
smask &= prop >= smin
if smax is not None:
smask &= (prop < smax)
smask &= prop < smax
return pos[smask, ...]
@ -110,8 +99,11 @@ def do_auto(run, cat, ic):
return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
bins = numpy.logspace(numpy.log10(config["rpmin"]),
numpy.log10(config["rpmax"]), config["nrpbins"] + 1)
bins = numpy.logspace(
numpy.log10(config["rpmin"]),
numpy.log10(config["rpmax"]),
config["nrpbins"] + 1,
)
pos = read_single(_config, cat)
nrandom = int(config["randmult"] * pos.shape[0])
rp, wp = tpcf(pos, rvs_gen, nrandom, bins)

View file

@ -16,18 +16,26 @@
A script to fit halos (concentration, ...). The particle array of each CSiBORG
realisation must have been split in advance by `runsplit_halos`.
"""
from argparse import ArgumentParser
from datetime import datetime
from os.path import join
import numpy
from mpi4py import MPI
from tqdm import tqdm
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
parser = ArgumentParser()
parser.add_argument("--kind", type=str, choices=["halos", "clumps"])
args = parser.parse_args()
# Get MPI things
comm = MPI.COMM_WORLD
@ -35,128 +43,170 @@ rank = comm.Get_rank()
nproc = comm.Get_size()
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
partreader =csiborgtools.read.ParticleReader(paths)
cols_collect = [("npart", numpy.int64), ("totpartmass", numpy.float64),
("Rs", numpy.float64), ("vx", numpy.float64),
("vy", numpy.float64), ("vz", numpy.float64),
("Lx", numpy.float64), ("Ly", numpy.float64),
("Lz", numpy.float64), ("rho0", numpy.float64),
("conc", numpy.float64), ("rmin", numpy.float64),
("rmax", numpy.float64), ("r200", numpy.float64),
("r500", numpy.float64), ("m200", numpy.float64),
("m500", numpy.float64), ("lambda200c", numpy.float64)]
def fit_clump(particles, clump, box):
partreader = csiborgtools.read.ParticleReader(paths)
nfwpost = csiborgtools.fits.NFWPosterior()
ftemp = join(paths.temp_dumpdir, "fit_clump_{}_{}_{}.npy")
cols_collect = [
("index", numpy.int32),
("npart", numpy.int32),
("totpartmass", numpy.float32),
("vx", numpy.float32),
("vy", numpy.float32),
("vz", numpy.float32),
("conc", numpy.float32),
("rho0", numpy.float32),
("r200c", numpy.float32),
("r500c", numpy.float32),
("m200c", numpy.float32),
("m500c", numpy.float32),
("lambda200c", numpy.float32),
("r200m", numpy.float32),
("m200m", numpy.float32),
]
def fit_clump(particles, clump_info, box):
"""
Fit an object. Can be eithe a clump or a parent halo.
"""
obj = csiborgtools.fits.Clump(particles, clump_info, box)
out = {}
if numpy.isnan(clump_info["index"]):
print("Why am I NaN?", flush=True)
out["index"] = clump_info["index"]
out["npart"] = len(obj)
out["totpartmass"] = numpy.sum(obj["M"])
for i, v in enumerate(["vx", "vy", "vz"]):
out[v] = numpy.average(obj.vel[:, i], weights=obj["M"])
# Overdensity masses
out["r200c"], out["m200c"] = obj.spherical_overdensity_mass(200, kind="crit")
out["r500c"], out["m500c"] = obj.spherical_overdensity_mass(500, kind="crit")
out["r200m"], out["m200m"] = obj.spherical_overdensity_mass(200, kind="matter")
# NFW fit
if out["npart"] > 10 and numpy.isfinite(out["r200c"]):
Rs, rho0 = nfwpost.fit(obj)
out["conc"] = Rs / out["r200c"]
out["rho0"] = rho0
# Spin within R200c
if numpy.isfinite(out["r200c"]):
out["lambda200c"] = obj.lambda_bullock(out["r200c"])
return out
out["npart"][n] = clump.Npart
out["rmin"][n] = clump.rmin
out["rmax"][n] = clump.rmax
out["totpartmass"][n] = clump.total_particle_mass
out["vx"][n] = numpy.average(clump.vel[:, 0], weights=clump.m)
out["vy"][n] = numpy.average(clump.vel[:, 1], weights=clump.m)
out["vz"][n] = numpy.average(clump.vel[:, 2], weights=clump.m)
out["Lx"][n], out["Ly"][n], out["Lz"][n] = clump.angular_momentum
def load_clump_particles(clumpid, particle_archive):
"""
Load a clump's particles from the particle archive. If it is not there, i.e
clump has no associated particles, return `None`.
"""
try:
part = particle_archive[str(clumpid)]
except KeyError:
part = None
return part
def load_parent_particles(clumpid, particle_archive, clumps_cat):
"""
Load a parent halo's particles.
"""
indxs = clumps_cat["index"][clumps_cat["parent"] == clumpid]
# We first load the particles of each clump belonging to this parent and then
# concatenate them for further analysis.
clumps = []
for ind in indxs:
parts = load_clump_particles(ind, particle_archive)
if parts is not None:
clumps.append([parts, None])
if len(clumps) == 0:
return None
return csiborgtools.match.concatenate_clumps(clumps, include_velocities=True)
# We now start looping over all simulations
for i, nsim in enumerate(paths.get_ics(tonew=False)):
if rank == 0:
print("{}: calculating {}th simulation `{}`."
.format(datetime.now(), i, nsim), flush=True)
print(
"{}: calculating {}th simulation `{}`.".format(datetime.now(), i, nsim),
flush=True,
)
nsnap = max(paths.get_snapshots(nsim))
box = csiborgtools.read.BoxUnits(nsnap, nsim, paths)
# Archive of clumps, keywords are their clump IDs
particle_archive = paths.split_path(nsnap, nsim)
clumpsarr = partreader.read_clumps(nsnap, nsim,
cols=["index", 'x', 'y', 'z'])
clumpid2arrpos = {ind: ii for ii, ind in enumerate(clumpsarr["index"])}
particle_archive = numpy.load(paths.split_path(nsnap, nsim))
clumps_cat = csiborgtools.read.ClumpsCatalogue(
nsim, paths, maxdist=None, minmass=None, rawdata=True, load_fitted=False
)
# We check whether we fit halos or clumps, will be indexing over different
# iterators.
if args.kind == "halos":
ismain = clumps_cat.ismain
else:
ismain = numpy.ones(len(clumps_cat), dtype=bool)
ntasks = len(clumps_cat)
# We split the clumps among the processes. Each CPU calculates a fraction
# of them and dumps the results in a structured array. Even if we are
# calculating parent halo this index runs over all clumps.
jobs = csiborgtools.fits.split_jobs(ntasks, nproc)[rank]
out = csiborgtools.read.cols_to_structured(len(jobs), cols_collect)
for i, j in enumerate(tqdm(jobs)) if nproc == 1 else enumerate(jobs):
# If we are fitting halos and this clump is not a main, then continue.
if args.kind == "halos" and not ismain[j]:
continue
clumpid = clumps_cat["index"][j]
if args.kind == "halos":
part = load_parent_particles(clumpid, particle_archive, clumps_cat)
else:
part = load_clump_particles(clumpid, particle_archive)
nclumps = len(particle_archive.files)
# Fit 5000 clumps at a time, then dump results
batchsize = 5000
# We fit the particles if there are any. If not we assign the index,
# otherwise it would be NaN converted to integers (-2147483648) and
# yield an error further down.
if part is not None:
_out = fit_clump(part, clumps_cat[j], box)
for key in _out.keys():
out[key][i] = _out[key]
else:
out["index"][i] = clumpid
out["npart"][i] = 0
# This rank does these `batchsize` clumps/halos
jobs = csiborgtools.utils.split_jobs(nclumps, nclumps // batchsize)[rank]
for clumpid in jobs:
... = fit_clump(particle_archive[str(clumpid)], clumpsarr[clumpid2arrpos[clumpid]])
jobs = csiborgtools.utils.split_jobs(nclumps, nproc)[rank]
for nsplit in jobs:
parts, part_clumps, clumps = csiborgtools.fits.load_split_particles(
nsplit, nsnap, nsim, paths, remove_split=False)
N = clumps.size
cols = [("index", numpy.int64), ("npart", numpy.int64),
("totpartmass", numpy.float64), ("Rs", numpy.float64),
("rho0", numpy.float64), ("conc", numpy.float64),
("lambda200c", numpy.float64), ("vx", numpy.float64),
("vy", numpy.float64), ("vz", numpy.float64),
("Lx", numpy.float64), ("Ly", numpy.float64),
("Lz", numpy.float64), ("rmin", numpy.float64),
("rmax", numpy.float64), ("r200", numpy.float64),
("r500", numpy.float64), ("m200", numpy.float64),
("m500", numpy.float64)]
out = csiborgtools.utils.cols_to_structured(N, cols)
out["index"] = clumps["index"]
for n in range(N):
# Pick clump and its particles
xs = csiborgtools.fits.pick_single_clump(n, parts, part_clumps,
clumps)
clump = csiborgtools.fits.Clump.from_arrays(
*xs, rhoc=box.box_rhoc, G=box.box_G)
out["npart"][n] = clump.Npart
out["rmin"][n] = clump.rmin
out["rmax"][n] = clump.rmax
out["totpartmass"][n] = clump.total_particle_mass
out["vx"][n] = numpy.average(clump.vel[:, 0], weights=clump.m)
out["vy"][n] = numpy.average(clump.vel[:, 1], weights=clump.m)
out["vz"][n] = numpy.average(clump.vel[:, 2], weights=clump.m)
out["Lx"][n], out["Ly"][n], out["Lz"][n] = clump.angular_momentum
# Spherical overdensity radii and masses
rs, ms = clump.spherical_overdensity_mass([200, 500])
out["r200"][n] = rs[0]
out["r500"][n] = rs[1]
out["m200"][n] = ms[0]
out["m500"][n] = ms[1]
out["lambda200c"][n] = clump.lambda200c
# NFW profile fit
if clump.Npart > 10 and numpy.isfinite(out["r200"][n]):
nfwpost = csiborgtools.fits.NFWPosterior(clump)
logRs, __ = nfwpost.maxpost_logRs()
Rs = 10**logRs
if not numpy.isnan(logRs):
out["Rs"][n] = Rs
out["rho0"][n] = nfwpost.rho0_from_Rs(Rs)
out["conc"][n] = out["r200"][n] / Rs
csiborgtools.read.dump_split(out, nsplit, nsnap, nsim, paths)
# Wait until all jobs finished before moving to another simulation
fout = ftemp.format(str(nsim).zfill(5), str(nsnap).zfill(5), rank)
if nproc == 0:
print(
"{}: rank {} saving to `{}`.".format(datetime.now(), rank, fout), flush=True
)
numpy.save(fout, out)
# We saved this CPU's results in a temporary file. Wait now for the other
# CPUs and then collect results from the 0th rank and save them.
comm.Barrier()
# # Use the rank 0 to combine outputs for this CSiBORG realisation
# if rank == 0:
# print("Collecting results!")
# partreader = csiborgtools.read.ParticleReader(paths)
# out_collected = csiborgtools.read.combine_splits(
# utils.Nsplits, nsnap, nsim, partreader, cols_collect,
# remove_splits=True, verbose=False)
# fname = paths.hcat_path(nsim)
# print("Saving results to `{}`.".format(fname))
# numpy.save(fname, out_collected)
#
# comm.Barrier()
#
# if rank == 0:
# print("All finished! See ya!")
if rank == 0:
print(
"{}: collecting results for simulation `{}`.".format(datetime.now(), nsim),
flush=True,
)
# We write to the output array. Load data from each CPU and append to
# the output array.
out = csiborgtools.read.cols_to_structured(ntasks, cols_collect)
clumpid2outpos = {indx: i for i, indx in enumerate(clumps_cat["index"])}
for i in range(nproc):
inp = numpy.load(ftemp.format(str(nsim).zfill(5), str(nsnap).zfill(5), i))
for j, clumpid in enumerate(inp["index"]):
k = clumpid2outpos[clumpid]
for key in inp.dtype.names:
out[key][k] = inp[key][j]
# If we were analysing main halos, then remove array indices that do
# not correspond to parent halos.
if args.kind == "halos":
out = out[ismain]
fout = paths.structfit_path(nsnap, nsim, "clumps")
print("Saving to `{}`.".format(fout), flush=True)
numpy.save(fout, out)
# We now wait before moving on to another simulation.
comm.Barrier()

View file

@ -12,7 +12,10 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""Script to split particles to indivudual files according to their clump."""
"""
Script to split particles to individual files according to their clump. This is
useful for calculating the halo properties directly from the particles.
"""
from datetime import datetime
from gc import collect
from glob import glob
@ -28,6 +31,7 @@ try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
@ -38,7 +42,7 @@ nproc = comm.Get_size()
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
verbose = nproc == 1
partcols = ['x', 'y', 'z', "vx", "vy", "vz", 'M']
partcols = ["x", "y", "z", "vx", "vy", "vz", "M"]
def do_split(nsim):
@ -46,8 +50,8 @@ def do_split(nsim):
reader = csiborgtools.read.ParticleReader(paths)
ftemp_base = join(
paths.temp_dumpdir,
"split_{}_{}".format(str(nsim).zfill(5), str(nsnap).zfill(5))
)
"split_{}_{}".format(str(nsim).zfill(5), str(nsnap).zfill(5)),
)
ftemp = ftemp_base + "_{}.npz"
# Load the particles and their clump IDs
@ -85,7 +89,7 @@ def do_split(nsim):
# Now load back in every temporary file, combine them into a single
# dictionary and save as a single .npz file.
out = {}
for file in glob(ftemp_base + '*'):
for file in glob(ftemp_base + "*"):
inp = numpy.load(file)
for key in inp.files:
out.update({key: inp[key]})
@ -107,7 +111,6 @@ if nproc > 1:
worker_process(do_split, comm, verbose=False)
else:
tasks = paths.get_ics(tonew=False)
tasks = [tasks[0]] # REMOVE
for task in tasks:
print("{}: completing task `{}`.".format(datetime.now(), task))
do_split(task)