Correct fitting script for both clumps and halos ()

* Minor typos

* fix minor bugs

* pep8

* formatting

* pep8

* Fix minor bugs

* New path & pep8

* add splitt

* Updates

* Improve calculation within radius

* pep8

* pep8

* get the script working

* Add matter overdensity

* Add m200m to the script

* Fix looping bug

* add parents support

* add import

* Optionally concatenate velocities

* Make optional masking

* Ignore the error message

* Start reading in raw data

* Fix cat reading

* Additional units conversions

* Add clump reading

* Fix indexing

* Remove old comment

* Remove old comment

* set npart to 0 instead of overflow from NaN

* fix docs

* rm boring stuff

* Remove old stuff

* Remove old stuff

* Remove old comment

* Update nb
This commit is contained in:
Richard Stiskalek 2023-04-19 16:39:35 +02:00 committed by GitHub
parent c2fde1566b
commit 39b3498621
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 709 additions and 357 deletions

View file

@ -14,3 +14,4 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .halo import Clump, Halo # noqa from .halo import Clump, Halo # noqa
from .haloprofile import NFWPosterior, NFWProfile # noqa from .haloprofile import NFWPosterior, NFWProfile # noqa
from .utils import split_jobs # noqa

View file

@ -22,6 +22,7 @@ class BaseStructure(ABC):
""" """
Basic structure object for handling operations on its particles. Basic structure object for handling operations on its particles.
""" """
_particles = None _particles = None
_info = None _info = None
_box = None _box = None
@ -39,7 +40,7 @@ class BaseStructure(ABC):
@particles.setter @particles.setter
def particles(self, particles): def particles(self, particles):
pars = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M'] pars = ["x", "y", "z", "vx", "vy", "vz", "M"]
assert all(p in particles.dtype.names for p in pars) assert all(p in particles.dtype.names for p in pars)
self._particles = particles self._particles = particles
@ -87,20 +88,9 @@ class BaseStructure(ABC):
------- -------
pos : 2-dimensional array of shape `(n_particles, 3)`. pos : 2-dimensional array of shape `(n_particles, 3)`.
""" """
ps = ('x', 'y', 'z') ps = ("x", "y", "z")
return numpy.vstack([self[p] - self.info[p] for p in ps]).T return numpy.vstack([self[p] - self.info[p] for p in ps]).T
@property
def r(self):
"""
Radial separation of the particles from the centre of the object.
Returns
-------
r : 1-dimensional array of shape `(n_particles, )`.
"""
return numpy.linalg.norm(self.pos, axis=1)
@property @property
def vel(self): def vel(self):
""" """
@ -112,34 +102,61 @@ class BaseStructure(ABC):
""" """
return numpy.vstack([self[p] for p in ("vx", "vy", "vz")]).T return numpy.vstack([self[p] for p in ("vx", "vy", "vz")]).T
@property def r(self):
def cmass(self):
""" """
Cartesian position components of the object's centre of mass. Note that Calculate the radial separation of the particles from the centre of the
this is already in a frame centered at the clump's potential minimum, object.
so its distance from origin indicates the separation of the centre of
mass and potential minimum. Returns
-------
r : 1-dimensional array of shape `(n_particles, )`.
"""
return numpy.linalg.norm(self.pos, axis=1)
def cmass(self, rmax, rmin):
"""
Calculate Cartesian position components of the object's centre of mass.
Note that this is already in a frame centered at the clump's potential
minimum, so its distance from origin indicates the separation of the
centre of mass and potential minimum.
Parameters
----------
rmax : float
Maximum radius for particles to be included in the calculation.
rmin : float
Minimum radius for particles to be included in the calculation.
Returns Returns
------- -------
cm : 1-dimensional array of shape `(3, )` cm : 1-dimensional array of shape `(3, )`
""" """
return numpy.average(self.pos, axis=0, weights=self['M']) r = self.r()
mask = (r >= rmin) & (r <= rmax)
return numpy.average(self.pos[mask], axis=0, weights=self["M"][mask])
@property def angular_momentum(self, rmax, rmin=0):
def angular_momentum(self):
""" """
Angular momentum in the box coordinates. Calculate angular momentum in the box coordinates.
NOTE: here also change velocities to the CM and appropriately edit the Parameters
docs. ----------
rmax : float
Maximum radius for particles to be included in the calculation.
rmin : float
Minimum radius for particles to be included in the calculation.
Returns Returns
------- -------
J : 1-dimensional array or shape `(3, )` J : 1-dimensional array or shape `(3, )`
""" """
J = numpy.cross(self.pos - self.cmass, self.vel) r = self.r()
return numpy.einsum("i,ij->j", self.m, J) mask = (r >= rmin) & (r <= rmax)
pos = self.pos[mask] - self.cmass(rmax, rmin)
# Velocitities in the object CM frame
vel = self.vel[mask]
vel -= numpy.average(self.vel[mask], axis=0, weights=self["M"][mask])
return numpy.einsum("i,ij->j", self["M"][mask], numpy.cross(pos, vel))
def enclosed_mass(self, rmax, rmin=0): def enclosed_mass(self, rmax, rmin=0):
""" """
@ -156,8 +173,8 @@ class BaseStructure(ABC):
------- -------
enclosed_mass : float enclosed_mass : float
""" """
r = self.r r = self.r()
return numpy.sum(self['M'][(r >= rmin) & (r <= rmax)]) return numpy.sum(self["M"][(r >= rmin) & (r <= rmax)])
def lambda_bullock(self, radius, npart_min=10): def lambda_bullock(self, radius, npart_min=10):
r""" r"""
@ -182,19 +199,21 @@ class BaseStructure(ABC):
Bullock, J. S.; Dekel, A.; Kolatt, T. S.; Kravtsov, A. V.; Bullock, J. S.; Dekel, A.; Kolatt, T. S.; Kravtsov, A. V.;
Klypin, A. A.; Porciani, C.; Primack, J. R. Klypin, A. A.; Porciani, C.; Primack, J. R.
""" """
mask = self.r <= radius mask = self.r() <= radius
if numpy.sum(mask) < npart_min: if numpy.sum(mask) < npart_min:
return numpy.nan return numpy.nan
mass = self.enclosed_mass(radius) mass = self.enclosed_mass(radius)
V = numpy.sqrt(self.box.box_G * mass / radius) V = numpy.sqrt(self.box.box_G * mass / radius)
return (numpy.linalg.norm(self.angular_momentum[mask]) return numpy.linalg.norm(self.angular_momentum(radius)) / (
/ (numpy.sqrt(2) * mass * V * radius)) numpy.sqrt(2) * mass * V * radius
)
def spherical_overdensity_mass(self, delta_mult, npart_min=10): def spherical_overdensity_mass(self, delta_mult, npart_min=10, kind="crit"):
r""" r"""
Calculate spherical overdensity mass and radius. The mass is defined as Calculate spherical overdensity mass and radius. The mass is defined as
the enclosed mass within an outermost radius where the mean enclosed the enclosed mass within an outermost radius where the mean enclosed
spherical density reaches a multiple of the critical density `delta`. spherical density reaches a multiple of the critical density `delta`
(times the matter density if `kind` is `matter`).
Parameters Parameters
---------- ----------
@ -203,6 +222,8 @@ class BaseStructure(ABC):
npart_min : int npart_min : int
Minimum number of enclosed particles for a radius to be Minimum number of enclosed particles for a radius to be
considered trustworthy. considered trustworthy.
kind : str
Either `crit` or `matter`, for critical or matter overdensity
Returns Returns
------- -------
@ -211,17 +232,25 @@ class BaseStructure(ABC):
mx : float mx : float
Corresponding spherical enclosed mass. Corresponding spherical enclosed mass.
""" """
# Quick check of inputs
assert kind in ["crit", "matter"]
# We first sort the particles in an increasing separation # We first sort the particles in an increasing separation
rs = self.r rs = self.r()
order = numpy.argsort(rs) order = numpy.argsort(rs)
rs = rs[order] rs = rs[order]
cmass = numpy.cumsum(self['M']) # Cumulative mass cmass = numpy.cumsum(self["M"][order]) # Cumulative mass
# We calculate the enclosed volume and indices where it is above target # We calculate the enclosed volume and indices where it is above target
vol = 4 * numpy.pi / 3 * (rs**3 - rs[0] ** 3) vol = 4 * numpy.pi / 3 * (rs**3 - rs[0] ** 3)
ks = numpy.where([cmass / vol > delta_mult * self.box.rhoc])[0]
target_density = delta_mult * self.box.box_rhoc
if kind == "matter":
target_density *= self.box.cosmo.Om0
with numpy.errstate(divide="ignore"):
ks = numpy.where(cmass / vol > target_density)[0]
if ks.size == 0: # Never above the threshold? if ks.size == 0: # Never above the threshold?
return numpy.nan, numpy.nan return numpy.nan, numpy.nan
k = numpy.maximum(ks) k = numpy.max(ks)
if k < npart_min: # Too few particles? if k < npart_min: # Too few particles?
return numpy.nan, numpy.nan return numpy.nan, numpy.nan
return rs[k], cmass[k] return rs[k], cmass[k]
@ -235,7 +264,7 @@ class BaseStructure(ABC):
------- -------
key : list of str key : list of str
""" """
return self.data.dtype.names return self.particles.dtype.names
def __getitem__(self, key): def __getitem__(self, key):
if key not in self.keys: if key not in self.keys:
@ -259,6 +288,7 @@ class Clump(BaseStructure):
box : :py:class:`csiborgtools.read.BoxUnits` box : :py:class:`csiborgtools.read.BoxUnits`
Box units object. Box units object.
""" """
def __init__(self, particles, info, box): def __init__(self, particles, info, box):
self.particles = particles self.particles = particles
self.info = info self.info = info
@ -279,6 +309,7 @@ class Halo(BaseStructure):
box : :py:class:`csiborgtools.read.BoxUnits` box : :py:class:`csiborgtools.read.BoxUnits`
Box units object. Box units object.
""" """
def __init__(self, particles, info, box): def __init__(self, particles, info, box):
self.particles = particles self.particles = particles
self.info = info self.info = info

View file

@ -187,9 +187,6 @@ class NFWPosterior(NFWProfile):
Clump object containing the particles and clump information. Clump object containing the particles and clump information.
""" """
def __init__(self):
super().__init__()
@property @property
def clump(self): def clump(self):
""" """
@ -228,7 +225,7 @@ class NFWPosterior(NFWProfile):
------- -------
rho0: float rho0: float
""" """
return mass / self.bounded_enclosed_mass(rmin, rmax, Rs, 1) return mass / self.bounded_mass(rmin, rmax, Rs, 1)
def initlogRs(self, r, rmin, rmax, binsguess=10): def initlogRs(self, r, rmin, rmax, binsguess=10):
r""" r"""
@ -255,7 +252,6 @@ class NFWPosterior(NFWProfile):
counts, edges = numpy.histogram(r, bins) counts, edges = numpy.histogram(r, bins)
return numpy.log10(edges[numpy.argmax(counts)]) return numpy.log10(edges[numpy.argmax(counts)])
def logprior(self, logRs, rmin, rmax): def logprior(self, logRs, rmin, rmax):
r""" r"""
Logarithmic uniform prior on :math:`\log R_{\rm s}`. Unnormalised but Logarithmic uniform prior on :math:`\log R_{\rm s}`. Unnormalised but
@ -276,7 +272,7 @@ class NFWPosterior(NFWProfile):
""" """
if not rmin < 10**logRs < rmax: if not rmin < 10**logRs < rmax:
return -numpy.infty return -numpy.infty
return 0. return 0.0
def loglikelihood(self, logRs, r, rmin, rmax, npart): def loglikelihood(self, logRs, r, rmin, rmax, npart):
""" """
@ -303,7 +299,6 @@ class NFWPosterior(NFWProfile):
mnfw = self.bounded_mass(rmin, rmax, Rs, 1) mnfw = self.bounded_mass(rmin, rmax, Rs, 1)
return numpy.sum(self._logprofile(r, Rs, 1)) - npart * numpy.log(mnfw) return numpy.sum(self._logprofile(r, Rs, 1)) - npart * numpy.log(mnfw)
def __call__(self, logRs, r, rmin, rmax, npart): def __call__(self, logRs, r, rmin, rmax, npart):
""" """
Logarithmic posterior. Sum of the logarithmic prior and likelihood. Logarithmic posterior. Sum of the logarithmic prior and likelihood.
@ -353,18 +348,20 @@ class NFWPosterior(NFWProfile):
Best fit NFW central density. Best fit NFW central density.
""" """
assert isinstance(clump, Clump) assert isinstance(clump, Clump)
r = clump.r r = clump.r()
rmin = numpy.min(r) rmin = numpy.min(r)
rmax, mtot = clump.spherical_overdensity_mass(200) rmax, mtot = clump.spherical_overdensity_mass(200)
npart = numpy.sum((rmin <= r) & (r <= rmax)) mask = (rmin <= r) & (r <= rmax)
npart = numpy.sum(mask)
r = r[mask]
# Loss function to optimize # Loss function to optimize
def loss(logRs): def loss(logRs):
return -self(logRs, r, rmin, rmax, npart) return -self(logRs, r, rmin, rmax, npart)
res = minimize_scalar( res = minimize_scalar(
loss, bounds=(numpy.log10(rmin), numpy.log10(rmax)), loss, bounds=(numpy.log10(rmin), numpy.log10(rmax)), method="bounded"
method='bounded') )
if numpy.log10(rmax) - res.x < eps: if numpy.log10(rmax) - res.x < eps:
res.success = False res.success = False

View file

@ -0,0 +1,43 @@
# Copyright (C) 2022 Richard Stiskalek, Deaglan Bartlett
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""Fitting utility functions."""
import numpy
def split_jobs(njobs, ncpu):
"""
Split `njobs` amongst `ncpu`.
Parameters
----------
njobs : int
Number of jobs.
ncpu : int
Number of CPUs.
Returns
-------
jobs : list of lists of integers
Outer list of each CPU and inner lists for CPU's jobs.
"""
njobs_per_cpu, njobs_remainder = divmod(njobs, ncpu)
jobs = numpy.arange(njobs_per_cpu * ncpu).reshape((njobs_per_cpu, ncpu)).T
jobs = jobs.tolist()
for i in range(njobs_remainder):
jobs[i].append(njobs_per_cpu * ncpu + i)
return jobs

View file

@ -12,7 +12,7 @@
# You should have received a copy of the GNU General Public License along # You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc., # with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from csiborgtools.match.match import ( # noqa from .match import ( # noqa
ParticleOverlap, ParticleOverlap,
RealisationsMatcher, RealisationsMatcher,
calculate_overlap, calculate_overlap,
@ -24,4 +24,5 @@ from csiborgtools.match.match import ( # noqa
fill_delta_indxs, fill_delta_indxs,
get_clumplims, get_clumplims,
) )
from csiborgtools.match.num_density import binned_counts, number_density # noqa from .num_density import binned_counts, number_density # noqa
from .utils import concatenate_clumps # noqa

View file

@ -16,7 +16,7 @@
import numpy import numpy
def concatenate_clumps(clumps): def concatenate_clumps(clumps, include_velocities=False):
""" """
Concatenate an array of clumps to a single array containing all particles. Concatenate an array of clumps to a single array containing all particles.
@ -24,6 +24,8 @@ def concatenate_clumps(clumps):
---------- ----------
clumps : list of structured arrays clumps : list of structured arrays
List of clumps. Each clump must be a structured array with keys List of clumps. Each clump must be a structured array with keys
include_velocities : bool, optional
Whether to include velocities in the output array.
Returns Returns
------- -------
@ -34,21 +36,31 @@ def concatenate_clumps(clumps):
for clump, __ in clumps: for clump, __ in clumps:
N += clump.size N += clump.size
# Infer dtype of positions # Infer dtype of positions
if clumps[0][0]['x'].dtype.char in numpy.typecodes["AllInteger"]: if clumps[0][0]["x"].dtype.char in numpy.typecodes["AllInteger"]:
posdtype = numpy.int32 posdtype = numpy.int32
else: else:
posdtype = numpy.float32 posdtype = numpy.float32
# Pre-allocate array # We pre-allocate an empty array. By default, we include just particle positions,
dtype = {"names": ['x', 'y', 'z', 'M'], # which may be specified by cell IDs if integers, and masses. Additionally also
"formats": [posdtype] * 3 + [numpy.float32]} # outputs velocities.
if include_velocities:
dtype = {
"names": ["x", "y", "z", "vx", "vy", "vz", "M"],
"formats": [posdtype] * 3 + [numpy.float32] * 4,
}
else:
dtype = {
"names": ["x", "y", "z", "M"],
"formats": [posdtype] * 3 + [numpy.float32],
}
particles = numpy.full(N, numpy.nan, dtype) particles = numpy.full(N, numpy.nan, dtype)
# Fill it one clump by another # Fill it one clump by another
start = 0 start = 0
for clump, __ in clumps: for clump, __ in clumps:
end = start + clump.size end = start + clump.size
for p in ('x', 'y', 'z', 'M'): for p in dtype["names"]:
particles[p][start:end] = clump[p] particles[p][start:end] = clump[p]
start = end start = end

View file

@ -12,6 +12,7 @@
# You should have received a copy of the GNU General Public License along # You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc., # with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .box_units import BoxUnits # noqa
from .halo_cat import ClumpsCatalogue, HaloCatalogue # noqa from .halo_cat import ClumpsCatalogue, HaloCatalogue # noqa
from .knn_summary import kNNCDFReader # noqa from .knn_summary import kNNCDFReader # noqa
from .obs import ( # noqa from .obs import ( # noqa
@ -26,4 +27,4 @@ from .paths import CSiBORGPaths # noqa
from .pk_summary import PKReader # noqa from .pk_summary import PKReader # noqa
from .readsim import MmainReader, ParticleReader, halfwidth_select, read_initcm # noqa from .readsim import MmainReader, ParticleReader, halfwidth_select, read_initcm # noqa
from .tpcf_summary import TPCFReader # noqa from .tpcf_summary import TPCFReader # noqa
from .utils import cartesian_to_radec, radec_to_cartesian from .utils import cartesian_to_radec, cols_to_structured, radec_to_cartesian # noqa

View file

@ -24,10 +24,26 @@ from .readsim import ParticleReader
# Map of unit conversions # Map of unit conversions
CONV_NAME = { CONV_NAME = {
"length": ['x', 'y', 'z', "peak_x", "peak_y", "peak_z", "Rs", "rmin", "length": [
"rmax", "r200", "r500", "x0", "y0", "z0", "lagpatch"], "x",
"mass": ["mass_cl", "totpartmass", "m200", "m500", "mass_mmain", 'M'], "y",
"density": ["rho0"] "z",
"peak_x",
"peak_y",
"peak_z",
"Rs",
"rmin",
"rmax",
"r200c",
"r500c",
"r200m",
"x0",
"y0",
"z0",
"lagpatch",
],
"mass": ["mass_cl", "totpartmass", "m200c", "m500c", "mass_mmain", "M", "m200m"],
"density": ["rho0"],
} }
@ -53,15 +69,29 @@ class BoxUnits:
""" """
partreader = ParticleReader(paths) partreader = ParticleReader(paths)
info = partreader.read_info(nsnap, nsim) info = partreader.read_info(nsnap, nsim)
pars = ["boxlen", "time", "aexp", "H0", pars = [
"omega_m", "omega_l", "omega_k", "omega_b", "boxlen",
"unit_l", "unit_d", "unit_t"] "time",
"aexp",
"H0",
"omega_m",
"omega_l",
"omega_k",
"omega_b",
"unit_l",
"unit_d",
"unit_t",
]
for par in pars: for par in pars:
setattr(self, "_" + par, float(info[par])) setattr(self, "_" + par, float(info[par]))
self._cosmo = LambdaCDM(H0=self._H0, Om0=self._omega_m, self._cosmo = LambdaCDM(
Ode0=self._omega_l, Tcmb0=2.725 * units.K, H0=self._H0,
Ob0=self._omega_b) Om0=self._omega_m,
Ode0=self._omega_l,
Tcmb0=2.725 * units.K,
Ob0=self._omega_b,
)
self._Msuncgs = constants.M_sun.cgs.value # Solar mass in grams self._Msuncgs = constants.M_sun.cgs.value # Solar mass in grams
@property @property
@ -227,7 +257,7 @@ class BoxUnits:
cosmo_redshift : foat cosmo_redshift : foat
Cosmological redshift. Cosmological redshift.
""" """
x = numpy.linspace(0., 1., 5001) x = numpy.linspace(0.0, 1.0, 5001)
y = self.cosmo.comoving_distance(x) y = self.cosmo.comoving_distance(x)
return interp1d(y, x)(self.box2mpc(dist)) return interp1d(y, x)(self.box2mpc(dist))
@ -337,8 +367,7 @@ class BoxUnits:
density : float density : float
Density in :math:`M_\odot / \mathrm{pc}^3`. Density in :math:`M_\odot / \mathrm{pc}^3`.
""" """
return (density * self._unit_d / self._Msuncgs return density * self._unit_d / self._Msuncgs * (units.Mpc.to(units.cm)) ** 3
* (units.Mpc.to(units.cm))**3)
def dens2box(self, density): def dens2box(self, density):
r""" r"""
@ -355,8 +384,7 @@ class BoxUnits:
density : float density : float
Density in box units. Density in box units.
""" """
return (density / self._unit_d * self._Msuncgs return density / self._unit_d * self._Msuncgs / (units.Mpc.to(units.cm)) ** 3
/ (units.Mpc.to(units.cm))**3)
def convert_from_boxunits(self, data, names): def convert_from_boxunits(self, data, names):
r""" r"""
@ -389,7 +417,7 @@ class BoxUnits:
transforms = { transforms = {
"length": self.box2mpc, "length": self.box2mpc,
"mass": self.box2solarmass, "mass": self.box2solarmass,
"density": self.box2dens "density": self.box2dens,
} }
for name in names: for name in names:
@ -407,7 +435,8 @@ class BoxUnits:
# If nothing found # If nothing found
if not found: if not found:
raise NotImplementedError( raise NotImplementedError(
"Conversion of `{}` is not defined.".format(name)) "Conversion of `{}` is not defined.".format(name)
)
# Center at the observer # Center at the observer
if name in ["peak_x", "peak_y", "peak_z", "x0", "y0", "z0"]: if name in ["peak_x", "peak_y", "peak_z", "x0", "y0", "z0"]:

View file

@ -21,13 +21,14 @@ from sklearn.neighbors import NearestNeighbors
from .box_units import BoxUnits from .box_units import BoxUnits
from .paths import CSiBORGPaths from .paths import CSiBORGPaths
from .readsim import ParticleReader from .readsim import ParticleReader
from .utils import cartesian_to_radec, flip_cols, radec_to_cartesian from .utils import add_columns, cartesian_to_radec, flip_cols, radec_to_cartesian
class BaseCatalogue(ABC): class BaseCatalogue(ABC):
""" """
Base (sub)halo catalogue. Base (sub)halo catalogue.
""" """
_data = None _data = None
_paths = None _paths = None
_nsim = None _nsim = None
@ -132,9 +133,9 @@ class BaseCatalogue(ABC):
pos : 2-dimensional array of shape `(nobjects, 3)` pos : 2-dimensional array of shape `(nobjects, 3)`
""" """
if in_initial: if in_initial:
ps = ['x0', 'y0', 'z0'] ps = ["x0", "y0", "z0"]
else: else:
ps = ['x', 'y', 'z'] ps = ["x", "y", "z"]
pos = numpy.vstack([self[p] for p in ps]).T pos = numpy.vstack([self[p] for p in ps]).T
if not cartesian: if not cartesian:
pos = cartesian_to_radec(pos) pos = cartesian_to_radec(pos)
@ -248,8 +249,7 @@ class BaseCatalogue(ABC):
knn.fit(pos) knn.fit(pos)
# Convert angular radius to cosine difference. # Convert angular radius to cosine difference.
metric_maxdist = 1 - numpy.cos(numpy.deg2rad(ang_radius)) metric_maxdist = 1 - numpy.cos(numpy.deg2rad(ang_radius))
dist, ind = knn.radius_neighbors(X, radius=metric_maxdist, dist, ind = knn.radius_neighbors(X, radius=metric_maxdist, sort_results=True)
sort_results=True)
# And the cosine difference to angular distance. # And the cosine difference to angular distance.
for i in range(X.shape[0]): for i in range(X.shape[0]):
dist[i] = numpy.rad2deg(numpy.arccos(1 - dist[i])) dist[i] = numpy.rad2deg(numpy.arccos(1 - dist[i]))
@ -282,13 +282,9 @@ class ClumpsCatalogue(BaseCatalogue):
r""" r"""
Clumps catalogue, defined in the final snapshot. Clumps catalogue, defined in the final snapshot.
TODO:
Add fitted quantities.
Add threshold on number of particles
Parameters Parameters
---------- ----------
nsim : int sim : int
IC realisation index. IC realisation index.
paths : py:class`csiborgtools.read.CSiBORGPaths` paths : py:class`csiborgtools.read.CSiBORGPaths`
CSiBORG paths object. CSiBORG paths object.
@ -299,28 +295,68 @@ class ClumpsCatalogue(BaseCatalogue):
minmass : len-2 tuple, optional minmass : len-2 tuple, optional
Minimum mass. The first element is the catalogue key and the second is Minimum mass. The first element is the catalogue key and the second is
the value. the value.
load_fitted : bool, optional
Whether to load fitted quantities.
rawdata : bool, optional
Whether to return the raw data. In this case applies no cuts and
transformations.
""" """
def __init__(self, nsim, paths, maxdist=155.5 / 0.705,
minmass=("mass_cl", 1e12)): def __init__(
self,
nsim,
paths,
maxdist=155.5 / 0.705,
minmass=("mass_cl", 1e12),
load_fitted=True,
rawdata=False,
):
self.nsim = nsim self.nsim = nsim
self.paths = paths self.paths = paths
# Read in the clumps from the final snapshot # Read in the clumps from the final snapshot
partreader = ParticleReader(self.paths) partreader = ParticleReader(self.paths)
cols = ["index", "parent", 'x', 'y', 'z', "mass_cl"] cols = ["index", "parent", "x", "y", "z", "mass_cl"]
data = partreader.read_clumps(self.nsnap, self.nsim, cols=cols) self._data = partreader.read_clumps(self.nsnap, self.nsim, cols=cols)
# Overwrite the parent with the ultimate parent # Overwrite the parent with the ultimate parent
mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim)) mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim))
data["parent"] = mmain["ultimate_parent"] self._data["parent"] = mmain["ultimate_parent"]
# Flip positions and convert from code units to cMpc. Convert M too if load_fitted:
flip_cols(data, "x", "z") fits = numpy.load(paths.structfit_path(self.nsnap, nsim, "clumps"))
for p in ('x', 'y', 'z'): cols = [col for col in fits.dtype.names if col != "index"]
data[p] -= 0.5 X = [fits[col] for col in cols]
data = self.box.convert_from_boxunits(data, ['x', 'y', 'z', "mass_cl"]) self._data = add_columns(self._data, X, cols)
mask = numpy.sqrt(data['x']**2 + data['y']**2 + data['z']**2) < maxdist # If the raw data is not required, then start applying transformations
mask &= data[minmass[0]] > minmass[1] # and cuts.
self._data = data[mask] if not rawdata:
flip_cols(self._data, "x", "z")
for p in ("x", "y", "z"):
self._data[p] -= 0.5
self._data = self.box.convert_from_boxunits(
self._data,
[
"x",
"y",
"z",
"mass_cl",
"totpartmass",
"rho0",
"r200c",
"r500c",
"m200c",
"m500c",
"r200m",
"m200m",
],
)
if maxdist is not None:
dist = numpy.sqrt(
self._data["x"] ** 2 + self._data["y"] ** 2 + self._data["z"] ** 2
)
self._data = self._data[dist < maxdist]
if minmass is not None:
self._data = self._data[self._data[minmass[0]] > minmass[1]]
@property @property
def ismain(self): def ismain(self):
@ -341,7 +377,6 @@ class HaloCatalogue(BaseCatalogue):
TODO: TODO:
Add the fitted quantities Add the fitted quantities
Add threshold on number of particles
Parameters Parameters
---------- ----------
@ -356,20 +391,32 @@ class HaloCatalogue(BaseCatalogue):
minmass : len-2 tuple minmass : len-2 tuple
Minimum mass. The first element is the catalogue key and the second is Minimum mass. The first element is the catalogue key and the second is
the value. the value.
rawdata : bool, optional
Whether to return the raw data. In this case applies no cuts and
transformations.
""" """
def __init__(self, nsim, paths, maxdist=155.5 / 0.705,
minmass=('M', 1e12)): def __init__(
self, nsim, paths, maxdist=155.5 / 0.705, minmass=("M", 1e12), rawdata=False
):
self.nsim = nsim self.nsim = nsim
self.paths = paths self.paths = paths
# Read in the mmain catalogue of summed substructure # Read in the mmain catalogue of summed substructure
mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim)) mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim))
data = mmain["mmain"] self._data = mmain["mmain"]
if not rawdata:
# Flip positions and convert from code units to cMpc. Convert M too # Flip positions and convert from code units to cMpc. Convert M too
flip_cols(data, "x", "z") flip_cols(self._data, "x", "z")
for p in ('x', 'y', 'z'): for p in ("x", "y", "z"):
data[p] -= 0.5 self._data[p] -= 0.5
data = self.box.convert_from_boxunits(data, ['x', 'y', 'z', 'M']) self._data = self.box.convert_from_boxunits(
self._data, ["x", "y", "z", "M"]
)
mask = numpy.sqrt(data['x']**2 + data['y']**2 + data['z']**2) < maxdist if maxdist is not None:
mask &= data[minmass[0]] > minmass[1] dist = numpy.sqrt(
self._data = data[mask] self._data["x"] ** 2 + self._data["y"] ** 2 + self._data["z"] ** 2
)
self._data = self._data[dist < maxdist]
if minmass is not None:
self._data = self._data[self._data[minmass[0]] > minmass[1]]

View file

@ -32,6 +32,7 @@ class CSiBORGPaths:
postdir: str postdir: str
Path to the folder where post-processed files are stored. Path to the folder where post-processed files are stored.
""" """
_srcdir = None _srcdir = None
_postdir = None _postdir = None
@ -96,8 +97,7 @@ class CSiBORGPaths:
fpath = join(self.postdir, "temp") fpath = join(self.postdir, "temp")
if not isdir(fpath): if not isdir(fpath):
mkdir(fpath) mkdir(fpath)
warn("Created directory `{}`.".format(fpath), UserWarning, warn("Created directory `{}`.".format(fpath), UserWarning, stacklevel=1)
stacklevel=1)
return fpath return fpath
def mmain_path(self, nsnap, nsim): def mmain_path(self, nsnap, nsim):
@ -118,11 +118,9 @@ class CSiBORGPaths:
fdir = join(self.postdir, "mmain") fdir = join(self.postdir, "mmain")
if not isdir(fdir): if not isdir(fdir):
mkdir(fdir) mkdir(fdir)
warn("Created directory `{}`.".format(fdir), UserWarning, warn("Created directory `{}`.".format(fdir), UserWarning, stacklevel=1)
stacklevel=1)
return join( return join(
fdir, fdir, "mmain_{}_{}.npz".format(str(nsim).zfill(5), str(nsnap).zfill(5))
"mmain_{}_{}.npz".format(str(nsim).zfill(5), str(nsnap).zfill(5))
) )
def initmatch_path(self, nsim, kind): def initmatch_path(self, nsim, kind):
@ -145,8 +143,7 @@ class CSiBORGPaths:
fdir = join(self.postdir, "initmatch") fdir = join(self.postdir, "initmatch")
if not isdir(fdir): if not isdir(fdir):
mkdir(fdir) mkdir(fdir)
warn("Created directory `{}`.".format(fdir), UserWarning, warn("Created directory `{}`.".format(fdir), UserWarning, stacklevel=1)
stacklevel=1)
return join(fdir, "{}_{}.npy".format(kind, str(nsim).zfill(5))) return join(fdir, "{}_{}.npy".format(kind, str(nsim).zfill(5)))
def split_path(self, nsnap, nsim): def split_path(self, nsnap, nsim):
@ -167,10 +164,10 @@ class CSiBORGPaths:
fdir = join(self.postdir, "split") fdir = join(self.postdir, "split")
if not isdir(fdir): if not isdir(fdir):
mkdir(fdir) mkdir(fdir)
warn("Created directory `{}`.".format(fdir), UserWarning, warn("Created directory `{}`.".format(fdir), UserWarning, stacklevel=1)
stacklevel=1) return join(
return join(fdir, "clumps_{}_{}.npz" fdir, "clumps_{}_{}.npz".format(str(nsim).zfill(5), str(nsnap).zfill(5))
.format(str(nsim).zfill(5), str(nsnap).zfill(5))) )
def get_ics(self, tonew): def get_ics(self, tonew):
""" """
@ -239,7 +236,7 @@ class CSiBORGPaths:
# Get all files in simpath that start with output_ # Get all files in simpath that start with output_
snaps = glob(join(simpath, "output_*")) snaps = glob(join(simpath, "output_*"))
# Take just the last _00XXXX from each file and strip zeros # Take just the last _00XXXX from each file and strip zeros
snaps = [int(snap.split('_')[-1].lstrip('0')) for snap in snaps] snaps = [int(snap.split("_")[-1].lstrip("0")) for snap in snaps]
return numpy.sort(snaps) return numpy.sort(snaps)
def snapshot_path(self, nsnap, nsim): def snapshot_path(self, nsnap, nsim):
@ -261,22 +258,31 @@ class CSiBORGPaths:
simpath = self.ic_path(nsim, tonew=tonew) simpath = self.ic_path(nsim, tonew=tonew)
return join(simpath, "output_{}".format(str(nsnap).zfill(5))) return join(simpath, "output_{}".format(str(nsnap).zfill(5)))
def hcat_path(self, nsim): def structfit_path(self, nsnap, nsim, kind):
""" """
Path to the final snapshot halo catalogue from `fit_halos.py`. Path to the clump or halo catalogue from `fit_halos.py`.
Parameters Parameters
---------- ----------
nsnap : int
Snapshot index.
nsim : int nsim : int
IC realisation index. IC realisation index.
kind : str
Type of catalogue. Can be either `clumps` or `halos`.
Returns Returns
------- -------
path : str path : str
""" """
nsnap = str(max(self.get_snapshots(nsim))).zfill(5) assert kind in ["clumps", "halos"]
fname = "ramses_out_{}_{}.npy".format(str(self.nsim).zfill(5), nsnap) fdir = join(self.postdir, "structfit")
return join(self.postdir, fname) if not isdir(fdir):
mkdir(fdir)
warn("Created directory `{}`.".format(fdir), UserWarning, stacklevel=1)
fname = "{}_out_{}_{}.npy".format(kind, str(nsim).zfill(5), str(nsnap).zfill(5))
return join(fdir, fname)
def knnauto_path(self, run, nsim=None): def knnauto_path(self, run, nsim=None):
""" """
@ -297,8 +303,7 @@ class CSiBORGPaths:
fdir = join(self.postdir, "knn", "auto") fdir = join(self.postdir, "knn", "auto")
if not isdir(fdir): if not isdir(fdir):
makedirs(fdir) makedirs(fdir)
warn("Created directory `{}`.".format(fdir), UserWarning, warn("Created directory `{}`.".format(fdir), UserWarning, stacklevel=1)
stacklevel=1)
if nsim is not None: if nsim is not None:
return join(fdir, "knncdf_{}_{}.p".format(str(nsim).zfill(5), run)) return join(fdir, "knncdf_{}_{}.p".format(str(nsim).zfill(5), run))
@ -325,8 +330,7 @@ class CSiBORGPaths:
fdir = join(self.postdir, "knn", "cross") fdir = join(self.postdir, "knn", "cross")
if not isdir(fdir): if not isdir(fdir):
makedirs(fdir) makedirs(fdir)
warn("Created directory `{}`.".format(fdir), UserWarning, warn("Created directory `{}`.".format(fdir), UserWarning, stacklevel=1)
stacklevel=1)
if nsims is not None: if nsims is not None:
assert isinstance(nsims, (list, tuple)) and len(nsims) == 2 assert isinstance(nsims, (list, tuple)) and len(nsims) == 2
nsim0 = str(nsims[0]).zfill(5) nsim0 = str(nsims[0]).zfill(5)
@ -356,8 +360,7 @@ class CSiBORGPaths:
fdir = join(self.postdir, "tpcf", "auto") fdir = join(self.postdir, "tpcf", "auto")
if not isdir(fdir): if not isdir(fdir):
makedirs(fdir) makedirs(fdir)
warn("Created directory `{}`.".format(fdir), UserWarning, warn("Created directory `{}`.".format(fdir), UserWarning, stacklevel=1)
stacklevel=1)
if nsim is not None: if nsim is not None:
return join(fdir, "tpcf{}_{}.p".format(str(nsim).zfill(5), run)) return join(fdir, "tpcf{}_{}.p".format(str(nsim).zfill(5), run))

View file

@ -101,8 +101,7 @@ def cols_to_structured(N, cols):
if not isinstance(cols, list) and all(isinstance(c, tuple) for c in cols): if not isinstance(cols, list) and all(isinstance(c, tuple) for c in cols):
raise TypeError("`cols` must be a list of tuples.") raise TypeError("`cols` must be a list of tuples.")
dtype = {"names": [col[0] for col in cols], dtype = {"names": [col[0] for col in cols], "formats": [col[1] for col in cols]}
"formats": [col[1] for col in cols]}
return numpy.full(N, numpy.nan, dtype=dtype) return numpy.full(N, numpy.nan, dtype=dtype)
@ -237,8 +236,9 @@ def array_to_structured(arr, cols):
""" """
cols = [cols] if isinstance(cols, str) else cols cols = [cols] if isinstance(cols, str) else cols
if arr.ndim != 2 and arr.shape[1] != len(cols): if arr.ndim != 2 and arr.shape[1] != len(cols):
raise TypeError("`arr` must be a 2-dimensional array of " raise TypeError(
"shape `(n_samples, n_cols)`.") "`arr` must be a 2-dimensional array of " "shape `(n_samples, n_cols)`."
)
dtype = {"names": cols, "formats": [arr.dtype] * len(cols)} dtype = {"names": cols, "formats": [arr.dtype] * len(cols)}
out = numpy.full(arr.shape[0], numpy.nan, dtype=dtype) out = numpy.full(arr.shape[0], numpy.nan, dtype=dtype)
@ -299,5 +299,7 @@ def extract_from_structured(arr, cols):
out[:, i] = arr[col] out[:, i] = arr[col]
# Optionally flatten # Optionally flatten
if len(cols) == 1: if len(cols) == 1:
return out.reshape(-1,) return out.reshape(
-1,
)
return out return out

140
notebooks/fitting.ipynb Normal file

File diff suppressed because one or more lines are too long

View file

@ -29,6 +29,7 @@ try:
import csiborgtools import csiborgtools
except ModuleNotFoundError: except ModuleNotFoundError:
import sys import sys
sys.path.append("../") sys.path.append("../")
import csiborgtools import csiborgtools
@ -43,23 +44,13 @@ nproc = comm.Get_size()
parser = ArgumentParser() parser = ArgumentParser()
parser.add_argument("--runs", type=str, nargs="+") parser.add_argument("--runs", type=str, nargs="+")
args = parser.parse_args() args = parser.parse_args()
with open('../scripts/knn_auto.yml', 'r') as file: with open("../scripts/knn_auto.yml", "r") as file:
config = yaml.safe_load(file) config = yaml.safe_load(file)
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
totvol = 4 * numpy.pi * Rmax**3 / 3 totvol = 4 * numpy.pi * Rmax**3 / 3
minmass = 1e12
ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
7708, 7732, 7756, 7780, 7804, 7828, 7852, 7876, 7900, 7924, 7948,
7972, 7996, 8020, 8044, 8068, 8092, 8116, 8140, 8164, 8188, 8212,
8236, 8260, 8284, 8308, 8332, 8356, 8380, 8404, 8428, 8452, 8476,
8500, 8524, 8548, 8572, 8596, 8620, 8644, 8668, 8692, 8716, 8740,
8764, 8788, 8812, 8836, 8860, 8884, 8908, 8932, 8956, 8980, 9004,
9028, 9052, 9076, 9100, 9124, 9148, 9172, 9196, 9220, 9244, 9268,
9292, 9316, 9340, 9364, 9388, 9412, 9436, 9460, 9484, 9508, 9532,
9556, 9580, 9604, 9628, 9652, 9676, 9700, 9724, 9748, 9772, 9796,
9820, 9844]
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
ics = paths.get_ics(False)
knncdf = csiborgtools.clustering.kNN_CDF() knncdf = csiborgtools.clustering.kNN_CDF()
############################################################################### ###############################################################################
@ -75,9 +66,9 @@ def read_single(selection, cat):
psel = selection["primary"] psel = selection["primary"]
pmin, pmax = psel.get("min", None), psel.get("max", None) pmin, pmax = psel.get("min", None), psel.get("max", None)
if pmin is not None: if pmin is not None:
mmask &= (cat[psel["name"]] >= pmin) mmask &= cat[psel["name"]] >= pmin
if pmax is not None: if pmax is not None:
mmask &= (cat[psel["name"]] < pmax) mmask &= cat[psel["name"]] < pmax
pos = pos[mmask, ...] pos = pos[mmask, ...]
# Secondary selection # Secondary selection
@ -92,12 +83,13 @@ def read_single(selection, cat):
if ssel.get("marked", True): if ssel.get("marked", True):
x = cat[psel["name"]][mmask] x = cat[psel["name"]][mmask]
prop = csiborgtools.clustering.normalised_marks( prop = csiborgtools.clustering.normalised_marks(
x, prop, nbins=config["nbins_marks"]) x, prop, nbins=config["nbins_marks"]
)
if smin is not None: if smin is not None:
smask &= (prop >= smin) smask &= prop >= smin
if smax is not None: if smax is not None:
smask &= (prop < smax) smask &= prop < smax
return pos[smask, ...] return pos[smask, ...]
@ -106,8 +98,7 @@ def do_auto(run, cat, ic):
"""Calculate the kNN-CDF single catalgoue autocorrelation.""" """Calculate the kNN-CDF single catalgoue autocorrelation."""
_config = config.get(run, None) _config = config.get(run, None)
if _config is None: if _config is None:
warn("No configuration for run {}.".format(run), UserWarning, warn("No configuration for run {}.".format(run), UserWarning, stacklevel=1)
stacklevel=1)
return return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax) rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
@ -115,21 +106,28 @@ def do_auto(run, cat, ic):
knn = NearestNeighbors() knn = NearestNeighbors()
knn.fit(pos) knn.fit(pos)
rs, cdf = knncdf( rs, cdf = knncdf(
knn, rvs_gen=rvs_gen, nneighbours=config["nneighbours"], knn,
rmin=config["rmin"], rmax=config["rmax"], rvs_gen=rvs_gen,
nsamples=int(config["nsamples"]), neval=int(config["neval"]), nneighbours=config["nneighbours"],
batch_size=int(config["batch_size"]), random_state=config["seed"]) rmin=config["rmin"],
rmax=config["rmax"],
nsamples=int(config["nsamples"]),
neval=int(config["neval"]),
batch_size=int(config["batch_size"]),
random_state=config["seed"],
)
joblib.dump({"rs": rs, "cdf": cdf, "ndensity": pos.shape[0] / totvol}, joblib.dump(
paths.knnauto_path(run, ic)) {"rs": rs, "cdf": cdf, "ndensity": pos.shape[0] / totvol},
paths.knnauto_path(run, ic),
)
def do_cross_rand(run, cat, ic): def do_cross_rand(run, cat, ic):
"""Calculate the kNN-CDF cross catalogue random correlation.""" """Calculate the kNN-CDF cross catalogue random correlation."""
_config = config.get(run, None) _config = config.get(run, None)
if _config is None: if _config is None:
warn("No configuration for run {}.".format(run), UserWarning, warn("No configuration for run {}.".format(run), UserWarning, stacklevel=1)
stacklevel=1)
return return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax) rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
@ -142,10 +140,17 @@ def do_cross_rand(run, cat, ic):
knn2.fit(pos2) knn2.fit(pos2)
rs, cdf0, cdf1, joint_cdf = knncdf.joint( rs, cdf0, cdf1, joint_cdf = knncdf.joint(
knn1, knn2, rvs_gen=rvs_gen, nneighbours=int(config["nneighbours"]), knn1,
rmin=config["rmin"], rmax=config["rmax"], knn2,
nsamples=int(config["nsamples"]), neval=int(config["neval"]), rvs_gen=rvs_gen,
batch_size=int(config["batch_size"]), random_state=config["seed"]) nneighbours=int(config["nneighbours"]),
rmin=config["rmin"],
rmax=config["rmax"],
nsamples=int(config["nsamples"]),
neval=int(config["neval"]),
batch_size=int(config["batch_size"]),
random_state=config["seed"],
)
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf) corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
joblib.dump({"rs": rs, "corr": corr}, paths.knnauto_path(run, ic)) joblib.dump({"rs": rs, "corr": corr}, paths.knnauto_path(run, ic))

View file

@ -16,7 +16,6 @@
from argparse import ArgumentParser from argparse import ArgumentParser
from datetime import datetime from datetime import datetime
from itertools import combinations from itertools import combinations
from os.path import join
from warnings import warn from warnings import warn
import joblib import joblib
@ -30,6 +29,7 @@ try:
import csiborgtools import csiborgtools
except ModuleNotFoundError: except ModuleNotFoundError:
import sys import sys
sys.path.append("../") sys.path.append("../")
import csiborgtools import csiborgtools
@ -44,24 +44,12 @@ nproc = comm.Get_size()
parser = ArgumentParser() parser = ArgumentParser()
parser.add_argument("--runs", type=str, nargs="+") parser.add_argument("--runs", type=str, nargs="+")
args = parser.parse_args() args = parser.parse_args()
with open('../scripts/knn_cross.yml', 'r') as file: with open("../scripts/knn_cross.yml", "r") as file:
config = yaml.safe_load(file) config = yaml.safe_load(file)
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
minmass = 1e12
ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
7708, 7732, 7756, 7780, 7804, 7828, 7852, 7876, 7900, 7924, 7948,
7972, 7996, 8020, 8044, 8068, 8092, 8116, 8140, 8164, 8188, 8212,
8236, 8260, 8284, 8308, 8332, 8356, 8380, 8404, 8428, 8452, 8476,
8500, 8524, 8548, 8572, 8596, 8620, 8644, 8668, 8692, 8716, 8740,
8764, 8788, 8812, 8836, 8860, 8884, 8908, 8932, 8956, 8980, 9004,
9028, 9052, 9076, 9100, 9124, 9148, 9172, 9196, 9220, 9244, 9268,
9292, 9316, 9340, 9364, 9388, 9412, 9436, 9460, 9484, 9508, 9532,
9556, 9580, 9604, 9628, 9652, 9676, 9700, 9724, 9748, 9772, 9796,
9820, 9844]
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
dumpdir = "/mnt/extraspace/rstiskalek/csiborg/knn" ics = paths.get_ics(False)
fout = join(dumpdir, "cross", "knncdf_{}_{}_{}.p")
knncdf = csiborgtools.clustering.kNN_CDF() knncdf = csiborgtools.clustering.kNN_CDF()
############################################################################### ###############################################################################
@ -76,9 +64,9 @@ def read_single(selection, cat):
psel = selection["primary"] psel = selection["primary"]
pmin, pmax = psel.get("min", None), psel.get("max", None) pmin, pmax = psel.get("min", None), psel.get("max", None)
if pmin is not None: if pmin is not None:
mmask &= (cat[psel["name"]] >= pmin) mmask &= cat[psel["name"]] >= pmin
if pmax is not None: if pmax is not None:
mmask &= (cat[psel["name"]] < pmax) mmask &= cat[psel["name"]] < pmax
return pos[mmask, ...] return pos[mmask, ...]
@ -99,10 +87,17 @@ def do_cross(run, ics):
knn2.fit(pos2) knn2.fit(pos2)
rs, cdf0, cdf1, joint_cdf = knncdf.joint( rs, cdf0, cdf1, joint_cdf = knncdf.joint(
knn1, knn2, rvs_gen=rvs_gen, nneighbours=int(config["nneighbours"]), knn1,
rmin=config["rmin"], rmax=config["rmax"], knn2,
nsamples=int(config["nsamples"]), neval=int(config["neval"]), rvs_gen=rvs_gen,
batch_size=int(config["batch_size"]), random_state=config["seed"]) nneighbours=int(config["nneighbours"]),
rmin=config["rmin"],
rmax=config["rmax"],
nsamples=int(config["nsamples"]),
neval=int(config["neval"]),
batch_size=int(config["batch_size"]),
random_state=config["seed"],
)
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf) corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
joblib.dump({"rs": rs, "corr": corr}, paths.knncross_path(run, ics)) joblib.dump({"rs": rs, "corr": corr}, paths.knncross_path(run, ics))

View file

@ -16,7 +16,6 @@
from argparse import ArgumentParser from argparse import ArgumentParser
from copy import deepcopy from copy import deepcopy
from datetime import datetime from datetime import datetime
from os.path import join
from warnings import warn from warnings import warn
import joblib import joblib
@ -29,6 +28,7 @@ try:
import csiborgtools import csiborgtools
except ModuleNotFoundError: except ModuleNotFoundError:
import sys import sys
sys.path.append("../") sys.path.append("../")
import csiborgtools import csiborgtools
@ -43,24 +43,12 @@ nproc = comm.Get_size()
parser = ArgumentParser() parser = ArgumentParser()
parser.add_argument("--runs", type=str, nargs="+") parser.add_argument("--runs", type=str, nargs="+")
args = parser.parse_args() args = parser.parse_args()
with open('../scripts/tpcf_auto.yml', 'r') as file: with open("../scripts/tpcf_auto.yml", "r") as file:
config = yaml.safe_load(file) config = yaml.safe_load(file)
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
minmass = 1e12
ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
7708, 7732, 7756, 7780, 7804, 7828, 7852, 7876, 7900, 7924, 7948,
7972, 7996, 8020, 8044, 8068, 8092, 8116, 8140, 8164, 8188, 8212,
8236, 8260, 8284, 8308, 8332, 8356, 8380, 8404, 8428, 8452, 8476,
8500, 8524, 8548, 8572, 8596, 8620, 8644, 8668, 8692, 8716, 8740,
8764, 8788, 8812, 8836, 8860, 8884, 8908, 8932, 8956, 8980, 9004,
9028, 9052, 9076, 9100, 9124, 9148, 9172, 9196, 9220, 9244, 9268,
9292, 9316, 9340, 9364, 9388, 9412, 9436, 9460, 9484, 9508, 9532,
9556, 9580, 9604, 9628, 9652, 9676, 9700, 9724, 9748, 9772, 9796,
9820, 9844]
dumpdir = "/mnt/extraspace/rstiskalek/csiborg/tpcf"
fout = join(dumpdir, "auto", "tpcf_{}_{}.p")
paths = csiborgtools.read.CSiBORGPaths() paths = csiborgtools.read.CSiBORGPaths()
ics = paths.get_ics(False)
tpcf = csiborgtools.clustering.Mock2PCF() tpcf = csiborgtools.clustering.Mock2PCF()
############################################################################### ###############################################################################
@ -76,9 +64,9 @@ def read_single(selection, cat):
psel = selection["primary"] psel = selection["primary"]
pmin, pmax = psel.get("min", None), psel.get("max", None) pmin, pmax = psel.get("min", None), psel.get("max", None)
if pmin is not None: if pmin is not None:
mmask &= (cat[psel["name"]] >= pmin) mmask &= cat[psel["name"]] >= pmin
if pmax is not None: if pmax is not None:
mmask &= (cat[psel["name"]] < pmax) mmask &= cat[psel["name"]] < pmax
pos = pos[mmask, ...] pos = pos[mmask, ...]
# Secondary selection # Secondary selection
@ -93,12 +81,13 @@ def read_single(selection, cat):
if ssel.get("marked", True): if ssel.get("marked", True):
x = cat[psel["name"]][mmask] x = cat[psel["name"]][mmask]
prop = csiborgtools.clustering.normalised_marks( prop = csiborgtools.clustering.normalised_marks(
x, prop, nbins=config["nbins_marks"]) x, prop, nbins=config["nbins_marks"]
)
if smin is not None: if smin is not None:
smask &= (prop >= smin) smask &= prop >= smin
if smax is not None: if smax is not None:
smask &= (prop < smax) smask &= prop < smax
return pos[smask, ...] return pos[smask, ...]
@ -110,8 +99,11 @@ def do_auto(run, cat, ic):
return return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax) rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
bins = numpy.logspace(numpy.log10(config["rpmin"]), bins = numpy.logspace(
numpy.log10(config["rpmax"]), config["nrpbins"] + 1) numpy.log10(config["rpmin"]),
numpy.log10(config["rpmax"]),
config["nrpbins"] + 1,
)
pos = read_single(_config, cat) pos = read_single(_config, cat)
nrandom = int(config["randmult"] * pos.shape[0]) nrandom = int(config["randmult"] * pos.shape[0])
rp, wp = tpcf(pos, rvs_gen, nrandom, bins) rp, wp = tpcf(pos, rvs_gen, nrandom, bins)

View file

@ -16,18 +16,26 @@
A script to fit halos (concentration, ...). The particle array of each CSiBORG A script to fit halos (concentration, ...). The particle array of each CSiBORG
realisation must have been split in advance by `runsplit_halos`. realisation must have been split in advance by `runsplit_halos`.
""" """
from argparse import ArgumentParser
from datetime import datetime from datetime import datetime
from os.path import join
import numpy import numpy
from mpi4py import MPI from mpi4py import MPI
from tqdm import tqdm
try: try:
import csiborgtools import csiborgtools
except ModuleNotFoundError: except ModuleNotFoundError:
import sys import sys
sys.path.append("../") sys.path.append("../")
import csiborgtools import csiborgtools
parser = ArgumentParser()
parser.add_argument("--kind", type=str, choices=["halos", "clumps"])
args = parser.parse_args()
# Get MPI things # Get MPI things
comm = MPI.COMM_WORLD comm = MPI.COMM_WORLD
@ -36,127 +44,169 @@ nproc = comm.Get_size()
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
partreader = csiborgtools.read.ParticleReader(paths) partreader = csiborgtools.read.ParticleReader(paths)
nfwpost = csiborgtools.fits.NFWPosterior()
cols_collect = [("npart", numpy.int64), ("totpartmass", numpy.float64), ftemp = join(paths.temp_dumpdir, "fit_clump_{}_{}_{}.npy")
("Rs", numpy.float64), ("vx", numpy.float64), cols_collect = [
("vy", numpy.float64), ("vz", numpy.float64), ("index", numpy.int32),
("Lx", numpy.float64), ("Ly", numpy.float64), ("npart", numpy.int32),
("Lz", numpy.float64), ("rho0", numpy.float64), ("totpartmass", numpy.float32),
("conc", numpy.float64), ("rmin", numpy.float64), ("vx", numpy.float32),
("rmax", numpy.float64), ("r200", numpy.float64), ("vy", numpy.float32),
("r500", numpy.float64), ("m200", numpy.float64), ("vz", numpy.float32),
("m500", numpy.float64), ("lambda200c", numpy.float64)] ("conc", numpy.float32),
("rho0", numpy.float32),
def fit_clump(particles, clump, box): ("r200c", numpy.float32),
("r500c", numpy.float32),
("m200c", numpy.float32),
("m500c", numpy.float32),
("lambda200c", numpy.float32),
("r200m", numpy.float32),
("m200m", numpy.float32),
]
def fit_clump(particles, clump_info, box):
"""
Fit an object. Can be eithe a clump or a parent halo.
"""
obj = csiborgtools.fits.Clump(particles, clump_info, box)
out = {}
if numpy.isnan(clump_info["index"]):
print("Why am I NaN?", flush=True)
out["index"] = clump_info["index"]
out["npart"] = len(obj)
out["totpartmass"] = numpy.sum(obj["M"])
for i, v in enumerate(["vx", "vy", "vz"]):
out[v] = numpy.average(obj.vel[:, i], weights=obj["M"])
# Overdensity masses
out["r200c"], out["m200c"] = obj.spherical_overdensity_mass(200, kind="crit")
out["r500c"], out["m500c"] = obj.spherical_overdensity_mass(500, kind="crit")
out["r200m"], out["m200m"] = obj.spherical_overdensity_mass(200, kind="matter")
# NFW fit
if out["npart"] > 10 and numpy.isfinite(out["r200c"]):
Rs, rho0 = nfwpost.fit(obj)
out["conc"] = Rs / out["r200c"]
out["rho0"] = rho0
# Spin within R200c
if numpy.isfinite(out["r200c"]):
out["lambda200c"] = obj.lambda_bullock(out["r200c"])
return out
out["npart"][n] = clump.Npart def load_clump_particles(clumpid, particle_archive):
out["rmin"][n] = clump.rmin """
out["rmax"][n] = clump.rmax Load a clump's particles from the particle archive. If it is not there, i.e
out["totpartmass"][n] = clump.total_particle_mass clump has no associated particles, return `None`.
out["vx"][n] = numpy.average(clump.vel[:, 0], weights=clump.m) """
out["vy"][n] = numpy.average(clump.vel[:, 1], weights=clump.m) try:
out["vz"][n] = numpy.average(clump.vel[:, 2], weights=clump.m) part = particle_archive[str(clumpid)]
out["Lx"][n], out["Ly"][n], out["Lz"][n] = clump.angular_momentum except KeyError:
part = None
return part
def load_parent_particles(clumpid, particle_archive, clumps_cat):
"""
Load a parent halo's particles.
"""
indxs = clumps_cat["index"][clumps_cat["parent"] == clumpid]
# We first load the particles of each clump belonging to this parent and then
# concatenate them for further analysis.
clumps = []
for ind in indxs:
parts = load_clump_particles(ind, particle_archive)
if parts is not None:
clumps.append([parts, None])
if len(clumps) == 0:
return None
return csiborgtools.match.concatenate_clumps(clumps, include_velocities=True)
# We now start looping over all simulations
for i, nsim in enumerate(paths.get_ics(tonew=False)): for i, nsim in enumerate(paths.get_ics(tonew=False)):
if rank == 0: if rank == 0:
print("{}: calculating {}th simulation `{}`." print(
.format(datetime.now(), i, nsim), flush=True) "{}: calculating {}th simulation `{}`.".format(datetime.now(), i, nsim),
flush=True,
)
nsnap = max(paths.get_snapshots(nsim)) nsnap = max(paths.get_snapshots(nsim))
box = csiborgtools.read.BoxUnits(nsnap, nsim, paths) box = csiborgtools.read.BoxUnits(nsnap, nsim, paths)
# Archive of clumps, keywords are their clump IDs # Archive of clumps, keywords are their clump IDs
particle_archive = paths.split_path(nsnap, nsim) particle_archive = numpy.load(paths.split_path(nsnap, nsim))
clumpsarr = partreader.read_clumps(nsnap, nsim, clumps_cat = csiborgtools.read.ClumpsCatalogue(
cols=["index", 'x', 'y', 'z']) nsim, paths, maxdist=None, minmass=None, rawdata=True, load_fitted=False
clumpid2arrpos = {ind: ii for ii, ind in enumerate(clumpsarr["index"])} )
# We check whether we fit halos or clumps, will be indexing over different
# iterators.
if args.kind == "halos":
ismain = clumps_cat.ismain
else:
ismain = numpy.ones(len(clumps_cat), dtype=bool)
ntasks = len(clumps_cat)
# We split the clumps among the processes. Each CPU calculates a fraction
# of them and dumps the results in a structured array. Even if we are
# calculating parent halo this index runs over all clumps.
jobs = csiborgtools.fits.split_jobs(ntasks, nproc)[rank]
out = csiborgtools.read.cols_to_structured(len(jobs), cols_collect)
for i, j in enumerate(tqdm(jobs)) if nproc == 1 else enumerate(jobs):
# If we are fitting halos and this clump is not a main, then continue.
if args.kind == "halos" and not ismain[j]:
continue
clumpid = clumps_cat["index"][j]
if args.kind == "halos":
part = load_parent_particles(clumpid, particle_archive, clumps_cat)
else:
part = load_clump_particles(clumpid, particle_archive)
nclumps = len(particle_archive.files) # We fit the particles if there are any. If not we assign the index,
# Fit 5000 clumps at a time, then dump results # otherwise it would be NaN converted to integers (-2147483648) and
batchsize = 5000 # yield an error further down.
if part is not None:
_out = fit_clump(part, clumps_cat[j], box)
for key in _out.keys():
out[key][i] = _out[key]
else:
out["index"][i] = clumpid
out["npart"][i] = 0
# This rank does these `batchsize` clumps/halos fout = ftemp.format(str(nsim).zfill(5), str(nsnap).zfill(5), rank)
jobs = csiborgtools.utils.split_jobs(nclumps, nclumps // batchsize)[rank] if nproc == 0:
for clumpid in jobs: print(
... = fit_clump(particle_archive[str(clumpid)], clumpsarr[clumpid2arrpos[clumpid]]) "{}: rank {} saving to `{}`.".format(datetime.now(), rank, fout), flush=True
)
numpy.save(fout, out)
# We saved this CPU's results in a temporary file. Wait now for the other
jobs = csiborgtools.utils.split_jobs(nclumps, nproc)[rank] # CPUs and then collect results from the 0th rank and save them.
for nsplit in jobs:
parts, part_clumps, clumps = csiborgtools.fits.load_split_particles(
nsplit, nsnap, nsim, paths, remove_split=False)
N = clumps.size
cols = [("index", numpy.int64), ("npart", numpy.int64),
("totpartmass", numpy.float64), ("Rs", numpy.float64),
("rho0", numpy.float64), ("conc", numpy.float64),
("lambda200c", numpy.float64), ("vx", numpy.float64),
("vy", numpy.float64), ("vz", numpy.float64),
("Lx", numpy.float64), ("Ly", numpy.float64),
("Lz", numpy.float64), ("rmin", numpy.float64),
("rmax", numpy.float64), ("r200", numpy.float64),
("r500", numpy.float64), ("m200", numpy.float64),
("m500", numpy.float64)]
out = csiborgtools.utils.cols_to_structured(N, cols)
out["index"] = clumps["index"]
for n in range(N):
# Pick clump and its particles
xs = csiborgtools.fits.pick_single_clump(n, parts, part_clumps,
clumps)
clump = csiborgtools.fits.Clump.from_arrays(
*xs, rhoc=box.box_rhoc, G=box.box_G)
out["npart"][n] = clump.Npart
out["rmin"][n] = clump.rmin
out["rmax"][n] = clump.rmax
out["totpartmass"][n] = clump.total_particle_mass
out["vx"][n] = numpy.average(clump.vel[:, 0], weights=clump.m)
out["vy"][n] = numpy.average(clump.vel[:, 1], weights=clump.m)
out["vz"][n] = numpy.average(clump.vel[:, 2], weights=clump.m)
out["Lx"][n], out["Ly"][n], out["Lz"][n] = clump.angular_momentum
# Spherical overdensity radii and masses
rs, ms = clump.spherical_overdensity_mass([200, 500])
out["r200"][n] = rs[0]
out["r500"][n] = rs[1]
out["m200"][n] = ms[0]
out["m500"][n] = ms[1]
out["lambda200c"][n] = clump.lambda200c
# NFW profile fit
if clump.Npart > 10 and numpy.isfinite(out["r200"][n]):
nfwpost = csiborgtools.fits.NFWPosterior(clump)
logRs, __ = nfwpost.maxpost_logRs()
Rs = 10**logRs
if not numpy.isnan(logRs):
out["Rs"][n] = Rs
out["rho0"][n] = nfwpost.rho0_from_Rs(Rs)
out["conc"][n] = out["r200"][n] / Rs
csiborgtools.read.dump_split(out, nsplit, nsnap, nsim, paths)
# Wait until all jobs finished before moving to another simulation
comm.Barrier() comm.Barrier()
# # Use the rank 0 to combine outputs for this CSiBORG realisation if rank == 0:
# if rank == 0: print(
# print("Collecting results!") "{}: collecting results for simulation `{}`.".format(datetime.now(), nsim),
# partreader = csiborgtools.read.ParticleReader(paths) flush=True,
# out_collected = csiborgtools.read.combine_splits( )
# utils.Nsplits, nsnap, nsim, partreader, cols_collect, # We write to the output array. Load data from each CPU and append to
# remove_splits=True, verbose=False) # the output array.
# fname = paths.hcat_path(nsim) out = csiborgtools.read.cols_to_structured(ntasks, cols_collect)
# print("Saving results to `{}`.".format(fname)) clumpid2outpos = {indx: i for i, indx in enumerate(clumps_cat["index"])}
# numpy.save(fname, out_collected) for i in range(nproc):
# inp = numpy.load(ftemp.format(str(nsim).zfill(5), str(nsnap).zfill(5), i))
# comm.Barrier() for j, clumpid in enumerate(inp["index"]):
# k = clumpid2outpos[clumpid]
# if rank == 0: for key in inp.dtype.names:
# print("All finished! See ya!") out[key][k] = inp[key][j]
# If we were analysing main halos, then remove array indices that do
# not correspond to parent halos.
if args.kind == "halos":
out = out[ismain]
fout = paths.structfit_path(nsnap, nsim, "clumps")
print("Saving to `{}`.".format(fout), flush=True)
numpy.save(fout, out)
# We now wait before moving on to another simulation.
comm.Barrier()

View file

@ -12,7 +12,10 @@
# You should have received a copy of the GNU General Public License along # You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc., # with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""Script to split particles to indivudual files according to their clump.""" """
Script to split particles to individual files according to their clump. This is
useful for calculating the halo properties directly from the particles.
"""
from datetime import datetime from datetime import datetime
from gc import collect from gc import collect
from glob import glob from glob import glob
@ -28,6 +31,7 @@ try:
import csiborgtools import csiborgtools
except ModuleNotFoundError: except ModuleNotFoundError:
import sys import sys
sys.path.append("../") sys.path.append("../")
import csiborgtools import csiborgtools
@ -38,7 +42,7 @@ nproc = comm.Get_size()
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
verbose = nproc == 1 verbose = nproc == 1
partcols = ['x', 'y', 'z', "vx", "vy", "vz", 'M'] partcols = ["x", "y", "z", "vx", "vy", "vz", "M"]
def do_split(nsim): def do_split(nsim):
@ -46,7 +50,7 @@ def do_split(nsim):
reader = csiborgtools.read.ParticleReader(paths) reader = csiborgtools.read.ParticleReader(paths)
ftemp_base = join( ftemp_base = join(
paths.temp_dumpdir, paths.temp_dumpdir,
"split_{}_{}".format(str(nsim).zfill(5), str(nsnap).zfill(5)) "split_{}_{}".format(str(nsim).zfill(5), str(nsnap).zfill(5)),
) )
ftemp = ftemp_base + "_{}.npz" ftemp = ftemp_base + "_{}.npz"
@ -85,7 +89,7 @@ def do_split(nsim):
# Now load back in every temporary file, combine them into a single # Now load back in every temporary file, combine them into a single
# dictionary and save as a single .npz file. # dictionary and save as a single .npz file.
out = {} out = {}
for file in glob(ftemp_base + '*'): for file in glob(ftemp_base + "*"):
inp = numpy.load(file) inp = numpy.load(file)
for key in inp.files: for key in inp.files:
out.update({key: inp[key]}) out.update({key: inp[key]})
@ -107,7 +111,6 @@ if nproc > 1:
worker_process(do_split, comm, verbose=False) worker_process(do_split, comm, verbose=False)
else: else:
tasks = paths.get_ics(tonew=False) tasks = paths.get_ics(tonew=False)
tasks = [tasks[0]] # REMOVE
for task in tasks: for task in tasks:
print("{}: completing task `{}`.".format(datetime.now(), task)) print("{}: completing task `{}`.".format(datetime.now(), task))
do_split(task) do_split(task)