New filesystem, basic neighbours calculation (#10)

* simplify Planck catalogue

* add MCXC and base survey

* Add 2MPP classes

* move match to MCXC to Planck

* add halo catalogue

* rm comment

* rm unused imports

* Move conversion to box

* add min mass

* Run on all simulations

* rm old function

* add combined catalogue

* add halo positions

* add knn neighbors

* set to 5 sims for testing

* add docstring

* Switch to neighbours in a catalogue

* rename io to read

* fix indentation

* rename to read

* io -> read

* add import

* add RealisationMatcher

* io -> read

* add docstrings

* add search_sim_indiices

* update todo

* keep make_cat at 10 for now

* update nb
This commit is contained in:
Richard Stiskalek 2022-11-22 10:00:01 +00:00 committed by GitHub
parent f5bf04c46e
commit c748c87e45
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 3515 additions and 549 deletions

View file

@ -1,10 +1,9 @@
# CSiBORG tools
## :scroll: Short-term TODO
- [x] Add the X-ray clusters
- [x] Match the X-ray clusters to Planck
- [ ] Calculate catalogues for all realisations.
- [ ] Find the distribution of particles in the first snapshot
- [ ] Implement Max's halo matching
- [ ] Implement a custom model for matchin galaxies to halos.
## :hourglass: Long-term TODO

View file

@ -13,4 +13,4 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from csiborgtools import (io, match, utils, units, fits) # noqa
from csiborgtools import (read, match, utils, units, fits) # noqa

View file

@ -22,7 +22,7 @@ from os import remove
from warnings import warn
from os.path import join
from tqdm import trange
from ..io import nparts_to_start_ind
from ..read import nparts_to_start_ind
def clump_with_particles(particle_clumps, clumps):

View file

@ -1,263 +0,0 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Scripts to read in observation.
"""
import numpy
from astropy.io import fits
from astropy.coordinates import SkyCoord
from astropy import units as u
from ..utils import (add_columns, cols_to_structured)
F64 = numpy.float64
def read_planck2015(fpath, cosmo, max_comdist=None):
r"""
Read the Planck 2nd Sunyaev-Zeldovich source catalogue [1]. The following
is performed:
- removes clusters without a redshift estimate,
- calculates the comoving distance with the provided cosmology.
- Converts `MSZ` from units of :math:`1e14 M_\odot` to :math:`M_\odot`
Parameters
----------
fpath : str
Path to the source catalogue.
cosmo : `astropy.cosmology` object
The cosmology to calculate cluster comoving distance from redshift and
convert their mass.
max_comdist : float, optional
Maximum comoving distance threshold in units of :math:`\mathrm{Mpc}`.
By default `None` and no threshold is applied.
Returns
-------
out : structured array
The catalogue structured array.
References
----------
[1] https://heasarc.gsfc.nasa.gov/W3Browse/all/plancksz2.html
"""
data = fits.open(fpath)[1].data
hdata = 0.7
# Convert FITS to a structured array
out = numpy.full(data.size, numpy.nan, dtype=data.dtype.descr)
for name in out.dtype.names:
out[name] = data[name]
# Take only clusters with redshifts
out = out[out["REDSHIFT"] >= 0]
# Add comoving distance
dist = cosmo.comoving_distance(out["REDSHIFT"]).value
out = add_columns(out, dist, "COMDIST")
# Convert masses
for par in ("MSZ", "MSZ_ERR_UP", "MSZ_ERR_LOW"):
out[par] *= 1e14
out[par] *= (hdata / cosmo.h)**2
# Distance threshold
if max_comdist is not None:
out = out[out["COMDIST"] < max_comdist]
return out
def read_mcxc(fpath, cosmo, max_comdist=None):
r"""
Read the MCXC Meta-Catalog of X-Ray Detected Clusters of Galaxies
catalogue [1], with data description at [2] and download at [3].
Note
----
The exact mass conversion has non-trivial dependence on :math:`H(z)`, see
[1] for more details. However, this should be negligible.
Parameters
----------
fpath : str
Path to the source catalogue obtained from [3]. Expected to be the fits
file.
cosmo : `astropy.cosmology` object
The cosmology to calculate cluster comoving distance from redshift and
convert their mass.
max_comdist : float, optional
Maximum comoving distance threshold in units of :math:`\mathrm{Mpc}`.
By default `None` and no threshold is applied.
Returns
-------
out : structured array
The catalogue structured array.
References
----------
[1] The MCXC: a meta-catalogue of x-ray detected clusters of galaxies
(2011); Piffaretti, R. ; Arnaud, M. ; Pratt, G. W. ; Pointecouteau,
E. ; Melin, J. -B.
[2] https://heasarc.gsfc.nasa.gov/W3Browse/rosat/mcxc.html
[3] https://cdsarc.cds.unistra.fr/viz-bin/cat/J/A+A/534/A109#/article
"""
data = fits.open(fpath)[1].data
hdata = 0.7 # Little h of the catalogue
cols = [("RAdeg", F64), ("DEdeg", F64), ("z", F64),
("L500", F64), ("M500", F64), ("R500", F64)]
out = cols_to_structured(data.size, cols)
for col in cols:
par = col[0]
out[par] = data[par]
# Get little h units to match the cosmology
out["L500"] *= (hdata / cosmo.h)**2
out["M500"] *= (hdata / cosmo.h)**2
# Get the 10s back in
out["L500"] *= 1e44 # ergs/s
out["M500"] *= 1e14 # Msun
dist = cosmo.comoving_distance(data["z"]).value
out = add_columns(out, dist, "COMDIST")
out = add_columns(out, data["MCXC"], "name")
if max_comdist is not None:
out = out[out["COMDIST"] < max_comdist]
return out
def read_2mpp(fpath, dist_cosmo):
"""
Read in the 2M++ galaxy redshift catalogue [1], with the catalogue at [2].
Removes fake galaxies used to fill the zone of avoidance. Note that in
principle additional care should be taken for calculating the distance
to objects [3]. Currently calculated from the CMB redshift, so some
distance estimates may be negative..
Parameters
----------
fpath : str
File path to the catalogue.
cosmo : `astropy.cosmology` object
The cosmology to calculate distance from redshift.
Returns
-------
out : structured array
The catalogue.
References
----------
[1] The 2M++ galaxy redshift catalogue; Lavaux, Guilhem, Hudson, Michael J.
[2] https://cdsarc.cds.unistra.fr/viz-bin/cat/J/MNRAS/416/2840#/article
[3] Improving NASA/IPAC Extragalactic Database Redshift Calculations
(2021); Anthony Carr and Tamara Davis
"""
from scipy.constants import c
# Read the catalogue and select non-fake galaxies
cat = numpy.genfromtxt(fpath, delimiter="|", )
cat = cat[cat[:, 12] == 0, :]
cols = [("RA", F64), ("DEC", F64), ("Ksmag", F64), ("ZCMB", F64),
("DIST", F64)]
out = cols_to_structured(cat.shape[0], cols)
out["RA"] = cat[:, 1]
out["DEC"] = cat[:, 2]
out["Ksmag"] = cat[:, 5]
out["ZCMB"] = cat[:, 7] / (c * 1e-3)
out["DIST"] = cat[:, 7] / dist_cosmo.H0
return out
def read_2mpp_groups(fpath, dist_cosmo):
"""
Read in the 2M++ galaxy group catalogue [1], with the catalogue at [2].
Note that the same caveats apply to the distance calculation as in
py:function:`read_2mpp`. See that function for more details.
Parameters
----------
fpath : str
File path to the catalogue.
cosmo : `astropy.cosmology` object
The cosmology to calculate distance from redshift.
Returns
-------
out : structured array
The catalogue.
References
----------
[1] The 2M++ galaxy redshift catalogue; Lavaux, Guilhem, Hudson, Michael J.
[2] https://cdsarc.cds.unistra.fr/viz-bin/cat/J/MNRAS/416/2840#/article
[3] Improving NASA/IPAC Extragalactic Database Redshift Calculations
(2021); Anthony Carr and Tamara Davis
"""
cat = numpy.genfromtxt(fpath, delimiter="|", )
cols = [("RA", F64), ("DEC", F64), ("K2mag", F64), ("Rich", numpy.int64),
("dist", F64), ("sigma", F64)]
out = cols_to_structured(cat.shape[0], cols)
out["K2mag"] = cat[:, 3]
out["Rich"] = cat[:, 4]
out["sigma"] = cat[:, 7]
out["dist"] = cat[:, 6] / dist_cosmo.H0
# Convert galactic coordinates to RA, dec
glon = cat[:, 1]
glat = cat[:, 2]
coords = SkyCoord(l=glon*u.degree, b=glat*u.degree, frame='galactic')
coords = coords.transform_to("icrs")
out["RA"] = coords.ra
out["DEC"] = coords.dec
return out
def match_planck_to_mcxc(planck, mcxc):
"""
Return the MCXC catalogue indices of the Planck catalogue detections. Finds
the index of the quoted Planck MCXC counterpart in the MCXC array. If not
found throws an error. For this reason it may be better to make sure the
MCXC catalogue reaches further.
Parameters
----------
planck : structured array
The Planck cluster array.
mcxc : structured array
The MCXC cluster array.
Returns
-------
indxs : 1-dimensional array
The array of MCXC indices to match the Planck array. If no counterpart
is found returns `numpy.nan`.
"""
# Planck MCXC need to be decoded to str
planck_names = [name.decode() for name in planck["MCXC"]]
mcxc_names = [name for name in mcxc["name"]]
indxs = [numpy.nan] * len(planck_names)
for i, name in enumerate(planck_names):
if name == "":
continue
if name in mcxc_names:
indxs[i] = mcxc_names.index(name)
else:
raise ValueError("Planck MCXC identifies `{}` not found in the "
"MCXC catalogue.".format(name))
return indxs

View file

@ -13,6 +13,6 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .match import brute_spatial_separation # noqa
from .match import (brute_spatial_separation, RealisationsMatcher) # noqa
from .num_density import (binned_counts, number_density) # noqa
# from .correlation import (get_randoms_sphere, sphere_angular_tpcf) # noqa

View file

@ -16,6 +16,7 @@
import numpy
from tqdm import tqdm
from astropy.coordinates import SkyCoord
from ..read import CombinedHaloCatalogue
def brute_spatial_separation(c1, c2, angular=False, N=None, verbose=False):
@ -65,3 +66,130 @@ def brute_spatial_separation(c1, c2, angular=False, N=None, verbose=False):
sep[i, :] = dist[sort]
return sep, indxs
class RealisationsMatcher:
"""
A tool to match halos between IC realisations. Looks for halos 3D space
neighbours in all remaining IC realisations that are within some mass
range of it.
Parameters
----------
cats : :py:class`csiborgtools.read.CombinedHaloCatalogue`
Combined halo catalogue to search.
"""
_cats = None
def __init__(self, cats):
self.cats = cats
@property
def cats(self):
"""
Combined catalogues.
Returns
-------
cats : :py:class`csiborgtools.read.CombinedHaloCatalogue`
Combined halo catalogue.
"""
return self._cats
@cats.setter
def cats(self, cats):
"""
Sets `cats`, ensures that this is a `CombinedHaloCatalogue` object.
"""
if not isinstance(cats, CombinedHaloCatalogue):
raise TypeError("`cats` must be of type `CombinedHaloCatalogue`.")
self._cats = cats
def search_sim_indices(self, n_sim):
"""
Return indices of all other IC realisations but of `n_sim`.
Parameters
----------
n_sim : int
IC realisation index of `self.cats` to be skipped.
Returns
-------
indxs : list of int
The remaining IC indices.
"""
return [i for i in range(self.cats.N) if i != n_sim]
def cross_knn_position_single(self, n_sim, nmult=5, dlogmass=2):
r"""
Find all neighbours within :math:`n_{\rm mult} R_{200c}` of halos in
the `nsim`th simulation. Also enforces that the neighbours'
:math:`\log M_{200c}` be within `dlogmass` dex.
Parameters
----------
n_sim : int
Index of an IC realisation in `self.cats` whose halos' neighbours
in the remaining simulations to search for.
nmult : float or int, optional
Multiple of :math:`R_{200c}` within which to return neighbours. By
default 5.
dlogmass : float, optional
Tolerance on mass logarithmic mass difference. By default 2 dex.
Returns
-------
matches : composite array
Array, indices are `(n_sims - 1, 2, n_halos, n_matches)`. The
2nd axis is `index` of the neighbouring halo in its catalogue and
`dist`, which is the 3D distance to the halo whose neighbours are
searched.
"""
# R200c, M200c and positions of halos in `n_sim` IC realisation
r200 = self.cats[n_sim]["r200"]
logm200 = numpy.log10(self.cats[n_sim]["m200"])
pos = self.cats[n_sim].positions
matches = [None] * (self.cats.N - 1)
# Search for neighbours in the other simulations
for count, i in enumerate(self.search_sim_indices(n_sim)):
dist, indxs = self.cats[i].radius_neigbours(pos, r200 * nmult)
# Get rid of neighbors whose mass is too off
for j, indx in enumerate(indxs):
match_logm200 = numpy.log10(self.cats[i][indx]["m200"])
mask = numpy.abs(match_logm200 - logm200[j]) < dlogmass
dist[j] = dist[j][mask]
indxs[j] = indx[mask]
# Append as a composite array
matches[count] = numpy.asarray([indxs, dist], dtype=object)
return numpy.asarray(matches, dtype=object)
def cross_knn_position_all(self, nmult=5, dlogmass=2):
r"""
Find all neighbours within :math:`n_{\rm mult} R_{200c}` of halos in
all simulations listed in `self.cats`. Also enforces that the
neighbours' :math:`\log M_{200c}` be within `dlogmass` dex.
Parameters
----------
nmult : float or int, optional
Multiple of :math:`R_{200c}` within which to return neighbours. By
default 5.
dlogmass : float, optional
Tolerance on mass logarithmic mass difference. By default 2 dex.
Returns
-------
matches : list of composite arrays
List whose length is `n_sims`. For description of its elements see
`self.cross_knn_position_single(...)`.
"""
N = self.cats.N # Number of catalogues
matches = [None] * N
# Loop over each catalogue
for i in range(N):
matches[i] = self.cross_knn_position_single(i, nmult, dlogmass)
return matches

View file

@ -17,7 +17,7 @@ from .readsim import (get_csiborg_ids, get_sim_path, get_snapshots, # noqa
get_snapshot_path, get_maximum_snapshot, read_info, nparts_to_start_ind, # noqa
open_particle, open_unbinding, read_particle, # noqa
drop_zero_indx, # noqa
read_clumpid, read_clumps, read_mmain, # noqa
merge_mmain_to_clumps) # noqa
from .readobs import (read_planck2015, read_mcxc, read_2mpp, read_2mpp_groups, match_planck_to_mcxc) # noqa
read_clumpid, read_clumps, read_mmain) # noqa
from .make_cat import (HaloCatalogue, CombinedHaloCatalogue) # noqa
from .readobs import (PlanckClusters, MCXCClusters, TwoMPPGalaxies, TwoMPPGroups) # noqa
from .outsim import (dump_split, combine_splits) # noqa

View file

@ -0,0 +1,331 @@
# Copyright (C) 2022 Richard Stiskalek, Harry Desmond
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Functions to read in the particle and clump files.
"""
import numpy
from os.path import join
from tqdm import trange
from sklearn.neighbors import NearestNeighbors
from .readsim import (get_sim_path, read_mmain, get_csiborg_ids,
get_maximum_snapshot)
from ..utils import (flip_cols, add_columns)
from ..units import (BoxUnits, cartesian_to_radec)
class HaloCatalogue:
r"""
Processed halo catalogue, the data should be calculated in `run_fit_halos`.
Parameters
----------
n_sim: int
Initial condition index.
n_snap: int
Snapshot index.
minimum_m500 : float, optional
The minimum :math:`M_{rm 500c} / M_\odot` mass. By default no
threshold.
dumpdir : str, optional
Path to where files from `run_fit_halos` are stored. By default
`/mnt/extraspace/rstiskalek/csiborg/`.
mmain_path : str, optional
Path to where mmain files are stored. By default
`/mnt/zfsusers/hdesmond/Mmain`.
"""
_box = None
_n_sim = None
_n_snap = None
_data = None
_knn = None
_positions = None
def __init__(self, n_sim, n_snap, minimum_m500=None,
dumpdir="/mnt/extraspace/rstiskalek/csiborg/",
mmain_path="/mnt/zfsusers/hdesmond/Mmain"):
self._box = BoxUnits(n_snap, get_sim_path(n_sim))
minimum_m500 = 0 if minimum_m500 is None else minimum_m500
self._set_data(n_sim, n_snap, dumpdir, mmain_path, minimum_m500)
self._nsim = n_sim
self._nsnap = n_snap
# Initialise the KNN
knn = NearestNeighbors()
knn.fit(self.positions)
self._knn = knn
@property
def data(self):
"""
Halo catalogue.
Returns
-------
cat : structured array
Catalogue.
"""
if self._data is None:
raise ValueError("`data` is not set!")
return self._data
@property
def box(self):
"""
Box object, useful for change of units.
Returns
-------
box : :py:class:`csiborgtools.units.BoxUnits`
The box object.
"""
return self._box
@property
def cosmo(self):
"""
The box cosmology.
Returns
-------
cosmo : `astropy` cosmology object
Box cosmology.
"""
return self.box.cosmo
@property
def n_snap(self):
"""
The snapshot ID.
Returns
-------
n_snap : int
Snapshot ID.
"""
return self._n_snap
@property
def n_sim(self):
"""
The initiali condition (IC) realisation ID.
Returns
-------
n_sim : int
The IC ID.
"""
return self._n_sim
def _set_data(self, n_sim, n_snap, dumpdir, mmain_path, minimum_m500):
"""
Loads the data, merges with mmain, does various coordinate transforms.
"""
# Load the processed data
fname = "ramses_out_{}_{}.npy".format(
str(n_sim).zfill(5), str(n_snap).zfill(5))
data = numpy.load(join(dumpdir, fname))
# Load the mmain file and add it to the data
mmain = read_mmain(n_sim, mmain_path)
data = self.merge_mmain_to_clumps(data, mmain)
flip_cols(data, "peak_x", "peak_z")
# Cut on number of particles and finite m200
data = data[(data["npart"] > 100) & numpy.isfinite(data["m200"])]
# Unit conversion
convert_cols = ["m200", "m500", "totpartmass", "mass_mmain",
"r200", "r500", "Rs", "rho0",
"peak_x", "peak_y", "peak_z"]
data = self.box.convert_from_boxunits(data, convert_cols)
# Cut on mass. Note that this is in Msun
data = data[data["m500"] > minimum_m500]
# Now calculate spherical coordinates
d, ra, dec = cartesian_to_radec(data)
data = add_columns(data, [d, ra, dec], ["dist", "ra", "dec"])
# Pre-allocate the positions array
self._positions = numpy.vstack(
[data["peak_{}".format(p)] for p in ("x", "y", "z")]).T
self._data = data
def merge_mmain_to_clumps(self, clumps, mmain):
"""
Merge columns from the `mmain` files to the `clump` file, matches them
by their halo index while assuming that the indices `index` in both
arrays are sorted.
Parameters
----------
clumps : structured array
Clumps structured array.
mmain : structured array
Parent halo array whose information is to be merged into `clumps`.
Returns
-------
out : structured array
Array with added columns.
"""
X = numpy.full((clumps.size, 2), numpy.nan)
# Mask of which clumps have a mmain index
mask = numpy.isin(clumps["index"], mmain["index"])
X[mask, 0] = mmain["mass_cl"]
X[mask, 1] = mmain["sub_frac"]
return add_columns(clumps, X, ["mass_mmain", "sub_frac"])
@property
def positions(self):
"""
3D positions of halos.
Returns
-------
X : 2-dimensional array
Array of shape `(n_halos, 3)`, where the latter axis represents
`x`, `y` and `z`.
"""
return self._positions
def radius_neigbours(self, X, radius):
"""
Return sorted nearest neigbours within `radius` or `X`.
Parameters
----------
X : 2-dimensional array
Array of shape `(n_queries, 3)`, where the latter axis represents
`x`, `y` and `z`.
radius : float
Limiting distance of neighbours.
Returns
-------
dist : list of 1-dimensional arrays
List of length `n_queries` whose elements are arrays of distances
to the nearest neighbours.
knns : list of 1-dimensional arrays
List of length `n_queries` whose elements are arrays of indices of
nearest neighbours in this catalogue.
"""
if not (X.ndim == 2 and X.shape[1] == 3):
raise TypeError("`X` must be an array of shape `(n_samples, 3)`.")
# Query the KNN
return self._knn.radius_neighbors(X, radius, sort_results=True)
@property
def keys(self):
"""Catalogue keys."""
return self.data.dtype.names
def __getitem__(self, key):
return self._data[key]
class CombinedHaloCatalogue:
r"""
A combined halo catalogue, containing `HaloCatalogue` for each IC
realisation at the latest redshift.
Parameters
----------
minimum_m500 : float, optional
The minimum :math:`M_{rm 500c} / M_\odot` mass. By default no
threshold.
dumpdir : str, optional
Path to where files from `run_fit_halos` are stored. By default
`/mnt/extraspace/rstiskalek/csiborg/`.
mmain_path : str, optional
Path to where mmain files are stored. By default
`/mnt/zfsusers/hdesmond/Mmain`.
verbose : bool, optional
Verbosity flag for reading the catalogues.
"""
_n_sims = None
_n_snaps = None
_cats = None
def __init__(self, minimum_m500=None,
dumpdir="/mnt/extraspace/rstiskalek/csiborg/",
mmain_path="/mnt/zfsusers/hdesmond/Mmain", verbose=True):
# Read simulations and their maximum snapshots
# NOTE remove this later and take all cats
self._n_sims = get_csiborg_ids("/mnt/extraspace/hdesmond")[:10]
n_snaps = [get_maximum_snapshot(get_sim_path(i)) for i in self._n_sims]
self._n_snaps = numpy.asanyarray(n_snaps)
cats = [None] * self.N
for i in trange(self.N) if verbose else range(self.N):
cats[i] = HaloCatalogue(self._n_sims[i], self._n_snaps[i],
minimum_m500, dumpdir, mmain_path)
self._cats = cats
@property
def N(self):
"""
Number of IC realisations in this combined catalogue.
Returns
-------
N : int
Number of catalogues.
"""
return len(self.n_sims)
@property
def n_sims(self):
"""
IC realisations CSiBORG identifiers.
Returns
-------
ids : 1-dimensional array
Array of IDs.
"""
return self._n_sims
@property
def n_snaps(self):
"""
Snapshot numbers corresponding to `self.n_sims`.
Returns
-------
n_snaps : 1-dimensional array
Array of snapshot numbers.
"""
return self._n_snaps
@property
def cats(self):
"""
Catalogues associated with this object.
Returns
-------
cats : list of `HaloCatalogue`
Catalogues.
"""
return self._cats
def __getitem__(self, n):
if n > self.N:
raise ValueError("Catalogue count is {}, requested catalogue {}."
.format(self.N, n))
return self.cats[n]

View file

@ -0,0 +1,300 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Scripts to read in observation.
"""
import numpy
from astropy.io import fits
from astropy.coordinates import SkyCoord
from astropy.cosmology import FlatLambdaCDM
from astropy import units as u
from ..utils import (add_columns, cols_to_structured)
F64 = numpy.float64
class BaseSurvey:
"""
Base survey class with some methods that are common to all survey classes.
"""
_data = None
_cosmo = None
@property
def data(self):
"""
Cluster catalogue.
Returns
-------
cat : structured array
Catalogue.
"""
if self._data is None:
raise ValueError("`data` is not set!")
return self._data
@property
def cosmo(self):
"""Desired cosmology."""
if self._cosmo is None:
raise ValueError("`cosmo` is not set!")
return self._cosmo
@property
def keys(self):
"""Catalogue keys."""
return self.data.dtype.names
def __getitem__(self, key):
return self._data[key]
class PlanckClusters(BaseSurvey):
r"""
Planck 2nd Sunyaev-Zeldovich source catalogue [1]. Automatically removes
clusters without a redshift estimate.
Parameters
----------
fpath : str
Path to the source catalogue.
cosmo : `astropy.cosmology` object, optional
Cosmology to convert masses (particularly :math:`H_0`). By default
`FlatLambdaCDM(H0=70.5, Om0=0.307, Tcmb0=2.728)`.
max_redshift: float, optional
Maximum cluster redshift. By default `None` and no selection is
performed.
References
----------
[1] https://heasarc.gsfc.nasa.gov/W3Browse/all/plancksz2.html
"""
_hdata = 0.7 # little h value of the data
def __init__(self, fpath, cosmo=None, max_redshift=None):
if cosmo is None:
self._cosmo = FlatLambdaCDM(H0=70.5, Om0=0.307, Tcmb0=2.728)
else:
self._cosmo = cosmo
self.set_data(fpath, max_redshift)
def set_data(self, fpath, max_redshift=None):
"""
Set the catalogue, loads it and applies a maximum redshift cut.
"""
cat = fits.open(fpath)[1].data
# Convert FITS to a structured array
data = numpy.full(cat.size, numpy.nan, dtype=cat.dtype.descr)
for name in cat.dtype.names:
data[name] = cat[name]
# Take only clusters with redshifts
data = data[data["REDSHIFT"] >= 0]
# Convert masses
for par in ("MSZ", "MSZ_ERR_UP", "MSZ_ERR_LOW"):
data[par] *= 1e14
data[par] *= (self._hdata / self.cosmo.h)**2
# Redshift cut
if max_redshift is not None:
data = data["REDSHIFT" <= max_redshift]
self._data = data
def match_to_mcxc(self, mcxc):
"""
Return the MCXC catalogue indices of the Planck catalogue detections.
Finds the index of the quoted Planck MCXC counterpart in the MCXC
array. If not found throws an error. For this reason it may be better
to make sure the MCXC catalogue reaches further.
Parameters
----------
mcxc : :py:class`MCXCClusters`
MCXC cluster object.
Returns
-------
indxs : list of int
Array of MCXC indices to match the Planck array. If no counterpart
is found returns `numpy.nan`.
"""
if not isinstance(mcxc, MCXCClusters):
raise TypeError("`mcxc` must be `MCXCClusters` type.")
# Planck MCXC need to be decoded to str
planck_names = [name.decode() for name in self["MCXC"]]
mcxc_names = [name for name in mcxc["name"]]
indxs = [numpy.nan] * len(planck_names)
for i, name in enumerate(planck_names):
if name == "":
continue
if name in mcxc_names:
indxs[i] = mcxc_names.index(name)
else:
raise ValueError("Planck MCXC identifier `{}` not found in "
"the MCXC catalogue.".format(name))
return indxs
class MCXCClusters(BaseSurvey):
r"""
MCXC Meta-Catalog of X-Ray Detected Clusters of Galaxies catalogue [1],
with data description at [2] and download at [3].
Note
----
The exact mass conversion has non-trivial dependence on :math:`H(z)`, see
[1] for more details. However, this should be negligible.
Parameters
----------
fpath : str
Path to the source catalogue obtained from [3]. Expected to be the fits
file.
cosmo : `astropy.cosmology` object, optional
The cosmology to to convert cluster masses (to first order). By default
`FlatLambdaCDM(H0=70.5, Om0=0.307, Tcmb0=2.728)`.
max_redshift: float, optional
Maximum cluster redshift. By default `None` and no selection is
performed.
References
----------
[1] The MCXC: a meta-catalogue of x-ray detected clusters of galaxies
(2011); Piffaretti, R. ; Arnaud, M. ; Pratt, G. W. ; Pointecouteau,
E. ; Melin, J. -B.
[2] https://heasarc.gsfc.nasa.gov/W3Browse/rosat/mcxc.html
[3] https://cdsarc.cds.unistra.fr/viz-bin/cat/J/A+A/534/A109#/article
"""
_hdata = 0.7 # Little h of the catalogue
def __init__(self, fpath, cosmo=None, max_redshift=None):
if cosmo is None:
self._cosmo = FlatLambdaCDM(H0=70.5, Om0=0.307, Tcmb0=2.728)
else:
self._cosmo = cosmo
self._set_data(fpath, max_redshift)
def _set_data(self, fpath, max_redshift):
"""
Set the catalogue, loads it and applies a maximum redshift cut.
"""
cat = fits.open(fpath)[1].data
# Pre-allocate array and extract selected variables
cols = [("RAdeg", F64), ("DEdeg", F64), ("z", F64),
("L500", F64), ("M500", F64), ("R500", F64)]
data = cols_to_structured(cat.size, cols)
for col in cols:
par = col[0]
data[par] = cat[par]
# Add the cluster names
data = add_columns(data, cat["MCXC"], "name")
# Get little h units to match the cosmology
data["L500"] *= (self._hdata / self.cosmo.h)**2
data["M500"] *= (self._hdata / self.cosmo.h)**2
# Get the 10s back in
data["L500"] *= 1e44 # ergs/s
data["M500"] *= 1e14 # Msun
if max_redshift is not None:
data = data["z" <= max_redshift]
self._data = data
class TwoMPPGalaxies(BaseSurvey):
"""
The 2M++ galaxy redshift catalogue [1], with the catalogue at [2].
Removes fake galaxies used to fill the zone of avoidance. Note that the
stated redshift is in the CMB frame.
Parameters
----------
fpath : str
File path to the catalogue.
References
----------
[1] The 2M++ galaxy redshift catalogue; Lavaux, Guilhem, Hudson, Michael J.
[2] https://cdsarc.cds.unistra.fr/viz-bin/cat/J/MNRAS/416/2840#/article
[3] Improving NASA/IPAC Extragalactic Database Redshift Calculations
(2021); Anthony Carr and Tamara Davis
"""
def __init__(self, fpath):
self._set_data(fpath)
def _set_data(self, fpath):
"""
Set the catalogue
"""
from scipy.constants import c
# Read the catalogue and select non-fake galaxies
cat = numpy.genfromtxt(fpath, delimiter="|", )
cat = cat[cat[:, 12] == 0, :]
# Pre=allocate array and fillt it
cols = [("RA", F64), ("DEC", F64), ("Ksmag", F64), ("ZCMB", F64),
("DIST", F64)]
data = cols_to_structured(cat.shape[0], cols)
data["RA"] = cat[:, 1]
data["DEC"] = cat[:, 2]
data["Ksmag"] = cat[:, 5]
data["ZCMB"] = cat[:, 7] / (c * 1e-3)
self._data = data
class TwoMPPGroups(BaseSurvey):
"""
The 2M++ galaxy group catalogue [1], with the catalogue at [2].
Parameters
----------
fpath : str
File path to the catalogue.
References
----------
[1] The 2M++ galaxy redshift catalogue; Lavaux, Guilhem, Hudson, Michael J.
[2] https://cdsarc.cds.unistra.fr/viz-bin/cat/J/MNRAS/416/2840#/article
[3] Improving NASA/IPAC Extragalactic Database Redshift Calculations
(2021); Anthony Carr and Tamara Davis
"""
def __init__(self, fpath):
self._set_data(fpath)
def _set_data(self, fpath):
"""
Set the catalogue
"""
cat = numpy.genfromtxt(fpath, delimiter="|", )
# Pre-allocate and fill the array
cols = [("RA", F64), ("DEC", F64), ("K2mag", F64),
("Rich", numpy.int64), ("sigma", F64)]
data = cols_to_structured(cat.shape[0], cols)
data["K2mag"] = cat[:, 3]
data["Rich"] = cat[:, 4]
data["sigma"] = cat[:, 7]
# Convert galactic coordinates to RA, dec
glon = data[:, 1]
glat = data[:, 2]
coords = SkyCoord(l=glon*u.degree, b=glat*u.degree, frame='galactic')
coords = coords.transform_to("icrs")
data["RA"] = coords.ra
data["DEC"] = coords.dec
self._data = data

View file

@ -22,8 +22,7 @@ from os import listdir
from os.path import (join, isfile)
from glob import glob
from tqdm import tqdm
from ..utils import (cols_to_structured, add_columns)
from ..utils import cols_to_structured
F16 = numpy.float16
@ -512,30 +511,3 @@ def read_mmain(n, srcdir, fname="Mmain_{}.npy"):
out[name] = arr[:, i]
return out
def merge_mmain_to_clumps(clumps, mmain):
"""
Merge columns from the `mmain` files to the `clump` file, matches them
by their halo index while assuming that the indices `index` in both arrays
are sorted.
Parameters
----------
clumps : structured array
Clumps structured array.
mmain : structured array
Parent halo array whose information is to be merged into `clumps`.
Returns
-------
out : structured array
Array with added columns.
"""
X = numpy.full((clumps.size, 2), numpy.nan)
# Mask of which clumps have a mmain index
mask = numpy.isin(clumps["index"], mmain["index"])
X[mask, 0] = mmain["mass_cl"]
X[mask, 1] = mmain["sub_frac"]
return add_columns(clumps, X, ["mass_mmain", "sub_frac"])

View file

@ -14,4 +14,4 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .transforms import cartesian_to_radec # noqa
from .box_units import (BoxUnits, convert_from_boxunits) # noqa
from .box_units import (BoxUnits) # noqa

View file

@ -20,7 +20,7 @@ import numpy
from scipy.interpolate import interp1d
from astropy.cosmology import LambdaCDM
from astropy import (constants, units)
from ..io import read_info
from ..read import read_info
# Map of unit conversions
@ -329,18 +329,18 @@ class BoxUnits:
return (density / self._unit_d * self._Msuncgs
/ (units.Mpc.to(units.cm))**3)
def convert_from_boxunits(data, names, boxunits):
def convert_from_boxunits(self, data, names):
r"""
Convert columns named `names` in array `data` from box units to physical
units, such that
Convert columns named `names` in array `data` from box units to
physical units, such that
- length -> :math:`Mpc`,
- mass -> :math:`M_\odot`,
- density -> :math:`M_\odot / \mathrm{Mpc}^3`.
Any other conversions are currently not implemented. Note that the array
is passed by reference and directly modified, even though it is also
explicitly returned. Additionally centres the box coordinates on the
observer, if they are being transformed.
Any other conversions are currently not implemented. Note that the
array is passed by reference and directly modified, even though it is
also explicitly returned. Additionally centres the box coordinates on
the observer, if they are being transformed.
Parameters
----------
@ -348,30 +348,25 @@ def convert_from_boxunits(data, names, boxunits):
Input array.
names : list of str
Columns to be converted.
boxunits : `BoxUnits`
Box units class of the simulation and snapshot.
Returns
-------
data : structured array
Input array with converted columns.
"""
if not isinstance(boxunits, BoxUnits):
raise TypeError("`boxunits` must be of type `{}`. Currently `{}`."
.format(BoxUnits, type(boxunits)))
names = [names] if isinstance(names, str) else names
# Shortcut for the transform functions
transforms = {
"length": boxunits.box2mpc,
"mass": boxunits.box2solarmass,
"density": boxunits.box2dens
"length": self.box2mpc,
"mass": self.box2solarmass,
"density": self.box2dens
}
for name in names:
# Check that the name is even in the array
if name not in data.dtype.names:
raise ValueError("Name `{}` is not in `data` array.".format(name))
raise ValueError("Name `{}` not in `data` array.".format(name))
# Convert
found = False
@ -388,6 +383,5 @@ def convert_from_boxunits(data, names, boxunits):
# Center at the observer
if name in ["peak_x", "peak_y", "peak_z"]:
data[name] -= transforms["length"](0.5)
data[name] -= (0.5)
return data

File diff suppressed because one or more lines are too long

View file

@ -32,9 +32,6 @@ import utils
F64 = numpy.float64
I64 = numpy.int64
# Simulations and their snapshot to analyze
Nsims = [9844]
Nsnap = 1016
# Get MPI things
comm = MPI.COMM_WORLD
@ -50,15 +47,17 @@ cols_collect = [("npart", I64), ("totpartmass", F64), ("Rs", F64),
("rmax", F64), ("r200", F64), ("r500", F64),
("m200", F64), ("m500", F64)]
# NOTE later loop over sims too
Nsim = Nsims[0]
simpath = csiborgtools.io.get_sim_path(Nsim)
Nsims = csiborgtools.read.get_csiborg_ids("/mnt/extraspace/hdesmond")
for i, Nsim in enumerate(Nsims):
if rank == 0:
print("{}: calculating {}th simulation.".format(datetime.now(), i))
simpath = csiborgtools.read.get_sim_path(Nsim)
Nsnap = csiborgtools.read.get_maximum_snapshot(simpath)
box = csiborgtools.units.BoxUnits(Nsnap, simpath)
jobs = csiborgtools.fits.split_jobs(utils.Nsplits, nproc)[rank]
for icount, Nsplit in enumerate(jobs):
print("{}: rank {} working {} / {} jobs.".format(datetime.now(), rank,
icount + 1, len(jobs)))
for Nsplit in jobs:
parts, part_clumps, clumps = csiborgtools.fits.load_split_particles(
Nsplit, loaddir, Nsim, Nsnap, remove_split=False)
@ -73,7 +72,8 @@ for icount, Nsplit in enumerate(jobs):
for n in range(N):
# Pick clump and its particles
xs = csiborgtools.fits.pick_single_clump(n, parts, part_clumps, clumps)
xs = csiborgtools.fits.pick_single_clump(n, parts, part_clumps,
clumps)
clump = csiborgtools.fits.Clump.from_arrays(*xs, rhoc=box.box_rhoc)
out["npart"][n] = clump.Npart
out["rmin"][n] = clump.rmin
@ -100,18 +100,23 @@ for icount, Nsplit in enumerate(jobs):
out["rho0"][n] = nfwpost.rho0_from_Rs(Rs)
out["conc"][n] = out["r200"][n] / Rs
csiborgtools.io.dump_split(out, Nsplit, Nsim, Nsnap, dumpdir)
csiborgtools.read.dump_split(out, Nsplit, Nsim, Nsnap, dumpdir)
# Force all ranks to wait
# Wait until all jobs finished before moving to another simulation
comm.Barrier()
# Use the rank 0 to combine outputs for this CSiBORG realisation
if rank == 0:
print("Collecting results!")
out_collected = csiborgtools.io.combine_splits(
out_collected = csiborgtools.read.combine_splits(
utils.Nsplits, Nsim, Nsnap, utils.dumpdir, cols_collect,
remove_splits=True, verbose=False)
fname = join(utils.dumpdir, "ramses_out_{}_{}.npy"
.format(str(Nsim).zfill(5), str(Nsnap).zfill(5)))
print("Saving results to `{}`.".format(fname))
numpy.save(fname, out_collected)
comm.Barrier()
if rank == 0:
print("All finished! See ya!")

View file

@ -34,7 +34,7 @@ comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
Nsims = csiborgtools.io.get_csiborg_ids("/mnt/extraspace/hdesmond")
Nsims = csiborgtools.read.get_csiborg_ids("/mnt/extraspace/hdesmond")
partcols = ["x", "y", "z", "vx", "vy", "vz", "M", "level"]
dumpdir = join(utils.dumpdir, "temp")
@ -43,16 +43,16 @@ for icount, sim_index in enumerate(jobs):
print("{}: rank {} working {} / {} jobs.".format(datetime.now(), rank,
icount + 1, len(jobs)))
Nsim = Nsims[sim_index]
simpath = csiborgtools.io.get_sim_path(Nsim)
Nsnap = csiborgtools.io.get_maximum_snapshot(simpath)
simpath = csiborgtools.read.get_sim_path(Nsim)
Nsnap = csiborgtools.read.get_maximum_snapshot(simpath)
# Load the clumps, particles' clump IDs and particles.
clumps = csiborgtools.io.read_clumps(Nsnap, simpath)
particle_clumps = csiborgtools.io.read_clumpid(Nsnap, simpath,
clumps = csiborgtools.read.read_clumps(Nsnap, simpath)
particle_clumps = csiborgtools.read.read_clumpid(Nsnap, simpath,
verbose=False)
particles = csiborgtools.io.read_particle(partcols, Nsnap, simpath,
particles = csiborgtools.read.read_particle(partcols, Nsnap, simpath,
verbose=False)
# Drop all particles whose clump index is 0 (not assigned to any halo)
particle_clumps, particles = csiborgtools.io.drop_zero_indx(
particle_clumps, particles = csiborgtools.read.drop_zero_indx(
particle_clumps, particles)
# Dump it!
csiborgtools.fits.dump_split_particles(particles, particle_clumps, clumps,

View file

@ -16,16 +16,14 @@
Notebook utility functions.
"""
# import numpy
# from os.path import join
import numpy
from os.path import join
from astropy.cosmology import FlatLambdaCDM
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
# try:
# import csiborgtools
# except ModuleNotFoundError:
# import sys
# sys.path.append("../")
Nsplits = 200
@ -42,52 +40,3 @@ _virgo = {"RA": (12 + 27 / 60) * 15,
"COMDIST": 16.5}
specific_clusters = {"Coma": _coma, "Virgo": _virgo}
def load_processed(Nsim, Nsnap):
simpath = csiborgtools.io.get_sim_path(Nsim)
outfname = join(
dumpdir, "ramses_out_{}_{}.npy".format(str(Nsim).zfill(5),
str(Nsnap).zfill(5)))
data = numpy.load(outfname)
# Add mmain
mmain = csiborgtools.io.read_mmain(Nsim, "/mnt/zfsusers/hdesmond/Mmain")
data = csiborgtools.io.merge_mmain_to_clumps(data, mmain)
csiborgtools.utils.flip_cols(data, "peak_x", "peak_z")
# Cut on numbre of particles and finite m200
data = data[(data["npart"] > 100) & numpy.isfinite(data["m200"])]
# Do unit conversion
boxunits = csiborgtools.units.BoxUnits(Nsnap, simpath)
convert_cols = ["m200", "m500", "totpartmass", "mass_mmain",
"r200", "r500", "Rs", "rho0", "peak_x", "peak_y", "peak_z"]
data = csiborgtools.units.convert_from_boxunits(
data, convert_cols, boxunits)
# Now calculate spherical coordinates
d, ra, dec = csiborgtools.units.cartesian_to_radec(data)
data = csiborgtools.utils.add_columns(
data, [d, ra, dec], ["dist", "ra", "dec"])
return data, boxunits
def load_planck2015(max_comdist=214):
cosmo = FlatLambdaCDM(H0=70.5, Om0=0.307, Tcmb0=2.728)
fpath = "../data/HFI_PCCS_SZ-union_R2.08.fits"
return csiborgtools.io.read_planck2015(fpath, cosmo, max_comdist)
def load_mcxc(max_comdist=214):
cosmo = FlatLambdaCDM(H0=70.5, Om0=0.307, Tcmb0=2.728)
fpath = ("../data/mcxc.fits")
return csiborgtools.io.read_mcxc(fpath, cosmo, max_comdist)
def load_2mpp():
cosmo = FlatLambdaCDM(H0=70.5, Om0=0.307, Tcmb0=2.728)
return csiborgtools.io.read_2mpp("../data/2M++_galaxy_catalog.dat", cosmo)
def load_2mpp_groups():
cosmo = FlatLambdaCDM(H0=70.5, Om0=0.307, Tcmb0=2.728)
return csiborgtools.io.read_2mpp_groups(
"../data/../data/2M++_group_catalog.dat", cosmo)