Cluster rough angular match (#45)

* Remove sim dump

* Remove unused things

* Remove warning

* Fix coordinate transforms

* Add imports

* Update coordinate transfers

* Add angular neighbours calculation

* Update nb
This commit is contained in:
Richard Stiskalek 2023-04-18 18:49:21 +02:00 committed by GitHub
parent fdb0df8d4c
commit c2fde1566b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 276 additions and 260 deletions

View file

@ -21,9 +21,9 @@ from .obs import ( # noqa
TwoMPPGalaxies, TwoMPPGalaxies,
TwoMPPGroups, TwoMPPGroups,
) )
from .outsim import combine_splits, dump_split # noqa
from .overlap_summary import NPairsOverlap, PairOverlap, binned_resample_mean # noqa from .overlap_summary import NPairsOverlap, PairOverlap, binned_resample_mean # noqa
from .paths import CSiBORGPaths # noqa from .paths import CSiBORGPaths # noqa
from .pk_summary import PKReader # noqa from .pk_summary import PKReader # noqa
from .readsim import MmainReader, ParticleReader, halfwidth_select, read_initcm # noqa from .readsim import MmainReader, ParticleReader, halfwidth_select, read_initcm # noqa
from .tpcf_summary import TPCFReader # noqa from .tpcf_summary import TPCFReader # noqa
from .utils import cartesian_to_radec, radec_to_cartesian

View file

@ -20,8 +20,8 @@ from sklearn.neighbors import NearestNeighbors
from .box_units import BoxUnits from .box_units import BoxUnits
from .paths import CSiBORGPaths from .paths import CSiBORGPaths
from .readsim import ParticleReader, read_initcm from .readsim import ParticleReader
from .utils import add_columns, cartesian_to_radec, flip_cols from .utils import cartesian_to_radec, flip_cols, radec_to_cartesian
class BaseCatalogue(ABC): class BaseCatalogue(ABC):
@ -47,7 +47,6 @@ class BaseCatalogue(ABC):
@nsim.setter @nsim.setter
def nsim(self, nsim): def nsim(self, nsim):
assert isinstance(nsim, int)
self._nsim = nsim self._nsim = nsim
@property @property
@ -136,11 +135,10 @@ class BaseCatalogue(ABC):
ps = ['x0', 'y0', 'z0'] ps = ['x0', 'y0', 'z0']
else: else:
ps = ['x', 'y', 'z'] ps = ['x', 'y', 'z']
pos = [self[p] for p in ps] pos = numpy.vstack([self[p] for p in ps]).T
if cartesian: if not cartesian:
return numpy.vstack(pos).T pos = cartesian_to_radec(pos)
else: return pos
return numpy.vstack([cartesian_to_radec(*pos)]).T
def velocity(self): def velocity(self):
""" """
@ -207,6 +205,64 @@ class BaseCatalogue(ABC):
knn = self.knn(in_initial) knn = self.knn(in_initial)
return knn.radius_neighbors(X, radius, sort_results=True) return knn.radius_neighbors(X, radius, sort_results=True)
def angular_neighbours(self, X, ang_radius, rad_tolerance=None):
r"""
Find nearest neighbours within `ang_radius` of query points `X`.
Optionally applies radial tolerance, which is expected to be in
:math:`\mathrm{cMpc}`.
Parameters
----------
X : 2-dimensional array of shape `(n_queries, 2)` or `(n_queries, 3)`
Query positions. If 2-dimensional, then RA and DEC in degrees.
If 3-dimensional, then radial distance in :math:`\mathrm{cMpc}`,
RA and DEC in degrees.
ang_radius : float
Angular radius in degrees.
rad_tolerance : float, optional
Radial tolerance in :math:`\mathrm{cMpc}`.
Returns
-------
dist : array of 1-dimensional arrays of shape `(n_neighbours,)`
Distance of each neighbour to the query point.
ind : array of 1-dimensional arrays of shape `(n_neighbours,)`
Indices of each neighbour in this catalogue.
"""
assert X.ndim == 2
# We first get positions of haloes in this catalogue, store their
# radial distance and normalise them to unit vectors.
pos = self.position(in_initial=False, cartesian=True)
raddist = numpy.linalg.norm(pos, axis=1)
pos /= raddist.reshape(-1, 1)
# We convert RAdec query positions to unit vectors. If no radial
# distance provided add it.
if X.shape[1] == 2:
X = numpy.vstack([numpy.ones_like(X[:, 0]), X[:, 0], X[:, 1]]).T
radquery = None
else:
radquery = X[:, 0]
X = radec_to_cartesian(X)
knn = NearestNeighbors(metric="cosine")
knn.fit(pos)
# Convert angular radius to cosine difference.
metric_maxdist = 1 - numpy.cos(numpy.deg2rad(ang_radius))
dist, ind = knn.radius_neighbors(X, radius=metric_maxdist,
sort_results=True)
# And the cosine difference to angular distance.
for i in range(X.shape[0]):
dist[i] = numpy.rad2deg(numpy.arccos(1 - dist[i]))
# Apply the radial tolerance
if rad_tolerance is not None:
assert radquery is not None
for i in range(X.shape[0]):
mask = numpy.abs(raddist[ind[i]] - radquery) < rad_tolerance
dist[i] = dist[i][mask]
ind[i] = ind[i][mask]
return dist, ind
@property @property
def keys(self): def keys(self):
"""Catalogue keys.""" """Catalogue keys."""
@ -240,8 +296,12 @@ class ClumpsCatalogue(BaseCatalogue):
The maximum comoving distance of a halo. By default The maximum comoving distance of a halo. By default
:math:`155.5 / 0.705 ~ \mathrm{Mpc}` with assumed :math:`h = 0.705`, :math:`155.5 / 0.705 ~ \mathrm{Mpc}` with assumed :math:`h = 0.705`,
which corresponds to the high-resolution region. which corresponds to the high-resolution region.
minmass : len-2 tuple, optional
Minimum mass. The first element is the catalogue key and the second is
the value.
""" """
def __init__(self, nsim, paths, maxdist=155.5 / 0.705): def __init__(self, nsim, paths, maxdist=155.5 / 0.705,
minmass=("mass_cl", 1e12)):
self.nsim = nsim self.nsim = nsim
self.paths = paths self.paths = paths
# Read in the clumps from the final snapshot # Read in the clumps from the final snapshot
@ -259,6 +319,7 @@ class ClumpsCatalogue(BaseCatalogue):
data = self.box.convert_from_boxunits(data, ['x', 'y', 'z', "mass_cl"]) data = self.box.convert_from_boxunits(data, ['x', 'y', 'z', "mass_cl"])
mask = numpy.sqrt(data['x']**2 + data['y']**2 + data['z']**2) < maxdist mask = numpy.sqrt(data['x']**2 + data['y']**2 + data['z']**2) < maxdist
mask &= data[minmass[0]] > minmass[1]
self._data = data[mask] self._data = data[mask]
@property @property
@ -272,117 +333,6 @@ class ClumpsCatalogue(BaseCatalogue):
""" """
return self["index"] == self["parent"] return self["index"] == self["parent"]
def _set_data(self, min_mass, max_dist, load_init):
"""
TODO: old later remove.
Loads the data, merges with mmain, does various coordinate transforms.
"""
# Load the processed data
data = numpy.load(self.paths.hcat_path(self.nsim))
# Load the mmain file and add it to the data
# TODO: read the mmain here
# mmain = read_mmain(self.nsim, self.paths.mmain_dir)
# data = self.merge_mmain_to_clumps(data, mmain)
flip_cols(data, "peak_x", "peak_z")
# Cut on number of particles and finite m200. Do not change! Hardcoded
data = data[(data["npart"] > 100) & numpy.isfinite(data["m200"])]
# Now also load the initial positions
if load_init:
initcm = read_initcm(self.nsim,
self.paths.initmatch_path(self.nsim, "cm"))
if initcm is not None:
data = self.merge_initmatch_to_clumps(data, initcm)
flip_cols(data, "x0", "z0")
# Unit conversion
convert_cols = ["m200", "m500", "totpartmass", "mass_mmain",
"r200", "r500", "Rs", "rho0",
"peak_x", "peak_y", "peak_z"]
data = self.box.convert_from_boxunits(data, convert_cols)
# And do the unit transform
if load_init and initcm is not None:
data = self.box.convert_from_boxunits(
data, ["x0", "y0", "z0", "lagpatch"])
# Convert all that is not an integer to float32
names = list(data.dtype.names)
formats = []
for name in names:
if data[name].dtype.char in numpy.typecodes["AllInteger"]:
formats.append(numpy.int32)
else:
formats.append(numpy.float32)
dtype = numpy.dtype({"names": names, "formats": formats})
# Apply cuts on distance and total particle mass if any
data = data[data["dist"] < max_dist] if max_dist is not None else data
data = (data[data["totpartmass"] > min_mass]
if min_mass is not None else data)
self._data = data.astype(dtype)
def merge_mmain_to_clumps(self, clumps, mmain):
"""
TODO: old, later remove.
Merge columns from the `mmain` files to the `clump` file, matches them
by their halo index while assuming that the indices `index` in both
arrays are sorted.
Parameters
----------
clumps : structured array
Clumps structured array.
mmain : structured array
Parent halo array whose information is to be merged into `clumps`.
Returns
-------
out : structured array
Array with added columns.
"""
X = numpy.full((clumps.size, 2), numpy.nan)
# Mask of which clumps have a mmain index
mask = numpy.isin(clumps["index"], mmain["index"])
X[mask, 0] = mmain["mass_cl"]
X[mask, 1] = mmain["sub_frac"]
return add_columns(clumps, X, ["mass_mmain", "sub_frac"])
def merge_initmatch_to_clumps(self, clumps, initcat):
"""
TODO: old, later remove.
Merge columns from the `init_cm` files to the `clump` file.
Parameters
----------
clumps : structured array
Clumps structured array.
initcat : structured array
Catalog with the clumps initial centre of mass at z = 70.
Returns
-------
out : structured array
"""
# There are more initcat clumps, so check which ones have z = 0
# and then downsample
mask = numpy.isin(initcat["ID"], clumps["index"])
initcat = initcat[mask]
# Now the index ordering should match
if not numpy.alltrue(initcat["ID"] == clumps["index"]):
raise ValueError(
"Ordering of `initcat` and `clumps` is inconsistent.")
X = numpy.full((clumps.size, 4), numpy.nan)
for i, p in enumerate(['x', 'y', 'z', "lagpatch"]):
X[:, i] = initcat[p]
return add_columns(clumps, X, ["x0", "y0", "z0", "lagpatch"])
class HaloCatalogue(BaseCatalogue): class HaloCatalogue(BaseCatalogue):
r""" r"""
@ -403,8 +353,12 @@ class HaloCatalogue(BaseCatalogue):
The maximum comoving distance of a halo. By default The maximum comoving distance of a halo. By default
:math:`155.5 / 0.705 ~ \mathrm{Mpc}` with assumed :math:`h = 0.705`, :math:`155.5 / 0.705 ~ \mathrm{Mpc}` with assumed :math:`h = 0.705`,
which corresponds to the high-resolution region. which corresponds to the high-resolution region.
minmass : len-2 tuple
Minimum mass. The first element is the catalogue key and the second is
the value.
""" """
def __init__(self, nsim, paths, maxdist=155.5 / 0.705): def __init__(self, nsim, paths, maxdist=155.5 / 0.705,
minmass=('M', 1e12)):
self.nsim = nsim self.nsim = nsim
self.paths = paths self.paths = paths
# Read in the mmain catalogue of summed substructure # Read in the mmain catalogue of summed substructure
@ -417,4 +371,5 @@ class HaloCatalogue(BaseCatalogue):
data = self.box.convert_from_boxunits(data, ['x', 'y', 'z', 'M']) data = self.box.convert_from_boxunits(data, ['x', 'y', 'z', 'M'])
mask = numpy.sqrt(data['x']**2 + data['y']**2 + data['z']**2) < maxdist mask = numpy.sqrt(data['x']**2 + data['y']**2 + data['z']**2) < maxdist
mask &= data[minmass[0]] > minmass[1]
self._data = data[mask] self._data = data[mask]

View file

@ -1,112 +0,0 @@
# Copyright (C) 2022 Richard Stiskalek, Harry Desmond
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
I/O functions for analysing the CSiBORG realisations.
"""
from os import remove
from os.path import join
import numpy
from tqdm import trange
def dump_split(arr, nsplit, nsnap, nsim, paths):
"""
Dump an array from a split.
Parameters
----------
arr : n-dimensional or structured array
Array to be saved.
nsplit : int
Split index.
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
paths : py:class`csiborgtools.read.CSiBORGPaths`
CSiBORG paths-handling object with set `n_sim` and `n_snap`.
Returns
-------
None
"""
fname = join(paths.temp_dumpdir, "ramses_out_{}_{}_{}.npy"
.format(str(nsim).zfill(5), str(nsnap).zfill(5), nsplit))
numpy.save(fname, arr)
def combine_splits(nsplits, nsnap, nsim, part_reader, cols_add,
remove_splits=False, verbose=True):
"""
Combine results of many splits saved from `dump_split`. Identifies to which
clump the clumps in the split correspond to by matching their index.
Returns an array that contains the original clump data along with the newly
calculated quantities.
Paramaters
----------
nsplits : int
Total number of clump splits.
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
part_reader : py:class`csiborgtools.read.ParticleReadear`
CSiBORG particle reader.
cols_add : list of `(str, dtype)`
Colums to add. Must be formatted as, for example,
`[("npart", numpy.float64), ("totpartmass", numpy.float64)]`.
remove_splits : bool, optional
Whether to remove the splits files. By default `False`.
verbose : bool, optional
Verbosity flag. By default `True`.
Returns
-------
out : structured array
Clump array with appended results from the splits.
"""
clumps = part_reader.read_clumps(nsnap, nsim, cols=None)
# Get the old + new dtypes and create an empty array
descr = clumps.dtype.descr + cols_add
out = numpy.full(clumps.size, numpy.nan, dtype=descr)
for par in clumps.dtype.names: # Now put the old values into the array
out[par] = clumps[par]
# Filename of splits data
froot = "ramses_out_{}_{}".format(str(nsim).zfill(5), str(nsnap).zfill(5))
fname = join(part_reader.paths.temp_dumpdir, froot + "_{}.npy")
# Iterate over splits and add to the output array
cols_add_names = [col[0] for col in cols_add]
iters = trange(nsplits) if verbose else range(nsplits)
for n in iters:
fnamesplit = fname.format(n)
arr = numpy.load(fnamesplit)
# Check that all halo indices from the split are in the clump file
if not numpy.alltrue(numpy.isin(arr["index"], out["index"])):
raise KeyError("....")
# Mask of where to put the values from the split
mask = numpy.isin(out["index"], arr["index"])
for par in cols_add_names:
out[par][mask] = arr[par]
# Now remove this split
if remove_splits:
remove(fnamesplit)
return out

View file

@ -56,7 +56,7 @@ class ParticleReader:
@paths.setter @paths.setter
def paths(self, paths): def paths(self, paths):
# assert isinstance(paths, CSiBORGPaths) # REMOVE assert isinstance(paths, CSiBORGPaths)
self._paths = paths self._paths = paths
def read_info(self, nsnap, nsim): def read_info(self, nsnap, nsim):
@ -87,7 +87,8 @@ class ParticleReader:
keys = info[eqs - 1] keys = info[eqs - 1]
vals = info[eqs + 1] vals = info[eqs + 1]
return {key: val for key, val in zip(keys, vals, strict=True)} # trunk-ignore(ruff/B905)
return {key: val for key, val in zip(keys, vals)}
def open_particle(self, nsnap, nsim, verbose=True): def open_particle(self, nsnap, nsim, verbose=True):
""" """
@ -245,7 +246,8 @@ class ParticleReader:
for cpu in iters: for cpu in iters:
i = start_ind[cpu] i = start_ind[cpu]
j = nparts[cpu] j = nparts[cpu]
for (fname, fdtype) in zip(fnames, fdtypes, strict=True): # trunk-ignore(ruff/B905)
for (fname, fdtype) in zip(fnames, fdtypes):
if fname in pars_extract: if fname in pars_extract:
out[fname][i:i + j] = self.read_sp(fdtype, partfiles[cpu]) out[fname][i:i + j] = self.read_sp(fdtype, partfiles[cpu])
else: else:

View file

@ -22,53 +22,59 @@ import numpy
############################################################################### ###############################################################################
def cartesian_to_radec(x, y, z): def cartesian_to_radec(X, indeg=True):
""" """
Calculate the radial distance, right ascension in [0, 360) degrees and Calculate the radial distance, RA, dec from Cartesian coordinates. Note,
declination [-90, 90] degrees. Note, the observer should be placed in the RA is in range [0, 360) degrees and dec in range [-90, 90] degrees.
middle of the box.
Parameters Parameters
---------- ----------
x, y, z : 1-dimensional arrays X : 2-dimensional array `(nsamples, 3)`
Cartesian coordinates. Cartesian coordinates.
indeg : bool, optional
Whether to return RA and DEC in degrees.
Returns Returns
------- -------
dist, ra, dec : 1-dimensional arrays out : 2-dimensional array `(nsamples, 3)`
Radial distance, right ascension and declination. Radial distance, RA and dec.
""" """
dist = numpy.sqrt(x**2 + y**2 + z**2) x, y, z = X[:, 0], X[:, 1], X[:, 2]
dec = numpy.rad2deg(numpy.arcsin(z/dist)) dist = numpy.linalg.norm(X, axis=1)
ra = numpy.rad2deg(numpy.arctan2(y, x)) dec = numpy.arcsin(z/dist)
# Make sure RA in the correct range ra = numpy.arctan2(y, x)
ra[ra < 0] += 360 ra[ra < 0] += 2 * numpy.pi # Wrap RA to [0, 2pi)
return dist, ra, dec if indeg:
ra = numpy.rad2deg(ra)
dec = numpy.rad2deg(dec)
return numpy.vstack([dist, ra, dec]).T
def radec_to_cartesian(dist, ra, dec, isdeg=True): def radec_to_cartesian(X, isdeg=True):
""" """
Convert distance, right ascension and declination to Cartesian coordinates. Calculate Cartesian coordinates from radial distance, RA, dec. Note, RA is
expected in range [0, 360) degrees and dec in range [-90, 90] degrees.
Parameters Parameters
---------- ----------
dist, ra, dec : 1-dimensional arrays X : 2-dimensional array `(nsamples, 3)`
Spherical coordinates. Radial distance, RA and dec.
isdeg : bool, optional isdeg : bool, optional
Whether `ra` and `dec` are in degres. By default `True`. Whether to return RA and DEC in degrees.
Returns Returns
------- -------
x, y, z : 1-dimensional arrays out : 2-dimensional array `(nsamples, 3)`
Cartesian coordinates. Cartesian coordinates.
""" """
dist, ra, dec = X[:, 0], X[:, 1], X[:, 2]
if isdeg: if isdeg:
ra = numpy.deg2rad(ra) ra = numpy.deg2rad(ra)
dec = numpy.deg2rad(dec) dec = numpy.deg2rad(dec)
x = dist * numpy.cos(dec) * numpy.cos(ra) x = numpy.cos(dec) * numpy.cos(ra)
y = dist * numpy.cos(dec) * numpy.sin(ra) y = numpy.cos(dec) * numpy.sin(ra)
z = dist * numpy.sin(dec) z = numpy.sin(dec)
return x, y, z return dist * numpy.vstack([x, y, z]).T
############################################################################### ###############################################################################

165
notebooks/perseus.ipynb Normal file

File diff suppressed because one or more lines are too long