Basic matching (#2)

* add recarray manipulations

* add cart to radec

* add behav so x can be a list

* add import

* create empty files

* ignore plots file

* add planck data

* add read_mmain file

* add cols_to_structured import

* use cols_to_structured

* add cols_to_structued

* add read_mmain import

* add reading planck

* add mass conversion

* add brute force separation calculation

* update nb

* rename & int dtype

* add func to get csiborg ids

* add list to nd array conversion

* add utils

* rename file

* add 2M++

* add read 2mpp

* add 2mpp shortcut

* add randoms generator

* Change range of RA [0, 360]

* fix ang wrapping

* add code for sphere 2pcf

* rm wrapping

* optionally load only a few borgs

* update nb
This commit is contained in:
Richard Stiskalek 2022-10-18 19:41:20 +01:00 committed by GitHub
parent d2c1f3294a
commit 53a0629d90
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 74697 additions and 37 deletions

View file

@ -13,4 +13,4 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from galomatch import io
from galomatch import (io, match, utils)

View file

@ -13,6 +13,8 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .readsim import (get_sim_path, open_particle, open_unbinding,
read_particle, read_clumpid, read_clumps,
from .readsim import (get_csiborg_ids, get_sim_path, open_particle,
open_unbinding, read_particle, read_clumpid, read_clumps,
read_mmain,
convert_mass_cols, convert_position_cols, flip_cols)
from .readobs import (read_planck2015, read_2mpp)

95
galomatch/io/readobs.py Normal file
View file

@ -0,0 +1,95 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import numpy
from astropy.io import fits
from ..utils import (add_columns, cols_to_structured)
def read_planck2015(fpath, dist_cosmo, max_comdist=None):
"""
Read the Planck 2nd Sunyaev-Zeldovich source catalogue [1]. The following
is performed:
- removes clusters without a redshift estimate,
- calculates the comoving distance with the provided cosmology.
- Converts `MSZ` from units of :math:`1e14 M_\odot` to :math:`M_\odot`
Parameters
----------
fpath : str
Path to the source catalogue.
dist_cosmo : `astropy.cosmology` object
The cosmology to calculate cluster comoving distance from redshift.
max_comdist : float, optional
Maximum comoving distance threshold in units of :math:`\mathrm{MPc}`.
By default `None` and no threshold is applied.
References
----------
[1] https://heasarc.gsfc.nasa.gov/W3Browse/all/plancksz2.html
Returns
-------
out : `astropy.io.fits.FITS_rec`
The catalogue structured array.
"""
data = fits.open(fpath)[1].data
# Convert FITS to a structured array
out = numpy.full(data.size, numpy.nan, dtype=data.dtype.descr)
for name in out.dtype.names:
out[name] = data[name]
# Take only clusters with redshifts
out = out[out["REDSHIFT"] >= 0]
# Add comoving distance
dist = dist_cosmo.comoving_distance(out["REDSHIFT"]).value
out = add_columns(out, dist, "COMDIST")
# Convert masses
for p in ("MSZ", "MSZ_ERR_UP", "MSZ_ERR_LOW"):
out[p] *= 1e14
# Distance threshold
if max_comdist is not None:
out = out[out["COMDIST"] < max_comdist]
return out
def read_2mpp(fpath):
"""
Read in the 2M++ galaxy redshift catalogue [1], with the catalogue at [2].
Removes fake galaxies used to fill the zone of avoidance.
Parameters
----------
fpath : str
File path to the catalogue.
Returns
-------
out : structured array
The catalogue.
"""
# Read the catalogue and select non-fake galaxies
cat = numpy.genfromtxt(fpath, delimiter="|", )
cat = cat[cat[:, 12] == 0, :]
F64 = numpy.float64
cols = [("RA", F64), ("DEC", F64), ("Ksmag", F64)]
out = cols_to_structured(cat.shape[0], cols)
out["RA"] = cat[:, 1] - 180
out["DEC"] = cat[:, 2]
out["Ksmag"] = cat[:, 5]
return out

View file

@ -19,8 +19,11 @@ import numpy
from scipy.io import FortranFile
from os import listdir
from os.path import (join, isfile)
from glob import glob
from tqdm import tqdm
from ..utils import cols_to_structured
F16 = numpy.float16
F32 = numpy.float32
@ -32,6 +35,37 @@ BOXSIZE = 677.7 / little_h # Mpc. Otherwise positions in [0, 1].
BOXMASS = 3.749e19 # Msun
def get_csiborg_ids(srcdir):
"""
Get CSiBORG simulation IDs from the list of folders in `srcdir`.
Assumes that the folders look like `ramses_out_X` and extract the `X`
integer. Removes `5511` from the list of IDs.
Parameters
----------
srcdir : string
The folder where CSiBORG simulations are stored.
Returns
-------
ids : 1-dimensional array
Array of CSiBORG simulation IDs.
"""
files = glob(join(srcdir, "ramses_out*"))
# Select only file names
files = [f.split("/")[-1] for f in files]
# Remove files with inverted ICs
files = [f for f in files if "_inv" not in f]
# Remove the filename with _old
files = [f for f in files if "OLD" not in f]
ids = [int(f.split("_")[-1]) for f in files]
try:
ids.remove(5511)
except ValueError:
pass
return numpy.sort(ids)
def get_sim_path(n, fname="ramses_out_{}", srcdir="/mnt/extraspace/hdesmond"):
"""
Get a path to a CSiBORG simulation.
@ -309,15 +343,45 @@ def read_clumps(n, simpath):
("peak_x", F64), ("peak_y", F64), ("peak_z", F64),
("rho-", F64), ("rho+", F64), ("rho_av", F64),
("mass_cl", F64), ("relevance", F64)]
# Write to a structured array
dtype = {"names": [col[0] for col in cols],
"formats": [col[1] for col in cols]}
out = numpy.full(arr.shape[0], numpy.nan, dtype=dtype)
for i, name in enumerate(dtype["names"]):
out = cols_to_structured(arr.shape[0], cols)
for i, name in enumerate(out.dtype.names):
out[name] = arr[:, i]
return out
def read_mmain(n, srcdir, fname="Mmain_{}.npy"):
"""
Read `mmain` numpy arrays of central halos whose mass contains their
substracture contribution.
Parameters
----------
n : int
The index of the initial conditions (IC) realisation.
srcdir : str
The path to the folder containing the files.
fname : str, optional
The file name convention. By default `Mmain_{}.npy`, where the
substituted value is `n`.
Returns
-------
out : structured array
Array with the central halo information.
"""
fpath = join(srcdir, fname.format(n))
arr = numpy.load(fpath)
cols = [("index", I64), ("peak_x", F64), ("peak_y", F64),
("peak_z", F64), ("mass_cl", F64), ("sub_frac", F64)]
out = cols_to_structured(arr.shape[0], cols)
for i, name in enumerate(out.dtype.names):
out[name] = arr[:, i]
return out
def convert_mass_cols(arr, cols):
"""
Convert mass columns from box units to :math:`M_{odot}`. `arr` is passed by

View file

@ -0,0 +1,17 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .match import brute_spatial_separation
from .correlation import (get_randoms_sphere, angular_tpcf)

View file

@ -0,0 +1,131 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import numpy
from Corrfunc.mocks import DDtheta_mocks
from Corrfunc.utils import convert_3d_counts_to_cf
from warnings import warn
def get_randoms_sphere(N, seed=42):
"""
Generate random points on a sphere.
Parameters
----------
N : int
Number of points.
seed : int
Random seed.
Returns
-------
ra : 1-dimensional array
Right ascension in :math:`[0, 360)` degrees.
dec : 1-dimensional array
Declination in :math:`[-90, 90]` degrees.
"""
gen = numpy.random.default_rng(seed)
ra = gen.random(N) * 360
dec = numpy.rad2deg(numpy.arcsin(2 * (gen.random(N) - 0.5)))
return ra, dec
def wrapRA(ra, degrees=True):
"""
Wrap the right ascension from :math:`[-180, 180)` to :math`[0, 360)`
degrees or equivalently if `degrees=False` in radians.
Paramaters
----------
ra : 1-dimensional array
Right ascension values.
degrees : float, optional
Whether the right ascension is in degrees.
Returns
-------
ra : 1-dimensional array
Wrapped around right ascension.
"""
mask = ra < 0
if numpy.sum(mask) == 0:
warn("No negative right ascension found.")
ra[mask] += 360 if degrees else 2 * numpy.pi
return ra
def sphere_angular_tpcf(bins, RA1, DEC1, RA2=None, DEC2=None, nthreads=1,
Nmult=5, seed1=42, seed2=666):
"""
Calculate the angular two-point correlation function. The coordinates must
be provided in degrees. With the right ascension and degrees being
in range of :math:`[-180, 180]` and :math:`[-90, 90]` degrees, respectively.
If `RA2` and `DEC2` are provided cross-correlates the first data set with
the second. Creates a uniformly sampled randoms on the surface of a sphere
of size `Nmult` times the corresponding number of data points. Uses the
Landy-Szalay estimator.
Parameters
----------
bins : 1-dimensional array
Angular bins to calculate the angular twop-point correlation function.
RA1 : 1-dimensional array
Right ascension of the 1st data set, in degrees.
DEC1 : 1-dimensional array
Declination of the 1st data set, in degrees.
RA2 : 1-dimensional array, optional
Right ascension of the 2nd data set, in degrees.
DEC2 : 1-dimensional array, optional
Declination of the 2nd data set, in degrees.
nthreads : int, optional
Number of threads, by default 1.
Nmult : int, optional
Relative randoms size with respect to the data set. By default 5.
seed1 : int, optional
Seed to generate the first set of randoms.
seed2 : int, optional
Seed to generate the second set of randoms.
Returns
-------
cf : 1-dimensional array
The angular 2-point correlation function.
"""
# If not provided calculate autocorrelation
if RA2 is None:
RA2 = RA1
DEC2 = DEC1
# Get the array sizes
ND1 = RA1.size
ND2 = RA2.size
NR1 = ND1 * Nmult
NR2 = ND2 * Nmult
# Generate randoms. Note that these are over the sphere!
randRA1, randDEC1 = get_randoms_sphere(NR1, seed1)
randRA2, randDEC2 = get_randoms_sphere(NR2, seed2)
# Wrap RA
RA1 = wrapRA(numpy.copy(RA1))
RA2 = wrapRA(numpy.copy(RA2))
# Calculate pairs
D1D2 = DDtheta_mocks(0, nthreads, bins, RA1, DEC1, RA2=RA2, DEC2=DEC2)
D1R2 = DDtheta_mocks(0, nthreads, bins, RA1, DEC1,
RA2=randRA2, DEC2=randDEC2)
D2R1 = DDtheta_mocks(0, nthreads, bins, RA2, DEC2,
RA2=randRA1, DEC2=randDEC1)
R1R2 = DDtheta_mocks(0, nthreads, bins, randRA1, randDEC1,
RA2=randRA2, DEC2=randDEC2)
# Convert to the CF
return convert_3d_counts_to_cf(ND1, ND2, NR1, NR2, D1D2, D1R2, D2R1, R1R2)

67
galomatch/match/match.py Normal file
View file

@ -0,0 +1,67 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import numpy
from tqdm import tqdm
from astropy.coordinates import SkyCoord
def brute_spatial_separation(c1, c2, angular=False, N=None, verbose=False):
"""
Calculate for each point in `c1` the `N` closest points in `c2`.
Parameters
----------
c1 : `astropy.coordinates.SkyCoord`
Coordinates of the first set of points.
c2 : `astropy.coordinates.SkyCoord`
Coordinates of the second set of points.
angular : bool, optional
Whether to calculate angular separation or 3D separation. By default
`False` and 3D separation is calculated.
N : int, optional
Number of closest points in `c2` to each object in `c1` to return.
verbose : bool, optional
Verbosity flag. By default `False`.
Returns
-------
sep : 1-dimensional array
Separation of each object in `c1` to `N` closest objects in `c2`. The
array shape is `(c1.size, N)`. Separation is in units of `c1`.
indxs : 1-dimensional array
Indexes of the closest objects in `c2` for each object in `c1`. The
array shape is `(c1.size, N)`.
"""
if not (isinstance(c1, SkyCoord) and isinstance(c2, SkyCoord)):
raise TypeError("`c1` & `c2` must be `astropy.coordinates.SkyCoord`.")
N1 = c1.size
N2 = c2.size if N is None else N
# Pre-allocate arrays
sep = numpy.full((N1, N2), numpy.nan)
indxs = numpy.full((N1, N2), numpy.nan, dtype=int)
iters = tqdm(range(N1)) if verbose else range(N1)
for i in iters:
if angular:
dist = c1[i].separation(c2).value
else:
dist = c1[i].separation_3d(c2).value
# Sort the distances
sort = numpy.argsort(dist)[:N2]
indxs[i, :] = sort
sep[i, :] = dist[sort]
return sep, indxs

View file

@ -0,0 +1,18 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .recarray_manip import (cols_to_structured, add_columns, rm_columns,
list_to_ndarray)
from .transforms import cartesian_to_radec

View file

@ -0,0 +1,155 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""Utilility functions for manipulation structured arrays."""
import numpy
def cols_to_structured(N, cols):
"""
Allocate a structured array from `cols`.
Parameters
----------
N : int
Structured array size.
cols: list of tuples
Column names and dtypes. Each tuple must written as `(name, dtype)`.
Returns
-------
out : structured array
Initialised structured array.
"""
if not isinstance(cols, list) and all(isinstance(c, tuple) for c in cols):
raise TypeError("`cols` must be a list of tuples.")
dtype = {"names": [col[0] for col in cols],
"formats": [col[1] for col in cols]}
return numpy.full(N, numpy.nan, dtype=dtype)
def add_columns(arr, X, cols):
"""
Add new columns to a record array `arr`. Creates a new array.
Parameters
----------
arr : record array
The record array to add columns to.
X : (list of) 1-dimensional array(s) or 2-dimensional array
Columns to be added.
cols : str or list of str
Column names to be added.
Returns
-------
out : record array
The new record array with added values.
"""
# Make sure cols is a list of str and X a 2D array
cols = [cols] if isinstance(cols, str) else cols
if isinstance(X, numpy.ndarray) and X.ndim == 1:
X = X.reshape(-1, 1)
if isinstance(X, list) and all(x.ndim == 1 for x in X):
X = numpy.vstack([X]).T
if len(cols) != X.shape[1]:
raise ValueError("Number of columns of `X` does not match `cols`.")
if arr.size != X.shape[0]:
raise ValueError("Number of rows of `X` does not match size of `arr`.")
# Get the new data types
dtype = arr.dtype.descr
for i, col in enumerate(cols):
dtype.append((col, X[i, :].dtype.descr[0][1]))
# Fill in the old array
out = numpy.full(arr.size, numpy.nan, dtype=dtype)
for col in arr.dtype.names:
out[col] = arr[col]
for i, col in enumerate(cols):
out[col] = X[:, i]
return out
def rm_columns(arr, cols):
"""
Remove columns `cols` from a record array `arr`. Creates a new array.
Parameters
----------
arr : record array
The record array to remove columns from.
cols : str or list of str
Column names to be removed.
Returns
-------
out : record array
Record array with removed columns.
"""
# Check columns we wish to delete are in the array
cols = [cols] if isinstance(cols, str) else cols
for col in cols:
if col not in arr.dtype.names:
raise ValueError("Column `{}` not in `arr`.".format(col))
# Get a new dtype without the cols to be deleted
new_dtype = []
for dtype, name in zip(arr.dtype.descr, arr.dtype.names):
if name not in cols:
new_dtype.append(dtype)
# Allocate a new array and fill it in.
out = numpy.full(arr.size, numpy.nan, new_dtype)
for name in out.dtype.names:
out[name] = arr[name]
return out
def list_to_ndarray(arrs, cols):
"""
Convert a list of structured arrays of CSiBORG simulation catalogues to
an 3-dimensional array.
Parameters
----------
arrs : list of structured arrays
List of CSiBORG catalogues.
cols : str or list of str
Columns to be extracted from the CSiBORG catalogues.
Returns
-------
out : 3-dimensional array
Catalogue array of shape `(n_realisations, n_samples, n_cols)`, where
`n_samples` is the maximum number of samples over the CSiBORG
catalogues.
"""
if not isinstance(arrs, list):
raise TypeError("`arrs` must be a list of structured arrays.")
cols = [cols] if isinstance(cols, str) else cols
Narr = len(arrs)
Nobj_max = max([arr.size for arr in arrs])
Ncol = len(cols)
# Preallocate the array and fill it
out = numpy.full((Narr, Nobj_max, Ncol), numpy.nan)
for i in range(Narr):
Nobj = arrs[i].size
for j in range(Ncol):
out[i, :Nobj, j] = arrs[i][cols[j]]
return out

View file

@ -0,0 +1,56 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import numpy
def cartesian_to_radec(arr, xpar="peak_x", ypar="peak_y", zpar="peak_z", degrees=True):
"""
Extract `x`, `y`, and `z` coordinates from a record array `arr` and
calculate their spherical coordinates representation.
Parameters
----------
arr : record array
Record array with the Cartesian coordinates.
xpar : str, optional
Name of the x coordinate in the record array.
ypar : str, optional
Name of the y coordinate in the record array.
zpar : str, optional
Name of the z coordinate in the record array.
degrees : bool, optional
Whether to return angles in degrees. By default `True`.
Returns
-------
dist : 1-dimensional array
Radial distance.
ra : 1-dimensional array
Right ascension.
dec : 1-dimensional array
Declination.
"""
x, y, z = arr[xpar], arr[ypar], arr[zpar]
dist = numpy.sqrt(x**2 + y**2 + z**2)
dec = numpy.arcsin(z / dist)
ra = numpy.arctan2(y, x)
if degrees:
dec = numpy.rad2deg(dec)
ra = numpy.rad2deg(ra)
return dist, ra, dec