Basic matching (#2)

* add recarray manipulations * add cart to radec * add behav so x can be a list * add import * create empty files * ignore plots file * add planck data * add read_mmain file * add cols_to_structured import * use cols_to_structured * add cols_to_structued * add read_mmain import * add reading planck * add mass conversion * add brute force separation calculation * update nb * rename & int dtype * add func to get csiborg ids * add list to nd array conversion * add utils * rename file * add 2M++ * add read 2mpp * add 2mpp shortcut * add randoms generator * Change range of RA [0, 360] * fix ang wrapping * add code for sphere 2pcf * rm wrapping * optionally load only a few borgs * update nb
2025-07-15 18:33:03 +00:00 · 2022-10-18 19:41:20 +01:00 · 2022-10-18 19:41:20 +01:00 · 53a0629d90
commit 53a0629d90
parent d2c1f3294a
17 changed files with 74697 additions and 37 deletions
--- a/galomatch/io/init.py
+++ b/galomatch/io/init.py
@ -13,6 +13,8 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

-from .readsim import (get_sim_path, open_particle, open_unbinding,
-                      read_particle, read_clumpid, read_clumps,
+from .readsim import (get_csiborg_ids, get_sim_path, open_particle,
+                      open_unbinding, read_particle, read_clumpid, read_clumps,
+                      read_mmain,
                      convert_mass_cols, convert_position_cols, flip_cols)
+from .readobs import (read_planck2015, read_2mpp)
--- a/galomatch/io/readobs.py
+++ b/galomatch/io/readobs.py
@ -0,0 +1,95 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+import numpy
+from astropy.io import fits
+
+from ..utils import (add_columns, cols_to_structured)
+
+
+def read_planck2015(fpath, dist_cosmo, max_comdist=None):
+    """
+    Read the Planck 2nd Sunyaev-Zeldovich source catalogue [1]. The following
+    is performed:
+        - removes clusters without a redshift estimate,
+        - calculates the comoving distance with the provided cosmology.
+        - Converts `MSZ` from units of :math:`1e14 M_\odot` to :math:`M_\odot`
+
+    Parameters
+    ----------
+    fpath : str
+        Path to the source catalogue.
+    dist_cosmo : `astropy.cosmology` object
+        The cosmology to calculate cluster comoving distance from redshift.
+    max_comdist : float, optional
+        Maximum comoving distance threshold in units of :math:`\mathrm{MPc}`.
+        By default `None` and no threshold is applied.
+
+    References
+    ----------
+    [1] https://heasarc.gsfc.nasa.gov/W3Browse/all/plancksz2.html
+
+    Returns
+    -------
+    out : `astropy.io.fits.FITS_rec`
+        The catalogue structured array.
+    """
+    data = fits.open(fpath)[1].data
+    # Convert FITS to a structured array
+    out = numpy.full(data.size, numpy.nan, dtype=data.dtype.descr)
+    for name in out.dtype.names:
+        out[name] = data[name]
+    # Take only clusters with redshifts
+    out = out[out["REDSHIFT"] >= 0]
+    # Add comoving distance
+    dist = dist_cosmo.comoving_distance(out["REDSHIFT"]).value
+    out = add_columns(out, dist, "COMDIST")
+    # Convert masses
+    for p in ("MSZ", "MSZ_ERR_UP", "MSZ_ERR_LOW"):
+        out[p] *= 1e14
+    # Distance threshold
+    if max_comdist is not None:
+        out = out[out["COMDIST"] < max_comdist]
+
+    return out
+
+
+def read_2mpp(fpath):
+    """
+    Read in the 2M++ galaxy redshift catalogue [1], with the catalogue at [2].
+    Removes fake galaxies used to fill the zone of avoidance.
+
+    Parameters
+    ----------
+    fpath : str
+        File path to the catalogue.
+
+    Returns
+    -------
+    out : structured array
+        The catalogue.
+    """
+    # Read the catalogue and select non-fake galaxies
+    cat = numpy.genfromtxt(fpath, delimiter="|", )
+    cat = cat[cat[:, 12] == 0, :]
+
+    F64 = numpy.float64
+    cols = [("RA", F64), ("DEC", F64), ("Ksmag", F64)]
+    out = cols_to_structured(cat.shape[0], cols)
+    out["RA"] = cat[:, 1] - 180
+    out["DEC"] = cat[:, 2]
+    out["Ksmag"] = cat[:, 5]
+
+    return out
--- a/galomatch/io/readsim.py
+++ b/galomatch/io/readsim.py
@ -19,8 +19,11 @@ import numpy
 from scipy.io import FortranFile
 from os import listdir
 from os.path import (join, isfile)
+from glob import glob
 from tqdm import tqdm

+from ..utils import cols_to_structured
+

 F16 = numpy.float16
 F32 = numpy.float32
@ -32,6 +35,37 @@ BOXSIZE = 677.7 / little_h  # Mpc. Otherwise positions in [0, 1].
 BOXMASS = 3.749e19  # Msun


+def get_csiborg_ids(srcdir):
+    """
+    Get CSiBORG simulation IDs from the list of folders in `srcdir`.
+    Assumes that the folders look like `ramses_out_X` and extract the `X`
+    integer. Removes `5511` from the list of IDs.
+
+    Parameters
+    ----------
+    srcdir : string
+        The folder where CSiBORG simulations are stored.
+
+    Returns
+    -------
+    ids : 1-dimensional array
+        Array of CSiBORG simulation IDs.
+    """
+    files = glob(join(srcdir, "ramses_out*"))
+    # Select only file names
+    files = [f.split("/")[-1] for f in files]
+    # Remove files with inverted ICs
+    files = [f for f in files if "_inv" not in f]
+    # Remove the filename with _old
+    files = [f for f in files if "OLD" not in f]
+    ids = [int(f.split("_")[-1]) for f in files]
+    try:
+        ids.remove(5511)
+    except ValueError:
+        pass
+    return numpy.sort(ids)
+
+
 def get_sim_path(n, fname="ramses_out_{}", srcdir="/mnt/extraspace/hdesmond"):
    """
    Get a path to a CSiBORG simulation.
@ -309,15 +343,45 @@ def read_clumps(n, simpath):
            ("peak_x", F64), ("peak_y", F64), ("peak_z", F64),
            ("rho-", F64), ("rho+", F64), ("rho_av", F64),
            ("mass_cl", F64), ("relevance", F64)]
-    # Write to a structured array
-    dtype = {"names": [col[0] for col in cols],
-            "formats": [col[1] for col in cols]}
-    out = numpy.full(arr.shape[0], numpy.nan, dtype=dtype)
-    for i, name in enumerate(dtype["names"]):
+    out = cols_to_structured(arr.shape[0], cols)
+    for i, name in enumerate(out.dtype.names):
        out[name] = arr[:, i]
    return out


+def read_mmain(n, srcdir, fname="Mmain_{}.npy"):
+    """
+    Read `mmain` numpy arrays of central halos whose mass contains their
+    substracture contribution.
+
+    Parameters
+    ----------
+    n : int
+        The index of the initial conditions (IC) realisation.
+    srcdir : str
+        The path to the folder containing the files.
+    fname : str, optional
+        The file name convention.  By default `Mmain_{}.npy`, where the
+        substituted value is `n`.
+
+    Returns
+    -------
+    out : structured array
+        Array with the central halo information.
+    """
+    fpath = join(srcdir, fname.format(n))
+    arr = numpy.load(fpath)
+
+    cols = [("index", I64), ("peak_x", F64), ("peak_y", F64),
+            ("peak_z", F64), ("mass_cl", F64), ("sub_frac", F64)]
+    out = cols_to_structured(arr.shape[0], cols)
+    for i, name in enumerate(out.dtype.names):
+        out[name] = arr[:, i]
+
+    return out
+
+
+
 def convert_mass_cols(arr, cols):
    """
    Convert mass columns from box units to :math:`M_{odot}`. `arr` is passed by