Rename repo

2025-07-12 00:53:02 +00:00 · 2022-10-20 23:34:14 +01:00 · 2022-10-20 23:34:14 +01:00 · a9e98f5a2d
commit a9e98f5a2d
parent 942c36b142
13 changed files with 0 additions and 0 deletions
--- a/csiborgtools/init.py
+++ b/csiborgtools/init.py
@ -0,0 +1,16 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+from galomatch import (io, match, utils, units)  # noqa
--- a/csiborgtools/fits/init.py
+++ b/csiborgtools/fits/init.py
@ -0,0 +1,14 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
--- a/csiborgtools/io/init.py
+++ b/csiborgtools/io/init.py
@ -0,0 +1,20 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+from .readsim import (get_csiborg_ids, get_sim_path, get_snapshot_path,  # noqa
+                      read_info,  # noqa
+                      open_particle, open_unbinding, read_particle,  # noqa
+                      read_clumpid, read_clumps, read_mmain)  # noqa
+from .readobs import (read_planck2015, read_2mpp)  # noqa
--- a/csiborgtools/io/readobs.py
+++ b/csiborgtools/io/readobs.py
@ -0,0 +1,106 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+Scripts to read in observation.
+"""
+
+import numpy
+from astropy.io import fits
+
+from ..utils import (add_columns, cols_to_structured)
+
+
+def read_planck2015(fpath, dist_cosmo, max_comdist=None):
+    r"""
+    Read the Planck 2nd Sunyaev-Zeldovich source catalogue [1]. The following
+    is performed:
+        - removes clusters without a redshift estimate,
+        - calculates the comoving distance with the provided cosmology.
+        - Converts `MSZ` from units of :math:`1e14 M_\odot` to :math:`M_\odot`
+
+    Parameters
+    ----------
+    fpath : str
+        Path to the source catalogue.
+    dist_cosmo : `astropy.cosmology` object
+        The cosmology to calculate cluster comoving distance from redshift.
+    max_comdist : float, optional
+        Maximum comoving distance threshold in units of :math:`\mathrm{MPc}`.
+        By default `None` and no threshold is applied.
+
+    References
+    ----------
+    [1] https://heasarc.gsfc.nasa.gov/W3Browse/all/plancksz2.html
+
+    Returns
+    -------
+    out : `astropy.io.fits.FITS_rec`
+        The catalogue structured array.
+    """
+    data = fits.open(fpath)[1].data
+    # Convert FITS to a structured array
+    out = numpy.full(data.size, numpy.nan, dtype=data.dtype.descr)
+    for name in out.dtype.names:
+        out[name] = data[name]
+    # Take only clusters with redshifts
+    out = out[out["REDSHIFT"] >= 0]
+    # Add comoving distance
+    dist = dist_cosmo.comoving_distance(out["REDSHIFT"]).value
+    out = add_columns(out, dist, "COMDIST")
+    # Convert masses
+    for par in ("MSZ", "MSZ_ERR_UP", "MSZ_ERR_LOW"):
+        out[par] *= 1e14
+    # Distance threshold
+    if max_comdist is not None:
+        out = out[out["COMDIST"] < max_comdist]
+
+    return out
+
+
+def read_2mpp(fpath, dist_cosmo):
+    """
+    Read in the 2M++ galaxy redshift catalogue [1], with the catalogue at [2].
+    Removes fake galaxies used to fill the zone of avoidance.
+
+    Parameters
+    ----------
+    fpath : str
+        File path to the catalogue.
+
+    Returns
+    -------
+    out : structured array
+        The catalogue.
+
+    References
+    ----------
+    [1] The 2M++ galaxy redshift catalogue; Lavaux, Guilhem, Hudson, Michael J.
+    [2] https://cdsarc.cds.unistra.fr/viz-bin/cat/J/MNRAS/416/2840#/article
+    """
+    from scipy.constants import c
+    # Read the catalogue and select non-fake galaxies
+    cat = numpy.genfromtxt(fpath, delimiter="|", )
+    cat = cat[cat[:, 12] == 0, :]
+
+    F64 = numpy.float64
+    cols = [("RA", F64), ("DEC", F64), ("Ksmag", F64), ("ZCMB", F64),
+            ("CDIST_CMB", F64)]
+    out = cols_to_structured(cat.shape[0], cols)
+    out["RA"] = cat[:, 1]
+    out["DEC"] = cat[:, 2]
+    out["Ksmag"] = cat[:, 5]
+    out["ZCMB"] = cat[:, 7] / (c * 1e-3)
+    out["CDIST_CMB"] = dist_cosmo.comoving_distance(out["ZCMB"]).value
+    return out
--- a/csiborgtools/io/readsim.py
+++ b/csiborgtools/io/readsim.py
@ -0,0 +1,431 @@
+# Copyright (C) 2022 Richard Stiskalek, Harry Desmond
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+"""Functions to read in the particle and clump files."""
+
+import numpy
+from scipy.io import FortranFile
+from os import listdir
+from os.path import (join, isfile)
+from glob import glob
+from tqdm import tqdm
+
+from ..utils import cols_to_structured
+
+
+F16 = numpy.float16
+F32 = numpy.float32
+F64 = numpy.float64
+I32 = numpy.int32
+I64 = numpy.int64
+
+
+def get_csiborg_ids(srcdir):
+    """
+    Get CSiBORG simulation IDs from the list of folders in `srcdir`.
+    Assumes that the folders look like `ramses_out_X` and extract the `X`
+    integer. Removes `5511` from the list of IDs.
+
+    Parameters
+    ----------
+    srcdir : string
+        The folder where CSiBORG simulations are stored.
+
+    Returns
+    -------
+    ids : 1-dimensional array
+        Array of CSiBORG simulation IDs.
+    """
+    files = glob(join(srcdir, "ramses_out*"))
+    # Select only file names
+    files = [f.split("/")[-1] for f in files]
+    # Remove files with inverted ICs
+    files = [f for f in files if "_inv" not in f]
+    # Remove the filename with _old
+    files = [f for f in files if "OLD" not in f]
+    ids = [int(f.split("_")[-1]) for f in files]
+    try:
+        ids.remove(5511)
+    except ValueError:
+        pass
+    return numpy.sort(ids)
+
+
+def get_sim_path(n, fname="ramses_out_{}", srcdir="/mnt/extraspace/hdesmond"):
+    """
+    Get a path to a CSiBORG simulation.
+
+    Parameters
+    ----------
+    n : int
+        The index of the initial conditions (IC) realisation.
+    fname : str, optional
+        The file name. By default `ramses_out_{}`, where `n` is the IC index.
+    srcdir : str, optional
+        The file path to the folder where realisations of the ICs are stored.
+
+    Returns
+    -------
+    path : str
+        Path to the `n`th CSiBORG simulation.
+    """
+    return join(srcdir, fname.format(n))
+
+
+def get_snapshot_path(Nsnap, simpath):
+    """
+    Get a path to a CSiBORG IC realisation snapshot.
+
+    Parameters
+    ----------
+    Nsnap : int
+        Snapshot index.
+    simpath : str
+        Path to the CSiBORG IC realisation.
+
+    Returns
+    -------
+    snappath : str
+        Path to the CSiBORG IC realisation snapshot.
+    """
+    return join(simpath, "output_{}".format(str(Nsnap).zfill(5)))
+
+
+def read_info(Nsnap, simpath):
+    """
+    Read CSiBORG simulation snapshot info.
+
+    Parameters
+    ----------
+    Nsnap : int
+        Snapshot index.
+    simpath : str
+        Path to the CSiBORG IC realisation.
+
+    Returns
+    -------
+    info : dict
+        Dictionary of info paramaters. Note that both keys and values are
+        strings.
+    """
+    # Open the info file
+    snappath = get_snapshot_path(Nsnap, simpath)
+    filename = join(snappath, "info_{}.txt".format(str(Nsnap).zfill(5)))
+    with open(filename, "r") as f:
+        info = f.read().split()
+    # Throw anything below ordering line out
+    info = numpy.asarray(info[:info.index("ordering")])
+    # Get indexes of lines with `=`. Indxs before/after be keys/vals
+    eqindxs = numpy.asarray([i for i in range(info.size) if info[i] == '='])
+
+    keys = info[eqindxs - 1]
+    vals = info[eqindxs + 1]
+    return {key: val for key, val in zip(keys, vals)}
+
+
+def open_particle(n, simpath, verbose=True):
+    """
+    Open particle files to a given CSiBORG simulation.
+
+    Parameters
+    ----------
+    n : int
+        The index of a redshift snapshot.
+    simpath : str
+        The complete path to the CSiBORG simulation.
+    verbose : bool, optional
+        Verbosity flag.
+
+    Returns
+    -------
+    nparts : 1-dimensional array
+        Number of parts assosiated with each CPU.
+    partfiles : list of `scipy.io.FortranFile`
+        Opened part files.
+    """
+    # Zeros filled snapshot number and the snapshot path
+    nout = str(n).zfill(5)
+    snappath = get_snapshot_path(n, simpath)
+    ncpu = int(read_info(n, simpath)["ncpu"])
+
+    if verbose:
+        print("Reading in output `{}` with ncpu = `{}`.".format(nout, ncpu))
+
+    # Check whether the unbinding file exists.
+    snapdirlist = listdir(snappath)
+    unbinding_file = "unbinding_{}.out00001".format(nout)
+    if unbinding_file not in snapdirlist:
+        raise FileNotFoundError(
+            "Couldn't find `{}` in `{}`. Use mergertreeplot.py -h or --help "
+            "to print help message.".format(unbinding_file, snappath))
+
+    # First read the headers. Reallocate arrays and fill them.
+    nparts = numpy.zeros(ncpu, dtype=int)
+    partfiles = [None] * ncpu
+    for cpu in range(ncpu):
+        cpu_str = str(cpu + 1).zfill(5)
+        fpath = join(snappath, "part_{}.out{}".format(nout, cpu_str))
+
+        f = FortranFile(fpath)
+        # Read in this order
+        ncpuloc = f.read_ints()
+        if ncpuloc != ncpu:
+            infopath = join(snappath, "info_{}.txt".format(nout))
+            raise ValueError("`ncpu = {}` of `{}` disagrees with `ncpu = {}` "
+                             "of `{}`.".format(ncpu, infopath, ncpuloc, fpath))
+        ndim = f.read_ints()
+        nparts[cpu] = f.read_ints()
+        localseed = f.read_ints()
+        nstar_tot = f.read_ints()
+        mstar_tot = f.read_reals('d')
+        mstar_lost = f.read_reals('d')
+        nsink = f.read_ints()
+
+        partfiles[cpu] = f
+        del ndim, localseed, nstar_tot, mstar_tot, mstar_lost, nsink
+
+    return nparts, partfiles
+
+
+def read_sp(dtype, partfile):
+    """
+    Utility function to read a single particle file, depending on the dtype.
+
+    Parameters
+    ----------
+    dtype : str
+        The dtype of the part file to be read now.
+    partfile : `scipy.io.FortranFile`
+        Part file to read from.
+
+    Returns
+    -------
+    out : 1-dimensional array
+        The data read from the part file.
+    n : int
+        The index of the initial conditions (IC) realisation.
+    simpath : str
+        The complete path to the CSiBORG simulation.
+    """
+    if dtype in [F16, F32, F64]:
+        return partfile.read_reals('d')
+    elif dtype in [I32]:
+        return partfile.read_ints()
+    else:
+        raise TypeError("Unexpected dtype `{}`.".format(dtype))
+
+
+def nparts_to_start_ind(nparts):
+    """
+    Convert `nparts` array to starting indices in a pre-allocated array for
+    looping over the CPU number.
+
+    Parameters
+    ----------
+    nparts : 1-dimensional array
+        Number of parts assosiated with each CPU.
+
+    Returns
+    -------
+    start_ind : 1-dimensional array
+        The starting indices calculated as a cumulative sum starting at 0.
+    """
+    return numpy.hstack([[0], numpy.cumsum(nparts[:-1])])
+
+
+def read_particle(pars_extract, n, simpath, verbose=True):
+    """
+    Read particle files of a simulation at a given snapshot and return
+    values of `pars_extract`.
+
+    Parameters
+    ----------
+    pars_extract : list of str
+        Parameters to be extacted.
+    n : int
+        The index of the redshift snapshot.
+    simpath : str
+        The complete path to the CSiBORG simulation.
+    verbose : bool, optional
+        Verbosity flag while for reading the CPU outputs.
+
+    Returns
+    -------
+    out : structured array
+        The data read from the particle file.
+    """
+    # Open the particle files
+    nparts, partfiles = open_particle(n, simpath)
+    if verbose:
+        print("Opened {} particle files.".format(nparts.size))
+    ncpu = nparts.size
+    # Order in which the particles are written in the FortranFile
+    forder = [("x", F16), ("y", F16), ("z", F16),
+              ("vx", F16), ("vy", F16), ("vz", F16),
+              ("M", F32), ("ID", I32), ("level", I32)]
+    fnames = [fp[0] for fp in forder]
+    fdtypes = [fp[1] for fp in forder]
+    # Check there are no strange parameters
+    for p in pars_extract:
+        if p not in fnames:
+            raise ValueError("Undefined parameter `{}`. Must be one of `{}`."
+                             .format(p, fnames))
+
+    npart_tot = numpy.sum(nparts)
+    # A dummy array is necessary for reading the fortran files.
+    dum = numpy.full(npart_tot, numpy.nan, dtype=F16)
+    # These are the data we read along with types
+    dtype = {"names": pars_extract,
+             "formats": [forder[fnames.index(p)][1] for p in pars_extract]}
+    # Allocate the output structured array
+    out = numpy.full(npart_tot, numpy.nan, dtype)
+    start_ind = nparts_to_start_ind((nparts))
+    iters = tqdm(range(ncpu)) if verbose else range(ncpu)
+    for cpu in iters:
+        i = start_ind[cpu]
+        j = nparts[cpu]
+        for (fname, fdtype) in zip(fnames, fdtypes):
+            if fname in pars_extract:
+                out[fname][i:i + j] = read_sp(fdtype, partfiles[cpu])
+            else:
+                dum[i:i + j] = read_sp(fdtype, partfiles[cpu])
+
+    return out
+
+
+def open_unbinding(cpu, n, simpath):
+    """
+    Open particle files to a given CSiBORG simulation. Note that to be
+    consistent CPU is incremented by 1.
+
+    Parameters
+    ----------
+    cpu : int
+        The CPU index.
+    n : int
+        The index of a redshift snapshot.
+    simpath : str
+        The complete path to the CSiBORG simulation.
+
+    Returns
+    -------
+    unbinding : `scipy.io.FortranFile`
+        The opened unbinding FortranFile.
+    """
+    nout = str(n).zfill(5)
+    cpu = str(cpu + 1).zfill(5)
+    fpath = join(simpath, "output_{}".format(nout),
+                 "unbinding_{}.out{}".format(nout, cpu))
+
+    return FortranFile(fpath)
+
+
+def read_clumpid(n, simpath, verbose=True):
+    """
+    Read clump IDs from unbinding files.
+
+    Parameters
+    ----------
+    n : int
+        The index of a redshift snapshot.
+    simpath : str
+        The complete path to the CSiBORG simulation.
+    verbose : bool, optional
+        Verbosity flag while for reading the CPU outputs.
+
+    Returns
+    -------
+    clumpid : 1-dimensional array
+        The array of clump IDs.
+    """
+    nparts, __ = open_particle(n, simpath, verbose)
+    start_ind = nparts_to_start_ind(nparts)
+    ncpu = nparts.size
+
+    clumpid = numpy.full(numpy.sum(nparts), numpy.nan)
+    iters = tqdm(range(ncpu)) if verbose else range(ncpu)
+    for cpu in iters:
+        i = start_ind[cpu]
+        j = nparts[cpu]
+        ff = open_unbinding(cpu, n, simpath)
+        clumpid[i:i + j] = ff.read_ints()
+
+    return clumpid
+
+
+def read_clumps(n, simpath):
+    """
+    Read in a precomputed clump file `clump_N.dat`.
+
+    Parameters
+    ----------
+    n : int
+        The index of a redshift snapshot.
+    simpath : str
+        The complete path to the CSiBORG simulation.
+
+    Returns
+    -------
+    out : structured array
+        Structured array of the clumps.
+    """
+    n = str(n).zfill(5)
+    fname = join(simpath, "output_{}".format(n), "clump_{}.dat".format(n))
+    # Check the file exists.
+    if not isfile(fname):
+        raise FileExistsError("Clump file `{}` does not exist.".format(fname))
+
+    # Read in the clump array. This is how the columns must be written!
+    arr = numpy.genfromtxt(fname)
+    cols = [("index", I64), ("level", I64), ("parent", I64), ("ncell", F64),
+            ("peak_x", F64), ("peak_y", F64), ("peak_z", F64),
+            ("rho-", F64), ("rho+", F64), ("rho_av", F64),
+            ("mass_cl", F64), ("relevance", F64)]
+    out = cols_to_structured(arr.shape[0], cols)
+    for i, name in enumerate(out.dtype.names):
+        out[name] = arr[:, i]
+    return out
+
+
+def read_mmain(n, srcdir, fname="Mmain_{}.npy"):
+    """
+    Read `mmain` numpy arrays of central halos whose mass contains their
+    substracture contribution.
+
+    Parameters
+    ----------
+    n : int
+        The index of the initial conditions (IC) realisation.
+    srcdir : str
+        The path to the folder containing the files.
+    fname : str, optional
+        The file name convention.  By default `Mmain_{}.npy`, where the
+        substituted value is `n`.
+
+    Returns
+    -------
+    out : structured array
+        Array with the central halo information.
+    """
+    fpath = join(srcdir, fname.format(n))
+    arr = numpy.load(fpath)
+
+    cols = [("index", I64), ("peak_x", F64), ("peak_y", F64),
+            ("peak_z", F64), ("mass_cl", F64), ("sub_frac", F64)]
+    out = cols_to_structured(arr.shape[0], cols)
+    for i, name in enumerate(out.dtype.names):
+        out[name] = arr[:, i]
+
+    return out
--- a/csiborgtools/match/init.py
+++ b/csiborgtools/match/init.py
@ -0,0 +1,17 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+from .match import brute_spatial_separation  # noqa
+from .correlation import (get_randoms_sphere, sphere_angular_tpcf) # noqa
--- a/csiborgtools/match/correlation.py
+++ b/csiborgtools/match/correlation.py
@ -0,0 +1,131 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+import numpy
+from Corrfunc.mocks import DDtheta_mocks
+from Corrfunc.utils import convert_3d_counts_to_cf
+from warnings import warn
+
+
+def get_randoms_sphere(N, seed=42):
+    """
+    Generate random points on a sphere.
+
+    Parameters
+    ----------
+    N : int
+        Number of points.
+    seed : int
+        Random seed.
+
+    Returns
+    -------
+    ra : 1-dimensional array
+        Right ascension in :math:`[0, 360)` degrees.
+    dec : 1-dimensional array
+        Declination in :math:`[-90, 90]` degrees.
+    """
+    gen = numpy.random.default_rng(seed)
+    ra = gen.random(N) * 360
+    dec = numpy.rad2deg(numpy.arcsin(2 * (gen.random(N) - 0.5)))
+    return ra, dec
+
+
+def wrapRA(ra, degrees=True):
+    """
+    Wrap the right ascension from :math:`[-180, 180)` to :math`[0, 360)`
+    degrees or equivalently if `degrees=False` in radians.
+
+    Paramaters
+    ----------
+    ra : 1-dimensional array
+        Right ascension values.
+    degrees : float, optional
+        Whether the right ascension is in degrees.
+
+    Returns
+    -------
+    ra : 1-dimensional array
+        Wrapped around right ascension.
+    """
+    mask = ra < 0
+    if numpy.sum(mask) == 0:
+        warn("No negative right ascension found.")
+    ra[mask] += 360 if degrees else 2 * numpy.pi
+    return ra
+
+
+def sphere_angular_tpcf(bins, RA1, DEC1, RA2=None, DEC2=None, nthreads=1,
+                        Nmult=5, seed1=42, seed2=666):
+    """
+    Calculate the angular two-point correlation function. The coordinates must
+    be provided in degrees. With the right ascension and degrees being
+    in range of :math:`[-180, 180]` and :math:`[-90, 90]` degrees.
+    If `RA2` and `DEC2` are provided cross-correlates the first data set with
+    the second. Creates a uniformly sampled randoms on the surface of a sphere
+    of size `Nmult` times the corresponding number of data points. Uses the
+    Landy-Szalay estimator.
+
+    Parameters
+    ----------
+    bins : 1-dimensional array
+        Angular bins to calculate the angular twop-point correlation function.
+    RA1 : 1-dimensional array
+        Right ascension of the 1st data set, in degrees.
+    DEC1 : 1-dimensional array
+        Declination of the 1st data set, in degrees.
+    RA2 : 1-dimensional array, optional
+        Right ascension of the 2nd data set, in degrees.
+    DEC2 : 1-dimensional array, optional
+        Declination of the 2nd data set, in degrees.
+    nthreads : int, optional
+        Number of threads, by default 1.
+    Nmult : int, optional
+        Relative randoms size with respect to the data set. By default 5.
+    seed1 : int, optional
+        Seed to generate the first set of randoms.
+    seed2 : int, optional
+        Seed to generate the second set of randoms.
+
+    Returns
+    -------
+    cf : 1-dimensional array
+        The angular 2-point correlation function.
+    """
+    # If not provided calculate autocorrelation
+    if RA2 is None:
+        RA2 = RA1
+        DEC2 = DEC1
+    # Get the array sizes
+    ND1 = RA1.size
+    ND2 = RA2.size
+    NR1 = ND1 * Nmult
+    NR2 = ND2 * Nmult
+    # Generate randoms. Note that these are over the sphere!
+    randRA1, randDEC1 = get_randoms_sphere(NR1, seed1)
+    randRA2, randDEC2 = get_randoms_sphere(NR2, seed2)
+    # Wrap RA
+    RA1 = wrapRA(numpy.copy(RA1))
+    RA2 = wrapRA(numpy.copy(RA2))
+    # Calculate pairs
+    D1D2 = DDtheta_mocks(0, nthreads, bins, RA1, DEC1, RA2=RA2, DEC2=DEC2)
+    D1R2 = DDtheta_mocks(0, nthreads, bins, RA1, DEC1,
+                         RA2=randRA2, DEC2=randDEC2)
+    D2R1 = DDtheta_mocks(0, nthreads, bins, RA2, DEC2,
+                         RA2=randRA1, DEC2=randDEC1)
+    R1R2 = DDtheta_mocks(0, nthreads, bins, randRA1, randDEC1,
+                         RA2=randRA2, DEC2=randDEC2)
+    # Convert to the CF
+    return convert_3d_counts_to_cf(ND1, ND2, NR1, NR2, D1D2, D1R2, D2R1, R1R2)
--- a/csiborgtools/match/match.py
+++ b/csiborgtools/match/match.py
@ -0,0 +1,67 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+import numpy
+from tqdm import tqdm
+from astropy.coordinates import SkyCoord
+
+
+def brute_spatial_separation(c1, c2, angular=False, N=None, verbose=False):
+    """
+    Calculate for each point in `c1` the `N` closest points in `c2`.
+
+    Parameters
+    ----------
+    c1 : `astropy.coordinates.SkyCoord`
+        Coordinates of the first set of points.
+    c2 : `astropy.coordinates.SkyCoord`
+        Coordinates of the second set of points.
+    angular : bool, optional
+        Whether to calculate angular separation or 3D separation. By default
+        `False` and 3D separation is calculated.
+    N : int, optional
+        Number of closest points in `c2` to each object in `c1` to return.
+    verbose : bool, optional
+        Verbosity flag. By default `False`.
+
+    Returns
+    -------
+    sep : 1-dimensional array
+        Separation of each object in `c1` to `N` closest objects in `c2`. The
+        array shape is `(c1.size, N)`. Separation is in units of `c1`.
+    indxs : 1-dimensional array
+        Indexes of the closest objects in `c2` for each object in `c1`. The
+        array shape is `(c1.size, N)`.
+    """
+    if not (isinstance(c1, SkyCoord) and isinstance(c2, SkyCoord)):
+        raise TypeError("`c1` & `c2` must be `astropy.coordinates.SkyCoord`.")
+    N1 = c1.size
+    N2 = c2.size if N is None else N
+
+    # Pre-allocate arrays
+    sep = numpy.full((N1, N2), numpy.nan)
+    indxs = numpy.full((N1, N2), numpy.nan, dtype=int)
+    iters = tqdm(range(N1)) if verbose else range(N1)
+    for i in iters:
+        if angular:
+            dist = c1[i].separation(c2).value
+        else:
+            dist = c1[i].separation_3d(c2).value
+        # Sort the distances
+        sort = numpy.argsort(dist)[:N2]
+        indxs[i, :] = sort
+        sep[i, :] = dist[sort]
+
+    return sep, indxs
--- a/csiborgtools/units/init.py
+++ b/csiborgtools/units/init.py
@ -0,0 +1,18 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+from .transforms import (cartesian_to_radec, convert_mass_cols,  # noqa
+                         convert_position_cols)  # noqa
+from .box_units import BoxUnits  # noqa
--- a/csiborgtools/units/box_units.py
+++ b/csiborgtools/units/box_units.py
@ -0,0 +1,166 @@
+# Copyright (C) 2022 Richard Stiskalek, Deaglan Bartlett
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+Simulation box unit transformations.
+"""
+
+
+from astropy.cosmology import LambdaCDM
+from astropy import (constants, units)
+from ..io import read_info
+
+
+# Conversion factors
+MSUNCGS = constants.M_sun.cgs.value
+KPC_TO_CM = 3.08567758149137e21
+PI = 3.1415926535897932384626433
+
+
+class BoxUnits:
+    """
+    Box units class for converting between box and physical units.
+
+    Paramaters
+    ----------
+    Nsnap : int
+        Snapshot index.
+    simpath : str
+        Path to the simulation where its snapshot index folders are stored.
+    """
+
+    def __init__(self, Nsnap, simpath):
+        """
+        Read in the snapshot info file and set the units from it.
+        """
+        info = read_info(Nsnap, simpath)
+        pars = ["boxlen", "time", "aexp", "H0",
+                "omega_m", "omega_l", "omega_k", "omega_b",
+                "unit_l", "unit_d", "unit_t"]
+        for par in pars:
+            setattr(self, par, float(info[par]))
+
+        self.h = self.H0 / 100
+        self.cosmo = LambdaCDM(H0=self.H0, Om0=self.omega_m, Ode0=self.omega_l,
+                               Tcmb0=2.725 * units.K, Ob0=self.omega_b)
+        # Constants in box units
+        self.G = constants.G.cgs.value * (self.unit_d * self.unit_t ** 2)
+        self.H0 = self.H0 * 1e5 / (1e3 * KPC_TO_CM) * self.unit_t
+        self.c = constants.c.cgs.value * self.unit_t / self.unit_l
+        self.rho_crit = 3 * self.H0 ** 2 / (8 * PI * self.G)
+
+    def box2kpc(self, length):
+        r"""
+        Convert length from box units to :math:`\mathrm{kpc}`.
+
+        Parameters
+        ----------
+        length : float
+            Length in box units.
+
+        Returns
+        -------
+        length : foat
+            Length in :math:`\mathrm{kpc}`
+        """
+        return length * self.unit_l / KPC_TO_CM
+
+    def kpc2box(self, length):
+        r"""
+        Convert length from :math:`\mathrm{kpc}` to box units.
+
+        Parameters
+        ----------
+        length : float
+            Length in :math:`\mathrm{kpc}`
+
+        Returns
+        -------
+        length : foat
+            Length in box units.
+        """
+        return length / self.unit_l * KPC_TO_CM
+
+    def solarmass2box(self, mass):
+        r"""
+        Convert mass from :math:`M_\odot` to box units.
+
+        Parameters
+        ----------
+        mass : float
+            Mass in :math:`M_\odot`.
+
+        Returns
+        -------
+        mass : float
+            Mass in box units.
+        """
+        m = mass * MSUNCGS   # In cgs
+        unit_m = self.unit_d * self.unit_l ** 3
+        return m / unit_m
+
+    def box2solarmass(self, mass):
+        r"""
+        Convert mass from box units to :math:`M_\odot`.
+
+        TODO: check this.
+
+        Parameters
+        ----------
+        mass : float
+            Mass in box units.
+
+        Returns
+        -------
+        mass : float
+            Mass in :math:`M_\odot`.
+        """
+        unit_m = self.unit_d * self.unit_l**3
+        m = mass * unit_m  # In cgs
+        m = m / MSUNCGS
+        return m
+
+    def box2dens(self, density):
+        r"""
+        Convert density from box units to :math:`M_\odot / \mathrm{pc}^3`.
+
+        TODO: check this.
+
+        Parameters
+        ----------
+        density : float
+            Density in box units.
+        box : `BoxConstants`
+            Simulation box class with units.
+
+        Returns
+        -------
+        density : float
+            Density in :math:`M_\odot / \mathrm{pc}^3`.
+        """
+        rho = density * self.unit_d  # In cgs
+        rho = rho * (KPC_TO_CM * 1e-3)**3  # In g/pc^3
+        rho = rho / MSUNCGS
+        return rho
+
+    def dens2box(self, density):
+        r"""
+        Convert density from M_sun / pc^3
+
+        TODO: check this and write documentation.
+        """
+        rho = density * MSUNCGS
+        rho = rho / (KPC_TO_CM * 1e-3)**3  # In g/cm^3
+        rho = rho / self.unit_d
+        return rho
--- a/csiborgtools/units/transforms.py
+++ b/csiborgtools/units/transforms.py
@ -0,0 +1,110 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+Various coordinate transformations.
+"""
+
+
+import numpy
+
+
+little_h = 0.705
+BOXSIZE = 677.7 / little_h  # Mpc. Otherwise positions in [0, 1].
+BOXMASS = 3.749e19  # Msun
+
+
+def cartesian_to_radec(arr, xpar="peak_x", ypar="peak_y", zpar="peak_z"):
+    r"""
+    Extract `x`, `y`, and `z` coordinates from a record array `arr` and
+    calculate the radial distance :math:`r` in coordinate units, right
+    ascension :math:`\mathrm{RA} \in [0, 360)` degrees and declination
+    :math:`\delta \in [-90, 90]` degrees.
+
+    Parameters
+    ----------
+    arr : record array
+        Record array with the Cartesian coordinates.
+    xpar : str, optional
+        Name of the x coordinate in the record array.
+    ypar : str, optional
+        Name of the y coordinate in the record array.
+    zpar : str, optional
+        Name of the z coordinate in the record array.
+
+    Returns
+    -------
+    dist : 1-dimensional array
+        Radial distance.
+    ra : 1-dimensional array
+        Right ascension.
+    dec : 1-dimensional array
+        Declination.
+    """
+    x, y, z = arr[xpar], arr[ypar], arr[zpar]
+
+    dist = numpy.sqrt(x**2 + y**2 + z**2)
+    dec = numpy.rad2deg(numpy.arcsin(z/dist))
+    ra = numpy.rad2deg(numpy.arctan2(y, x))
+    # Make sure RA in the correct range
+    ra[ra < 0] += 360
+
+    return dist, ra, dec
+
+
+def convert_mass_cols(arr, cols):
+    r"""
+    Convert mass columns from box units to :math:`M_{\odot}`. `arr` is passed
+    by reference and is not explicitly returned back.
+
+    Parameters
+    ----------
+    arr : structured array
+        The array whose columns are to be converted.
+    cols : str or list of str
+        The mass columns to be converted.
+
+    Returns
+    -------
+    None
+    """
+    cols = [cols] if isinstance(cols, str) else cols
+    for col in cols:
+        arr[col] *= BOXMASS
+
+
+def convert_position_cols(arr, cols, zero_centered=True):
+    r"""
+    Convert position columns from box units to :math:`\mathrm{Mpc}`. `arr` is
+    passed by reference and is not explicitly returned back.
+
+    Parameters
+    ----------
+    arr : structured array
+        The array whose columns are to be converted.
+    cols : str or list of str
+        The mass columns to be converted.
+    zero_centered : bool, optional
+        Whether to translate the well-resolved origin in the centre of the
+        simulation to the :math:`(0, 0 , 0)` point. By default `True`.
+
+    Returns
+    -------
+    None
+    """
+    cols = [cols] if isinstance(cols, str) else cols
+    for col in cols:
+        arr[col] *= BOXSIZE
+        if zero_centered:
+            arr[col] -= BOXSIZE / 2
--- a/csiborgtools/utils/init.py
+++ b/csiborgtools/utils/init.py
@ -0,0 +1,18 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+from .recarray_manip import (cols_to_structured, add_columns, rm_columns,  # noqa
+                             list_to_ndarray, array_to_structured,  # noqa
+                             flip_cols)  # noqa
--- a/csiborgtools/utils/recarray_manip.py
+++ b/csiborgtools/utils/recarray_manip.py
@ -0,0 +1,211 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+Utilility functions for manipulation structured arrays.
+"""
+
+
+import numpy
+
+
+def cols_to_structured(N, cols):
+    """
+    Allocate a structured array from `cols`.
+
+    Parameters
+    ----------
+    N : int
+        Structured array size.
+    cols: list of tuples
+        Column names and dtypes. Each tuple must written as `(name, dtype)`.
+
+    Returns
+    -------
+    out : structured array
+        Initialised structured array.
+    """
+    if not isinstance(cols, list) and all(isinstance(c, tuple) for c in cols):
+        raise TypeError("`cols` must be a list of tuples.")
+
+    dtype = {"names": [col[0] for col in cols],
+             "formats": [col[1] for col in cols]}
+    return numpy.full(N, numpy.nan, dtype=dtype)
+
+
+def add_columns(arr, X, cols):
+    """
+    Add new columns to a record array `arr`. Creates a new array.
+
+    Parameters
+    ----------
+    arr : record array
+        The record array to add columns to.
+    X : (list of) 1-dimensional array(s) or 2-dimensional array
+        Columns to be added.
+    cols : str or list of str
+        Column names to be added.
+
+    Returns
+    -------
+    out : record array
+        The new record array with added values.
+    """
+    # Make sure cols is a list of str and X a 2D array
+    cols = [cols] if isinstance(cols, str) else cols
+    if isinstance(X, numpy.ndarray) and X.ndim == 1:
+        X = X.reshape(-1, 1)
+    if isinstance(X, list) and all(x.ndim == 1 for x in X):
+        X = numpy.vstack([X]).T
+    if len(cols) != X.shape[1]:
+        raise ValueError("Number of columns of `X` does not match `cols`.")
+    if arr.size != X.shape[0]:
+        raise ValueError("Number of rows of `X` does not match size of `arr`.")
+
+    # Get the new data types
+    dtype = arr.dtype.descr
+    for i, col in enumerate(cols):
+        dtype.append((col, X[i, :].dtype.descr[0][1]))
+
+    # Fill in the old array
+    out = numpy.full(arr.size, numpy.nan, dtype=dtype)
+    for col in arr.dtype.names:
+        out[col] = arr[col]
+    for i, col in enumerate(cols):
+        out[col] = X[:, i]
+
+    return out
+
+
+def rm_columns(arr, cols):
+    """
+    Remove columns `cols` from a record array `arr`. Creates a new array.
+
+    Parameters
+    ----------
+    arr : record array
+        The record array to remove columns from.
+    cols : str or list of str
+        Column names to be removed.
+
+    Returns
+    -------
+    out : record array
+        Record array with removed columns.
+    """
+    # Check columns we wish to delete are in the array
+    cols = [cols] if isinstance(cols, str) else cols
+    for col in cols:
+        if col not in arr.dtype.names:
+            raise ValueError("Column `{}` not in `arr`.".format(col))
+
+    # Get a new dtype without the cols to be deleted
+    new_dtype = []
+    for dtype, name in zip(arr.dtype.descr, arr.dtype.names):
+        if name not in cols:
+            new_dtype.append(dtype)
+
+    # Allocate a new array and fill it in.
+    out = numpy.full(arr.size, numpy.nan, new_dtype)
+    for name in out.dtype.names:
+        out[name] = arr[name]
+
+    return out
+
+
+def list_to_ndarray(arrs, cols):
+    """
+    Convert a list of structured arrays of CSiBORG simulation catalogues to
+    an 3-dimensional array.
+
+    Parameters
+    ----------
+    arrs : list of structured arrays
+        List of CSiBORG catalogues.
+    cols : str or list of str
+        Columns to be extracted from the CSiBORG catalogues.
+
+    Returns
+    -------
+    out : 3-dimensional array
+        Catalogue array of shape `(n_realisations, n_samples, n_cols)`, where
+        `n_samples` is the maximum number of samples over the CSiBORG
+        catalogues.
+    """
+    if not isinstance(arrs, list):
+        raise TypeError("`arrs` must be a list of structured arrays.")
+    cols = [cols] if isinstance(cols, str) else cols
+
+    Narr = len(arrs)
+    Nobj_max = max([arr.size for arr in arrs])
+    Ncol = len(cols)
+    # Preallocate the array and fill it
+    out = numpy.full((Narr, Nobj_max, Ncol), numpy.nan)
+    for i in range(Narr):
+        Nobj = arrs[i].size
+        for j in range(Ncol):
+            out[i, :Nobj, j] = arrs[i][cols[j]]
+    return out
+
+
+def array_to_structured(arr, cols):
+    """
+    Create a structured array from a 2-dimensional array.
+
+    Parameters
+    ----------
+    arr : 2-dimensional array
+        Original array of shape `(n_samples, n_cols)`.
+    cols : list of str
+        Columns of the structured array
+
+    Returns
+    -------
+    out : structured array
+        The output structured array.
+    """
+    cols = [cols] if isinstance(cols, str) else cols
+    if arr.ndim != 2 and arr.shape[1] != len(cols):
+        raise TypeError("`arr` must be a 2-dimensional array of "
+                        "shape `(n_samples, n_cols)`.")
+
+    dtype = {"names": cols, "formats": [arr.dtype] * len(cols)}
+    out = numpy.full(arr.shape[0], numpy.nan, dtype=dtype)
+    for i, col in enumerate(cols):
+        out[col] = arr[:, i]
+
+    return out
+
+
+def flip_cols(arr, col1, col2):
+    """
+    Flip values in columns `col1` and `col2`. `arr` is passed by reference and
+    is not explicitly returned back.
+
+    Parameters
+    ----------
+    arr : structured array
+        The array whose columns are to be converted.
+    col1 : str
+        The first column name.
+    col2 : str
+        The second column name.
+
+    Returns
+    -------
+    nothing
+    """
+    dum = numpy.copy(arr[col1])
+    arr[col1] = arr[col2]
+    arr[col2] = dum