Within halo work and NFW fit (#4)

* add listing of snapshots * change distance to comoving * ignore cp files * rename nb * add str to list * add NFW profile shapes * add fits imports * Rename to Nsnap * in clumps_read only select props * make clumpid int * expand doc * add import * edit readme * distribute halos * add profile & posterior * add import * add import * add documentation * add rvs and init guess * update todo * update nb * add file * return end index too * change clump_ids format to int32 * skeleton of dump particle * update nb * add func to drop 0 clump indxs parts * add import * add halo dump * switch to float32 * Update TODO * update TODO * add func that loads a split * add halo object * Rename to clump * make post work with a clump * add optimiser * add Nsplits * ignore submission scripts * ignore .out * add dumppath * add job splitting * add split halos script * rename file * renaem files * rm file * rename imports * edit desc * add pick clump * add number of particles * update TODO * update todo * add script * add dumping * change dumpdir structure * change dumpdir * add import * Remove tqdm * Increase the number of splits * rm shuffle option * Change to remove split * add emojis * fix part counts in splits * change num of splits * rm with particle cut * keep splits * fit only if 10 part and more * add min distance * rm warning about not set vels * update TODO * calculate rho0 too * add results collection * add import * add func to combine splits * update TODO * add extract cols * update nb * update TODO
2025-06-08 18:01:11 +00:00 · 2022-10-30 20:16:56 +00:00 · 2022-10-30 20:16:56 +00:00 · 8a56c22813
commit 8a56c22813
parent 85a6a6d58a
15 changed files with 3815 additions and 397 deletions
--- a/csiborgtools/io/init.py
+++ b/csiborgtools/io/init.py
@ -13,8 +13,10 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

-from .readsim import (get_csiborg_ids, get_sim_path, get_snapshot_path,  # noqa
-                      read_info,  # noqa
+from .readsim import (get_csiborg_ids, get_sim_path, get_snapshots,  # noqa
+                      get_snapshot_path, read_info, nparts_to_start_ind,  # noqa
                      open_particle, open_unbinding, read_particle,  # noqa
+                      drop_zero_indx,  # noqa
                      read_clumpid, read_clumps, read_mmain)  # noqa
 from .readobs import (read_planck2015, read_2mpp)  # noqa
+from .outsim import (dump_split, combine_splits)  # noqa
--- a/csiborgtools/io/outsim.py
+++ b/csiborgtools/io/outsim.py
@ -0,0 +1,124 @@
+# Copyright (C) 2022 Richard Stiskalek, Harry Desmond
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+I/O functions for analysing the CSiBORG realisations.
+"""
+
+
+import numpy
+from os.path import join
+from os import remove
+from tqdm import trange
+from .readsim import (get_sim_path, read_clumps)
+
+I64 = numpy.int64
+F64 = numpy.float64
+
+
+def dump_split(arr, Nsplit, Nsim, Nsnap, outdir):
+    """
+    Dump an array from a split.
+
+    Parameters
+    ----------
+    arr : n-dimensional or structured array
+        Array to be saved.
+    Nsplit : int
+        The split index.
+    Nsim : int
+        The CSiBORG realisation index.
+    Nsnap : int
+        The index of a redshift snapshot.
+    outdir : string
+        Directory where to save the temporary files.
+
+    Returns
+    -------
+    None
+    """
+    Nsim = str(Nsim).zfill(5)
+    Nsnap = str(Nsnap).zfill(5)
+    fname = join(outdir, "ramses_out_{}_{}_{}.npy".format(Nsim, Nsnap, Nsplit))
+    numpy.save(fname, arr)
+
+
+def combine_splits(Nsplits, Nsim, Nsnap, outdir, cols_add, remove_splits=False,
+                   verbose=True):
+    """
+    Combine results of many splits saved from `dump_split`. Identifies to which
+    clump the clumps in the split correspond to by matching their index.
+    Returns an array that contains the original clump data along with the newly
+    calculated quantities.
+
+    Paramaters
+    ----------
+    Nsplits : int
+        The total number of clump splits.
+    Nsim : int
+        The CSiBORG realisation index.
+    Nsnap : int
+        The index of a redshift snapshot.
+    outdir : str
+        Directory where to save the new array.
+    cols_add : list of `(str, dtype)`
+        Colums to add. Must be formatted as, for example,
+        `[("npart", numpy.float64), ("totpartmass", numpy.float64)]`.
+    remove_splits : bool, optional
+        Whether to remove the splits files. By default `False`.
+    verbose : bool, optional
+        Verbosity flag. By default `True`.
+
+    Returns
+    -------
+    out : structured array
+        Clump array with appended results from the splits.
+    """
+    # Will be grabbing these columns from each split
+    cols_add = [("npart", I64), ("totpartmass", F64), ("logRs", F64),
+                ("rho0", F64)]
+    # Load clumps to see how many there are and will add to this array
+    simpath = get_sim_path(Nsim)
+    clumps = read_clumps(Nsnap, simpath, cols=None)
+    # Get the old + new dtypes and create an empty array
+    descr = clumps.dtype.descr + cols_add
+    out = numpy.full(clumps.size, numpy.nan, dtype=descr)
+    # Now put the old values into the array
+    for par in clumps.dtype.names:
+        out[par] = clumps[par]
+
+    # Filename of splits data
+    froot = "ramses_out_{}_{}".format(str(Nsim).zfill(5), str(Nsnap).zfill(5))
+    fname = join(outdir, froot + "_{}.npy")
+
+    # Iterate over splits and add to the output array
+    cols_add_names = [col[0] for col in cols_add]
+    iters = trange(Nsplits) if verbose else range(Nsplits)
+    for n in iters:
+        fnamesplit = fname.format(n)
+        arr = numpy.load(fnamesplit)
+
+        # Check that all halo indices from the split are in the clump file
+        if not numpy.alltrue(numpy.isin(arr["index"], out["index"])):
+            raise KeyError("....")
+        # Mask of where to put the values from the split
+        mask = numpy.isin(out["index"], arr["index"])
+        for par in cols_add_names:
+            out[par][mask] = arr[par]
+
+        # Now remove this split
+        if remove_splits:
+            remove(fnamesplit)
+
+    return out
--- a/csiborgtools/io/readsim.py
+++ b/csiborgtools/io/readsim.py
@ -12,8 +12,9 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-
-"""Functions to read in the particle and clump files."""
+"""
+Functions to read in the particle and clump files.
+"""

 import numpy
 from scipy.io import FortranFile
@ -84,6 +85,27 @@ def get_sim_path(n, fname="ramses_out_{}", srcdir="/mnt/extraspace/hdesmond"):
    return join(srcdir, fname.format(n))


+def get_snapshots(simpath):
+    """
+    Get the list of snapshots for the given IC realisation.
+
+    Parameters
+    ----------
+    simpath : str
+        Path to the CSiBORG IC realisation.
+
+    Returns
+    -------
+    snapshots : 1-dimensional array
+        Array of snapshot IDs.
+    """
+    # Get all files in simpath that start with output_
+    snaps = glob(join(simpath, "output_*"))
+    # Take just the last _00XXXX from each file  and strip zeros
+    snaps = [int(snap.split('_')[-1].lstrip('0')) for snap in snaps]
+    return numpy.sort(snaps)
+
+
 def get_snapshot_path(Nsnap, simpath):
    """
    Get a path to a CSiBORG IC realisation snapshot.
@ -135,13 +157,13 @@ def read_info(Nsnap, simpath):
    return {key: val for key, val in zip(keys, vals)}


-def open_particle(n, simpath, verbose=True):
+def open_particle(Nsnap, simpath, verbose=True):
    """
    Open particle files to a given CSiBORG simulation.

    Parameters
    ----------
-    n : int
+    Nsnap : int
        The index of a redshift snapshot.
    simpath : str
        The complete path to the CSiBORG simulation.
@ -156,9 +178,9 @@ def open_particle(n, simpath, verbose=True):
        Opened part files.
    """
    # Zeros filled snapshot number and the snapshot path
-    nout = str(n).zfill(5)
-    snappath = get_snapshot_path(n, simpath)
-    ncpu = int(read_info(n, simpath)["ncpu"])
+    nout = str(Nsnap).zfill(5)
+    snappath = get_snapshot_path(Nsnap, simpath)
+    ncpu = int(read_info(Nsnap, simpath)["ncpu"])

    if verbose:
        print("Reading in output `{}` with ncpu = `{}`.".format(nout, ncpu))
@ -245,7 +267,7 @@ def nparts_to_start_ind(nparts):
    return numpy.hstack([[0], numpy.cumsum(nparts[:-1])])


-def read_particle(pars_extract, n, simpath, verbose=True):
+def read_particle(pars_extract, Nsnap, simpath, verbose=True):
    """
    Read particle files of a simulation at a given snapshot and return
    values of `pars_extract`.
@ -254,7 +276,7 @@ def read_particle(pars_extract, n, simpath, verbose=True):
    ----------
    pars_extract : list of str
        Parameters to be extacted.
-    n : int
+    Nsnap : int
        The index of the redshift snapshot.
    simpath : str
        The complete path to the CSiBORG simulation.
@ -267,17 +289,19 @@ def read_particle(pars_extract, n, simpath, verbose=True):
        The data read from the particle file.
    """
    # Open the particle files
-    nparts, partfiles = open_particle(n, simpath)
+    nparts, partfiles = open_particle(Nsnap, simpath)
    if verbose:
        print("Opened {} particle files.".format(nparts.size))
    ncpu = nparts.size
    # Order in which the particles are written in the FortranFile
-    forder = [("x", F16), ("y", F16), ("z", F16),
-              ("vx", F16), ("vy", F16), ("vz", F16),
+    forder = [("x", F32), ("y", F32), ("z", F32),
+              ("vx", F32), ("vy", F32), ("vz", F32),
              ("M", F32), ("ID", I32), ("level", I32)]
    fnames = [fp[0] for fp in forder]
    fdtypes = [fp[1] for fp in forder]
    # Check there are no strange parameters
+    if isinstance(pars_extract, str):
+        pars_extract = [pars_extract]
    for p in pars_extract:
        if p not in fnames:
            raise ValueError("Undefined parameter `{}`. Must be one of `{}`."
@ -305,7 +329,7 @@ def read_particle(pars_extract, n, simpath, verbose=True):
    return out


-def open_unbinding(cpu, n, simpath):
+def open_unbinding(cpu, Nsnap, simpath):
    """
    Open particle files to a given CSiBORG simulation. Note that to be
    consistent CPU is incremented by 1.
@ -314,7 +338,7 @@ def open_unbinding(cpu, n, simpath):
    ----------
    cpu : int
        The CPU index.
-    n : int
+    Nsnap : int
        The index of a redshift snapshot.
    simpath : str
        The complete path to the CSiBORG simulation.
@ -324,7 +348,7 @@ def open_unbinding(cpu, n, simpath):
    unbinding : `scipy.io.FortranFile`
        The opened unbinding FortranFile.
    """
-    nout = str(n).zfill(5)
+    nout = str(Nsnap).zfill(5)
    cpu = str(cpu + 1).zfill(5)
    fpath = join(simpath, "output_{}".format(nout),
                 "unbinding_{}.out{}".format(nout, cpu))
@ -332,13 +356,13 @@ def open_unbinding(cpu, n, simpath):
    return FortranFile(fpath)


-def read_clumpid(n, simpath, verbose=True):
+def read_clumpid(Nsnap, simpath, verbose=True):
    """
-    Read clump IDs from unbinding files.
+    Read clump IDs of halos from unbinding files.

    Parameters
    ----------
-    n : int
+    Nsnap : int
        The index of a redshift snapshot.
    simpath : str
        The complete path to the CSiBORG simulation.
@ -350,52 +374,94 @@ def read_clumpid(n, simpath, verbose=True):
    clumpid : 1-dimensional array
        The array of clump IDs.
    """
-    nparts, __ = open_particle(n, simpath, verbose)
+    nparts, __ = open_particle(Nsnap, simpath, verbose)
    start_ind = nparts_to_start_ind(nparts)
    ncpu = nparts.size

-    clumpid = numpy.full(numpy.sum(nparts), numpy.nan)
+    clumpid = numpy.full(numpy.sum(nparts), numpy.nan, dtype=I32)
    iters = tqdm(range(ncpu)) if verbose else range(ncpu)
    for cpu in iters:
        i = start_ind[cpu]
        j = nparts[cpu]
-        ff = open_unbinding(cpu, n, simpath)
+        ff = open_unbinding(cpu, Nsnap, simpath)
        clumpid[i:i + j] = ff.read_ints()

    return clumpid


-def read_clumps(n, simpath):
+def drop_zero_indx(clump_ids, particles):
    """
-    Read in a precomputed clump file `clump_N.dat`.
+    Drop from `clump_ids` and `particles` entries whose clump index is 0.

    Parameters
    ----------
-    n : int
+    clump_ids : 1-dimensional array
+        Array of clump IDs.
+    particles : structured array
+        Array of the particle data.
+
+    Returns
+    -------
+    clump_ids : 1-dimensional array
+        The array of clump IDs after removing zero clump ID entries.
+    particles : structured array
+        The particle data after removing zero clump ID entries.
+    """
+    mask = clump_ids != 0
+    return clump_ids[mask], particles[mask]
+
+
+def read_clumps(Nsnap, simpath, cols=None):
+    """
+    Read in a clump file `clump_Nsnap.dat`.
+
+    Parameters
+    ----------
+    Nsnap : int
        The index of a redshift snapshot.
    simpath : str
        The complete path to the CSiBORG simulation.
+    cols : list of str, optional.
+        Columns to extract. By default `None` and all columns are extracted.

    Returns
    -------
    out : structured array
        Structured array of the clumps.
    """
-    n = str(n).zfill(5)
-    fname = join(simpath, "output_{}".format(n), "clump_{}.dat".format(n))
+    Nsnap = str(Nsnap).zfill(5)
+    fname = join(simpath, "output_{}".format(Nsnap),
+                 "clump_{}.dat".format(Nsnap))
    # Check the file exists.
    if not isfile(fname):
        raise FileExistsError("Clump file `{}` does not exist.".format(fname))

    # Read in the clump array. This is how the columns must be written!
-    arr = numpy.genfromtxt(fname)
-    cols = [("index", I64), ("level", I64), ("parent", I64), ("ncell", F64),
-            ("peak_x", F64), ("peak_y", F64), ("peak_z", F64),
-            ("rho-", F64), ("rho+", F64), ("rho_av", F64),
-            ("mass_cl", F64), ("relevance", F64)]
-    out = cols_to_structured(arr.shape[0], cols)
-    for i, name in enumerate(out.dtype.names):
-        out[name] = arr[:, i]
+    data = numpy.genfromtxt(fname)
+    clump_cols = [("index", I64), ("level", I64), ("parent", I64),
+                  ("ncell", F64), ("peak_x", F64), ("peak_y", F64),
+                  ("peak_z", F64), ("rho-", F64), ("rho+", F64),
+                  ("rho_av", F64), ("mass_cl", F64), ("relevance", F64)]
+    out0 = cols_to_structured(data.shape[0], clump_cols)
+    for i, name in enumerate(out0.dtype.names):
+        out0[name] = data[:, i]
+    # If take all cols then return
+    if cols is None:
+        return out0
+    # Make sure we have a list
+    cols = [cols] if isinstance(cols, str) else cols
+    # Get the indxs of clump_cols to output
+    clump_names = [col[0] for col in clump_cols]
+    indxs = [None] * len(cols)
+    for i, col in enumerate(cols):
+        if col not in clump_names:
+            raise KeyError("...")
+        indxs[i] = clump_names.index(col)
+    # Make an array and fill it
+    out = cols_to_structured(out0.size, [clump_cols[i] for i in indxs])
+    for name in out.dtype.names:
+        out[name] = out0[name]
+
    return out