csiborgtools/scripts/pre_dumppart.py
Richard Stiskalek 1a9e6177d7
Switch to h5py format (#52)
* Edit the particle paths

* Remove script

* Add h5py to dumping

* Minor adjustments

* add h5py support

* remove split path

* h5py support

* Type

* Edit initmatch paths

* Shorten func

* dist_centmass to work with 2D arrays

* Forgot to return the centre of mass

* Fixed code

* Fix halo bug

* Start MPI broadcasting

* Mini bug

* Remove commenting

* Remove test statement

* Fix index

* Printing from rank 0 only

* Move where clump index stored

* Add dtype options

* Add dtype options
2023-05-02 13:57:13 +01:00

119 lines
4.2 KiB
Python

# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to load in the simulation particles and dump them to a HDF5 file.
Creates a mapping to access directly particles of a single clump.
"""
from datetime import datetime
from distutils.util import strtobool
from gc import collect
import h5py
import numpy
from mpi4py import MPI
from tqdm import tqdm
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
from argparse import ArgumentParser
# We set up the MPI
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
# And next parse all the arguments and set up CSiBORG objects
parser = ArgumentParser()
parser.add_argument("--ics", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all simulations.")
parser.add_argument("--pos_only", type=lambda x: bool(strtobool(x)),
help="Do we only dump positions?")
parser.add_argument("--dtype", type=str, choices=["float32", "float64"],
default="float32",)
args = parser.parse_args()
verbose = nproc == 1
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
partreader = csiborgtools.read.ParticleReader(paths)
if args.pos_only:
pars_extract = ['x', 'y', 'z', 'M']
else:
pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M']
if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics(tonew=False)
else:
ics = args.ics
# MPI loop over individual simulations. We read in the particles from RAMSES
# files and dump them to a HDF5 file.
jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
for i in jobs:
nsim = ics[i]
nsnap = max(paths.get_snapshots(nsim))
print(f"{datetime.now()}: Rank {rank} loading particles {nsim}.",
flush=True)
parts = partreader.read_particle(nsnap, nsim, pars_extract,
return_structured=False, verbose=verbose)
if args.dtype == "float64":
parts = parts.astype(numpy.float64)
kind = "pos" if args.pos_only else None
print(f"{datetime.now()}: Rank {rank} dumping particles from {nsim}.",
flush=True)
with h5py.File(paths.particle_h5py_path(nsim, kind, args.dtype), "w") as f:
f.create_dataset("particles", data=parts)
del parts
collect()
print(f"{datetime.now()}: Rank {rank} finished dumping of {nsim}.",
flush=True)
# If we are dumping only particle positions, then we are done.
if args.pos_only:
continue
print(f"{datetime.now()}: Rank {rank} mapping particles from {nsim}.",
flush=True)
# If not, then load the clump IDs and prepare the memory mapping. We find
# which array positions correspond to which clump IDs and save it. With
# this we can then lazily load into memory the particles for each clump.
part_cids = partreader.read_clumpid(nsnap, nsim, verbose=verbose)
cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, load_fitted=False,
rawdata=True)
clumpinds = cat["index"]
# Some of the clumps have no particles, so we do not loop over them
clumpinds = clumpinds[numpy.isin(clumpinds, part_cids)]
out = {}
for i, cid in enumerate(tqdm(clumpinds) if verbose else clumpinds):
out.update({str(cid): numpy.where(part_cids == cid)[0]})
# We save the mapping to a HDF5 file
with h5py.File(paths.particle_h5py_path(nsim, "clumpmap"), "w") as f:
for cid, indxs in out.items():
f.create_dataset(cid, data=indxs)
del part_cids, cat, clumpinds, out
collect()