Switch to h5py format (#52)

* Edit the particle paths

* Remove script

* Add h5py to dumping

* Minor adjustments

* add h5py support

* remove split path

* h5py support

* Type

* Edit initmatch paths

* Shorten func

* dist_centmass to work with 2D arrays

* Forgot to return the centre of mass

* Fixed code

* Fix halo bug

* Start MPI broadcasting

* Mini bug

* Remove commenting

* Remove test statement

* Fix index

* Printing from rank 0 only

* Move where clump index stored

* Add dtype options

* Add dtype options
This commit is contained in:
Richard Stiskalek 2023-05-02 13:57:13 +01:00 committed by GitHub
parent 553eec8228
commit 1a9e6177d7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 236 additions and 323 deletions

View file

@ -12,16 +12,18 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to load in the simulation particles and dump them to a HDF5 file for the
SPH density field calculation.
Script to load in the simulation particles and dump them to a HDF5 file.
Creates a mapping to access directly particles of a single clump.
"""
from datetime import datetime
from gc import collect
from distutils.util import strtobool
from gc import collect
import h5py
import numpy
from mpi4py import MPI
from tqdm import tqdm
try:
import csiborgtools
@ -41,17 +43,23 @@ nproc = comm.Get_size()
# And next parse all the arguments and set up CSiBORG objects
parser = ArgumentParser()
parser.add_argument("--ics", type=int, nargs="+", default=None,
help="IC realisatiosn. If `-1` processes all simulations.")
parser.add_argument("--with_vel", type=lambda x: bool(strtobool(x)),
help="Whether to include velocities in the particle file.")
help="IC realisations. If `-1` processes all simulations.")
parser.add_argument("--pos_only", type=lambda x: bool(strtobool(x)),
help="Do we only dump positions?")
parser.add_argument("--dtype", type=str, choices=["float32", "float64"],
default="float32",)
args = parser.parse_args()
verbose = nproc == 1
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
partreader = csiborgtools.read.ParticleReader(paths)
if args.with_vel:
pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M']
else:
if args.pos_only:
pars_extract = ['x', 'y', 'z', 'M']
if args.ics is None or args.ics == -1:
else:
pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M']
if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics(tonew=False)
else:
ics = args.ics
@ -62,14 +70,49 @@ jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
for i in jobs:
nsim = ics[i]
nsnap = max(paths.get_snapshots(nsim))
print(f"{datetime.now()}: Rank {rank} completing simulation {nsim}.",
print(f"{datetime.now()}: Rank {rank} loading particles {nsim}.",
flush=True)
out = partreader.read_particle(
nsnap, nsim, pars_extract, return_structured=False, verbose=nproc == 1)
parts = partreader.read_particle(nsnap, nsim, pars_extract,
return_structured=False, verbose=verbose)
if args.dtype == "float64":
parts = parts.astype(numpy.float64)
with h5py.File(paths.particle_h5py_path(nsim), "w") as f:
dset = f.create_dataset("particles", data=out)
kind = "pos" if args.pos_only else None
del out
print(f"{datetime.now()}: Rank {rank} dumping particles from {nsim}.",
flush=True)
with h5py.File(paths.particle_h5py_path(nsim, kind, args.dtype), "w") as f:
f.create_dataset("particles", data=parts)
del parts
collect()
print(f"{datetime.now()}: Rank {rank} finished dumping of {nsim}.",
flush=True)
# If we are dumping only particle positions, then we are done.
if args.pos_only:
continue
print(f"{datetime.now()}: Rank {rank} mapping particles from {nsim}.",
flush=True)
# If not, then load the clump IDs and prepare the memory mapping. We find
# which array positions correspond to which clump IDs and save it. With
# this we can then lazily load into memory the particles for each clump.
part_cids = partreader.read_clumpid(nsnap, nsim, verbose=verbose)
cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, load_fitted=False,
rawdata=True)
clumpinds = cat["index"]
# Some of the clumps have no particles, so we do not loop over them
clumpinds = clumpinds[numpy.isin(clumpinds, part_cids)]
out = {}
for i, cid in enumerate(tqdm(clumpinds) if verbose else clumpinds):
out.update({str(cid): numpy.where(part_cids == cid)[0]})
# We save the mapping to a HDF5 file
with h5py.File(paths.particle_h5py_path(nsim, "clumpmap"), "w") as f:
for cid, indxs in out.items():
f.create_dataset(cid, data=indxs)
del part_cids, cat, clumpinds, out
collect()