CSiBORG FoF switch (#75)

* Add moving FoF membership files

* add FoF membership path

* Add notes where its PHEW

* Add FoF catalogue path

* Correct typo

* Add more functionalities

* Make work with halo IDs from FoF

* Edit print statement

* Fix copy bug

* copy

* Add FoF catalogue reading

* Clean up script

* Fix typo

* Little edits

* Fix naming convention

* Rename key

* Remove loading substructure particles

* Rename CSiBORG Cat

* Rename clumps cat

* Rename cat

* Remove misplaced import

* Switch to halos

* rm import

* structfit of only halos

* Add FoF halo reading

* Add a short comment

* Fix __getitem__ to work with int

* Fix problems

* Improve __getitem__

* Add more conversion

* Fix indexing

* Fix __getitem__ assertion

* Fix numbers

* Rename

* Fix verbosity flags

* Add full Quijote HMF option

* Add plot of Quijote only

* Add quijote full paths

* Fix the fit_init script

* Renam arg

* Update .gitignore

* add default argument name

* Change default verbosity flag

* Modernise script structure

* Fix dictionary

* Fix reading to include m200c

* Modernise script

* Add args
This commit is contained in:
Richard Stiskalek 2023-07-24 14:10:21 +02:00 committed by GitHub
parent fcd1a6b321
commit eb8d070fff
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 659 additions and 466 deletions

View file

@ -12,12 +12,12 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to load in the simulation particles, load them by their clump ID and
dump into a HDF5 file. Stores the first and last index of each clump in the
Script to load in the simulation particles, sort them by their FoF halo ID and
dump into a HDF5 file. Stores the first and last index of each halo in the
particle array. This can be used for fast slicing of the array to acces
particles of a single clump.
"""
from argparse import ArgumentParser
from datetime import datetime
from gc import collect
@ -25,8 +25,11 @@ import h5py
import numba
import numpy
from mpi4py import MPI
from taskmaster import work_delegation
from tqdm import trange
from utils import get_nsims
try:
import csiborgtools
except ModuleNotFoundError:
@ -35,80 +38,79 @@ except ModuleNotFoundError:
sys.path.append("../")
import csiborgtools
from argparse import ArgumentParser
# We set up the MPI
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
# And next parse all the arguments and set up CSiBORG objects
parser = ArgumentParser()
parser.add_argument("--ics", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all simulations.")
args = parser.parse_args()
verbose = nproc == 1
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
partreader = csiborgtools.read.ParticleReader(paths)
# Keep "ID" as the last column!
pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"]
if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics("csiborg")
else:
ics = args.ics
@numba.jit(nopython=True)
def minmax_clump(clid, clump_ids, start_loop=0):
def minmax_halo(hid, halo_ids, start_loop=0):
"""
Find the start and end index of a clump in a sorted array of clump IDs.
Find the start and end index of a halo in a sorted array of halo IDs.
This is much faster than using `numpy.where` and then `numpy.min` and
`numpy.max`.
"""
start = None
end = None
for i in range(start_loop, clump_ids.size):
n = clump_ids[i]
if n == clid:
for i in range(start_loop, halo_ids.size):
n = halo_ids[i]
if n == hid:
if start is None:
start = i
end = i
elif n > clid:
elif n > hid:
break
return start, end
# MPI loop over individual simulations. We read in the particles from RAMSES
# files and dump them to a HDF5 file.
jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
for i in jobs:
nsim = ics[i]
def main(nsim, simname, verbose):
"""
Read in the snapshot particles, sort them by their FoF halo ID and dump
into a HDF5 file. Stores the first and last index of each halo in the
particle array for fast slicing of the array to acces particles of a single
halo.
Parameters
----------
nsim : int
IC realisation index.
simname : str
Simulation name.
verbose : bool
Verbosity flag.
Returns
-------
None
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
partreader = csiborgtools.read.ParticleReader(paths)
if simname == "quijote":
raise NotImplementedError("Not implemented for Quijote yet.")
# Keep "ID" as the last column!
pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"]
nsnap = max(paths.get_snapshots(nsim))
fname = paths.particles(nsim)
# We first read in the clump IDs of the particles and infer the sorting.
# Right away we dump the clump IDs to a HDF5 file and clear up memory.
print(f"{datetime.now()}: rank {rank} loading particles {nsim}.",
flush=True)
part_cids = partreader.read_clumpid(nsnap, nsim, verbose=verbose)
sort_indxs = numpy.argsort(part_cids).astype(numpy.int32)
part_cids = part_cids[sort_indxs]
# We first read in the halo IDs of the particles and infer the sorting.
# Right away we dump the halo IDs to a HDF5 file and clear up memory.
if verbose:
print(f"{datetime.now()}: loading particles {nsim}.", flush=True)
part_hids = partreader.read_fof_hids(nsim)
sort_indxs = numpy.argsort(part_hids).astype(numpy.int32)
part_hids = part_hids[sort_indxs]
with h5py.File(fname, "w") as f:
f.create_dataset("clump_ids", data=part_cids)
f.create_dataset("halo_ids", data=part_hids)
f.close()
del part_cids
del part_hids
collect()
# Next we read in the particles and sort them by their clump ID.
# Next we read in the particles and sort them by their halo ID.
# We cannot directly read this as an unstructured array because the float32
# precision is insufficient to capture the clump IDs.
# precision is insufficient to capture the halo IDs.
parts, pids = partreader.read_particle(
nsnap, nsim, pars_extract, return_structured=False, verbose=verbose)
# Now we in two steps save the particles and particle IDs.
print(f"{datetime.now()}: rank {rank} dumping particles from {nsim}.",
flush=True)
if verbose:
print(f"{datetime.now()}: dumping particles from {nsim}.", flush=True)
parts = parts[sort_indxs]
pids = pids[sort_indxs]
del sort_indxs
@ -126,29 +128,48 @@ for i in jobs:
del parts
collect()
print(f"{datetime.now()}: rank {rank} creating clump mapping for {nsim}.",
flush=True)
if verbose:
print(f"{datetime.now()}: creating halo map for {nsim}.", flush=True)
# Load clump IDs back to memory
with h5py.File(fname, "r") as f:
part_cids = f["clump_ids"][:]
part_hids = f["halo_ids"][:]
# We loop over the unique clump IDs.
unique_clump_ids = numpy.unique(part_cids)
clump_map = numpy.full((unique_clump_ids.size, 3), numpy.nan,
dtype=numpy.int32)
unique_halo_ids = numpy.unique(part_hids)
halo_map = numpy.full((unique_halo_ids.size, 3), numpy.nan,
dtype=numpy.int32)
start_loop = 0
niters = unique_clump_ids.size
niters = unique_halo_ids.size
for i in trange(niters) if verbose else range(niters):
clid = unique_clump_ids[i]
k0, kf = minmax_clump(clid, part_cids, start_loop=start_loop)
clump_map[i, 0] = clid
clump_map[i, 1] = k0
clump_map[i, 2] = kf
hid = unique_halo_ids[i]
k0, kf = minmax_halo(hid, part_hids, start_loop=start_loop)
halo_map[i, 0] = hid
halo_map[i, 1] = k0
halo_map[i, 2] = kf
start_loop = kf
# We save the mapping to a HDF5 file
with h5py.File(paths.particles(nsim), "r+") as f:
f.create_dataset("clumpmap", data=clump_map)
f.create_dataset("halomap", data=halo_map)
f.close()
del part_cids
del part_hids
collect()
if __name__ == "__main__":
# And next parse all the arguments and set up CSiBORG objects
parser = ArgumentParser()
parser.add_argument("--simname", type=str, default="csiborg",
choices=["csiborg", "quijote"],
help="Simulation name")
parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all .")
args = parser.parse_args()
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsims = get_nsims(args, paths)
def _main(nsim, verbose=MPI.COMM_WORLD.nproc == 1):
main(nsim, args.simname, verbose=verbose)
work_delegation(_main, nsims, MPI.COMM_WORLD)