CSiBORG FoF switch (#75)

* Add moving FoF membership files

* add FoF membership path

* Add notes where its PHEW

* Add FoF catalogue path

* Correct typo

* Add more functionalities

* Make work with halo IDs from FoF

* Edit print statement

* Fix copy bug

* copy

* Add FoF catalogue reading

* Clean up script

* Fix typo

* Little edits

* Fix naming convention

* Rename key

* Remove loading substructure particles

* Rename CSiBORG Cat

* Rename clumps cat

* Rename cat

* Remove misplaced import

* Switch to halos

* rm import

* structfit of only halos

* Add FoF halo reading

* Add a short comment

* Fix __getitem__ to work with int

* Fix problems

* Improve __getitem__

* Add more conversion

* Fix indexing

* Fix __getitem__ assertion

* Fix numbers

* Rename

* Fix verbosity flags

* Add full Quijote HMF option

* Add plot of Quijote only

* Add quijote full paths

* Fix the fit_init script

* Renam arg

* Update .gitignore

* add default argument name

* Change default verbosity flag

* Modernise script structure

* Fix dictionary

* Fix reading to include m200c

* Modernise script

* Add args
This commit is contained in:
Richard Stiskalek 2023-07-24 14:10:21 +02:00 committed by GitHub
parent fcd1a6b321
commit eb8d070fff
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 659 additions and 466 deletions

View file

@ -13,15 +13,17 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
A script to fit halos (concentration, ...). The particle array of each CSiBORG
realisation must have been split in advance by `runsplit_halos`.
A script to fit FoF halos (concentration, ...). The particle array of each
CSiBORG realisation must have been processed in advance by `pre_dumppart.py`.
"""
from argparse import ArgumentParser
from datetime import datetime
import numpy
from mpi4py import MPI
from tqdm import tqdm
from tqdm import trange
from utils import get_nsims
try:
import csiborgtools
@ -38,18 +40,13 @@ nproc = comm.Get_size()
verbose = nproc == 1
parser = ArgumentParser()
parser.add_argument("--kind", type=str, choices=["halos", "clumps"])
parser.add_argument("--ics", type=int, nargs="+", default=None,
parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all simulations.")
args = parser.parse_args()
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
partreader = csiborgtools.read.ParticleReader(paths)
nfwpost = csiborgtools.fits.NFWPosterior()
if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics("csiborg")
else:
ics = args.ics
nsims = get_nsims(args, paths)
cols_collect = [
("index", numpy.int32),
@ -67,13 +64,12 @@ cols_collect = [
("lambda200c", numpy.float32),
("r200m", numpy.float32),
("m200m", numpy.float32),
("r500m", numpy.float32),
("m500m", numpy.float32),
]
def fit_clump(particles, clump_info, box):
"""
Fit an object. Can be eithe a clump or a parent halo.
"""
def fit_halo(particles, clump_info, box):
obj = csiborgtools.fits.Clump(particles, clump_info, box)
out = {}
@ -82,16 +78,15 @@ def fit_clump(particles, clump_info, box):
for i, v in enumerate(["vx", "vy", "vz"]):
out[v] = numpy.average(obj.vel[:, i], weights=obj["M"])
# Overdensity masses
out["r200c"], out["m200c"] = obj.spherical_overdensity_mass(200,
kind="crit")
out["r500c"], out["m500c"] = obj.spherical_overdensity_mass(500,
kind="crit")
out["r200m"], out["m200m"] = obj.spherical_overdensity_mass(200,
kind="matter")
for n in [200, 500]:
out[f"r{n}c"], out[f"m{n}c"] = obj.spherical_overdensity_mass(
n, kind="crit", npart_min=10)
out[f"r{n}m"], out[f"m{n}m"] = obj.spherical_overdensity_mass(
n, kind="matter", npart_min=10)
# NFW fit
if out["npart"] > 10 and numpy.isfinite(out["r200c"]):
Rs, rho0 = nfwpost.fit(obj)
out["conc"] = Rs / out["r200c"]
out["conc"] = out["r200c"] / Rs
out["rho0"] = rho0
# Spin within R200c
if numpy.isfinite(out["r200c"]):
@ -100,8 +95,8 @@ def fit_clump(particles, clump_info, box):
# We MPI loop over all simulations.
jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
for nsim in [ics[i] for i in jobs]:
jobs = csiborgtools.fits.split_jobs(len(nsims), nproc)[rank]
for nsim in [nsims[i] for i in jobs]:
print(f"{datetime.now()}: rank {rank} calculating simulation `{nsim}`.",
flush=True)
nsnap = max(paths.get_snapshots(nsim))
@ -110,49 +105,30 @@ for nsim in [ics[i] for i in jobs]:
# Particle archive
f = csiborgtools.read.read_h5(paths.particles(nsim))
particles = f["particles"]
clump_map = f["clumpmap"]
clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
load_fitted=False)
# We check whether we fit halos or clumps, will be indexing over different
# iterators.
if args.kind == "halos":
ismain = clumps_cat.ismain
else:
ismain = numpy.ones(len(clumps_cat), dtype=bool)
halo_map = f["halomap"]
hid2map = {clid: i for i, clid in enumerate(halo_map[:, 0])}
cat = csiborgtools.read.CSiBORGHaloCatalogue(
nsim, paths, with_lagpatch=False, load_initial=False, rawdata=True,
load_fitted=False)
# Even if we are calculating parent halo this index runs over all clumps.
out = csiborgtools.read.cols_to_structured(len(clumps_cat), cols_collect)
indxs = clumps_cat["index"]
for i, clid in enumerate(tqdm(indxs)) if verbose else enumerate(indxs):
clid = clumps_cat["index"][i]
out["index"][i] = clid
# If we are fitting halos and this clump is not a main, then continue.
if args.kind == "halos" and not ismain[i]:
continue
if args.kind == "halos":
part = csiborgtools.read.load_parent_particles(
clid, particles, clump_map, clid2map, clumps_cat)
else:
part = csiborgtools.read.load_clump_particles(clid, particles,
clump_map, clid2map)
out = csiborgtools.read.cols_to_structured(len(cat), cols_collect)
indxs = cat["index"]
for i in trange(len(cat)) if verbose else range(len(cat)):
hid = cat["index"][i]
out["index"][i] = hid
part = csiborgtools.read.load_halo_particles(hid, particles, halo_map,
hid2map)
# We fit the particles if there are any. If not we assign the index,
# otherwise it would be NaN converted to integers (-2147483648) and
# yield an error further down.
if part is None:
continue
_out = fit_clump(part, clumps_cat[i], box)
_out = fit_halo(part, cat[i], box)
for key in _out.keys():
out[key][i] = _out[key]
# Finally, we save the results. If we were analysing main halos, then
# remove array indices that do not correspond to parent halos.
if args.kind == "halos":
out = out[ismain]
fout = paths.structfit(nsnap, nsim, args.kind)
fout = paths.structfit(nsnap, nsim)
print(f"Saving to `{fout}`.", flush=True)
numpy.save(fout, out)

View file

@ -58,7 +58,9 @@ def get_counts(nsim, bins, paths, parser_args):
bounds = {"dist": (0, parser_args.Rmax)}
if simname == "csiborg":
cat = csiborgtools.read.HaloCatalogue(nsim, paths, bounds=bounds)
cat = csiborgtools.read.CSiBORGHaloCatalogue(
nsim, paths, bounds=bounds, with_lagpatch=False,
load_initial=False)
logmass = numpy.log10(cat["totpartmass"])
counts = csiborgtools.fits.number_counts(logmass, bins)
elif simname == "quijote":
@ -71,6 +73,12 @@ def get_counts(nsim, bins, paths, parser_args):
cat = cat0.pick_fiducial_observer(nobs, rmax=parser_args.Rmax)
logmass = numpy.log10(cat["group_mass"])
counts[nobs, :] = csiborgtools.fits.number_counts(logmass, bins)
elif simname == "quijote_full":
cat = csiborgtools.read.QuijoteHaloCatalogue(nsim, paths, nsnap=4)
logmass = numpy.log10(cat["group_mass"])
counts = csiborgtools.fits.number_counts(logmass, bins)
else:
raise ValueError(f"Unknown simulation name `{simname}`.")
fout = paths.halo_counts(simname, nsim)
if parser_args.verbose:
@ -80,12 +88,15 @@ def get_counts(nsim, bins, paths, parser_args):
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"],
parser.add_argument("--simname", type=str,
choices=["csiborg", "quijote", "quijote_full"],
help="Simulation name")
parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="Indices of simulations to cross. If `-1` processes all simulations.") # noqa
parser.add_argument("--Rmax", type=float, default=155/0.705,
help="High-resolution region radius")
parser.add_argument("--bw", type=float, default=0.2,
help="Bin width in dex")
parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
default=False)
@ -96,7 +107,7 @@ if __name__ == "__main__":
verbose = nproc == 1
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsims = get_nsims(parser_args, paths)
bins = numpy.arange(11., 16., 0.2, dtype=numpy.float32)
bins = numpy.arange(11., 16., parser_args.bw, dtype=numpy.float32)
def do_work(nsim):
get_counts(nsim, bins, paths, parser_args)

View file

@ -24,6 +24,8 @@ import numpy
from mpi4py import MPI
from tqdm import tqdm
from utils import get_nsims
try:
import csiborgtools
except ModuleNotFoundError:
@ -41,16 +43,16 @@ verbose = nproc == 1
# Argument parser
parser = ArgumentParser()
parser.add_argument("--ics", type=int, nargs="+", default=None,
parser.add_argument("--simname", type=str, default="csiborg",
choices=["csiborg", "quijote"],
help="Simulation name")
parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all simulations.")
args = parser.parse_args()
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
partreader = csiborgtools.read.ParticleReader(paths)
if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics("csiborg")
else:
ics = args.ics
nsims = get_nsims(args, paths)
cols_collect = [("index", numpy.int32),
("x", numpy.float32),
@ -61,8 +63,8 @@ cols_collect = [("index", numpy.int32),
# MPI loop over simulations
jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
for nsim in [ics[i] for i in jobs]:
jobs = csiborgtools.fits.split_jobs(len(nsims), nproc)[rank]
for nsim in [nsims[i] for i in jobs]:
nsnap = max(paths.get_snapshots(nsim))
overlapper = csiborgtools.match.ParticleOverlap()
print(f"{datetime.now()}: rank {rank} calculating simulation `{nsim}`.",
@ -70,22 +72,18 @@ for nsim in [ics[i] for i in jobs]:
parts = csiborgtools.read.read_h5(paths.initmatch(nsim, "particles"))
parts = parts['particles']
clump_map = csiborgtools.read.read_h5(paths.particles(nsim))
clump_map = clump_map["clumpmap"]
clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
load_fitted=False)
clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
ismain = clumps_cat.ismain
halo_map = csiborgtools.read.read_h5(paths.particles(nsim))
halo_map = halo_map["halomap"]
cat = csiborgtools.read.CSiBORGHaloCatalogue(
nsim, paths, rawdata=True, load_fitted=False, load_initial=False)
hid2map = {hid: i for i, hid in enumerate(halo_map[:, 0])}
out = csiborgtools.read.cols_to_structured(len(clumps_cat), cols_collect)
indxs = clumps_cat["index"]
for i, hid in enumerate(tqdm(indxs) if verbose else indxs):
out = csiborgtools.read.cols_to_structured(len(cat), cols_collect)
for i, hid in enumerate(tqdm(cat["index"]) if verbose else cat["index"]):
out["index"][i] = hid
if not ismain[i]:
continue
part = csiborgtools.read.load_halo_particles(hid, parts, halo_map,
hid2map)
part = csiborgtools.read.load_parent_particles(hid, parts, clump_map,
clid2map, clumps_cat)
# Skip if the halo is too small.
if part is None or part.size < 100:
continue
@ -101,7 +99,6 @@ for nsim in [ics[i] for i in jobs]:
delta = overlapper.make_delta(part[:, :3], part[:, 3], subbox=True)
out["lagpatch_ncells"][i] = csiborgtools.fits.delta2ncells(delta)
out = out[ismain]
# Now save it
fout = paths.initmatch(nsim, "fit")
print(f"{datetime.now()}: dumping fits to .. `{fout}`.",

View file

@ -30,7 +30,7 @@ except ModuleNotFoundError:
def pair_match(nsim0, nsimx, sigma, smoothen, verbose):
from csiborgtools.read import HaloCatalogue, read_h5
from csiborgtools.read import CSiBORGHaloCatalogue, read_h5
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
smooth_kwargs = {"sigma": sigma, "mode": "constant", "cval": 0.0}
@ -40,10 +40,10 @@ def pair_match(nsim0, nsimx, sigma, smoothen, verbose):
# Load the raw catalogues (i.e. no selection) including the initial CM
# positions and the particle archives.
bounds = {"totpartmass": (1e12, None)}
cat0 = HaloCatalogue(nsim0, paths, load_initial=True, bounds=bounds,
with_lagpatch=True, load_clumps_cat=True)
catx = HaloCatalogue(nsimx, paths, load_initial=True, bounds=bounds,
with_lagpatch=True, load_clumps_cat=True)
cat0 = CSiBORGHaloCatalogue(nsim0, paths, load_initial=True, bounds=bounds,
with_lagpatch=True, load_clumps_cat=True)
catx = CSiBORGHaloCatalogue(nsimx, paths, load_initial=True, bounds=bounds,
with_lagpatch=True, load_clumps_cat=True)
clumpmap0 = read_h5(paths.particles(nsim0))["clumpmap"]
parts0 = read_h5(paths.initmatch(nsim0, "particles"))["particles"]

151
scripts/mv_fofmembership.py Normal file
View file

@ -0,0 +1,151 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Short script to move and change format of the CSiBORG FoF membership files
calculated by Julien. Additionally, also orders the particles in the same way
as the PHEW halo finder output.
"""
from argparse import ArgumentParser
from datetime import datetime
from gc import collect
from os.path import join
from shutil import copy
import numpy
from mpi4py import MPI
from taskmaster import work_delegation
from tqdm import trange
from utils import get_nsims
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
def copy_membership(nsim, verbose=True):
"""
Copy the FoF particle halo membership to the CSiBORG directory and write it
as a NumPy array instead of a text file.
Parameters
----------
nsim : int
IC realisation index.
verbose : bool, optional
Verbosity flag.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
fpath = join("/mnt/extraspace/jeg/greenwhale/Constrained_Sims",
f"sim_{nsim}/particle_membership_{nsim}_FOF.txt")
if verbose:
print(f"Loading from ... `{fpath}`.")
data = numpy.genfromtxt(fpath, dtype=int)
fout = paths.fof_membership(nsim)
if verbose:
print(f"Saving to ... `{fout}`.")
numpy.save(fout, data)
def copy_catalogue(nsim, verbose=True):
"""
Move the FoF catalogue to the CSiBORG directory.
Parameters
----------
nsim : int
IC realisation index.
verbose : bool, optional
Verbosity flag.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
source = join("/mnt/extraspace/jeg/greenwhale/Constrained_Sims",
f"sim_{nsim}/halo_catalog_{nsim}_FOF.txt")
dest = paths.fof_cat(nsim)
if verbose:
print("Copying`{}` to `{}`.".format(source, dest))
copy(source, dest)
def sort_fofid(nsim, verbose=True):
"""
Read the FoF particle halo membership and sort the halo IDs to the ordering
of particles in the PHEW clump IDs.
Parameters
----------
nsim : int
IC realisation index.
verbose : bool, optional
Verbosity flag.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsnap = max(paths.get_snapshots(nsim))
fpath = paths.fof_membership(nsim)
if verbose:
print(f"{datetime.now()}: loading from ... `{fpath}`.")
# Columns are halo ID, particle ID.
fof = numpy.load(fpath)
reader = csiborgtools.read.ParticleReader(paths)
pars_extract = ["x"] # Dummy variable
__, pids = reader.read_particle(nsnap, nsim, pars_extract,
return_structured=False, verbose=verbose)
del __
collect()
# Map the particle IDs in pids to their corresponding PHEW array index
if verbose:
print(f"{datetime.now()}: mapping particle IDs to their indices.")
pids_idx = {pid: i for i, pid in enumerate(pids)}
if verbose:
print(f"{datetime.now()}: mapping FoF HIDs to their array indices.")
# Unassigned particle IDs are assigned a halo ID of 0. Same as PHEW.
fof_hids = numpy.zeros(pids.size, dtype=numpy.int32)
for i in trange(fof.shape[0]) if verbose else range(fof.shape[0]):
hid, pid = fof[i]
fof_hids[pids_idx[pid]] = hid
fout = paths.fof_membership(nsim, sorted=True)
if verbose:
print(f"Saving the sorted data to ... `{fout}`")
numpy.save(fout, fof_hids)
def main(nsim, verbose=True):
copy_membership(nsim, verbose=verbose)
copy_catalogue(nsim, verbose=verbose)
sort_fofid(nsim, verbose=verbose)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--simname", type=str, default="csiborg",
choices=["csiborg", "quijote"],
help="Simulation name")
parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="Indices of simulations to cross. If `-1` processes all simulations.") # noqa
args = parser.parse_args()
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsims = get_nsims(args, paths)
comm = MPI.COMM_WORLD
work_delegation(main, nsims, comm)

View file

@ -12,12 +12,12 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to load in the simulation particles, load them by their clump ID and
dump into a HDF5 file. Stores the first and last index of each clump in the
Script to load in the simulation particles, sort them by their FoF halo ID and
dump into a HDF5 file. Stores the first and last index of each halo in the
particle array. This can be used for fast slicing of the array to acces
particles of a single clump.
"""
from argparse import ArgumentParser
from datetime import datetime
from gc import collect
@ -25,8 +25,11 @@ import h5py
import numba
import numpy
from mpi4py import MPI
from taskmaster import work_delegation
from tqdm import trange
from utils import get_nsims
try:
import csiborgtools
except ModuleNotFoundError:
@ -35,80 +38,79 @@ except ModuleNotFoundError:
sys.path.append("../")
import csiborgtools
from argparse import ArgumentParser
# We set up the MPI
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
# And next parse all the arguments and set up CSiBORG objects
parser = ArgumentParser()
parser.add_argument("--ics", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all simulations.")
args = parser.parse_args()
verbose = nproc == 1
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
partreader = csiborgtools.read.ParticleReader(paths)
# Keep "ID" as the last column!
pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"]
if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics("csiborg")
else:
ics = args.ics
@numba.jit(nopython=True)
def minmax_clump(clid, clump_ids, start_loop=0):
def minmax_halo(hid, halo_ids, start_loop=0):
"""
Find the start and end index of a clump in a sorted array of clump IDs.
Find the start and end index of a halo in a sorted array of halo IDs.
This is much faster than using `numpy.where` and then `numpy.min` and
`numpy.max`.
"""
start = None
end = None
for i in range(start_loop, clump_ids.size):
n = clump_ids[i]
if n == clid:
for i in range(start_loop, halo_ids.size):
n = halo_ids[i]
if n == hid:
if start is None:
start = i
end = i
elif n > clid:
elif n > hid:
break
return start, end
# MPI loop over individual simulations. We read in the particles from RAMSES
# files and dump them to a HDF5 file.
jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
for i in jobs:
nsim = ics[i]
def main(nsim, simname, verbose):
"""
Read in the snapshot particles, sort them by their FoF halo ID and dump
into a HDF5 file. Stores the first and last index of each halo in the
particle array for fast slicing of the array to acces particles of a single
halo.
Parameters
----------
nsim : int
IC realisation index.
simname : str
Simulation name.
verbose : bool
Verbosity flag.
Returns
-------
None
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
partreader = csiborgtools.read.ParticleReader(paths)
if simname == "quijote":
raise NotImplementedError("Not implemented for Quijote yet.")
# Keep "ID" as the last column!
pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"]
nsnap = max(paths.get_snapshots(nsim))
fname = paths.particles(nsim)
# We first read in the clump IDs of the particles and infer the sorting.
# Right away we dump the clump IDs to a HDF5 file and clear up memory.
print(f"{datetime.now()}: rank {rank} loading particles {nsim}.",
flush=True)
part_cids = partreader.read_clumpid(nsnap, nsim, verbose=verbose)
sort_indxs = numpy.argsort(part_cids).astype(numpy.int32)
part_cids = part_cids[sort_indxs]
# We first read in the halo IDs of the particles and infer the sorting.
# Right away we dump the halo IDs to a HDF5 file and clear up memory.
if verbose:
print(f"{datetime.now()}: loading particles {nsim}.", flush=True)
part_hids = partreader.read_fof_hids(nsim)
sort_indxs = numpy.argsort(part_hids).astype(numpy.int32)
part_hids = part_hids[sort_indxs]
with h5py.File(fname, "w") as f:
f.create_dataset("clump_ids", data=part_cids)
f.create_dataset("halo_ids", data=part_hids)
f.close()
del part_cids
del part_hids
collect()
# Next we read in the particles and sort them by their clump ID.
# Next we read in the particles and sort them by their halo ID.
# We cannot directly read this as an unstructured array because the float32
# precision is insufficient to capture the clump IDs.
# precision is insufficient to capture the halo IDs.
parts, pids = partreader.read_particle(
nsnap, nsim, pars_extract, return_structured=False, verbose=verbose)
# Now we in two steps save the particles and particle IDs.
print(f"{datetime.now()}: rank {rank} dumping particles from {nsim}.",
flush=True)
if verbose:
print(f"{datetime.now()}: dumping particles from {nsim}.", flush=True)
parts = parts[sort_indxs]
pids = pids[sort_indxs]
del sort_indxs
@ -126,29 +128,48 @@ for i in jobs:
del parts
collect()
print(f"{datetime.now()}: rank {rank} creating clump mapping for {nsim}.",
flush=True)
if verbose:
print(f"{datetime.now()}: creating halo map for {nsim}.", flush=True)
# Load clump IDs back to memory
with h5py.File(fname, "r") as f:
part_cids = f["clump_ids"][:]
part_hids = f["halo_ids"][:]
# We loop over the unique clump IDs.
unique_clump_ids = numpy.unique(part_cids)
clump_map = numpy.full((unique_clump_ids.size, 3), numpy.nan,
dtype=numpy.int32)
unique_halo_ids = numpy.unique(part_hids)
halo_map = numpy.full((unique_halo_ids.size, 3), numpy.nan,
dtype=numpy.int32)
start_loop = 0
niters = unique_clump_ids.size
niters = unique_halo_ids.size
for i in trange(niters) if verbose else range(niters):
clid = unique_clump_ids[i]
k0, kf = minmax_clump(clid, part_cids, start_loop=start_loop)
clump_map[i, 0] = clid
clump_map[i, 1] = k0
clump_map[i, 2] = kf
hid = unique_halo_ids[i]
k0, kf = minmax_halo(hid, part_hids, start_loop=start_loop)
halo_map[i, 0] = hid
halo_map[i, 1] = k0
halo_map[i, 2] = kf
start_loop = kf
# We save the mapping to a HDF5 file
with h5py.File(paths.particles(nsim), "r+") as f:
f.create_dataset("clumpmap", data=clump_map)
f.create_dataset("halomap", data=halo_map)
f.close()
del part_cids
del part_hids
collect()
if __name__ == "__main__":
# And next parse all the arguments and set up CSiBORG objects
parser = ArgumentParser()
parser.add_argument("--simname", type=str, default="csiborg",
choices=["csiborg", "quijote"],
help="Simulation name")
parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all .")
args = parser.parse_args()
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsims = get_nsims(args, paths)
def _main(nsim, verbose=MPI.COMM_WORLD.nproc == 1):
main(nsim, args.simname, verbose=verbose)
work_delegation(_main, nsims, MPI.COMM_WORLD)

View file

@ -14,7 +14,7 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to sort the initial snapshot particles according to their final
snapshot ordering, which is sorted by the clump IDs.
snapshot ordering, which is sorted by the halo IDs.
"""
from argparse import ArgumentParser
from datetime import datetime
@ -23,6 +23,9 @@ from gc import collect
import h5py
import numpy
from mpi4py import MPI
from taskmaster import work_delegation
from utils import get_nsims
try:
import csiborgtools
@ -33,42 +36,37 @@ except ModuleNotFoundError:
import csiborgtools
# Get MPI things
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
verbose = nproc == 1
def _main(nsim, simname, verbose):
"""
Sort the initial snapshot particles according to their final snapshot
ordering and dump them into a HDF5 file.
# Argument parser
parser = ArgumentParser()
parser.add_argument("--ics", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all simulations.")
args = parser.parse_args()
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
partreader = csiborgtools.read.ParticleReader(paths)
# NOTE: ID has to be the last column.
pars_extract = ["x", "y", "z", "M", "ID"]
Parameters
----------
nsim : int
IC realisation index.
simname : str
Simulation name.
verbose : bool
Verbosity flag.
"""
if simname == "quijote":
raise NotImplementedError("Quijote not implemented yet.")
if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics("csiborg")
else:
ics = args.ics
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
partreader = csiborgtools.read.ParticleReader(paths)
# We loop over simulations. Each simulation is then procesed with MPI, rank 0
# loads the data and broadcasts it to other ranks.
jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
for i in jobs:
nsim = ics[i]
nsnap = max(paths.get_snapshots(nsim))
print(f"{datetime.now()}: reading and processing simulation {nsim}.",
flush=True)
if verbose:
print(f"{datetime.now()}: reading and processing simulation {nsim}.",
flush=True)
# We first load the particle IDs in the final snapshot.
pidf = csiborgtools.read.read_h5(paths.particles(nsim))
pidf = pidf["particle_ids"]
# Then we load the particles in the initil snapshot and make sure that
# their particle IDs are sorted as in the final snapshot.
# Again, because of precision this must be read as structured.
# their particle IDs are sorted as in the final snapshot. Again, because of
# precision this must be read as structured.
# NOTE: ID has to be the last column.
pars_extract = ["x", "y", "z", "M", "ID"]
part0, pid0 = partreader.read_particle(
1, nsim, pars_extract, return_structured=False, verbose=verbose)
# First enforce them to already be sorted and then apply reverse
@ -77,6 +75,26 @@ for i in jobs:
del pid0
collect()
part0 = part0[numpy.argsort(numpy.argsort(pidf))]
print(f"{datetime.now()}: dumping particles for {nsim}.", flush=True)
if verbose:
print(f"{datetime.now()}: dumping particles for {nsim}.", flush=True)
with h5py.File(paths.initmatch(nsim, "particles"), "w") as f:
f.create_dataset("particles", data=part0)
if __name__ == "__main__":
# Argument parser
parser = ArgumentParser()
parser.add_argument("--simname", type=str, default="csiborg",
choices=["csiborg", "quijote"],
help="Simulation name")
parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all.")
args = parser.parse_args()
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsims = get_nsims(args, paths)
def main(nsim):
_main(nsim, args.simname, MPI.COMM_WORLD.size == 1)
work_delegation(main, nsims, MPI.COMM_WORLD)

View file

@ -81,7 +81,7 @@ def read_single_catalogue(args, config, nsim, run, rmax, paths, nobs=None):
Returns
-------
cat : csiborgtools.read.HaloCatalogue or csiborgtools.read.QuijoteHaloCatalogue # noqa
cat : csiborgtools.read.CSiBORGHaloCatalogue or csiborgtools.read.QuijoteHaloCatalogue # noqa
Halo catalogue with selection criteria applied.
"""
selection = config.get(run, None)
@ -89,7 +89,7 @@ def read_single_catalogue(args, config, nsim, run, rmax, paths, nobs=None):
raise KeyError(f"No configuration for run {run}.")
# We first read the full catalogue without applying any bounds.
if args.simname == "csiborg":
cat = csiborgtools.read.HaloCatalogue(nsim, paths)
cat = csiborgtools.read.CSiBORGHaloCatalogue(nsim, paths)
else:
cat = csiborgtools.read.QuijoteHaloCatalogue(nsim, paths, nsnap=4)
if nobs is not None: