mirror of
https://github.com/Richard-Sti/csiborgtools_public.git
synced 2025-05-14 06:31:11 +00:00
Overlapper improvements (#53)
* Store indices as f32 * Fix init sorting * Organise imports * Rename pathing * Add particle loading * Improve particle reading * Add h5py reader * edit particle path * Update particles loading * update particles loading * Fix particle dumping * Add init fitting * Fix bug due to insufficient precision * Add commnet * Add comment * Add clumps catalogue to halo cat * Add comment * Make sure PIDS never forced to float32 * fix pid reading * fix pid reading * Update matching to work with new arrays * Stop using cubical sub boxes, turn off nshift if no smoothing * Improve caching * Move function definitions * Simplify calculation * Add import * Small updates to the halo * Simplify calculation * Simplify looping calculation * fix tonew * Add initial data * Add skip condition * Add unit conversion * Add loading background in batches * Rename mmain index * Switch overlaps to h5 * Add finite lagpatch check * fix column name * Add verbosity flags * Save halo IDs instead. * Switch back to npz * Delte nbs * Reduce size of the box * Load correct bckg of halos being matched * Remove verbosity * verbosity edits * Change lower thresholds
This commit is contained in:
parent
1c9dacfde5
commit
56e39a8b1d
20 changed files with 864 additions and 3816 deletions
|
@ -18,9 +18,7 @@ realisation must have been split in advance by `runsplit_halos`.
|
|||
"""
|
||||
from argparse import ArgumentParser
|
||||
from datetime import datetime
|
||||
from os.path import join
|
||||
|
||||
import h5py
|
||||
import numpy
|
||||
from mpi4py import MPI
|
||||
from tqdm import tqdm
|
||||
|
@ -33,20 +31,26 @@ except ModuleNotFoundError:
|
|||
sys.path.append("../")
|
||||
import csiborgtools
|
||||
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("--kind", type=str, choices=["halos", "clumps"])
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
# Get MPI things
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
nproc = comm.Get_size()
|
||||
verbose = nproc == 1
|
||||
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("--kind", type=str, choices=["halos", "clumps"])
|
||||
parser.add_argument("--ics", type=int, nargs="+", default=None,
|
||||
help="IC realisations. If `-1` processes all simulations.")
|
||||
args = parser.parse_args()
|
||||
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
|
||||
partreader = csiborgtools.read.ParticleReader(paths)
|
||||
nfwpost = csiborgtools.fits.NFWPosterior()
|
||||
ftemp = join(paths.temp_dumpdir, "fit_clump_{}_{}_{}.npy")
|
||||
|
||||
if args.ics is None or args.ics[0] == -1:
|
||||
ics = paths.get_ics(tonew=False)
|
||||
else:
|
||||
ics = args.ics
|
||||
|
||||
cols_collect = [
|
||||
("index", numpy.int32),
|
||||
("npart", numpy.int32),
|
||||
|
@ -63,7 +67,7 @@ cols_collect = [
|
|||
("lambda200c", numpy.float32),
|
||||
("r200m", numpy.float32),
|
||||
("m200m", numpy.float32),
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
def fit_clump(particles, clump_info, box):
|
||||
|
@ -95,46 +99,19 @@ def fit_clump(particles, clump_info, box):
|
|||
return out
|
||||
|
||||
|
||||
def load_clump_particles(clumpid, particles, clump_map):
|
||||
"""
|
||||
Load a clump's particles. If it is not there, i.e clump has no associated
|
||||
particles, return `None`.
|
||||
"""
|
||||
try:
|
||||
return particles[clump_map[clumpid], :]
|
||||
except KeyError:
|
||||
return None
|
||||
|
||||
|
||||
def load_parent_particles(clumpid, particles, clump_map, clumps_cat):
|
||||
"""
|
||||
Load a parent halo's particles.
|
||||
"""
|
||||
indxs = clumps_cat["index"][clumps_cat["parent"] == clumpid]
|
||||
# We first load the particles of each clump belonging to this parent
|
||||
# and then concatenate them for further analysis.
|
||||
clumps = []
|
||||
for ind in indxs:
|
||||
parts = load_clump_particles(ind, particles, clump_map)
|
||||
if parts is not None:
|
||||
clumps.append(parts)
|
||||
|
||||
if len(clumps) == 0:
|
||||
return None
|
||||
return numpy.concatenate(clumps)
|
||||
|
||||
|
||||
# We now start looping over all simulations
|
||||
for i, nsim in enumerate(paths.get_ics(tonew=False)):
|
||||
if rank == 0:
|
||||
print(f"{datetime.now()}: calculating {i}th simulation `{nsim}`.",
|
||||
flush=True)
|
||||
# We MPI loop over all simulations.
|
||||
jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
|
||||
for nsim in [ics[i] for i in jobs]:
|
||||
print(f"{datetime.now()}: rank {rank} calculating simulation `{nsim}`.",
|
||||
flush=True)
|
||||
nsnap = max(paths.get_snapshots(nsim))
|
||||
box = csiborgtools.read.BoxUnits(nsnap, nsim, paths)
|
||||
|
||||
# Particle archive
|
||||
particles = h5py.File(paths.particle_h5py_path(nsim), 'r')["particles"]
|
||||
clump_map = h5py.File(paths.particle_h5py_path(nsim, "clumpmap"), 'r')
|
||||
f = csiborgtools.read.read_h5(paths.particles_path(nsim))
|
||||
particles = f["particles"]
|
||||
clump_map = f["clumpmap"]
|
||||
clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
|
||||
clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
|
||||
load_fitted=False)
|
||||
# We check whether we fit halos or clumps, will be indexing over different
|
||||
|
@ -143,66 +120,39 @@ for i, nsim in enumerate(paths.get_ics(tonew=False)):
|
|||
ismain = clumps_cat.ismain
|
||||
else:
|
||||
ismain = numpy.ones(len(clumps_cat), dtype=bool)
|
||||
ntasks = len(clumps_cat)
|
||||
# We split the clumps among the processes. Each CPU calculates a fraction
|
||||
# of them and dumps the results in a structured array. Even if we are
|
||||
# calculating parent halo this index runs over all clumps.
|
||||
jobs = csiborgtools.fits.split_jobs(ntasks, nproc)[rank]
|
||||
out = csiborgtools.read.cols_to_structured(len(jobs), cols_collect)
|
||||
for i, j in enumerate(tqdm(jobs)) if nproc == 1 else enumerate(jobs):
|
||||
clumpid = clumps_cat["index"][j]
|
||||
out["index"][i] = clumpid
|
||||
|
||||
# Even if we are calculating parent halo this index runs over all clumps.
|
||||
out = csiborgtools.read.cols_to_structured(len(clumps_cat), cols_collect)
|
||||
indxs = clumps_cat["index"]
|
||||
for i, clid in enumerate(tqdm(indxs)) if verbose else enumerate(indxs):
|
||||
clid = clumps_cat["index"][i]
|
||||
out["index"][i] = clid
|
||||
# If we are fitting halos and this clump is not a main, then continue.
|
||||
if args.kind == "halos" and not ismain[j]:
|
||||
if args.kind == "halos" and not ismain[i]:
|
||||
continue
|
||||
|
||||
if args.kind == "halos":
|
||||
part = load_parent_particles(clumpid, particles, clump_map,
|
||||
clumps_cat)
|
||||
part = csiborgtools.read.load_parent_particles(
|
||||
clid, particles, clump_map, clid2map, clumps_cat)
|
||||
else:
|
||||
part = load_clump_particles(clumpid, particles, clump_map)
|
||||
part = csiborgtools.read.load_clump_particles(clid, particles,
|
||||
clump_map, clid2map)
|
||||
|
||||
# We fit the particles if there are any. If not we assign the index,
|
||||
# otherwise it would be NaN converted to integers (-2147483648) and
|
||||
# yield an error further down.
|
||||
if part is not None:
|
||||
_out = fit_clump(part, clumps_cat[j], box)
|
||||
for key in _out.keys():
|
||||
out[key][i] = _out[key]
|
||||
if part is None:
|
||||
continue
|
||||
|
||||
fout = ftemp.format(str(nsim).zfill(5), str(nsnap).zfill(5), rank)
|
||||
if nproc == 0:
|
||||
print(f"{datetime.now()}: rank {rank} saving to `{fout}`.", flush=True)
|
||||
_out = fit_clump(part, clumps_cat[i], box)
|
||||
for key in _out.keys():
|
||||
out[key][i] = _out[key]
|
||||
|
||||
# Finally, we save the results. If we were analysing main halos, then
|
||||
# remove array indices that do not correspond to parent halos.
|
||||
if args.kind == "halos":
|
||||
out = out[ismain]
|
||||
|
||||
fout = paths.structfit_path(nsnap, nsim, args.kind)
|
||||
print(f"Saving to `{fout}`.", flush=True)
|
||||
numpy.save(fout, out)
|
||||
# We saved this CPU's results in a temporary file. Wait now for the other
|
||||
# CPUs and then collect results from the 0th rank and save them.
|
||||
comm.Barrier()
|
||||
|
||||
if rank == 0:
|
||||
print(f"{datetime.now()}: collecting results for simulation `{nsim}`.",
|
||||
flush=True)
|
||||
# We write to the output array. Load data from each CPU and append to
|
||||
# the output array.
|
||||
out = csiborgtools.read.cols_to_structured(ntasks, cols_collect)
|
||||
clumpid2outpos = {indx: i
|
||||
for i, indx in enumerate(clumps_cat["index"])}
|
||||
for i in range(nproc):
|
||||
inp = numpy.load(ftemp.format(str(nsim).zfill(5),
|
||||
str(nsnap).zfill(5), i))
|
||||
for j, clumpid in enumerate(inp["index"]):
|
||||
k = clumpid2outpos[clumpid]
|
||||
for key in inp.dtype.names:
|
||||
out[key][k] = inp[key][j]
|
||||
|
||||
# If we were analysing main halos, then remove array indices that do
|
||||
# not correspond to parent halos.
|
||||
if args.kind == "halos":
|
||||
out = out[ismain]
|
||||
|
||||
fout = paths.structfit_path(nsnap, nsim, args.kind)
|
||||
print(f"Saving to `{fout}`.", flush=True)
|
||||
numpy.save(fout, out)
|
||||
|
||||
# We now wait before moving on to another simulation.
|
||||
comm.Barrier()
|
||||
|
|
104
scripts/fit_init.py
Normal file
104
scripts/fit_init.py
Normal file
|
@ -0,0 +1,104 @@
|
|||
# Copyright (C) 2022 Richard Stiskalek
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation; either version 3 of the License, or (at your
|
||||
# option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
# Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
"""
|
||||
Script to calculate the particle centre of mass, Lagrangian patch size in the
|
||||
initial snapshot. The initial snapshot particles are read from the sorted
|
||||
files.
|
||||
"""
|
||||
from argparse import ArgumentParser
|
||||
from datetime import datetime
|
||||
|
||||
import numpy
|
||||
from mpi4py import MPI
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
try:
|
||||
import csiborgtools
|
||||
except ModuleNotFoundError:
|
||||
import sys
|
||||
|
||||
sys.path.append("../")
|
||||
import csiborgtools
|
||||
|
||||
|
||||
# Get MPI things
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
nproc = comm.Get_size()
|
||||
verbose = nproc == 1
|
||||
|
||||
# Argument parser
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("--ics", type=int, nargs="+", default=None,
|
||||
help="IC realisations. If `-1` processes all simulations.")
|
||||
args = parser.parse_args()
|
||||
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
|
||||
partreader = csiborgtools.read.ParticleReader(paths)
|
||||
|
||||
if args.ics is None or args.ics[0] == -1:
|
||||
ics = paths.get_ics(tonew=True)
|
||||
else:
|
||||
ics = args.ics
|
||||
|
||||
cols_collect = [("index", numpy.int32),
|
||||
("x", numpy.float32),
|
||||
("y", numpy.float32),
|
||||
("z", numpy.float32),
|
||||
("lagpatch", numpy.float32),]
|
||||
|
||||
|
||||
# MPI loop over simulations
|
||||
jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
|
||||
for nsim in [ics[i] for i in jobs]:
|
||||
nsnap = max(paths.get_snapshots(nsim))
|
||||
print(f"{datetime.now()}: rank {rank} calculating simulation `{nsim}`.",
|
||||
flush=True)
|
||||
|
||||
parts = csiborgtools.read.read_h5(paths.initmatch_path(nsim, "particles"))
|
||||
parts = parts['particles']
|
||||
clump_map = csiborgtools.read.read_h5(paths.particles_path(nsim))
|
||||
clump_map = clump_map["clumpmap"]
|
||||
clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
|
||||
load_fitted=False)
|
||||
clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
|
||||
ismain = clumps_cat.ismain
|
||||
|
||||
out = csiborgtools.read.cols_to_structured(len(clumps_cat), cols_collect)
|
||||
indxs = clumps_cat["index"]
|
||||
for i, hid in enumerate(tqdm(indxs) if verbose else indxs):
|
||||
out["index"][i] = hid
|
||||
if not ismain[i]:
|
||||
continue
|
||||
|
||||
part = csiborgtools.read.load_parent_particles(hid, parts, clump_map,
|
||||
clid2map, clumps_cat)
|
||||
# Skip if the halo is too small.
|
||||
if part is None or part.size < 100:
|
||||
continue
|
||||
|
||||
dist, cm = csiborgtools.fits.dist_centmass(part)
|
||||
# We enforce a maximum patchsize of 0.075 in box coordinates.
|
||||
patchsize = min(numpy.percentile(dist, 99), 0.075)
|
||||
out["x"][i], out["y"][i], out["z"][i] = cm
|
||||
out["lagpatch"][i] = patchsize
|
||||
|
||||
out = out[ismain]
|
||||
# Now save it
|
||||
fout = paths.initmatch_path(nsim, "fit")
|
||||
print(f"{datetime.now()}: dumping fits to .. `{fout}`.",
|
||||
flush=True)
|
||||
with open(fout, "wb") as f:
|
||||
numpy.save(f, out)
|
|
@ -54,35 +54,6 @@ else:
|
|||
nsims = args.ics
|
||||
|
||||
|
||||
def load_clump_particles(clumpid, particles, clump_map):
|
||||
"""
|
||||
Load a clump's particles. If it is not there, i.e clump has no associated
|
||||
particles, return `None`.
|
||||
"""
|
||||
try:
|
||||
return particles[clump_map[clumpid], :]
|
||||
except KeyError:
|
||||
return None
|
||||
|
||||
|
||||
def load_parent_particles(clumpid, particles, clump_map, clumps_cat):
|
||||
"""
|
||||
Load a parent halo's particles.
|
||||
"""
|
||||
indxs = clumps_cat["index"][clumps_cat["parent"] == clumpid]
|
||||
# We first load the particles of each clump belonging to this parent
|
||||
# and then concatenate them for further analysis.
|
||||
clumps = []
|
||||
for ind in indxs:
|
||||
parts = load_clump_particles(ind, particles, clump_map)
|
||||
if parts is not None:
|
||||
clumps.append(parts)
|
||||
|
||||
if len(clumps) == 0:
|
||||
return None
|
||||
return numpy.concatenate(clumps)
|
||||
|
||||
|
||||
# We loop over simulations. Here later optionally add MPI.
|
||||
for i, nsim in enumerate(nsims):
|
||||
if rank == 0:
|
||||
|
@ -91,10 +62,11 @@ for i, nsim in enumerate(nsims):
|
|||
nsnap = max(paths.get_snapshots(nsim))
|
||||
box = csiborgtools.read.BoxUnits(nsnap, nsim, paths)
|
||||
|
||||
particles = h5py.File(paths.particle_h5py_path(nsim), 'r')["particles"]
|
||||
clump_map = h5py.File(paths.particle_h5py_path(nsim, "clumpmap"), 'r')
|
||||
clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, maxdist=None,
|
||||
minmass=None, rawdata=True,
|
||||
f = csiborgtools.read.read_h5(paths.particles_path(nsim))
|
||||
particles = f["particles"]
|
||||
clump_map = f["clumpmap"]
|
||||
clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
|
||||
clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
|
||||
load_fitted=False)
|
||||
ismain = clumps_cat.ismain
|
||||
ntasks = len(clumps_cat)
|
||||
|
@ -108,8 +80,8 @@ for i, nsim in enumerate(nsims):
|
|||
continue
|
||||
|
||||
clumpid = clumps_cat["index"][j]
|
||||
parts = load_parent_particles(clumpid, particles, clump_map,
|
||||
clumps_cat)
|
||||
parts = csiborgtools.read.load_parent_particles(
|
||||
clumpid, particles, clump_map, clid2map, clumps_cat)
|
||||
# If we have no particles, then do not save anything.
|
||||
if parts is None:
|
||||
continue
|
||||
|
@ -124,8 +96,7 @@ for i, nsim in enumerate(nsims):
|
|||
|
||||
_out["r"] = r[mask]
|
||||
_out["M"] = obj["M"][mask]
|
||||
|
||||
out[str(clumps_cat["index"][j])] = _out
|
||||
out[str(clumpid)] = _out
|
||||
|
||||
# Finished, so we save everything.
|
||||
fout = paths.radpos_path(nsnap, nsim)
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
"""A script to calculate overlap between two CSiBORG realisations."""
|
||||
from argparse import ArgumentParser
|
||||
from copy import deepcopy
|
||||
from datetime import datetime
|
||||
from distutils.util import strtobool
|
||||
|
||||
|
@ -26,13 +27,16 @@ except ModuleNotFoundError:
|
|||
|
||||
sys.path.append("../")
|
||||
import csiborgtools
|
||||
from csiborgtools.read import HaloCatalogue, read_h5
|
||||
|
||||
# Argument parser
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("--nsim0", type=int)
|
||||
parser.add_argument("--nsimx", type=int)
|
||||
parser.add_argument("--nmult", type=float)
|
||||
parser.add_argument("--sigma", type=float)
|
||||
parser.add_argument("--sigma", type=float, default=None)
|
||||
parser.add_argument("--smoothen", type=lambda x: bool(strtobool(x)),
|
||||
default=None)
|
||||
parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
|
||||
default=False)
|
||||
args = parser.parse_args()
|
||||
|
@ -43,27 +47,52 @@ matcher = csiborgtools.match.RealisationsMatcher()
|
|||
|
||||
# Load the raw catalogues (i.e. no selection) including the initial CM
|
||||
# positions and the particle archives.
|
||||
cat0 = csiborgtools.read.HaloCatalogue(args.nsim0, paths, load_initial=True,
|
||||
rawdata=True)
|
||||
catx = csiborgtools.read.HaloCatalogue(args.nsimx, paths, load_initial=True,
|
||||
rawdata=True)
|
||||
halos0_archive = paths.initmatch_path(args.nsim0, "particles")
|
||||
halosx_archive = paths.initmatch_path(args.nsimx, "particles")
|
||||
cat0 = HaloCatalogue(args.nsim0, paths, load_initial=True,
|
||||
minmass=("totpartmass", 1e12), with_lagpatch=True)
|
||||
catx = HaloCatalogue(args.nsimx, paths, load_initial=True,
|
||||
minmass=("totpartmass", 1e12), with_lagpatch=True)
|
||||
|
||||
clumpmap0 = read_h5(paths.particles_path(args.nsim0))["clumpmap"]
|
||||
parts0 = read_h5(paths.initmatch_path(args.nsim0, "particles"))["particles"]
|
||||
clid2map0 = {clid: i for i, clid in enumerate(clumpmap0[:, 0])}
|
||||
|
||||
clumpmapx = read_h5(paths.particles_path(args.nsimx))["clumpmap"]
|
||||
partsx = read_h5(paths.initmatch_path(args.nsimx, "particles"))["particles"]
|
||||
clid2mapx = {clid: i for i, clid in enumerate(clumpmapx[:, 0])}
|
||||
|
||||
|
||||
# We generate the background density fields. Loads halos's particles one by one
|
||||
# from the archive, concatenates them and calculates the NGP density field.
|
||||
if args.verbose:
|
||||
print(f"{datetime.now()}: generating the background density fields.",
|
||||
flush=True)
|
||||
delta_bckg = overlapper.make_bckg_delta(halos0_archive, verbose=args.verbose)
|
||||
delta_bckg = overlapper.make_bckg_delta(halosx_archive, delta=delta_bckg,
|
||||
delta_bckg = overlapper.make_bckg_delta(parts0, clumpmap0, clid2map0, cat0,
|
||||
verbose=args.verbose)
|
||||
delta_bckg = overlapper.make_bckg_delta(partsx, clumpmapx, clid2mapx, catx,
|
||||
delta=delta_bckg, verbose=args.verbose)
|
||||
|
||||
# We calculate the overlap between the NGP fields.
|
||||
if args.verbose:
|
||||
print(f"{datetime.now()}: crossing the simulations.", flush=True)
|
||||
match_indxs, ngp_overlap = matcher.cross(cat0, catx, halos0_archive,
|
||||
halosx_archive, delta_bckg)
|
||||
match_indxs, ngp_overlap = matcher.cross(cat0, catx, parts0, partsx, clumpmap0,
|
||||
clumpmapx, delta_bckg,
|
||||
verbose=args.verbose)
|
||||
# We wish to store the halo IDs of the matches, not their array positions in
|
||||
# the catalogues
|
||||
match_hids = deepcopy(match_indxs)
|
||||
for i, matches in enumerate(match_indxs):
|
||||
for j, match in enumerate(matches):
|
||||
match_hids[i][j] = catx["index"][match]
|
||||
|
||||
fout = paths.overlap_path(args.nsim0, args.nsimx, smoothed=False)
|
||||
numpy.savez(fout, ref_hids=cat0["index"], match_hids=match_hids,
|
||||
ngp_overlap=ngp_overlap)
|
||||
if args.verbose:
|
||||
print(f"{datetime.now()}: calculated NGP overlap, saved to {fout}.",
|
||||
flush=True)
|
||||
|
||||
if not args.smoothen:
|
||||
quit()
|
||||
|
||||
# We now smoothen up the background density field for the smoothed overlap
|
||||
# calculation.
|
||||
|
@ -72,16 +101,12 @@ if args.verbose:
|
|||
gaussian_filter(delta_bckg, output=delta_bckg, **smooth_kwargs)
|
||||
|
||||
# We calculate the smoothed overlap for the pairs whose NGP overlap is > 0.
|
||||
if args.verbose:
|
||||
print(f"{datetime.now()}: calculating smoothed overlaps.", flush=True)
|
||||
smoothed_overlap = matcher.smoothed_cross(cat0, catx, halos0_archive,
|
||||
halosx_archive, delta_bckg,
|
||||
smoothed_overlap = matcher.smoothed_cross(cat0, catx, parts0, partsx,
|
||||
clumpmap0, clumpmapx, delta_bckg,
|
||||
match_indxs, smooth_kwargs)
|
||||
|
||||
# We save the results at long last.
|
||||
fout = paths.overlap_path(args.nsim0, args.nsimx)
|
||||
fout = paths.overlap_path(args.nsim0, args.nsimx, smoothed=True)
|
||||
numpy.savez(fout, smoothed_overlap=smoothed_overlap, sigma=args.sigma)
|
||||
if args.verbose:
|
||||
print(f"{datetime.now()}: saving results to `{fout}`.", flush=True)
|
||||
numpy.savez(fout, match_indxs=match_indxs, ngp_overlap=ngp_overlap,
|
||||
smoothed_overlap=smoothed_overlap, sigma=args.sigma)
|
||||
print(f"{datetime.now()}: all finished.", flush=True)
|
||||
print(f"{datetime.now()}: calculated smoothed overlap, saved to {fout}.",
|
||||
flush=True)
|
||||
|
|
|
@ -12,18 +12,20 @@
|
|||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
"""
|
||||
Script to load in the simulation particles and dump them to a HDF5 file.
|
||||
Creates a mapping to access directly particles of a single clump.
|
||||
Script to load in the simulation particles, load them by their clump ID and
|
||||
dump into a HDF5 file. Stores the first and last index of each clump in the
|
||||
particle array. This can be used for fast slicing of the array to acces
|
||||
particles of a single clump.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from distutils.util import strtobool
|
||||
from gc import collect
|
||||
|
||||
import h5py
|
||||
import numba
|
||||
import numpy
|
||||
from mpi4py import MPI
|
||||
from tqdm import tqdm
|
||||
from tqdm import trange
|
||||
|
||||
try:
|
||||
import csiborgtools
|
||||
|
@ -44,75 +46,109 @@ nproc = comm.Get_size()
|
|||
parser = ArgumentParser()
|
||||
parser.add_argument("--ics", type=int, nargs="+", default=None,
|
||||
help="IC realisations. If `-1` processes all simulations.")
|
||||
parser.add_argument("--pos_only", type=lambda x: bool(strtobool(x)),
|
||||
help="Do we only dump positions?")
|
||||
parser.add_argument("--dtype", type=str, choices=["float32", "float64"],
|
||||
default="float32",)
|
||||
args = parser.parse_args()
|
||||
|
||||
verbose = nproc == 1
|
||||
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
|
||||
partreader = csiborgtools.read.ParticleReader(paths)
|
||||
|
||||
if args.pos_only:
|
||||
pars_extract = ['x', 'y', 'z', 'M']
|
||||
else:
|
||||
pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M']
|
||||
# Keep "ID" as the last column!
|
||||
pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"]
|
||||
|
||||
if args.ics is None or args.ics[0] == -1:
|
||||
ics = paths.get_ics(tonew=False)
|
||||
else:
|
||||
ics = args.ics
|
||||
|
||||
|
||||
@numba.jit(nopython=True)
|
||||
def minmax_clump(clid, clump_ids, start_loop=0):
|
||||
"""
|
||||
Find the start and end index of a clump in a sorted array of clump IDs.
|
||||
This is much faster than using `numpy.where` and then `numpy.min` and
|
||||
`numpy.max`.
|
||||
"""
|
||||
start = None
|
||||
end = None
|
||||
|
||||
for i in range(start_loop, clump_ids.size):
|
||||
n = clump_ids[i]
|
||||
if n == clid:
|
||||
if start is None:
|
||||
start = i
|
||||
end = i
|
||||
elif n > clid:
|
||||
break
|
||||
return start, end
|
||||
|
||||
|
||||
# MPI loop over individual simulations. We read in the particles from RAMSES
|
||||
# files and dump them to a HDF5 file.
|
||||
jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
|
||||
for i in jobs:
|
||||
nsim = ics[i]
|
||||
nsnap = max(paths.get_snapshots(nsim))
|
||||
print(f"{datetime.now()}: Rank {rank} loading particles {nsim}.",
|
||||
fname = paths.particles_path(nsim)
|
||||
# We first read in the clump IDs of the particles and infer the sorting.
|
||||
# Right away we dump the clump IDs to a HDF5 file and clear up memory.
|
||||
print(f"{datetime.now()}: rank {rank} loading particles {nsim}.",
|
||||
flush=True)
|
||||
part_cids = partreader.read_clumpid(nsnap, nsim, verbose=verbose)
|
||||
sort_indxs = numpy.argsort(part_cids).astype(numpy.int32)
|
||||
part_cids = part_cids[sort_indxs]
|
||||
with h5py.File(fname, "w") as f:
|
||||
f.create_dataset("clump_ids", data=part_cids)
|
||||
f.close()
|
||||
del part_cids
|
||||
collect()
|
||||
|
||||
parts = partreader.read_particle(nsnap, nsim, pars_extract,
|
||||
return_structured=False, verbose=verbose)
|
||||
if args.dtype == "float64":
|
||||
parts = parts.astype(numpy.float64)
|
||||
|
||||
kind = "pos" if args.pos_only else None
|
||||
|
||||
print(f"{datetime.now()}: Rank {rank} dumping particles from {nsim}.",
|
||||
# Next we read in the particles and sort them by their clump ID.
|
||||
# We cannot directly read this as an unstructured array because the float32
|
||||
# precision is insufficient to capture the clump IDs.
|
||||
parts, pids = partreader.read_particle(
|
||||
nsnap, nsim, pars_extract, return_structured=False, verbose=verbose)
|
||||
# Now we in two steps save the particles and particle IDs.
|
||||
print(f"{datetime.now()}: rank {rank} dumping particles from {nsim}.",
|
||||
flush=True)
|
||||
parts = parts[sort_indxs]
|
||||
pids = pids[sort_indxs]
|
||||
del sort_indxs
|
||||
collect()
|
||||
|
||||
with h5py.File(paths.particle_h5py_path(nsim, kind, args.dtype), "w") as f:
|
||||
with h5py.File(fname, "r+") as f:
|
||||
f.create_dataset("particle_ids", data=pids)
|
||||
f.close()
|
||||
del pids
|
||||
collect()
|
||||
|
||||
with h5py.File(fname, "r+") as f:
|
||||
f.create_dataset("particles", data=parts)
|
||||
f.close()
|
||||
del parts
|
||||
collect()
|
||||
print(f"{datetime.now()}: Rank {rank} finished dumping of {nsim}.",
|
||||
flush=True)
|
||||
# If we are dumping only particle positions, then we are done.
|
||||
if args.pos_only:
|
||||
continue
|
||||
|
||||
print(f"{datetime.now()}: Rank {rank} mapping particles from {nsim}.",
|
||||
print(f"{datetime.now()}: rank {rank} creating clump mapping for {nsim}.",
|
||||
flush=True)
|
||||
# If not, then load the clump IDs and prepare the memory mapping. We find
|
||||
# which array positions correspond to which clump IDs and save it. With
|
||||
# this we can then lazily load into memory the particles for each clump.
|
||||
part_cids = partreader.read_clumpid(nsnap, nsim, verbose=verbose)
|
||||
cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, load_fitted=False,
|
||||
rawdata=True)
|
||||
clumpinds = cat["index"]
|
||||
# Some of the clumps have no particles, so we do not loop over them
|
||||
clumpinds = clumpinds[numpy.isin(clumpinds, part_cids)]
|
||||
|
||||
out = {}
|
||||
for i, cid in enumerate(tqdm(clumpinds) if verbose else clumpinds):
|
||||
out.update({str(cid): numpy.where(part_cids == cid)[0]})
|
||||
# Load clump IDs back to memory
|
||||
with h5py.File(fname, "r") as f:
|
||||
part_cids = f["clump_ids"][:]
|
||||
# We loop over the unique clump IDs.
|
||||
unique_clump_ids = numpy.unique(part_cids)
|
||||
clump_map = numpy.full((unique_clump_ids.size, 3), numpy.nan,
|
||||
dtype=numpy.int32)
|
||||
start_loop = 0
|
||||
niters = unique_clump_ids.size
|
||||
for i in trange(niters) if verbose else range(niters):
|
||||
clid = unique_clump_ids[i]
|
||||
k0, kf = minmax_clump(clid, part_cids, start_loop=start_loop)
|
||||
clump_map[i, 0] = clid
|
||||
clump_map[i, 1] = k0
|
||||
clump_map[i, 2] = kf
|
||||
start_loop = kf
|
||||
|
||||
# We save the mapping to a HDF5 file
|
||||
with h5py.File(paths.particle_h5py_path(nsim, "clumpmap"), "w") as f:
|
||||
for cid, indxs in out.items():
|
||||
f.create_dataset(cid, data=indxs)
|
||||
with h5py.File(paths.particles_path(nsim), "r+") as f:
|
||||
f.create_dataset("clumpmap", data=clump_map)
|
||||
f.close()
|
||||
|
||||
del part_cids, cat, clumpinds, out
|
||||
del part_cids
|
||||
collect()
|
||||
|
|
|
@ -1,199 +0,0 @@
|
|||
# Copyright (C) 2022 Richard Stiskalek
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation; either version 3 of the License, or (at your
|
||||
# option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
# Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
"""
|
||||
Script to calculate the particle centre of mass, Lagrangian patch size in the
|
||||
initial snapshot and the particle mapping.
|
||||
"""
|
||||
from argparse import ArgumentParser
|
||||
from os.path import join
|
||||
from datetime import datetime
|
||||
from gc import collect
|
||||
import joblib
|
||||
from os import remove
|
||||
|
||||
import h5py
|
||||
import numpy
|
||||
from mpi4py import MPI
|
||||
from tqdm import trange
|
||||
|
||||
try:
|
||||
import csiborgtools
|
||||
except ModuleNotFoundError:
|
||||
import sys
|
||||
|
||||
sys.path.append("../")
|
||||
import csiborgtools
|
||||
|
||||
|
||||
# Get MPI things
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
nproc = comm.Get_size()
|
||||
verbose = nproc == 1
|
||||
|
||||
# Argument parser
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("--ics", type=int, nargs="+", default=None,
|
||||
help="IC realisations. If `-1` processes all simulations.")
|
||||
args = parser.parse_args()
|
||||
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
|
||||
partreader = csiborgtools.read.ParticleReader(paths)
|
||||
ftemp = lambda kind, nsim, rank: join(paths.temp_dumpdir, f"{kind}_{nsim}_{rank}.p") # noqa
|
||||
|
||||
if args.ics is None or args.ics[0] == -1:
|
||||
ics = paths.get_ics(tonew=True)
|
||||
else:
|
||||
ics = args.ics
|
||||
|
||||
# We loop over simulations. Each simulation is then procesed with MPI, rank 0
|
||||
# loads the data and broadcasts it to other ranks.
|
||||
for nsim in ics:
|
||||
nsnap = max(paths.get_snapshots(nsim))
|
||||
if rank == 0:
|
||||
print(f"{datetime.now()}: reading simulation {nsim}.", flush=True)
|
||||
|
||||
# We first load particles in the initial and final snapshots and sort
|
||||
# them by their particle IDs so that we can match them by array
|
||||
# position. `clump_ids` are the clump IDs of particles.
|
||||
part0 = partreader.read_particle(1, nsim, ["x", "y", "z", "M", "ID"],
|
||||
verbose=True,
|
||||
return_structured=False)
|
||||
part0 = part0[numpy.argsort(part0[:, -1])]
|
||||
part0 = part0[:, :-1] # Now we no longer need the particle IDs
|
||||
|
||||
pid = partreader.read_particle(nsnap, nsim, ["ID"], verbose=True,
|
||||
return_structured=False).reshape(-1, )
|
||||
clump_ids = partreader.read_clumpid(nsnap, nsim, verbose=True)
|
||||
clump_ids = clump_ids[numpy.argsort(pid)]
|
||||
# Release the particle IDs, we will not need them anymore now that both
|
||||
# particle arrays are matched in ordering.
|
||||
del pid
|
||||
collect()
|
||||
|
||||
# Particles whose clump ID is 0 are unassigned to a clump, so we can
|
||||
# get rid of them to speed up subsequent operations. We will not need
|
||||
# these. Again we release the mask.
|
||||
mask = clump_ids > 0
|
||||
clump_ids = clump_ids[mask]
|
||||
part0 = part0[mask, :]
|
||||
del mask
|
||||
collect()
|
||||
|
||||
print(f"{datetime.now()}: dumping particles for {nsim}.", flush=True)
|
||||
with h5py.File(paths.initmatch_path(nsim, "particles"), "w") as f:
|
||||
f.create_dataset("particles", data=part0)
|
||||
|
||||
print(f"{datetime.now()}: broadcasting simulation {nsim}.", flush=True)
|
||||
# Stop all ranks and figure out array shapes from the 0th rank
|
||||
comm.Barrier()
|
||||
if rank == 0:
|
||||
shape = numpy.array([*part0.shape], dtype=numpy.int32)
|
||||
else:
|
||||
shape = numpy.empty(2, dtype=numpy.int32)
|
||||
comm.Bcast(shape, root=0)
|
||||
|
||||
# Now broadcast the particle arrays to all ranks
|
||||
if rank > 0:
|
||||
part0 = numpy.empty(shape, dtype=numpy.float32)
|
||||
clump_ids = numpy.empty(shape[0], dtype=numpy.int32)
|
||||
|
||||
comm.Bcast(part0, root=0)
|
||||
comm.Bcast(clump_ids, root=0)
|
||||
if rank == 0:
|
||||
print(f"{datetime.now()}: simulation {nsim} broadcasted.", flush=True)
|
||||
|
||||
# Calculate the centre of mass of each parent halo, the Lagrangian patch
|
||||
# size and optionally the initial snapshot particles belonging to this
|
||||
# parent halo. Dumping the particles will take majority of time.
|
||||
if rank == 0:
|
||||
print(f"{datetime.now()}: calculating simulation {nsim}.", flush=True)
|
||||
# We load up the clump catalogue which contains information about the
|
||||
# ultimate parent halos of each clump. We will loop only over the clump
|
||||
# IDs of ultimate parent halos and add their substructure particles and at
|
||||
# the end save these.
|
||||
cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, load_fitted=False,
|
||||
rawdata=True)
|
||||
parent_ids = cat["index"][cat.ismain]
|
||||
parent_ids = parent_ids
|
||||
hid2arrpos = {indx: j for j, indx in enumerate(parent_ids)}
|
||||
# And we pre-allocate the output array for this simulation.
|
||||
dtype = {"names": ["index", "x", "y", "z", "lagpatch"],
|
||||
"formats": [numpy.int32] + [numpy.float32] * 4}
|
||||
# We MPI loop over the individual halos
|
||||
jobs = csiborgtools.fits.split_jobs(parent_ids.size, nproc)[rank]
|
||||
_out_fits = numpy.full(len(jobs), numpy.nan, dtype=dtype)
|
||||
_out_map = {}
|
||||
for i in trange(len(jobs)) if verbose else range(len(jobs)):
|
||||
clid = parent_ids[jobs[i]]
|
||||
_out_fits["index"][i] = clid
|
||||
mmain_indxs = cat["index"][cat["parent"] == clid]
|
||||
|
||||
mmain_mask = numpy.isin(clump_ids, mmain_indxs, assume_unique=True)
|
||||
mmain_particles = part0[mmain_mask, :]
|
||||
# If the number of particles is too small, we skip this halo.
|
||||
if mmain_particles.size < 100:
|
||||
continue
|
||||
|
||||
raddist, cmpos = csiborgtools.match.dist_centmass(mmain_particles)
|
||||
patchsize = csiborgtools.match.dist_percentile(raddist, [99],
|
||||
distmax=0.075)
|
||||
# Write the temporary results
|
||||
_out_fits["x"][i], _out_fits["y"][i], _out_fits["z"][i] = cmpos
|
||||
_out_fits["lagpatch"][i] = patchsize
|
||||
_out_map.update({str(clid): numpy.where(mmain_mask)[0]})
|
||||
|
||||
# Dump the results of this rank to a temporary file.
|
||||
joblib.dump(_out_fits, ftemp("fits", nsim, rank))
|
||||
joblib.dump(_out_map, ftemp("map", nsim, rank))
|
||||
|
||||
del part0, clump_ids,
|
||||
collect()
|
||||
|
||||
# Now we wait for all ranks, then collect the results and save it.
|
||||
comm.Barrier()
|
||||
if rank == 0:
|
||||
print(f"{datetime.now()}: collecting results for {nsim}.", flush=True)
|
||||
out_fits = numpy.full(parent_ids.size, numpy.nan, dtype=dtype)
|
||||
out_map = {}
|
||||
for i in range(nproc):
|
||||
# Merge the map dictionaries
|
||||
out_map = out_map | joblib.load(ftemp("map", nsim, i))
|
||||
# Now merge the structured arrays
|
||||
_out_fits = joblib.load(ftemp("fits", nsim, i))
|
||||
for j in range(_out_fits.size):
|
||||
k = hid2arrpos[_out_fits["index"][j]]
|
||||
for par in dtype["names"]:
|
||||
out_fits[par][k] = _out_fits[par][j]
|
||||
|
||||
remove(ftemp("fits", nsim, i))
|
||||
remove(ftemp("map", nsim, i))
|
||||
|
||||
# Now save it
|
||||
fout_fit = paths.initmatch_path(nsim, "fit")
|
||||
print(f"{datetime.now()}: dumping fits to .. `{fout_fit}`.",
|
||||
flush=True)
|
||||
with open(fout_fit, "wb") as f:
|
||||
numpy.save(f, out_fits)
|
||||
|
||||
fout_map = paths.initmatch_path(nsim, "halomap")
|
||||
print(f"{datetime.now()}: dumping mapping to .. `{fout_map}`.",
|
||||
flush=True)
|
||||
with h5py.File(fout_map, "w") as f:
|
||||
for hid, indxs in out_map.items():
|
||||
f.create_dataset(hid, data=indxs)
|
||||
|
||||
# We force clean up the memory before continuing.
|
||||
del out_map, out_fits
|
||||
collect()
|
82
scripts/pre_sortinit.py
Normal file
82
scripts/pre_sortinit.py
Normal file
|
@ -0,0 +1,82 @@
|
|||
# Copyright (C) 2022 Richard Stiskalek
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation; either version 3 of the License, or (at your
|
||||
# option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
# Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
"""
|
||||
Script to sort the initial snapshot particles according to their final
|
||||
snapshot ordering, which is sorted by the clump IDs.
|
||||
"""
|
||||
from argparse import ArgumentParser
|
||||
from datetime import datetime
|
||||
|
||||
import h5py
|
||||
from gc import collect
|
||||
import numpy
|
||||
from mpi4py import MPI
|
||||
|
||||
try:
|
||||
import csiborgtools
|
||||
except ModuleNotFoundError:
|
||||
import sys
|
||||
|
||||
sys.path.append("../")
|
||||
import csiborgtools
|
||||
|
||||
|
||||
# Get MPI things
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
nproc = comm.Get_size()
|
||||
verbose = nproc == 1
|
||||
|
||||
# Argument parser
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("--ics", type=int, nargs="+", default=None,
|
||||
help="IC realisations. If `-1` processes all simulations.")
|
||||
args = parser.parse_args()
|
||||
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
|
||||
partreader = csiborgtools.read.ParticleReader(paths)
|
||||
# NOTE: ID has to be the last column.
|
||||
pars_extract = ["x", "y", "z", "M", "ID"]
|
||||
|
||||
if args.ics is None or args.ics[0] == -1:
|
||||
ics = paths.get_ics(tonew=True)
|
||||
else:
|
||||
ics = args.ics
|
||||
|
||||
# We loop over simulations. Each simulation is then procesed with MPI, rank 0
|
||||
# loads the data and broadcasts it to other ranks.
|
||||
jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
|
||||
for i in jobs:
|
||||
nsim = ics[i]
|
||||
nsnap = max(paths.get_snapshots(nsim))
|
||||
|
||||
print(f"{datetime.now()}: reading and processing simulation {nsim}.",
|
||||
flush=True)
|
||||
# We first load the particle IDs in the final snapshot.
|
||||
pidf = csiborgtools.read.read_h5(paths.particles_path(nsim))
|
||||
pidf = pidf["particle_ids"]
|
||||
# Then we load the particles in the initil snapshot and make sure that
|
||||
# their particle IDs are sorted as in the final snapshot.
|
||||
# Again, because of precision this must be read as structured.
|
||||
part0, pid0 = partreader.read_particle(
|
||||
1, nsim, pars_extract, return_structured=False, verbose=verbose)
|
||||
# First enforce them to already be sorted and then apply reverse
|
||||
# sorting from the final snapshot.
|
||||
part0 = part0[numpy.argsort(pid0)]
|
||||
del pid0
|
||||
collect()
|
||||
part0 = part0[numpy.argsort(numpy.argsort(pidf))]
|
||||
print(f"{datetime.now()}: dumping particles for {nsim}.", flush=True)
|
||||
with h5py.File(paths.initmatch_path(nsim, "particles"), "w") as f:
|
||||
f.create_dataset("particles", data=part0)
|
Loading…
Add table
Add a link
Reference in a new issue