Speed up overlap (#27)

* Edit improt

* Simplify patch size calculation

* Add patch size percentiles

* Add various percentiles

* Remove  comment

* Update TODO

* Change to 95th percentile

* Add import

* Add KNN properties

* Add new matching initial condition

* Add import

* Remove import

* Add fast neighbours option

* Further edits to fast neighbours

* add imports

* add new overlap calculation and non-zero things

* Remove print

* Clean up code

* Fix small bug

* Remove comment

* Add run single cross match

* change values

* Edit hyperparams

* Add comment

* Add the argument parser

* Add new lagpatch calc

* New lagpatch calc

* Delete old patch definitions

* Make clump dumping once again optional

* Add lagpatch to the catalogue

* Edit print statement

* Fix small bug

* Remove init radius

* Change to lagpatch key

* Fix a small bug

* Fix little bug
This commit is contained in:
Richard Stiskalek 2023-02-05 11:46:19 +00:00 committed by GitHub
parent beb811e84c
commit 8dea3da4de
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 418 additions and 193 deletions

View file

@ -14,6 +14,10 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
MPI script to run the CSiBORG realisations matcher.
TODO
----
- [ ] Update this script
"""
import numpy
from datetime import datetime

View file

@ -20,6 +20,8 @@ Optionally also dumps the clumps information, however watch out as this will
eat up a lot of memory.
"""
import numpy
from argparse import ArgumentParser
from distutils.util import strtobool
from datetime import datetime
from mpi4py import MPI
from os.path import join
@ -37,6 +39,11 @@ comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
# Argument parser
parser = ArgumentParser()
parser.add_argument("--dump_clumps", type=lambda x: bool(strtobool(x)))
args = parser.parse_args()
init_paths = csiborgtools.read.CSiBORGPaths(to_new=True)
fin_paths = csiborgtools.read.CSiBORGPaths(to_new=False)
nsims = init_paths.ic_ids
@ -80,7 +87,7 @@ for nsim in nsims:
collect()
if rank == 0:
print("{}: dumping clumps for simulation.".format(datetime.now()),
print("{}: dumping intermediate files.".format(datetime.now()),
flush=True)
# Grab unique clump IDs and loop over them
@ -93,29 +100,29 @@ for nsim in nsims:
x0 = part0[clump_ids == n]
# Center of mass and Lagrangian patch size
pos = numpy.vstack([x0[p] for p in ('x', 'y', 'z')]).T
cm = numpy.average(pos, axis=0, weights=x0['M'])
patch_size = csiborgtools.match.lagpatch_size(
*(x0[p] for p in ('x', 'y', 'z', 'M')))
dist, cm = csiborgtools.match.dist_centmass(x0)
patch = csiborgtools.match.dist_percentile(dist, [99], distmax=0.075)
# Dump the center of mass
with open(ftemp.format(nsim, n, "cm"), 'wb') as f:
numpy.save(f, cm)
# Dump the Lagrangian patch size
with open(ftemp.format(nsim, n, "patch_size"), 'wb') as f:
numpy.save(f, patch_size)
with open(ftemp.format(nsim, n, "lagpatch"), 'wb') as f:
numpy.save(f, patch)
# Dump the entire clump
with open(ftemp.format(nsim, n, "clump"), "wb") as f:
numpy.save(f, x0)
if args.dump_clumps:
with open(ftemp.format(nsim, n, "clump"), "wb") as f:
numpy.save(f, x0)
del part0, clump_ids
collect()
comm.Barrier()
if rank == 0:
print("Collecting CM files...", flush=True)
print("{}: collecting summary files...".format(datetime.now()),
flush=True)
# Collect the centre of masses, patch size, etc. and dump them
dtype = {"names": ['x', 'y', 'z', "patch_size", "ID"],
dtype = {"names": ['x', 'y', 'z', "lagpatch", "ID"],
"formats": [numpy.float32] * 4 + [numpy.int32]}
out = numpy.full(njobs, numpy.nan, dtype=dtype)
@ -130,34 +137,37 @@ for nsim in nsims:
remove(fpath)
# Load in the patch size
fpath = ftemp.format(nsim, n, "patch_size")
fpath = ftemp.format(nsim, n, "lagpatch")
with open(fpath, "rb") as f:
out["patch_size"][i] = numpy.load(f)
out["lagpatch"][i] = numpy.load(f)
remove(fpath)
# Store the halo ID
out["ID"][i] = n
print("Dumping CM files to .. `{}`.".format(fpermcm.format(nsim)),
flush=True)
print("{}: dumping to .. `{}`.".format(
datetime.now(), fpermcm.format(nsim)), flush=True)
with open(fpermcm.format(nsim), 'wb') as f:
numpy.save(f, out)
print("Collecting clump files...", flush=True)
out = [None] * unique_clumpids.size
dtype = {"names": ["clump", "ID"], "formats": [object, numpy.int32]}
out = numpy.full(unique_clumpids.size, numpy.nan, dtype=dtype)
for i, n in enumerate(unique_clumpids):
fpath = ftemp.format(nsim, n, "clump")
with open(fpath, 'rb') as f:
fin = numpy.load(f)
out["clump"][i] = fin
out["ID"][i] = n
remove(fpath)
print("Dumping clump files to .. `{}`.".format(fpermpart.format(nsim)),
flush=True)
with open(fpermpart.format(nsim), "wb") as f:
numpy.save(f, out)
if args.dump_clumps:
print("{}: collecting particle files...".format(datetime.now()),
flush=True)
out = [None] * unique_clumpids.size
dtype = {"names": ["clump", "ID"],
"formats": [object, numpy.int32]}
out = numpy.full(unique_clumpids.size, numpy.nan, dtype=dtype)
for i, n in enumerate(unique_clumpids):
fpath = ftemp.format(nsim, n, "clump")
with open(fpath, 'rb') as f:
fin = numpy.load(f)
out["clump"][i] = fin
out["ID"][i] = n
remove(fpath)
print("{}: dumping to .. `{}`.".format(
datetime.now(), fpermpart.format(nsim)), flush=True)
with open(fpermpart.format(nsim), "wb") as f:
numpy.save(f, out)
del out
collect()
del out
collect()

View file

@ -0,0 +1,66 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to test running the CSiBORG realisations matcher.
"""
import numpy
from argparse import ArgumentParser
from distutils.util import strtobool
from datetime import datetime
from os.path import join
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
import utils
# Argument parser
parser = ArgumentParser()
parser.add_argument("--nmult", type=float)
parser.add_argument("--overlap", type=lambda x: bool(strtobool(x)))
parser.add_argument("--select_initial", type=lambda x: bool(strtobool(x)))
parser.add_argument("--fast_neighbours", type=lambda x: bool(strtobool(x)))
args = parser.parse_args()
# File paths
ic = 7468
fperm = join(utils.dumpdir, "overlap", "cross_{}.npy")
paths = csiborgtools.read.CSiBORGPaths(to_new=False)
paths.set_info(ic, paths.get_maximum_snapshot(ic))
print("{}: loading catalogues.".format(datetime.now()), flush=True)
cat = csiborgtools.read.CombinedHaloCatalogue(paths)
matcher = csiborgtools.match.RealisationsMatcher(cat)
nsim0 = cat.n_sims[0]
nsimx = cat.n_sims[1]
print("{}: crossing the simulations.".format(datetime.now()), flush=True)
out = matcher.cross_knn_position_single(
0, nmult=args.nmult, dlogmass=2., overlap=args.overlap,
select_initial=args.select_initial, fast_neighbours=args.fast_neighbours)
# Dump the result
fout = fperm.format(nsim0)
print("Saving results to `{}`.".format(fout), flush=True)
with open(fout, "wb") as f:
numpy.save(fout, out)
print("All finished.", flush=True)