Add mmain and other major updates (#44)

* Move paths to a separate file

* Add mmain reader

* Add a verbosity flag

* Fix imports

* Fix bug

* Rename files

* Return ultimate parents

* Add script to generate mmain

* Remove mmain path

* edit path

* Add mmain path

* Change function name

* Rename function

* Turn off verbose

* Fix list requirement

* Edit init match paths

* Fix init pathing

* Edit paths docs

* Edit dumpdir name

* Rename path

* Fix split paths

* Remove unused import

* Add comment

* Update readme

* remove read mmain

* Rename haloatalogue

* Fix minor bugs

* Update nbs

* Add create directory option

* Move split jobs

* Move spliot jobs

* Remove splitting

* Add import

* Edit script

* Deeper split folder

* Fix paths bug

* Rename catalogue

* Rename Catalogue

* Add new clumpread

* Edit paths

* add knn paths

* Update commenting

* Update imports

* Add more conversions

* Update temp file

* Add a note

* Add catalogue

* Cooment

* Update TODO

* Update script

* add nb

* Update

* pep8

* edit paths & pep8

* Fix knn auto paths

* add paths docs

* Add auto and cross knn paths

* Add new paths

* Simplify tpcf reading

* pep8 patch

* update readme

* Update progress

* pep8

* pep8

* pep8

* pep8

* pep8

* pep8

* pep8

* pep8

* pep8

* pep8

* pep8

* pep8

* pep8

* pep8

* pep8

* Pep 8 and restructure

* add lambda spin

* add clump and halo

* add checks

* Edit halo profile fit

* Update gitignore

* backup script
This commit is contained in:
Richard Stiskalek 2023-04-18 11:02:36 +02:00 committed by GitHub
parent e0d3854277
commit fdb0df8d4c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
50 changed files with 2152 additions and 1844 deletions

View file

@ -16,16 +16,18 @@
MPI script to calculate the matter cross power spectrum between CSiBORG
IC realisations. Units are Mpc/h.
"""
from gc import collect
from argparse import ArgumentParser
from datetime import datetime
from gc import collect
from itertools import combinations
from os import remove
from os.path import join
from itertools import combinations
from datetime import datetime
import numpy
import joblib
from mpi4py import MPI
import numpy
import Pk_library as PKL
from mpi4py import MPI
try:
import csiborgtools
except ModuleNotFoundError:
@ -47,9 +49,9 @@ nproc = comm.Get_size()
MAS = "CIC" # mass asignment scheme
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
box = csiborgtools.units.BoxUnits(paths)
box = csiborgtools.read.BoxUnits(paths)
reader = csiborgtools.read.ParticleReader(paths)
ics = paths.ic_ids(tonew=False)
ics = paths.get_ics(tonew=False)
nsims = len(ics)
# File paths
@ -59,7 +61,7 @@ fout = join(dumpdir, "crosspk",
"out_{}_{}" + "_{}.p".format(args.halfwidth))
jobs = csiborgtools.fits.split_jobs(nsims, nproc)[rank]
jobs = csiborgtools.utils.split_jobs(nsims, nproc)[rank]
for n in jobs:
print("Rank {}@{}: saving {}th delta.".format(rank, datetime.now(), n))
nsim = ics[n]
@ -99,7 +101,7 @@ for i in range(nsims):
combs.append((i, i))
prev_delta = [-1, None, None, None] # i, delta, aexp, length
jobs = csiborgtools.fits.split_jobs(len(combs), nproc)[rank]
jobs = csiborgtools.utils.split_jobs(len(combs), nproc)[rank]
for n in jobs:
i, j = combs[n]
print("Rank {}@{}: combination {}.".format(rank, datetime.now(), (i, j)))
@ -153,4 +155,4 @@ if rank == 0:
remove(ftemp.format(ic, "delta") + ".npy")
remove(ftemp.format(ic, "lengths") + ".p")
print("All finished!")
print("All finished!")

View file

@ -13,17 +13,18 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues."""
from os.path import join
from warnings import warn
from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime
from mpi4py import MPI
from TaskmasterMPI import master_process, worker_process
import numpy
from sklearn.neighbors import NearestNeighbors
from warnings import warn
import joblib
import numpy
import yaml
from mpi4py import MPI
from sklearn.neighbors import NearestNeighbors
from TaskmasterMPI import master_process, worker_process
try:
import csiborgtools
except ModuleNotFoundError:
@ -58,8 +59,6 @@ ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
9292, 9316, 9340, 9364, 9388, 9412, 9436, 9460, 9484, 9508, 9532,
9556, 9580, 9604, 9628, 9652, 9676, 9700, 9724, 9748, 9772, 9796,
9820, 9844]
dumpdir = "/mnt/extraspace/rstiskalek/csiborg/knn"
fout = join(dumpdir, "auto", "knncdf_{}_{}.p")
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
knncdf = csiborgtools.clustering.kNN_CDF()
@ -67,6 +66,7 @@ knncdf = csiborgtools.clustering.kNN_CDF()
# Analysis #
###############################################################################
def read_single(selection, cat):
"""Positions for single catalogue auto-correlation."""
mmask = numpy.ones(len(cat), dtype=bool)
@ -101,11 +101,13 @@ def read_single(selection, cat):
return pos[smask, ...]
def do_auto(run, cat, ic):
"""Calculate the kNN-CDF single catalgoue autocorrelation."""
_config = config.get(run, None)
if _config is None:
warn("No configuration for run {}.".format(run))
warn("No configuration for run {}.".format(run), UserWarning,
stacklevel=1)
return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
@ -119,13 +121,15 @@ def do_auto(run, cat, ic):
batch_size=int(config["batch_size"]), random_state=config["seed"])
joblib.dump({"rs": rs, "cdf": cdf, "ndensity": pos.shape[0] / totvol},
fout.format(str(ic).zfill(5), run))
paths.knnauto_path(run, ic))
def do_cross_rand(run, cat, ic):
"""Calculate the kNN-CDF cross catalogue random correlation."""
_config = config.get(run, None)
if _config is None:
warn("No configuration for run {}.".format(run))
warn("No configuration for run {}.".format(run), UserWarning,
stacklevel=1)
return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
@ -143,14 +147,11 @@ def do_cross_rand(run, cat, ic):
nsamples=int(config["nsamples"]), neval=int(config["neval"]),
batch_size=int(config["batch_size"]), random_state=config["seed"])
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
joblib.dump({"rs": rs, "corr": corr}, fout.format(str(ic).zfill(5), run))
joblib.dump({"rs": rs, "corr": corr}, paths.knnauto_path(run, ic))
def do_runs(ic):
cat = csiborgtools.read.HaloCatalogue(ic, paths, max_dist=Rmax,
min_mass=minmass)
cat = csiborgtools.read.ClumpsCatalogue(ic, paths, maxdist=Rmax)
for run in args.runs:
if "random" in run:
do_cross_rand(run, cat, ic)
@ -179,4 +180,4 @@ comm.Barrier()
if rank == 0:
print("{}: all finished.".format(datetime.now()))
quit() # Force quit the script
quit() # Force quit the script

View file

@ -13,18 +13,19 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues."""
from warnings import warn
from os.path import join
from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime
from itertools import combinations
from mpi4py import MPI
from TaskmasterMPI import master_process, worker_process
import numpy
from sklearn.neighbors import NearestNeighbors
from os.path import join
from warnings import warn
import joblib
import numpy
import yaml
from mpi4py import MPI
from sklearn.neighbors import NearestNeighbors
from TaskmasterMPI import master_process, worker_process
try:
import csiborgtools
except ModuleNotFoundError:
@ -67,6 +68,7 @@ knncdf = csiborgtools.clustering.kNN_CDF()
# Analysis #
###############################################################################
def read_single(selection, cat):
mmask = numpy.ones(len(cat), dtype=bool)
pos = cat.positions(False)
@ -79,19 +81,20 @@ def read_single(selection, cat):
mmask &= (cat[psel["name"]] < pmax)
return pos[mmask, ...]
def do_cross(run, ics):
_config = config.get(run, None)
if _config is None:
warn("No configuration for run {}.".format(run))
warn("No configuration for run {}.".format(run), stacklevel=1)
return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
knn1, knn2 = NearestNeighbors(), NearestNeighbors()
cat1 = csiborgtools.read.HaloCatalogue(ics[0], paths, max_dist=Rmax)
cat1 = csiborgtools.read.ClumpsCatalogue(ics[0], paths, max_dist=Rmax)
pos1 = read_single(_config, cat1)
knn1.fit(pos1)
cat2 = csiborgtools.read.HaloCatalogue(ics[1], paths, max_dist=Rmax)
cat2 = csiborgtools.read.ClumpsCatalogue(ics[1], paths, max_dist=Rmax)
pos2 = read_single(_config, cat2)
knn2.fit(pos2)
@ -102,9 +105,8 @@ def do_cross(run, ics):
batch_size=int(config["batch_size"]), random_state=config["seed"])
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
joblib.dump({"rs": rs, "corr": corr}, paths.knncross_path(run, ics))
joblib.dump({"rs": rs, "corr": corr},
fout.format(str(ics[0]).zfill(5), str(ics[1]).zfill(5), run))
def do_runs(ics):
print(ics)
@ -133,4 +135,4 @@ comm.Barrier()
if rank == 0:
print("{}: all finished.".format(datetime.now()))
quit() # Force quit the script
quit() # Force quit the script

View file

@ -13,16 +13,18 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""A script to calculate the auto-2PCF of CSiBORG catalogues."""
from os.path import join
from warnings import warn
from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime
from os.path import join
from warnings import warn
import joblib
import numpy
import yaml
from mpi4py import MPI
from TaskmasterMPI import master_process, worker_process
import numpy
import joblib
import yaml
try:
import csiborgtools
except ModuleNotFoundError:
@ -65,6 +67,7 @@ tpcf = csiborgtools.clustering.Mock2PCF()
# Analysis #
###############################################################################
def read_single(selection, cat):
"""Positions for single catalogue auto-correlation."""
mmask = numpy.ones(len(cat), dtype=bool)
@ -99,10 +102,11 @@ def read_single(selection, cat):
return pos[smask, ...]
def do_auto(run, cat, ic):
_config = config.get(run, None)
if _config is None:
warn("No configuration for run {}.".format(run))
warn("No configuration for run {}.".format(run), stacklevel=1)
return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
@ -112,12 +116,11 @@ def do_auto(run, cat, ic):
nrandom = int(config["randmult"] * pos.shape[0])
rp, wp = tpcf(pos, rvs_gen, nrandom, bins)
joblib.dump({"rp": rp, "wp": wp}, fout.format(str(ic).zfill(5), run))
joblib.dump({"rp": rp, "wp": wp}, paths.tpcfauto_path(run, ic))
def do_runs(ic):
cat = csiborgtools.read.HaloCatalogue(ic, paths, max_dist=Rmax,
min_mass=minmass)
cat = csiborgtools.read.ClumpsCatalogue(ic, paths, maxdist=Rmax)
for run in args.runs:
do_auto(run, cat, ic)
@ -143,4 +146,4 @@ comm.Barrier()
if rank == 0:
print("{}: all finished.".format(datetime.now()))
quit() # Force quit the script
quit() # Force quit the script

View file

@ -16,17 +16,20 @@
MPI script to evaluate field properties at the galaxy positions.
"""
from argparse import ArgumentParser
from os.path import join
from os import remove
from datetime import datetime
from os import remove
from os.path import join
import numpy
from mpi4py import MPI
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
import utils
dumpdir = "/mnt/extraspace/rstiskalek/csiborg/"
@ -61,16 +64,16 @@ dtype = {"names": ["delta", "phi"], "formats": [numpy.float32] * 2}
# CSiBORG simulation paths
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
ics = paths.ic_ids(tonew=False)
ics = paths.get_ics(tonew=False)
nsims = len(ics)
for n in csiborgtools.fits.split_jobs(nsims, nproc)[rank]:
for n in csiborgtools.utils.split_jobs(nsims, nproc)[rank]:
print("Rank {}@{}: working on {}th IC.".format(rank, datetime.now(), n),
flush=True)
nsim = ics[n]
nsnap = max(paths.get_snapshots(nsim))
reader = csiborgtools.read.ParticleReader(paths)
box = csiborgtools.units.BoxUnits(nsnap, nsim, paths)
box = csiborgtools.read.BoxUnits(nsnap, nsim, paths)
# Read particles and select a subset of them
particles = reader.read_particle(nsnap, nsim, ["x", "y", "z", "M"],
@ -121,4 +124,4 @@ if rank == 0:
print("Saving results to `{}`.".format(fperm), flush=True)
with open(fperm, "wb") as f:
numpy.save(f, out)
numpy.save(f, out)

View file

@ -13,17 +13,20 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""A script to calculate overlap between two CSiBORG realisations."""
from os.path import join
from argparse import ArgumentParser
from datetime import datetime
from os.path import join
import numpy
from scipy.ndimage import gaussian_filter
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
import utils
# Argument parser
@ -44,18 +47,19 @@ overlapper = csiborgtools.match.ParticleOverlap()
# Load catalogues
print("{}: loading catalogues {} and {}."
.format(datetime.now(), args.nsim0, args.nsimx), flush=True)
cat0 = csiborgtools.read.HaloCatalogue(args.nsim0, paths)
catx = csiborgtools.read.HaloCatalogue(args.nsimx, paths)
cat0 = csiborgtools.read.ClumpsCatalogue(args.nsim0, paths)
catx = csiborgtools.read.ClumpsCatalogue(args.nsimx, paths)
print("{}: loading simulation {} and converting positions to cell numbers."
.format(datetime.now(), args.nsim0), flush=True)
with open(paths.clump0_path(args.nsim0), "rb") as f:
with open(paths.initmatch_path(args.nsim0, "particles"), "rb") as f:
clumps0 = numpy.load(f, allow_pickle=True)
overlapper.clumps_pos2cell(clumps0)
print("{}: loading simulation {} and converting positions to cell numbers."
.format(datetime.now(), args.nsimx), flush=True)
with open(paths.clump0_path(args.nsimx), 'rb') as f:
with open(paths.initmatch_path(args.nsimx, "particles"), 'rb') as f:
clumpsx = numpy.load(f, allow_pickle=True)
overlapper.clumps_pos2cell(clumpsx)
@ -87,4 +91,4 @@ with open(fout, "wb") as f:
numpy.savez(fout, ref_indxs=ref_indxs, cross_indxs=cross_indxs,
match_indxs=match_indxs, ngp_overlap=ngp_overlap,
smoothed_overlap=smoothed_overlap, sigma=args.sigma)
print("All finished.", flush=True)
print("All finished.", flush=True)

View file

@ -16,17 +16,17 @@
A script to fit halos (concentration, ...). The particle array of each CSiBORG
realisation must have been split in advance by `runsplit_halos`.
"""
from os.path import join
from datetime import datetime
import numpy
from mpi4py import MPI
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
import utils
# Get MPI things
@ -35,8 +35,8 @@ rank = comm.Get_rank()
nproc = comm.Get_size()
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
dumpdir = "/mnt/extraspace/rstiskalek/csiborg/"
loaddir = join(dumpdir, "temp")
partreader =csiborgtools.read.ParticleReader(paths)
cols_collect = [("npart", numpy.int64), ("totpartmass", numpy.float64),
("Rs", numpy.float64), ("vx", numpy.float64),
("vy", numpy.float64), ("vz", numpy.float64),
@ -47,14 +47,48 @@ cols_collect = [("npart", numpy.int64), ("totpartmass", numpy.float64),
("r500", numpy.float64), ("m200", numpy.float64),
("m500", numpy.float64), ("lambda200c", numpy.float64)]
def fit_clump(particles, clump, box):
for i, nsim in enumerate(paths.ic_ids(tonew=False)):
out["npart"][n] = clump.Npart
out["rmin"][n] = clump.rmin
out["rmax"][n] = clump.rmax
out["totpartmass"][n] = clump.total_particle_mass
out["vx"][n] = numpy.average(clump.vel[:, 0], weights=clump.m)
out["vy"][n] = numpy.average(clump.vel[:, 1], weights=clump.m)
out["vz"][n] = numpy.average(clump.vel[:, 2], weights=clump.m)
out["Lx"][n], out["Ly"][n], out["Lz"][n] = clump.angular_momentum
for i, nsim in enumerate(paths.get_ics(tonew=False)):
if rank == 0:
print("{}: calculating {}th simulation.".format(datetime.now(), i))
print("{}: calculating {}th simulation `{}`."
.format(datetime.now(), i, nsim), flush=True)
nsnap = max(paths.get_snapshots(nsim))
box = csiborgtools.units.BoxUnits(nsnap, nsim, paths)
box = csiborgtools.read.BoxUnits(nsnap, nsim, paths)
jobs = csiborgtools.fits.split_jobs(utils.Nsplits, nproc)[rank]
# Archive of clumps, keywords are their clump IDs
particle_archive = paths.split_path(nsnap, nsim)
clumpsarr = partreader.read_clumps(nsnap, nsim,
cols=["index", 'x', 'y', 'z'])
clumpid2arrpos = {ind: ii for ii, ind in enumerate(clumpsarr["index"])}
nclumps = len(particle_archive.files)
# Fit 5000 clumps at a time, then dump results
batchsize = 5000
# This rank does these `batchsize` clumps/halos
jobs = csiborgtools.utils.split_jobs(nclumps, nclumps // batchsize)[rank]
for clumpid in jobs:
... = fit_clump(particle_archive[str(clumpid)], clumpsarr[clumpid2arrpos[clumpid]])
jobs = csiborgtools.utils.split_jobs(nclumps, nproc)[rank]
for nsplit in jobs:
parts, part_clumps, clumps = csiborgtools.fits.load_split_particles(
nsplit, nsnap, nsim, paths, remove_split=False)
@ -111,18 +145,18 @@ for i, nsim in enumerate(paths.ic_ids(tonew=False)):
# Wait until all jobs finished before moving to another simulation
comm.Barrier()
# Use the rank 0 to combine outputs for this CSiBORG realisation
if rank == 0:
print("Collecting results!")
partreader = csiborgtools.read.ParticleReader(paths)
out_collected = csiborgtools.read.combine_splits(
utils.Nsplits, nsnap, nsim, partreader, cols_collect,
remove_splits=True, verbose=False)
fname = paths.hcat_path(nsim)
print("Saving results to `{}`.".format(fname))
numpy.save(fname, out_collected)
comm.Barrier()
if rank == 0:
print("All finished! See ya!")
# # Use the rank 0 to combine outputs for this CSiBORG realisation
# if rank == 0:
# print("Collecting results!")
# partreader = csiborgtools.read.ParticleReader(paths)
# out_collected = csiborgtools.read.combine_splits(
# utils.Nsplits, nsnap, nsim, partreader, cols_collect,
# remove_splits=True, verbose=False)
# fname = paths.hcat_path(nsim)
# print("Saving results to `{}`.".format(fname))
# numpy.save(fname, out_collected)
#
# comm.Barrier()
#
# if rank == 0:
# print("All finished! See ya!")

View file

@ -19,14 +19,16 @@ are grouped in a clump at present redshift.
Optionally also dumps the clumps information, however watch out as this will
eat up a lot of memory.
"""
from gc import collect
from os.path import join
from os import remove
from argparse import ArgumentParser
from datetime import datetime
from distutils.util import strtobool
from gc import collect
from os import remove
from os.path import join
import numpy
from mpi4py import MPI
try:
import csiborgtools
except ModuleNotFoundError:
@ -45,12 +47,10 @@ parser.add_argument("--dump_clumps", type=lambda x: bool(strtobool(x)))
args = parser.parse_args()
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
nsims = paths.ic_ids(tonew=True)
nsims = paths.get_ics(tonew=True)
# Output files
ftemp = join(paths.dumpdir, "temp_initmatch", "temp_{}_{}_{}.npy")
fpermcm = join(paths.dumpdir, "initmatch", "clump_{}_cm.npy")
fpermpart = join(paths.dumpdir, "initmatch", "clump_{}_particles.npy")
# Temporary output file
ftemp = join(paths.dumpdir, "temp", "initmatch_{}_{}_{}.npy")
for nsim in nsims:
if rank == 0:
@ -87,7 +87,7 @@ for nsim in nsims:
unique_clumpids = numpy.unique(clump_ids)
njobs = unique_clumpids.size
jobs = csiborgtools.fits.split_jobs(njobs, nproc)[rank]
jobs = csiborgtools.utils.split_jobs(njobs, nproc)[rank]
for i in jobs:
n = unique_clumpids[i]
x0 = part0[clump_ids == n]
@ -139,8 +139,8 @@ for nsim in nsims:
out["ID"][i] = n
print("{}: dumping to .. `{}`.".format(
datetime.now(), fpermcm.format(nsim)), flush=True)
with open(fpermcm.format(nsim), 'wb') as f:
datetime.now(), paths.initmatch_path(nsim, "cm")), flush=True)
with open(paths.initmatch_path(nsim, "cm"), 'wb') as f:
numpy.save(f, out)
if args.dump_clumps:
@ -157,10 +157,12 @@ for nsim in nsims:
out["clump"][i] = fin
out["ID"][i] = n
remove(fpath)
print("{}: dumping to .. `{}`.".format(
datetime.now(), fpermpart.format(nsim)), flush=True)
with open(fpermpart.format(nsim), "wb") as f:
fout = paths.initmatch_path(nsim, "particles")
print("{}: dumping to .. `{}`.".format(datetime.now(), fout),
flush=True)
with open(fout, "wb") as f:
numpy.save(f, out)
del out
collect()
collect()

64
scripts/pre_mmain.py Normal file
View file

@ -0,0 +1,64 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to generate the mmain files, i.e. sums up the substructe of children.
"""
from datetime import datetime
import numpy
from mpi4py import MPI
from TaskmasterMPI import master_process, worker_process
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
# Get MPI things
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
mmain_reader = csiborgtools.read.MmainReader(paths)
def do_mmain(nsim):
nsnap = max(paths.get_snapshots(nsim))
# NOTE: currently works for highest snapshot anyway
mmain, ultimate_parent = mmain_reader.make_mmain(nsim, verbose=False)
numpy.savez(paths.mmain_path(nsnap, nsim),
mmain=mmain, ultimate_parent=ultimate_parent)
###############################################################################
# MPI task delegation #
###############################################################################
if nproc > 1:
if rank == 0:
tasks = list(paths.get_ics(tonew=False))
master_process(tasks, comm, verbose=True)
else:
worker_process(do_mmain, comm, verbose=False)
else:
tasks = paths.get_ics(tonew=False)
for task in tasks:
print("{}: completing task `{}`.".format(datetime.now(), task))
do_mmain(task)
comm.Barrier()

115
scripts/pre_splithalos.py Normal file
View file

@ -0,0 +1,115 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""Script to split particles to indivudual files according to their clump."""
from datetime import datetime
from gc import collect
from glob import glob
from os import remove
from os.path import join
import numpy
from mpi4py import MPI
from TaskmasterMPI import master_process, worker_process
from tqdm import tqdm
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
# Get MPI things
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
verbose = nproc == 1
partcols = ['x', 'y', 'z', "vx", "vy", "vz", 'M']
def do_split(nsim):
nsnap = max(paths.get_snapshots(nsim))
reader = csiborgtools.read.ParticleReader(paths)
ftemp_base = join(
paths.temp_dumpdir,
"split_{}_{}".format(str(nsim).zfill(5), str(nsnap).zfill(5))
)
ftemp = ftemp_base + "_{}.npz"
# Load the particles and their clump IDs
particles = reader.read_particle(nsnap, nsim, partcols, verbose=verbose)
particle_clumps = reader.read_clumpid(nsnap, nsim, verbose=verbose)
# Drop all particles whose clump index is 0 (not assigned to any clump)
assigned_mask = particle_clumps != 0
particle_clumps = particle_clumps[assigned_mask]
particles = particles[assigned_mask]
del assigned_mask
collect()
# Load the clump indices
clumpinds = reader.read_clumps(nsnap, nsim, cols="index")["index"]
# Some of the clumps have no particles, so we do not loop over them
clumpinds = clumpinds[numpy.isin(clumpinds, particle_clumps)]
# Loop over the clump indices and save the particles to a temporary file
# every 10000 clumps. We will later read this back and combine into a
# single file.
out = {}
for i, clind in enumerate(tqdm(clumpinds) if verbose else clumpinds):
key = str(clind)
out.update({str(clind): particles[particle_clumps == clind]})
# REMOVE bump this back up
if i % 10000 == 0 or i == clumpinds.size - 1:
numpy.savez(ftemp.format(i), **out)
out = {}
# Clear up memory because we will be loading everything back
del particles, particle_clumps, clumpinds
collect()
# Now load back in every temporary file, combine them into a single
# dictionary and save as a single .npz file.
out = {}
for file in glob(ftemp_base + '*'):
inp = numpy.load(file)
for key in inp.files:
out.update({key: inp[key]})
remove(file)
numpy.savez(paths.split_path(nsnap, nsim), **out)
###############################################################################
# MPI task delegation #
###############################################################################
if nproc > 1:
if rank == 0:
tasks = list(paths.get_ics(tonew=False))
master_process(tasks, comm, verbose=True)
else:
worker_process(do_split, comm, verbose=False)
else:
tasks = paths.get_ics(tonew=False)
tasks = [tasks[0]] # REMOVE
for task in tasks:
print("{}: completing task `{}`.".format(datetime.now(), task))
do_split(task)
comm.Barrier()

View file

@ -1,58 +0,0 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to split particles into smaller files according to their clump
membership for faster manipulation. Currently does this for the maximum
snapshot of each simulation. Running this requires a lot of memory.
"""
from mpi4py import MPI
from datetime import datetime
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
import utils
# Get MPI things
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
sims = paths.ic_ids(False)
partcols = ["x", "y", "z", "vx", "vy", "vz", "M", "level"]
jobs = csiborgtools.fits.split_jobs(len(sims), nproc)[rank]
for icount, sim_index in enumerate(jobs):
print("{}: rank {} working {} / {} jobs."
.format(datetime.now(), rank, icount + 1, len(jobs)), flush=True)
nsim = sims[sim_index]
nsnap = max(paths.get_snapshots(nsim))
partreader = csiborgtools.read.ParticleReader(paths)
# Load the clumps, particles' clump IDs and particles.
clumps = partreader.read_clumps(nsnap, nsim)
particle_clumps = partreader.read_clumpid(nsnap, nsim, verbose=False)
particles = partreader.read_particle(nsnap, nsim, partcols, verbose=False)
# Drop all particles whose clump index is 0 (not assigned to any halo)
particle_clumps, particles = partreader.drop_zero_indx(
particle_clumps, particles)
# Dump it!
csiborgtools.fits.dump_split_particles(particles, particle_clumps, clumps,
utils.Nsplits, nsnap, nsim, paths,
verbose=False)
print("All finished!", flush=True)