Particle match & file system & phase space (#11)

* Create file system * add doc * add n_sim n_snap directly to paths * Move things to a single particle reader for consistency * add docstring * add srdcir, dumpdir and mmain_path * make boxunits work with paths * switch to using paths * add tempdumpdir * rm dependence on old functions * rm comment * rm unused import * go back to all imports * fix import bug * rm dependence on old functions * modernize code! * fix typo * fix typo * update fits to new data structureing * change docs * add julia repo * add setup * add install commands * ignore install files * add array flattening * update dependene * add positions reader * update manifest and projects * add func * update gitignore * pos matching progress * move file * rm comment * add velocities getter * fix bug * fix name bug * fix path bug * fix args func * add redshift calculation to catalogues * add shortcut to set n_sim and n_snap * if cond bug * add the cosine similarity * add verbosit iterator * add docs * update README * update README * update README
2025-07-01 12:01:11 +00:00 · 2022-11-25 10:44:08 +00:00 · 2022-11-25 10:44:08 +00:00 · 161c27d995
commit 161c27d995
parent c748c87e45
22 changed files with 1443 additions and 4178 deletions
--- a/.gitignore
+++ b/.gitignore
@ -11,3 +11,8 @@ csiborgtools/fits/_filenames.py
 csiborgtools/fits/analyse_voids_25.py
 scripts/*.sh
 scripts/*.out
+build/*
+.eggs/*
+csiborgtools.egg-info/*
+scripts/playground_*
+scripts/playground.ipynb
--- a/JuliaCSiBORGTools/Manifest.toml
+++ b/JuliaCSiBORGTools/Manifest.toml
@ -0,0 +1,344 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.8.3"
+manifest_format = "2.0"
+project_hash = "8ac89492dc497a2481ecf3e1e7f2cc6ba12b4702"
+
+[[deps.ArgTools]]
+uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
+version = "1.1.1"
+
+[[deps.Artifacts]]
+uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+
+[[deps.Base64]]
+uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+
+[[deps.BenchmarkTools]]
+deps = ["JSON", "Logging", "Printf", "Profile", "Statistics", "UUIDs"]
+git-tree-sha1 = "d9a9701b899b30332bbcb3e1679c41cce81fb0e8"
+uuid = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+version = "1.3.2"
+
+[[deps.CSTParser]]
+deps = ["Tokenize"]
+git-tree-sha1 = "3ddd48d200eb8ddf9cb3e0189fc059fd49b97c1f"
+uuid = "00ebfdb7-1f24-5e51-bd34-a7502290713f"
+version = "3.3.6"
+
+[[deps.CodeTracking]]
+deps = ["InteractiveUtils", "UUIDs"]
+git-tree-sha1 = "cc4bd91eba9cdbbb4df4746124c22c0832a460d6"
+uuid = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"
+version = "1.1.1"
+
+[[deps.CompilerSupportLibraries_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
+version = "0.5.2+0"
+
+[[deps.Conda]]
+deps = ["Downloads", "JSON", "VersionParsing"]
+git-tree-sha1 = "6e47d11ea2776bc5627421d59cdcc1296c058071"
+uuid = "8f4d0f93-b110-5947-807f-2305c1781a2d"
+version = "1.7.0"
+
+[[deps.Dates]]
+deps = ["Printf"]
+uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
+
+[[deps.Distributed]]
+deps = ["Random", "Serialization", "Sockets"]
+uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
+
+[[deps.Downloads]]
+deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"]
+uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
+version = "1.6.0"
+
+[[deps.FileIO]]
+deps = ["Pkg", "Requires", "UUIDs"]
+git-tree-sha1 = "7be5f99f7d15578798f338f5433b6c432ea8037b"
+uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
+version = "1.16.0"
+
+[[deps.FileWatching]]
+uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
+
+[[deps.IJulia]]
+deps = ["Base64", "Conda", "Dates", "InteractiveUtils", "JSON", "Libdl", "Markdown", "MbedTLS", "Pkg", "Printf", "REPL", "Random", "SoftGlobalScope", "Test", "UUIDs", "ZMQ"]
+git-tree-sha1 = "98ab633acb0fe071b671f6c1785c46cd70bb86bd"
+uuid = "7073ff75-c697-5162-941a-fcdaad2a7d2a"
+version = "1.23.3"
+
+[[deps.InteractiveUtils]]
+deps = ["Markdown"]
+uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+
+[[deps.JLLWrappers]]
+deps = ["Preferences"]
+git-tree-sha1 = "abc9885a7ca2052a736a600f7fa66209f96506e1"
+uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
+version = "1.4.1"
+
+[[deps.JSON]]
+deps = ["Dates", "Mmap", "Parsers", "Unicode"]
+git-tree-sha1 = "3c837543ddb02250ef42f4738347454f95079d4e"
+uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
+version = "0.21.3"
+
+[[deps.JuliaInterpreter]]
+deps = ["CodeTracking", "InteractiveUtils", "Random", "UUIDs"]
+git-tree-sha1 = "a79c4cf60cc7ddcdcc70acbb7216a5f9b4f8d188"
+uuid = "aa1ae85d-cabe-5617-a682-6adf51b2e16a"
+version = "0.9.16"
+
+[[deps.LibCURL]]
+deps = ["LibCURL_jll", "MozillaCACerts_jll"]
+uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
+version = "0.6.3"
+
+[[deps.LibCURL_jll]]
+deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
+uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
+version = "7.84.0+0"
+
+[[deps.LibGit2]]
+deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
+uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
+
+[[deps.LibSSH2_jll]]
+deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
+uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
+version = "1.10.2+0"
+
+[[deps.Libdl]]
+uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+
+[[deps.LinearAlgebra]]
+deps = ["Libdl", "libblastrampoline_jll"]
+uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+
+[[deps.Logging]]
+uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
+
+[[deps.LoweredCodeUtils]]
+deps = ["JuliaInterpreter"]
+git-tree-sha1 = "dedbebe234e06e1ddad435f5c6f4b85cd8ce55f7"
+uuid = "6f1432cf-f94c-5a45-995e-cdbf5db27b0b"
+version = "2.2.2"
+
+[[deps.MacroTools]]
+deps = ["Markdown", "Random"]
+git-tree-sha1 = "42324d08725e200c23d4dfb549e0d5d89dede2d2"
+uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
+version = "0.5.10"
+
+[[deps.Markdown]]
+deps = ["Base64"]
+uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
+
+[[deps.MbedTLS]]
+deps = ["Dates", "MbedTLS_jll", "MozillaCACerts_jll", "Random", "Sockets"]
+git-tree-sha1 = "03a9b9718f5682ecb107ac9f7308991db4ce395b"
+uuid = "739be429-bea8-5141-9913-cc70e7f3736d"
+version = "1.1.7"
+
+[[deps.MbedTLS_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
+version = "2.28.0+0"
+
+[[deps.Mmap]]
+uuid = "a63ad114-7e13-5084-954f-fe012c677804"
+
+[[deps.MozillaCACerts_jll]]
+uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
+version = "2022.2.1"
+
+[[deps.NPZ]]
+deps = ["FileIO", "ZipFile"]
+git-tree-sha1 = "60a8e272fe0c5079363b28b0953831e2dd7b7e6f"
+uuid = "15e1cf62-19b3-5cfa-8e77-841668bca605"
+version = "0.4.3"
+
+[[deps.NetworkOptions]]
+uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
+version = "1.2.0"
+
+[[deps.OpenBLAS_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
+uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
+version = "0.3.20+0"
+
+[[deps.OrderedCollections]]
+git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c"
+uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
+version = "1.4.1"
+
+[[deps.Parsers]]
+deps = ["Dates", "SnoopPrecompile"]
+git-tree-sha1 = "b64719e8b4504983c7fca6cc9db3ebc8acc2a4d6"
+uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
+version = "2.5.1"
+
+[[deps.Pkg]]
+deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
+uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+version = "1.8.0"
+
+[[deps.Preferences]]
+deps = ["TOML"]
+git-tree-sha1 = "47e5f437cc0e7ef2ce8406ce1e7e24d44915f88d"
+uuid = "21216c6a-2e73-6563-6e65-726566657250"
+version = "1.3.0"
+
+[[deps.Printf]]
+deps = ["Unicode"]
+uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+
+[[deps.Profile]]
+deps = ["Printf"]
+uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
+
+[[deps.PyCall]]
+deps = ["Conda", "Dates", "Libdl", "LinearAlgebra", "MacroTools", "Serialization", "VersionParsing"]
+git-tree-sha1 = "53b8b07b721b77144a0fbbbc2675222ebf40a02d"
+uuid = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
+version = "1.94.1"
+
+[[deps.REPL]]
+deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
+uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+
+[[deps.Random]]
+deps = ["SHA", "Serialization"]
+uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+
+[[deps.Requires]]
+deps = ["UUIDs"]
+git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7"
+uuid = "ae029012-a4dd-5104-9daa-d747884805df"
+version = "1.3.0"
+
+[[deps.Revise]]
+deps = ["CodeTracking", "Distributed", "FileWatching", "JuliaInterpreter", "LibGit2", "LoweredCodeUtils", "OrderedCollections", "Pkg", "REPL", "Requires", "UUIDs", "Unicode"]
+git-tree-sha1 = "dad726963ecea2d8a81e26286f625aee09a91b7c"
+uuid = "295af30f-e4ad-537b-8983-00126c2a3abe"
+version = "3.4.0"
+
+[[deps.SHA]]
+uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+version = "0.7.0"
+
+[[deps.Serialization]]
+uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+
+[[deps.SnoopPrecompile]]
+git-tree-sha1 = "f604441450a3c0569830946e5b33b78c928e1a85"
+uuid = "66db9d55-30c0-4569-8b51-7e840670fc0c"
+version = "1.0.1"
+
+[[deps.Sockets]]
+uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
+
+[[deps.SoftGlobalScope]]
+deps = ["REPL"]
+git-tree-sha1 = "986ec2b6162ccb95de5892ed17832f95badf770c"
+uuid = "b85f4697-e234-5449-a836-ec8e2f98b302"
+version = "1.1.0"
+
+[[deps.SparseArrays]]
+deps = ["LinearAlgebra", "Random"]
+uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+
+[[deps.StaticLint]]
+deps = ["CSTParser", "Serialization", "SymbolServer"]
+git-tree-sha1 = "1152934b19a8a296db95ef6e1d454d4acc2aa79d"
+uuid = "b3cc710f-9c33-5bdb-a03d-a94903873e97"
+version = "8.1.0"
+
+[[deps.Statistics]]
+deps = ["LinearAlgebra", "SparseArrays"]
+uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+
+[[deps.SymbolServer]]
+deps = ["InteractiveUtils", "LibGit2", "Markdown", "Pkg", "REPL", "SHA", "Serialization", "Sockets", "UUIDs"]
+git-tree-sha1 = "d675e3a860523660421b1ca33543c06db2783a9b"
+uuid = "cf896787-08d5-524d-9de7-132aaa0cb996"
+version = "7.2.1"
+
+[[deps.TOML]]
+deps = ["Dates"]
+uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
+version = "1.0.0"
+
+[[deps.Tar]]
+deps = ["ArgTools", "SHA"]
+uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
+version = "1.10.1"
+
+[[deps.Test]]
+deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
+uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[[deps.Tokenize]]
+git-tree-sha1 = "2b3af135d85d7e70b863540160208fa612e736b9"
+uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624"
+version = "0.5.24"
+
+[[deps.UUIDs]]
+deps = ["Random", "SHA"]
+uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
+
+[[deps.Unicode]]
+uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
+
+[[deps.VersionParsing]]
+git-tree-sha1 = "58d6e80b4ee071f5efd07fda82cb9fbe17200868"
+uuid = "81def892-9a0e-5fdd-b105-ffc91e053289"
+version = "1.3.0"
+
+[[deps.ZMQ]]
+deps = ["FileWatching", "Sockets", "ZeroMQ_jll"]
+git-tree-sha1 = "356d2bdcc0bce90aabee1d1c0f6d6f301eda8f77"
+uuid = "c2297ded-f4af-51ae-bb23-16f91089e4e1"
+version = "1.2.2"
+
+[[deps.ZeroMQ_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "libsodium_jll"]
+git-tree-sha1 = "fe5c65a526f066fb3000da137d5785d9649a8a47"
+uuid = "8f1865be-045e-5c20-9c9f-bfbfb0764568"
+version = "4.3.4+0"
+
+[[deps.ZipFile]]
+deps = ["Libdl", "Printf", "Zlib_jll"]
+git-tree-sha1 = "ef4f23ffde3ee95114b461dc667ea4e6906874b2"
+uuid = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"
+version = "0.10.0"
+
+[[deps.Zlib_jll]]
+deps = ["Libdl"]
+uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
+version = "1.2.12+3"
+
+[[deps.libblastrampoline_jll]]
+deps = ["Artifacts", "Libdl", "OpenBLAS_jll"]
+uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
+version = "5.1.1+0"
+
+[[deps.libsodium_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "848ab3d00fe39d6fbc2a8641048f8f272af1c51e"
+uuid = "a9144af2-ca23-56d9-984f-0d03f7b5ccf8"
+version = "1.0.20+0"
+
+[[deps.nghttp2_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
+version = "1.48.0+0"
+
+[[deps.p7zip_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
+version = "17.4.0+0"
--- a/JuliaCSiBORGTools/Project.toml
+++ b/JuliaCSiBORGTools/Project.toml
@ -0,0 +1,12 @@
+name = "JuliaCSiBORGTools"
+uuid = "6ceada58-95f6-416f-bef9-d16b3bb7d5db"
+authors = ["rstiskalek <richard.stiskalek@protonmail.com>"]
+version = "0.1.0"
+
+[deps]
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+IJulia = "7073ff75-c697-5162-941a-fcdaad2a7d2a"
+NPZ = "15e1cf62-19b3-5cfa-8e77-841668bca605"
+PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
+Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
+StaticLint = "b3cc710f-9c33-5bdb-a03d-a94903873e97"
--- a/JuliaCSiBORGTools/src/JuliaCSiBORGTools.jl
+++ b/JuliaCSiBORGTools/src/JuliaCSiBORGTools.jl
@ -0,0 +1,22 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+module JuliaCSiBORGTools
+
+include("./particles_match.jl")
+
+export halo_parts
+
+end # module JuliaCSiBORGTools
--- a/JuliaCSiBORGTools/src/particles_match.jl
+++ b/JuliaCSiBORGTools/src/particles_match.jl
@ -0,0 +1,29 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+
+"""
+    halo_parts(clumpid::Int, partids::Vector{<:Int}, clumpids::Vector{<:Int})
+
+Return particle IDs belonging to a given clump.
+
+# Arguments
+- `clumpid::Integer`: the ID of the clump.
+- `partids::Vector{<:Integer}`: vector of shape `(n_particles,)` with the particle IDs.
+- `clumpids::Vector{<:Integer}`: vector of shape `(n_particles, )` with the particles' clump IDs.
+"""
+function halo_parts(clumpid::Integer, partids::Vector{<:Integer}, clumpids::Vector{<:Integer})
+    return partids[clumpids .== clumpid]
+end
--- a/README.md
+++ b/README.md
@ -2,7 +2,7 @@

 ## :scroll: Short-term TODO
 - [ ] Find the distribution of particles in the first snapshot
- [ ] Implement Max's halo matching
+- [ ] Make sure I am not taking halos outside of the well-resolved region.
 - [ ] Implement a custom model for matchin galaxies to halos.


@ -13,4 +13,4 @@


 ## :bulb: Open questions
- Completeness of clusters. Optical clusters catalogues are a bit of a mess..
+- What scaling of the search region? No reason for it to be a multiple of $R_{200c}$.
--- a/csiborgtools/fits/halo.py
+++ b/csiborgtools/fits/halo.py
@ -22,7 +22,7 @@ from os import remove
 from warnings import warn
 from os.path import join
 from tqdm import trange
-from ..read import nparts_to_start_ind
+from ..read import ParticleReader


 def clump_with_particles(particle_clumps, clumps):
@ -44,14 +44,14 @@ def clump_with_particles(particle_clumps, clumps):
    return numpy.isin(clumps["index"], particle_clumps)


-def distribute_halos(Nsplits, clumps):
+def distribute_halos(n_splits, clumps):
    """
    Evenly distribute clump indices to smaller splits. Clumps should only be
    clumps that contain particles.

    Parameters
    ----------
-    Nsplits : int
+    n_splits : int
        Number of splits.
    clumps : structured array
        The clumps array.
@ -59,22 +59,23 @@ def distribute_halos(Nsplits, clumps):
    Returns
    -------
    splits : 2-dimensional array
-        Array of starting and ending indices of each CPU of shape `(Njobs, 2)`.
+        Array of starting and ending indices of each CPU of shape
+        `(njobs, 2)`.
    """
    # Make sure these are unique IDs
    indxs = clumps["index"]
    if indxs.size > numpy.unique((indxs)).size:
        raise ValueError("`clump_indxs` constains duplicate indices.")
    Ntotal = indxs.size
-    Njobs_per_cpu = numpy.ones(Nsplits, dtype=int) * Ntotal // Nsplits
-    # Split the remainder Ntotal % Njobs among the CPU
-    Njobs_per_cpu[:Ntotal % Nsplits] += 1
-    start = nparts_to_start_ind(Njobs_per_cpu)
-    return numpy.vstack([start, start + Njobs_per_cpu]).T
+    njobs_per_cpu = numpy.ones(n_splits, dtype=int) * Ntotal // n_splits
+    # Split the remainder Ntotal % njobs among the CPU
+    njobs_per_cpu[:Ntotal % n_splits] += 1
+    start = ParticleReader.nparts_to_start_ind(njobs_per_cpu)
+    return numpy.vstack([start, start + njobs_per_cpu]).T


-def dump_split_particles(particles, particle_clumps, clumps, Nsplits,
-                         dumpfolder, Nsim, Nsnap, verbose=True):
+def dump_split_particles(particles, particle_clumps, clumps, n_splits,
+                         paths, verbose=True):
    """
    Save the data needed for each split so that a process does not have to load
    everything.
@ -87,14 +88,10 @@ def dump_split_particles(particles, particle_clumps, clumps, Nsplits,
        Array of particles' clump IDs.
    clumps : structured array
        The clumps array.
-    Nsplits : int
+    n_splits : int
        Number of times to split the clumps.
-    dumpfolder : str
-        Path to the folder where to dump the splits.
-    Nsim : int
-        CSiBORG simulation index.
-    Nsnap : int
-        Snapshot index.
+    paths : py:class`csiborgtools.read.CSiBORGPaths`
+        CSiBORG paths-handling object with set `n_sim` and `n_snap`.
    verbose : bool, optional
        Verbosity flag. By default `True`.

@ -112,10 +109,10 @@ def dump_split_particles(particles, particle_clumps, clumps, Nsplits,
             .format(with_particles.sum() / with_particles.size * 100))

    # The starting clump index of each split
-    splits = distribute_halos(Nsplits, clumps)
-    fname = join(dumpfolder, "out_{}_snap_{}_{}.npz")
+    splits = distribute_halos(n_splits, clumps)
+    fname = join(paths.temp_dumpdir, "out_{}_snap_{}_{}.npz")

-    iters = trange(Nsplits) if verbose else range(Nsplits)
+    iters = trange(n_splits) if verbose else range(n_splits)
    tot = 0
    for n in iters:
        # Lower and upper array index of the clumps array
@ -133,7 +130,7 @@ def dump_split_particles(particles, particle_clumps, clumps, Nsplits,
                "with no particles.".format(n, indxs.size, npart_unique))
        # Dump it!
        tot += mask.sum()
-        fout = fname.format(Nsim, Nsnap, n)
+        fout = fname.format(paths.n_sim, paths.n_snap, n)
        numpy.savez(fout, particles[mask], particle_clumps[mask], clumps[i:j])

    # There are particles whose clump ID is > 1 and have no counterpart in the
@ -144,15 +141,15 @@ def dump_split_particles(particles, particle_clumps, clumps, Nsplits,
            "size `{}`.".format(tot, particle_clumps.size))


-def split_jobs(Njobs, Ncpu):
+def split_jobs(njobs, ncpu):
    """
-    Split `Njobs` amongst `Ncpu`.
+    Split `njobs` amongst `ncpu`.

    Parameters
    ----------
-    Njobs : int
+    njobs : int
        Number of jobs.
-    Ncpu : int
+    ncpu : int
        Number of CPUs.

    Returns
@ -160,29 +157,25 @@ def split_jobs(Njobs, Ncpu):
    jobs : list of lists of integers
        Outer list of each CPU and inner lists for CPU's jobs.
    """
-    njobs_per_cpu, njobs_remainder = divmod(Njobs, Ncpu)
-    jobs = numpy.arange(njobs_per_cpu * Ncpu).reshape((njobs_per_cpu, Ncpu)).T
+    njobs_per_cpu, njobs_remainder = divmod(njobs, ncpu)
+    jobs = numpy.arange(njobs_per_cpu * ncpu).reshape((njobs_per_cpu, ncpu)).T
    jobs = jobs.tolist()
    for i in range(njobs_remainder):
-        jobs[i].append(njobs_per_cpu * Ncpu + i)
+        jobs[i].append(njobs_per_cpu * ncpu + i)

    return jobs


-def load_split_particles(Nsplit, dumpfolder, Nsim, Nsnap, remove_split=False):
+def load_split_particles(n_split, paths, remove_split=False):
    """
    Load particles of a split saved by `dump_split_particles`.

    Parameters
    --------
-    Nsplit : int
+    n_split : int
        Split index.
-    dumpfolder : str
-        Path to the folder where the splits were dumped.
-    Nsim : int
-        CSiBORG simulation index.
-    Nsnap : int
-        Snapshot index.
+    paths : py:class`csiborgtools.read.CSiBORGPaths`
+        CSiBORG paths-handling object with set `n_sim` and `n_snap`.
    remove_split : bool, optional
        Whether to remove the split file. By default `False`.

@ -196,7 +189,8 @@ def load_split_particles(Nsplit, dumpfolder, Nsim, Nsnap, remove_split=False):
        Clumps belonging to this split.
    """
    fname = join(
-        dumpfolder, "out_{}_snap_{}_{}.npz".format(Nsim, Nsnap, Nsplit))
+        paths.temp_dumpdir, "out_{}_snap_{}_{}.npz".format(
+            paths.n_sim, paths.n_snap, n_split))
    file = numpy.load(fname)
    particles, clump_indxs, clumps = (file[f] for f in file.files)
    if remove_split:
--- a/csiborgtools/match/match.py
+++ b/csiborgtools/match/match.py
@ -122,7 +122,38 @@ class RealisationsMatcher:
        """
        return [i for i in range(self.cats.N) if i != n_sim]

-    def cross_knn_position_single(self, n_sim, nmult=5, dlogmass=2):
+    def cosine_similarity(self, x, y):
+        r"""
+        Calculate the cosine similarity between two Cartesian vectors. Defined
+        as :math:`\Sum_{i} x_i y_{i} / (|x|  |y|)`.
+
+        Parameters
+        ----------
+        x : 1-dimensional array
+            The first vector.
+        y : 1- or 2-dimensional array
+            The second vector. Can be 2-dimensional of shape `(n_samples, 3)`,
+            in which case the calculation is broadcasted.
+
+        Returns
+        -------
+        out : float or 1-dimensional array
+            The cosine similarity. If y is 1-dimensinal returns only a float.
+        """
+        # Quick check of dimensions
+        if x.ndim != 1:
+            raise ValueError("`x` must be a 1-dimensional array.")
+        y = y.reshape(-1, 3) if y.ndim == 1 else y
+
+        out = numpy.sum(x * y, axis=1)
+        out /= numpy.linalg.norm(x) * numpy.linalg.norm(y, axis=1)
+
+        if out.size == 1:
+            return out[0]
+        return out
+
+    def cross_knn_position_single(self, n_sim, nmult=5, dlogmass=2,
+                                  verbose=True):
        r"""
        Find all neighbours within :math:`n_{\rm mult} R_{200c}` of halos in
        the `nsim`th simulation. Also enforces that the neighbours'
@ -153,8 +184,13 @@ class RealisationsMatcher:
        pos = self.cats[n_sim].positions

        matches = [None] * (self.cats.N - 1)
+        # Verbose iterator
+        if verbose:
+            iters = enumerate(tqdm(self.search_sim_indices(n_sim)))
+        else:
+            iters = enumerate(self.search_sim_indices(n_sim))
        # Search for neighbours in the other simulations
-        for count, i in enumerate(self.search_sim_indices(n_sim)):
+        for count, i in iters:
            dist, indxs = self.cats[i].radius_neigbours(pos, r200 * nmult)
            # Get rid of neighbors whose mass is too off
            for j, indx in enumerate(indxs):
--- a/csiborgtools/read/init.py
+++ b/csiborgtools/read/init.py
@ -13,11 +13,7 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

-from .readsim import (get_csiborg_ids, get_sim_path, get_snapshots,  # noqa
-                      get_snapshot_path, get_maximum_snapshot, read_info, nparts_to_start_ind,  # noqa
-                      open_particle, open_unbinding, read_particle,  # noqa
-                      drop_zero_indx,  # noqa
-                      read_clumpid, read_clumps, read_mmain)  # noqa
+from .readsim import (CSiBORGPaths, ParticleReader, read_mmain, get_positions)  # noqa
 from .make_cat import (HaloCatalogue, CombinedHaloCatalogue)  # noqa
 from .readobs import (PlanckClusters, MCXCClusters, TwoMPPGalaxies, TwoMPPGroups)  # noqa
 from .outsim import (dump_split, combine_splits)  # noqa
--- a/csiborgtools/read/make_cat.py
+++ b/csiborgtools/read/make_cat.py
@ -19,9 +19,9 @@ Functions to read in the particle and clump files.
 import numpy
 from os.path import join
 from tqdm import trange
+from copy import deepcopy
 from sklearn.neighbors import NearestNeighbors
-from .readsim import (get_sim_path, read_mmain, get_csiborg_ids,
-                      get_maximum_snapshot)
+from .readsim import read_mmain
 from ..utils import (flip_cols, add_columns)
 from ..units import (BoxUnits, cartesian_to_radec)

@ -32,35 +32,23 @@ class HaloCatalogue:

    Parameters
    ----------
-    n_sim: int
-        Initial condition index.
-    n_snap: int
-        Snapshot index.
+    paths : py:class:`csiborgtools.read.CSiBORGPaths`
+        CSiBORG paths-handling object with set `n_sim` and `n_snap`.
    minimum_m500 : float, optional
        The minimum :math:`M_{rm 500c} / M_\odot` mass. By default no
        threshold.
-    dumpdir : str, optional
-        Path to where files from `run_fit_halos` are stored. By default
-        `/mnt/extraspace/rstiskalek/csiborg/`.
-    mmain_path : str, optional
-        Path to where mmain files are stored. By default
-        `/mnt/zfsusers/hdesmond/Mmain`.
    """
    _box = None
-    _n_sim = None
-    _n_snap = None
+    _paths = None
    _data = None
    _knn = None
    _positions = None

-    def __init__(self, n_sim, n_snap, minimum_m500=None,
-                 dumpdir="/mnt/extraspace/rstiskalek/csiborg/",
-                 mmain_path="/mnt/zfsusers/hdesmond/Mmain"):
-        self._box = BoxUnits(n_snap, get_sim_path(n_sim))
+    def __init__(self, paths, minimum_m500=None):
+        self._box = BoxUnits(paths)
        minimum_m500 = 0 if minimum_m500 is None else minimum_m500
-        self._set_data(n_sim, n_snap, dumpdir, mmain_path, minimum_m500)
-        self._nsim = n_sim
-        self._nsnap = n_snap
+        self._paths = paths
+        self._set_data(minimum_m500)
        # Initialise the KNN
        knn = NearestNeighbors()
        knn.fit(self.positions)
@ -74,7 +62,6 @@ class HaloCatalogue:
        Returns
        -------
        cat : structured array
-            Catalogue.
        """
        if self._data is None:
            raise ValueError("`data` is not set!")
@ -88,7 +75,6 @@ class HaloCatalogue:
        Returns
        -------
        box : :py:class:`csiborgtools.units.BoxUnits`
-            The box object.
        """
        return self._box

@ -100,10 +86,20 @@ class HaloCatalogue:
        Returns
        -------
        cosmo : `astropy` cosmology object
-            Box cosmology.
        """
        return self.box.cosmo

+    @property
+    def paths(self):
+        """
+        The paths-handling object.
+
+        Returns
+        -------
+        paths : :py:class:`csiborgtools.read.CSiBORGPaths`
+        """
+        return self._paths
+
    @property
    def n_snap(self):
        """
@ -112,9 +108,8 @@ class HaloCatalogue:
        Returns
        -------
        n_snap : int
-            Snapshot ID.
        """
-        return self._n_snap
+        return self.paths.n_snap

    @property
    def n_sim(self):
@ -124,27 +119,37 @@ class HaloCatalogue:
        Returns
        -------
        n_sim : int
-            The IC ID.
        """
-        return self._n_sim
+        return self.paths.n_sim

-    def _set_data(self, n_sim, n_snap, dumpdir, mmain_path, minimum_m500):
+    def _set_data(self, minimum_m500):
        """
        Loads the data, merges with mmain, does various coordinate transforms.
        """
        # Load the processed data
        fname = "ramses_out_{}_{}.npy".format(
-            str(n_sim).zfill(5), str(n_snap).zfill(5))
-        data = numpy.load(join(dumpdir, fname))
+            str(self.n_sim).zfill(5), str(self.n_snap).zfill(5))
+        data = numpy.load(join(self.paths.dumpdir, fname))

        # Load the mmain file and add it to the data
-        mmain = read_mmain(n_sim, mmain_path)
+        mmain = read_mmain(self.n_sim, self.paths.mmain_path)
        data = self.merge_mmain_to_clumps(data, mmain)
        flip_cols(data, "peak_x", "peak_z")

        # Cut on number of particles and finite m200
        data = data[(data["npart"] > 100) & numpy.isfinite(data["m200"])]

+        # Calculate redshift
+        pos = [data["peak_{}".format(p)] - 0.5 for p in ("x", "y", "z")]
+        vel = [data["v{}".format(p)] for p in ("x", "y", "z")]
+        zpec = self.box.box2pecredshift(*vel, *pos)
+        zobs = self.box.box2obsredshift(*vel, *pos)
+        zcosmo = self.box.box2cosmoredshift(
+            sum(pos[i]**2 for i in range(3))**0.5)
+
+        data = add_columns(data, [zpec, zobs, zcosmo],
+                           ["zpec", "zobs", "zcosmo"])
+
        # Unit conversion
        convert_cols = ["m200", "m500", "totpartmass", "mass_mmain",
                        "r200", "r500", "Rs", "rho0",
@ -203,6 +208,18 @@ class HaloCatalogue:
        """
        return self._positions

+    @property
+    def velocities(self):
+        """
+        Cartesian velocities of halos.
+
+        Returns
+        -------
+        vel : 2-dimensional array
+            Array of shape `(n_halos, 3)`.
+        """
+        return numpy.vstack([self["v{}".format(p)] for p in ("x", "y", "z")]).T
+
    def radius_neigbours(self, X, radius):
        """
        Return sorted nearest neigbours within `radius` or `X`.
@ -245,15 +262,12 @@ class CombinedHaloCatalogue:

    Parameters
    ----------
+    paths : py:class`csiborgtools.read.CSiBORGPaths`
+        CSiBORG paths-handling object. Doest not have to have set set `n_sim`
+        and `n_snap`.
    minimum_m500 : float, optional
        The minimum :math:`M_{rm 500c} / M_\odot` mass. By default no
        threshold.
-    dumpdir : str, optional
-        Path to where files from `run_fit_halos` are stored. By default
-        `/mnt/extraspace/rstiskalek/csiborg/`.
-    mmain_path : str, optional
-        Path to where mmain files are stored. By default
-        `/mnt/zfsusers/hdesmond/Mmain`.
    verbose : bool, optional
        Verbosity flag for reading the catalogues.
    """
@ -261,19 +275,18 @@ class CombinedHaloCatalogue:
    _n_snaps = None
    _cats = None

-    def __init__(self, minimum_m500=None,
-                 dumpdir="/mnt/extraspace/rstiskalek/csiborg/",
-                 mmain_path="/mnt/zfsusers/hdesmond/Mmain", verbose=True):
+    def __init__(self, paths, minimum_m500=None, verbose=True):
        # Read simulations and their maximum snapshots
        # NOTE remove this later and take all cats
-        self._n_sims = get_csiborg_ids("/mnt/extraspace/hdesmond")[:10]
-        n_snaps = [get_maximum_snapshot(get_sim_path(i)) for i in self._n_sims]
+        self._n_sims = paths.ic_ids[:10]
+        n_snaps = [paths.get_maximum_snapshot(i) for i in self._n_sims]
        self._n_snaps = numpy.asanyarray(n_snaps)

        cats = [None] * self.N
        for i in trange(self.N) if verbose else range(self.N):
-            cats[i] = HaloCatalogue(self._n_sims[i], self._n_snaps[i],
-                                    minimum_m500, dumpdir, mmain_path)
+            paths = deepcopy(paths)
+            paths.set_info(self.n_sims[i], self.n_snaps[i])
+            cats[i] = HaloCatalogue(paths, minimum_m500)
        self._cats = cats

    @property
--- a/csiborgtools/read/outsim.py
+++ b/csiborgtools/read/outsim.py
@ -21,13 +21,12 @@ import numpy
 from os.path import join
 from os import remove
 from tqdm import trange
-from .readsim import (get_sim_path, read_clumps)

 I64 = numpy.int64
 F64 = numpy.float64


-def dump_split(arr, Nsplit, Nsim, Nsnap, outdir):
+def dump_split(arr, n_split, paths):
    """
    Dump an array from a split.

@ -35,11 +34,13 @@ def dump_split(arr, Nsplit, Nsim, Nsnap, outdir):
    ----------
    arr : n-dimensional or structured array
        Array to be saved.
-    Nsplit : int
+    n_split: int
        The split index.
-    Nsim : int
+    paths : py:class`csiborgtools.read.CSiBORGPaths`
+        CSiBORG paths-handling object with set `n_sim` and `n_snap`.
+    n_sim : int
        The CSiBORG realisation index.
-    Nsnap : int
+    n_snap : int
        The index of a redshift snapshot.
    outdir : string
        Directory where to save the temporary files.
@ -48,13 +49,14 @@ def dump_split(arr, Nsplit, Nsim, Nsnap, outdir):
    -------
    None
    """
-    Nsim = str(Nsim).zfill(5)
-    Nsnap = str(Nsnap).zfill(5)
-    fname = join(outdir, "ramses_out_{}_{}_{}.npy".format(Nsim, Nsnap, Nsplit))
+    n_sim = str(paths.n_sim).zfill(5)
+    n_snap = str(paths.n_snap).zfill(5)
+    fname = join(paths.temp_dumpdir, "ramses_out_{}_{}_{}.npy"
+                 .format(n_sim, n_snap, n_split))
    numpy.save(fname, arr)


-def combine_splits(Nsplits, Nsim, Nsnap, outdir, cols_add, remove_splits=False,
+def combine_splits(n_splits, part_reader, cols_add, remove_splits=False,
                   verbose=True):
    """
    Combine results of many splits saved from `dump_split`. Identifies to which
@ -64,14 +66,10 @@ def combine_splits(Nsplits, Nsim, Nsnap, outdir, cols_add, remove_splits=False,

    Paramaters
    ----------
-    Nsplits : int
+    n_splits : int
        The total number of clump splits.
-    Nsim : int
-        The CSiBORG realisation index.
-    Nsnap : int
-        The index of a redshift snapshot.
-    outdir : str
-        Directory where to save the new array.
+    part_reader : py:class`csiborgtools.read.ParticleReadear`
+        CSiBORG particle reader.
    cols_add : list of `(str, dtype)`
        Colums to add. Must be formatted as, for example,
        `[("npart", numpy.float64), ("totpartmass", numpy.float64)]`.
@ -86,8 +84,10 @@ def combine_splits(Nsplits, Nsim, Nsnap, outdir, cols_add, remove_splits=False,
        Clump array with appended results from the splits.
    """
    # Load clumps to see how many there are and will add to this array
-    simpath = get_sim_path(Nsim)
-    clumps = read_clumps(Nsnap, simpath, cols=None)
+    n_sim = part_reader.paths.n_sim
+    n_snap = part_reader.paths.n_snap
+    clumps = part_reader.read_clumps(cols=None)
+
    # Get the old + new dtypes and create an empty array
    descr = clumps.dtype.descr + cols_add
    out = numpy.full(clumps.size, numpy.nan, dtype=descr)
@ -96,12 +96,13 @@ def combine_splits(Nsplits, Nsim, Nsnap, outdir, cols_add, remove_splits=False,
        out[par] = clumps[par]

    # Filename of splits data
-    froot = "ramses_out_{}_{}".format(str(Nsim).zfill(5), str(Nsnap).zfill(5))
-    fname = join(outdir, froot + "_{}.npy")
+    froot = "ramses_out_{}_{}".format(
+        str(n_sim).zfill(5), str(n_snap).zfill(5))
+    fname = join(part_reader.paths.temp_dumpdir, froot + "_{}.npy")

    # Iterate over splits and add to the output array
    cols_add_names = [col[0] for col in cols_add]
-    iters = trange(Nsplits) if verbose else range(Nsplits)
+    iters = trange(n_splits) if verbose else range(n_splits)
    for n in iters:
        fnamesplit = fname.format(n)
        arr = numpy.load(fnamesplit)
--- a/csiborgtools/read/readsim.py
+++ b/csiborgtools/read/readsim.py
--- a/csiborgtools/units/box_units.py
+++ b/csiborgtools/units/box_units.py
@ -20,7 +20,7 @@ import numpy
 from scipy.interpolate import interp1d
 from astropy.cosmology import LambdaCDM
 from astropy import (constants, units)
-from ..read import read_info
+from ..read import ParticleReader


 # Map of unit conversions
@ -38,18 +38,17 @@ class BoxUnits:

    Paramaters
    ----------
-    Nsnap : int
-        Snapshot index.
-    simpath : str
-        Path to the simulation where its snapshot index folders are stored.
+    paths : py:class`csiborgtools.read.CSiBORGPaths`
+        CSiBORG paths-handling object with set `n_sim` and `n_snap`.
    """
    _cosmo = None

-    def __init__(self, Nsnap, simpath):
+    def __init__(self, paths):
        """
        Read in the snapshot info file and set the units from it.
        """
-        info = read_info(Nsnap, simpath)
+        partreader = ParticleReader(paths)
+        info = partreader.read_info()
        pars = ["boxlen", "time", "aexp", "H0",
                "omega_m", "omega_l", "omega_k", "omega_b",
                "unit_l", "unit_d", "unit_t"]
@ -220,6 +219,8 @@ class BoxUnits:
        r"""
        Convert the box comoving distance to cosmological redshift.

+        NOTE: this likely is already the observed redshift.
+
        Parameters
        ----------
        dist : float
@ -236,8 +237,24 @@ class BoxUnits:

    def box2pecredshift(self, vx, vy, vz, px, py, pz, p0x=0, p0y=0, p0z=0):
        """
-        TODO: docs
+        Convert the box phase-space information to a peculiar redshift.

+        NOTE: there is some confusion about this.
+
+        Parameters
+        ----------
+        vx, vy, vz : 1-dimensional arrays
+            The Cartesian velocity components.
+        px, py, pz : 1-dimensional arrays
+            The Cartesian position vectors components.
+        p0x, p0y, p0z : floats
+            The centre of the box. By default 0, in which it is assumed that
+            the coordinates are already centred.
+
+        Returns
+        -------
+        pec_redshift : 1-dimensional array
+            The peculiar redshift.
        """
        # Peculiar velocity along the radial distance
        r = numpy.vstack([px - p0x, py - p0y, pz - p0z]).T
@ -251,8 +268,24 @@ class BoxUnits:

    def box2obsredshift(self, vx, vy, vz, px, py, pz, p0x=0, p0y=0, p0z=0):
        """
-        TODO: docs
+        Convert the box phase-space information to an 'observed' redshift.

+        NOTE: there is some confusion about this.
+
+        Parameters
+        ----------
+        vx, vy, vz : 1-dimensional arrays
+            The Cartesian velocity components.
+        px, py, pz : 1-dimensional arrays
+            The Cartesian position vectors components.
+        p0x, p0y, p0z : floats
+            The centre of the box. By default 0, in which it is assumed that
+            the coordinates are already centred.
+
+        Returns
+        -------
+        obs_redshift : 1-dimensional array
+            The observed redshift.
        """
        r = numpy.vstack([px - p0x, py - p0y, pz - p0z]).T
        zcosmo = self.box2cosmoredshift(numpy.sum(r**2, axis=1)**0.5)
--- a/csiborgtools/utils/init.py
+++ b/csiborgtools/utils/init.py
@ -15,4 +15,4 @@

 from .recarray_manip import (cols_to_structured, add_columns, rm_columns,  # noqa
                             list_to_ndarray, array_to_structured,  # noqa
-                             flip_cols)  # noqa
+                             flip_cols, extract_from_structured)  # noqa
--- a/csiborgtools/utils/recarray_manip.py
+++ b/csiborgtools/utils/recarray_manip.py
@ -209,3 +209,35 @@ def flip_cols(arr, col1, col2):
    dum = numpy.copy(arr[col1])
    arr[col1] = arr[col2]
    arr[col2] = dum
+
+
+def extract_from_structured(arr, cols):
+    """
+    Extract columns `cols` from a structured array. The  array dtype is set
+    to be that of the first column in `cols`.
+
+    Parameters
+    ----------
+    arr : structured array
+        Array from which to extract columns.
+    cols : list of str or str
+        Column to extract.
+
+    Returns
+    -------
+    out : 2- or 1-dimensional array
+        Array with shape `(n_particles, len(cols))`. If `len(cols)` is 1
+        flattens the array.
+    """
+    cols = [cols] if isinstance(cols, str) else cols
+    for col in cols:
+        if col not in arr.dtype.names:
+            raise ValueError("Invalid column `{}`!".format(col))
+    # Preallocate an array and populate it
+    out = numpy.zeros((arr.size, len(cols)), dtype=arr[cols[0]].dtype)
+    for i, col in enumerate(cols):
+        out[:, i] = arr[col]
+    # Optionally flatten
+    if len(cols) == 1:
+        return out.reshape(-1,)
+    return out
--- a/pymake.sh
+++ b/pymake.sh
@ -0,0 +1,2 @@
+cmd="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python -m pip install ."
+$cmd
--- a/pyremove.sh
+++ b/pyremove.sh
@ -0,0 +1,2 @@
+cmd="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python -m pip uninstall csiborgtools"
+$cmd
--- a/scripts/playground.ipynb
+++ b/scripts/playground.ipynb
--- a/scripts/run_find_initpos.jl
+++ b/scripts/run_find_initpos.jl
@ -0,0 +1,49 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+using Pkg: activate, build
+activate("../JuliaCSiBORGTools/")
+
+using JuliaCSiBORGTools
+using NPZ: npzread
+using PyCall: pyimport
+csiborgtools = pyimport("csiborgtools")
+
+verbose = true
+paths = csiborgtools.read.CSiBORGPaths()
+nsims = paths.ic_ids[:1]
+
+for nsim in nsims
+    nsnap_min = convert(Int64, paths.get_minimum_snapshot(nsim))
+    nsnap_max = convert(Int64, paths.get_maximum_snapshot(nsim))
+
+    # Get the maximum snapshot properties
+    verbose ? println("Loading snapshot $nsnap_max from simulation $nsim") : nothing
+    pids, ppos, pmass, clumpids = csiborgtools.read.get_positions(nsim, nsnap_max, get_clumpid=true, verbose=false)
+
+    println("Sizes are: ")
+    println(size(pids))
+    println(size(ppos))
+    println(size(pmass))
+
+
+
+#    # Get the minimum snapshot properties
+#    verbose ? println("Loading snapshot $nsnap_min from simulation $nsim") : nothing
+#    pids, ppos, pmass = csiborgtools.read.get_positions(nsim, nsnap_max, get_clumpid=false, verbose=false)
+
+    JuliaCSiBORGTools.halo_parts(0, partids, clumpids)
+
+end
--- a/scripts/run_fit_halos.py
+++ b/scripts/run_fit_halos.py
@ -46,20 +46,21 @@ cols_collect = [("npart", I64), ("totpartmass", F64), ("Rs", F64),
                ("rho0", F64), ("conc", F64), ("rmin", F64),
                ("rmax", F64), ("r200", F64), ("r500", F64),
                ("m200", F64), ("m500", F64)]
+paths = csiborgtools.read.CSiBORGPaths()

-Nsims = csiborgtools.read.get_csiborg_ids("/mnt/extraspace/hdesmond")
-for i, Nsim in enumerate(Nsims):
+for i, n_sim in enumerate(paths.ic_ids):
    if rank == 0:
        print("{}: calculating {}th simulation.".format(datetime.now(), i))
+    # Correctly set the paths!
+    n_snap = paths.get_maximum_snapshot(n_sim)
+    paths.set_info(n_sim, n_snap)

-    simpath = csiborgtools.read.get_sim_path(Nsim)
-    Nsnap = csiborgtools.read.get_maximum_snapshot(simpath)
-    box = csiborgtools.units.BoxUnits(Nsnap, simpath)
+    box = csiborgtools.units.BoxUnits(paths)

    jobs = csiborgtools.fits.split_jobs(utils.Nsplits, nproc)[rank]
-    for Nsplit in jobs:
+    for n_split in jobs:
        parts, part_clumps, clumps = csiborgtools.fits.load_split_particles(
-            Nsplit, loaddir, Nsim, Nsnap, remove_split=False)
+            n_split, paths, remove_split=False)

        N = clumps.size
        cols = [("index", I64), ("npart", I64), ("totpartmass", F64),
@ -79,9 +80,9 @@ for i, Nsim in enumerate(Nsims):
            out["rmin"][n] = clump.rmin
            out["rmax"][n] = clump.rmax
            out["totpartmass"][n] = clump.total_particle_mass
-            out["vx"] = numpy.average(clump.vel[:, 0], weights=clump.m)
-            out["vy"] = numpy.average(clump.vel[:, 1], weights=clump.m)
-            out["vz"] = numpy.average(clump.vel[:, 2], weights=clump.m)
+            out["vx"][n] = numpy.average(clump.vel[:, 0], weights=clump.m)
+            out["vy"][n] = numpy.average(clump.vel[:, 1], weights=clump.m)
+            out["vz"][n] = numpy.average(clump.vel[:, 2], weights=clump.m)

            # Spherical overdensity radii and masses
            rs, ms = clump.spherical_overdensity_mass([200, 500])
@ -100,7 +101,7 @@ for i, Nsim in enumerate(Nsims):
                    out["rho0"][n] = nfwpost.rho0_from_Rs(Rs)
                    out["conc"][n] = out["r200"][n] / Rs

-        csiborgtools.read.dump_split(out, Nsplit, Nsim, Nsnap, dumpdir)
+        csiborgtools.read.dump_split(out, n_split, paths)

    # Wait until all jobs finished before moving to another simulation
    comm.Barrier()
@ -108,11 +109,13 @@ for i, Nsim in enumerate(Nsims):
    # Use the rank 0 to combine outputs for this CSiBORG realisation
    if rank == 0:
        print("Collecting results!")
+        partreader = csiborgtools.read.ParticleReader(paths)
        out_collected = csiborgtools.read.combine_splits(
-            utils.Nsplits, Nsim, Nsnap, utils.dumpdir, cols_collect,
-            remove_splits=True, verbose=False)
-        fname = join(utils.dumpdir, "ramses_out_{}_{}.npy"
-                     .format(str(Nsim).zfill(5), str(Nsnap).zfill(5)))
+            utils.Nsplits, partreader, cols_collect, remove_splits=True,
+            verbose=False)
+        fname = join(paths.dumpdir, "ramses_out_{}_{}.npy"
+                     .format(str(paths.n_sim).zfill(5),
+                             str(paths.n_snap).zfill(5)))
        print("Saving results to `{}`.".format(fname))
        numpy.save(fname, out_collected)

--- a/scripts/run_split_halos.py
+++ b/scripts/run_split_halos.py
@ -18,7 +18,6 @@ membership for faster manipulation. Currently does this for the maximum
 snapshot of each simulation. Running this will require a lot of memory.
 """

-from os.path import join
 from mpi4py import MPI
 from datetime import datetime
 try:
@ -34,29 +33,28 @@ comm = MPI.COMM_WORLD
 rank = comm.Get_rank()
 nproc = comm.Get_size()

-Nsims = csiborgtools.read.get_csiborg_ids("/mnt/extraspace/hdesmond")
+paths = csiborgtools.read.CSiBORGPaths()
+n_sims = paths.ic_ids[:1]
 partcols = ["x", "y", "z", "vx", "vy", "vz", "M", "level"]
-dumpdir = join(utils.dumpdir, "temp")

-jobs = csiborgtools.fits.split_jobs(len(Nsims), nproc)[rank]
+jobs = csiborgtools.fits.split_jobs(len(n_sims), nproc)[rank]
 for icount, sim_index in enumerate(jobs):
    print("{}: rank {} working {} / {} jobs.".format(datetime.now(), rank,
                                                     icount + 1, len(jobs)))
-    Nsim = Nsims[sim_index]
-    simpath = csiborgtools.read.get_sim_path(Nsim)
-    Nsnap = csiborgtools.read.get_maximum_snapshot(simpath)
+    n_sim = n_sims[sim_index]
+    n_snap = paths.get_maximum_snapshot(n_sim)
+    # Set paths and inifitalise a particle reader
+    paths.set_info(n_sim, n_snap)
+    partreader = csiborgtools.read.ParticleReader(paths)
    # Load the clumps, particles' clump IDs and particles.
-    clumps = csiborgtools.read.read_clumps(Nsnap, simpath)
-    particle_clumps = csiborgtools.read.read_clumpid(Nsnap, simpath,
-                                                     verbose=False)
-    particles = csiborgtools.read.read_particle(partcols, Nsnap, simpath,
-                                                verbose=False)
+    clumps = partreader.read_clumps()
+    particle_clumps = partreader.read_clumpid(verbose=False)
+    particles = partreader.read_particle(partcols, verbose=False)
    # Drop all particles whose clump index is 0 (not assigned to any halo)
-    particle_clumps, particles = csiborgtools.read.drop_zero_indx(
+    particle_clumps, particles = partreader.drop_zero_indx(
        particle_clumps, particles)
    # Dump it!
    csiborgtools.fits.dump_split_particles(particles, particle_clumps, clumps,
-                                           utils.Nsplits, dumpdir, Nsim, Nsnap,
-                                           verbose=False)
+                                           utils.Nsplits, paths, verbose=False)

 print("All finished!")
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,31 @@
+from setuptools import find_packages, setup
+
+
+BUILD_REQ = ["numpy>=1.17.3", "scipy<1.9.0"]
+INSTALL_REQ = BUILD_REQ
+INSTALL_REQ += ["scikit-learn>=1.1.0",
+                "jax[cpu]>=0.3.23",
+                "tqdm>=4.64.1",
+                "astropy>=5.1",
+                ],
+
+setup(
+    name="csiborgtools",
+    version="0.1",
+    description="CSiBORG analysis tools",
+    url="https://github.com/Richard-Sti/csiborgtools",
+    author="Richard Stiskalek",
+    author_email="richard.stiskalek@protonmail.com",
+    license="GPL-3.0",
+    packages=find_packages(),
+    python_requires=">=3.8",
+    build_requires=BUILD_REQ,
+    setup_requires=BUILD_REQ,
+    install_requires=INSTALL_REQ,
+    classifiers=[
+        "Development Status :: 1 - Planning",
+        "Intended Audience :: Science/Research",
+        "Operating System :: POSIX :: Linux",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9"]
+)