Particle match & file system & phase space (#11)

* Create file system * add doc * add n_sim n_snap directly to paths * Move things to a single particle reader for consistency * add docstring * add srdcir, dumpdir and mmain_path * make boxunits work with paths * switch to using paths * add tempdumpdir * rm dependence on old functions * rm comment * rm unused import * go back to all imports * fix import bug * rm dependence on old functions * modernize code! * fix typo * fix typo * update fits to new data structureing * change docs * add julia repo * add setup * add install commands * ignore install files * add array flattening * update dependene * add positions reader * update manifest and projects * add func * update gitignore * pos matching progress * move file * rm comment * add velocities getter * fix bug * fix name bug * fix path bug * fix args func * add redshift calculation to catalogues * add shortcut to set n_sim and n_snap * if cond bug * add the cosine similarity * add verbosit iterator * add docs * update README * update README * update README
2025-06-30 19:41:12 +00:00 · 2022-11-25 10:44:08 +00:00 · 2022-11-25 10:44:08 +00:00 · 161c27d995
commit 161c27d995
parent c748c87e45
22 changed files with 1443 additions and 4178 deletions
--- a/.gitignore
+++ b/.gitignore
@ -11,3 +11,8 @@ csiborgtools/fits/_filenames.py
 csiborgtools/fits/analyse_voids_25.py
 scripts/*.sh
 scripts/*.out
 build/*
 .eggs/*
 csiborgtools.egg-info/*
 scripts/playground_*
 scripts/playground.ipynb
--- a/JuliaCSiBORGTools/Manifest.toml
+++ b/JuliaCSiBORGTools/Manifest.toml
@ -0,0 +1,344 @@
 # This file is machine-generated - editing it directly is not advised
 julia_version = "1.8.3"
 manifest_format = "2.0"
 project_hash = "8ac89492dc497a2481ecf3e1e7f2cc6ba12b4702"
 [[deps.ArgTools]]
 uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
 version = "1.1.1"
 [[deps.Artifacts]]
 uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 [[deps.Base64]]
 uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
 [[deps.BenchmarkTools]]
 deps = ["JSON", "Logging", "Printf", "Profile", "Statistics", "UUIDs"]
 git-tree-sha1 = "d9a9701b899b30332bbcb3e1679c41cce81fb0e8"
 uuid = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 version = "1.3.2"
 [[deps.CSTParser]]
 deps = ["Tokenize"]
 git-tree-sha1 = "3ddd48d200eb8ddf9cb3e0189fc059fd49b97c1f"
 uuid = "00ebfdb7-1f24-5e51-bd34-a7502290713f"
 version = "3.3.6"
 [[deps.CodeTracking]]
 deps = ["InteractiveUtils", "UUIDs"]
 git-tree-sha1 = "cc4bd91eba9cdbbb4df4746124c22c0832a460d6"
 uuid = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"
 version = "1.1.1"
 [[deps.CompilerSupportLibraries_jll]]
 deps = ["Artifacts", "Libdl"]
 uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
 version = "0.5.2+0"
 [[deps.Conda]]
 deps = ["Downloads", "JSON", "VersionParsing"]
 git-tree-sha1 = "6e47d11ea2776bc5627421d59cdcc1296c058071"
 uuid = "8f4d0f93-b110-5947-807f-2305c1781a2d"
 version = "1.7.0"
 [[deps.Dates]]
 deps = ["Printf"]
 uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
 [[deps.Distributed]]
 deps = ["Random", "Serialization", "Sockets"]
 uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 [[deps.Downloads]]
 deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"]
 uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
 version = "1.6.0"
 [[deps.FileIO]]
 deps = ["Pkg", "Requires", "UUIDs"]
 git-tree-sha1 = "7be5f99f7d15578798f338f5433b6c432ea8037b"
 uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
 version = "1.16.0"
 [[deps.FileWatching]]
 uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
 [[deps.IJulia]]
 deps = ["Base64", "Conda", "Dates", "InteractiveUtils", "JSON", "Libdl", "Markdown", "MbedTLS", "Pkg", "Printf", "REPL", "Random", "SoftGlobalScope", "Test", "UUIDs", "ZMQ"]
 git-tree-sha1 = "98ab633acb0fe071b671f6c1785c46cd70bb86bd"
 uuid = "7073ff75-c697-5162-941a-fcdaad2a7d2a"
 version = "1.23.3"
 [[deps.InteractiveUtils]]
 deps = ["Markdown"]
 uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 [[deps.JLLWrappers]]
 deps = ["Preferences"]
 git-tree-sha1 = "abc9885a7ca2052a736a600f7fa66209f96506e1"
 uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
 version = "1.4.1"
 [[deps.JSON]]
 deps = ["Dates", "Mmap", "Parsers", "Unicode"]
 git-tree-sha1 = "3c837543ddb02250ef42f4738347454f95079d4e"
 uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 version = "0.21.3"
 [[deps.JuliaInterpreter]]
 deps = ["CodeTracking", "InteractiveUtils", "Random", "UUIDs"]
 git-tree-sha1 = "a79c4cf60cc7ddcdcc70acbb7216a5f9b4f8d188"
 uuid = "aa1ae85d-cabe-5617-a682-6adf51b2e16a"
 version = "0.9.16"
 [[deps.LibCURL]]
 deps = ["LibCURL_jll", "MozillaCACerts_jll"]
 uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
 version = "0.6.3"
 [[deps.LibCURL_jll]]
 deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
 uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
 version = "7.84.0+0"
 [[deps.LibGit2]]
 deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
 uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
 [[deps.LibSSH2_jll]]
 deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
 uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
 version = "1.10.2+0"
 [[deps.Libdl]]
 uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 [[deps.LinearAlgebra]]
 deps = ["Libdl", "libblastrampoline_jll"]
 uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 [[deps.Logging]]
 uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
 [[deps.LoweredCodeUtils]]
 deps = ["JuliaInterpreter"]
 git-tree-sha1 = "dedbebe234e06e1ddad435f5c6f4b85cd8ce55f7"
 uuid = "6f1432cf-f94c-5a45-995e-cdbf5db27b0b"
 version = "2.2.2"
 [[deps.MacroTools]]
 deps = ["Markdown", "Random"]
 git-tree-sha1 = "42324d08725e200c23d4dfb549e0d5d89dede2d2"
 uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
 version = "0.5.10"
 [[deps.Markdown]]
 deps = ["Base64"]
 uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
 [[deps.MbedTLS]]
 deps = ["Dates", "MbedTLS_jll", "MozillaCACerts_jll", "Random", "Sockets"]
 git-tree-sha1 = "03a9b9718f5682ecb107ac9f7308991db4ce395b"
 uuid = "739be429-bea8-5141-9913-cc70e7f3736d"
 version = "1.1.7"
 [[deps.MbedTLS_jll]]
 deps = ["Artifacts", "Libdl"]
 uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
 version = "2.28.0+0"
 [[deps.Mmap]]
 uuid = "a63ad114-7e13-5084-954f-fe012c677804"
 [[deps.MozillaCACerts_jll]]
 uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
 version = "2022.2.1"
 [[deps.NPZ]]
 deps = ["FileIO", "ZipFile"]
 git-tree-sha1 = "60a8e272fe0c5079363b28b0953831e2dd7b7e6f"
 uuid = "15e1cf62-19b3-5cfa-8e77-841668bca605"
 version = "0.4.3"
 [[deps.NetworkOptions]]
 uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
 version = "1.2.0"
 [[deps.OpenBLAS_jll]]
 deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
 uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
 version = "0.3.20+0"
 [[deps.OrderedCollections]]
 git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c"
 uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
 version = "1.4.1"
 [[deps.Parsers]]
 deps = ["Dates", "SnoopPrecompile"]
 git-tree-sha1 = "b64719e8b4504983c7fca6cc9db3ebc8acc2a4d6"
 uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
 version = "2.5.1"
 [[deps.Pkg]]
 deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
 uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 version = "1.8.0"
 [[deps.Preferences]]
 deps = ["TOML"]
 git-tree-sha1 = "47e5f437cc0e7ef2ce8406ce1e7e24d44915f88d"
 uuid = "21216c6a-2e73-6563-6e65-726566657250"
 version = "1.3.0"
 [[deps.Printf]]
 deps = ["Unicode"]
 uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 [[deps.Profile]]
 deps = ["Printf"]
 uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
 [[deps.PyCall]]
 deps = ["Conda", "Dates", "Libdl", "LinearAlgebra", "MacroTools", "Serialization", "VersionParsing"]
 git-tree-sha1 = "53b8b07b721b77144a0fbbbc2675222ebf40a02d"
 uuid = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
 version = "1.94.1"
 [[deps.REPL]]
 deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
 uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
 [[deps.Random]]
 deps = ["SHA", "Serialization"]
 uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 [[deps.Requires]]
 deps = ["UUIDs"]
 git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7"
 uuid = "ae029012-a4dd-5104-9daa-d747884805df"
 version = "1.3.0"
 [[deps.Revise]]
 deps = ["CodeTracking", "Distributed", "FileWatching", "JuliaInterpreter", "LibGit2", "LoweredCodeUtils", "OrderedCollections", "Pkg", "REPL", "Requires", "UUIDs", "Unicode"]
 git-tree-sha1 = "dad726963ecea2d8a81e26286f625aee09a91b7c"
 uuid = "295af30f-e4ad-537b-8983-00126c2a3abe"
 version = "3.4.0"
 [[deps.SHA]]
 uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
 version = "0.7.0"
 [[deps.Serialization]]
 uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
 [[deps.SnoopPrecompile]]
 git-tree-sha1 = "f604441450a3c0569830946e5b33b78c928e1a85"
 uuid = "66db9d55-30c0-4569-8b51-7e840670fc0c"
 version = "1.0.1"
 [[deps.Sockets]]
 uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
 [[deps.SoftGlobalScope]]
 deps = ["REPL"]
 git-tree-sha1 = "986ec2b6162ccb95de5892ed17832f95badf770c"
 uuid = "b85f4697-e234-5449-a836-ec8e2f98b302"
 version = "1.1.0"
 [[deps.SparseArrays]]
 deps = ["LinearAlgebra", "Random"]
 uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 [[deps.StaticLint]]
 deps = ["CSTParser", "Serialization", "SymbolServer"]
 git-tree-sha1 = "1152934b19a8a296db95ef6e1d454d4acc2aa79d"
 uuid = "b3cc710f-9c33-5bdb-a03d-a94903873e97"
 version = "8.1.0"
 [[deps.Statistics]]
 deps = ["LinearAlgebra", "SparseArrays"]
 uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 [[deps.SymbolServer]]
 deps = ["InteractiveUtils", "LibGit2", "Markdown", "Pkg", "REPL", "SHA", "Serialization", "Sockets", "UUIDs"]
 git-tree-sha1 = "d675e3a860523660421b1ca33543c06db2783a9b"
 uuid = "cf896787-08d5-524d-9de7-132aaa0cb996"
 version = "7.2.1"
 [[deps.TOML]]
 deps = ["Dates"]
 uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
 version = "1.0.0"
 [[deps.Tar]]
 deps = ["ArgTools", "SHA"]
 uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
 version = "1.10.1"
 [[deps.Test]]
 deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
 uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 [[deps.Tokenize]]
 git-tree-sha1 = "2b3af135d85d7e70b863540160208fa612e736b9"
 uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624"
 version = "0.5.24"
 [[deps.UUIDs]]
 deps = ["Random", "SHA"]
 uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
 [[deps.Unicode]]
 uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
 [[deps.VersionParsing]]
 git-tree-sha1 = "58d6e80b4ee071f5efd07fda82cb9fbe17200868"
 uuid = "81def892-9a0e-5fdd-b105-ffc91e053289"
 version = "1.3.0"
 [[deps.ZMQ]]
 deps = ["FileWatching", "Sockets", "ZeroMQ_jll"]
 git-tree-sha1 = "356d2bdcc0bce90aabee1d1c0f6d6f301eda8f77"
 uuid = "c2297ded-f4af-51ae-bb23-16f91089e4e1"
 version = "1.2.2"
 [[deps.ZeroMQ_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "libsodium_jll"]
 git-tree-sha1 = "fe5c65a526f066fb3000da137d5785d9649a8a47"
 uuid = "8f1865be-045e-5c20-9c9f-bfbfb0764568"
 version = "4.3.4+0"
 [[deps.ZipFile]]
 deps = ["Libdl", "Printf", "Zlib_jll"]
 git-tree-sha1 = "ef4f23ffde3ee95114b461dc667ea4e6906874b2"
 uuid = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"
 version = "0.10.0"
 [[deps.Zlib_jll]]
 deps = ["Libdl"]
 uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
 version = "1.2.12+3"
 [[deps.libblastrampoline_jll]]
 deps = ["Artifacts", "Libdl", "OpenBLAS_jll"]
 uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
 version = "5.1.1+0"
 [[deps.libsodium_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
 git-tree-sha1 = "848ab3d00fe39d6fbc2a8641048f8f272af1c51e"
 uuid = "a9144af2-ca23-56d9-984f-0d03f7b5ccf8"
 version = "1.0.20+0"
 [[deps.nghttp2_jll]]
 deps = ["Artifacts", "Libdl"]
 uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
 version = "1.48.0+0"
 [[deps.p7zip_jll]]
 deps = ["Artifacts", "Libdl"]
 uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
 version = "17.4.0+0"
--- a/JuliaCSiBORGTools/Project.toml
+++ b/JuliaCSiBORGTools/Project.toml
@ -0,0 +1,12 @@
 name = "JuliaCSiBORGTools"
 uuid = "6ceada58-95f6-416f-bef9-d16b3bb7d5db"
 authors = ["rstiskalek <richard.stiskalek@protonmail.com>"]
 version = "0.1.0"
 [deps]
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 IJulia = "7073ff75-c697-5162-941a-fcdaad2a7d2a"
 NPZ = "15e1cf62-19b3-5cfa-8e77-841668bca605"
 PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
 Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
 StaticLint = "b3cc710f-9c33-5bdb-a03d-a94903873e97"
--- a/JuliaCSiBORGTools/src/JuliaCSiBORGTools.jl
+++ b/JuliaCSiBORGTools/src/JuliaCSiBORGTools.jl
@ -0,0 +1,22 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 module JuliaCSiBORGTools
 include("./particles_match.jl")
 export halo_parts
 end # module JuliaCSiBORGTools
--- a/JuliaCSiBORGTools/src/particles_match.jl
+++ b/JuliaCSiBORGTools/src/particles_match.jl
@ -0,0 +1,29 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
    halo_parts(clumpid::Int, partids::Vector{<:Int}, clumpids::Vector{<:Int})
 Return particle IDs belonging to a given clump.
 # Arguments
 - `clumpid::Integer`: the ID of the clump.
 - `partids::Vector{<:Integer}`: vector of shape `(n_particles,)` with the particle IDs.
 - `clumpids::Vector{<:Integer}`: vector of shape `(n_particles, )` with the particles' clump IDs.
 """
 function halo_parts(clumpid::Integer, partids::Vector{<:Integer}, clumpids::Vector{<:Integer})
    return partids[clumpids .== clumpid]
 end
--- a/README.md
+++ b/README.md
@ -2,7 +2,7 @@
 ## :scroll: Short-term TODO
 - [ ] Find the distribution of particles in the first snapshot
- [ ] Implement Max's halo matching
+- [ ] Make sure I am not taking halos outside of the well-resolved region.
 - [ ] Implement a custom model for matchin galaxies to halos.
@ -13,4 +13,4 @@
 ## :bulb: Open questions
- Completeness of clusters. Optical clusters catalogues are a bit of a mess..
+- What scaling of the search region? No reason for it to be a multiple of $R_{200c}$.
--- a/csiborgtools/fits/halo.py
+++ b/csiborgtools/fits/halo.py
@ -22,7 +22,7 @@ from os import remove
 from warnings import warn
 from os.path import join
 from tqdm import trange
-from ..read import nparts_to_start_ind
+from ..read import ParticleReader
 def clump_with_particles(particle_clumps, clumps):
@ -44,14 +44,14 @@ def clump_with_particles(particle_clumps, clumps):
    return numpy.isin(clumps["index"], particle_clumps)
-def distribute_halos(Nsplits, clumps):
+def distribute_halos(n_splits, clumps):
    """
    Evenly distribute clump indices to smaller splits. Clumps should only be
    clumps that contain particles.
    Parameters
    ----------
-    Nsplits : int
+    n_splits : int
        Number of splits.
    clumps : structured array
        The clumps array.
@ -59,22 +59,23 @@ def distribute_halos(Nsplits, clumps):
    Returns
    -------
    splits : 2-dimensional array
-        Array of starting and ending indices of each CPU of shape `(Njobs, 2)`.
+        Array of starting and ending indices of each CPU of shape
        `(njobs, 2)`.
    """
    # Make sure these are unique IDs
    indxs = clumps["index"]
    if indxs.size > numpy.unique((indxs)).size:
        raise ValueError("`clump_indxs` constains duplicate indices.")
    Ntotal = indxs.size
-    Njobs_per_cpu = numpy.ones(Nsplits, dtype=int) * Ntotal // Nsplits
+    njobs_per_cpu = numpy.ones(n_splits, dtype=int) * Ntotal // n_splits
-    # Split the remainder Ntotal % Njobs among the CPU
+    # Split the remainder Ntotal % njobs among the CPU
-    Njobs_per_cpu[:Ntotal % Nsplits] += 1
+    njobs_per_cpu[:Ntotal % n_splits] += 1
-    start = nparts_to_start_ind(Njobs_per_cpu)
+    start = ParticleReader.nparts_to_start_ind(njobs_per_cpu)
-    return numpy.vstack([start, start + Njobs_per_cpu]).T
+    return numpy.vstack([start, start + njobs_per_cpu]).T
-def dump_split_particles(particles, particle_clumps, clumps, Nsplits,
+def dump_split_particles(particles, particle_clumps, clumps, n_splits,
-                         dumpfolder, Nsim, Nsnap, verbose=True):
+                         paths, verbose=True):
    """
    Save the data needed for each split so that a process does not have to load
    everything.
@ -87,14 +88,10 @@ def dump_split_particles(particles, particle_clumps, clumps, Nsplits,
        Array of particles' clump IDs.
    clumps : structured array
        The clumps array.
-    Nsplits : int
+    n_splits : int
        Number of times to split the clumps.
-    dumpfolder : str
+    paths : py:class`csiborgtools.read.CSiBORGPaths`
-        Path to the folder where to dump the splits.
+        CSiBORG paths-handling object with set `n_sim` and `n_snap`.
    Nsim : int
        CSiBORG simulation index.
    Nsnap : int
        Snapshot index.
    verbose : bool, optional
        Verbosity flag. By default `True`.
@ -112,10 +109,10 @@ def dump_split_particles(particles, particle_clumps, clumps, Nsplits,
             .format(with_particles.sum() / with_particles.size * 100))
    # The starting clump index of each split
-    splits = distribute_halos(Nsplits, clumps)
+    splits = distribute_halos(n_splits, clumps)
-    fname = join(dumpfolder, "out_{}_snap_{}_{}.npz")
+    fname = join(paths.temp_dumpdir, "out_{}_snap_{}_{}.npz")
-    iters = trange(Nsplits) if verbose else range(Nsplits)
+    iters = trange(n_splits) if verbose else range(n_splits)
    tot = 0
    for n in iters:
        # Lower and upper array index of the clumps array
@ -133,7 +130,7 @@ def dump_split_particles(particles, particle_clumps, clumps, Nsplits,
                "with no particles.".format(n, indxs.size, npart_unique))
        # Dump it!
        tot += mask.sum()
-        fout = fname.format(Nsim, Nsnap, n)
+        fout = fname.format(paths.n_sim, paths.n_snap, n)
        numpy.savez(fout, particles[mask], particle_clumps[mask], clumps[i:j])
    # There are particles whose clump ID is > 1 and have no counterpart in the
@ -144,15 +141,15 @@ def dump_split_particles(particles, particle_clumps, clumps, Nsplits,
            "size `{}`.".format(tot, particle_clumps.size))
-def split_jobs(Njobs, Ncpu):
+def split_jobs(njobs, ncpu):
    """
-    Split `Njobs` amongst `Ncpu`.
+    Split `njobs` amongst `ncpu`.
    Parameters
    ----------
-    Njobs : int
+    njobs : int
        Number of jobs.
-    Ncpu : int
+    ncpu : int
        Number of CPUs.
    Returns
@ -160,29 +157,25 @@ def split_jobs(Njobs, Ncpu):
    jobs : list of lists of integers
        Outer list of each CPU and inner lists for CPU's jobs.
    """
-    njobs_per_cpu, njobs_remainder = divmod(Njobs, Ncpu)
+    njobs_per_cpu, njobs_remainder = divmod(njobs, ncpu)
-    jobs = numpy.arange(njobs_per_cpu * Ncpu).reshape((njobs_per_cpu, Ncpu)).T
+    jobs = numpy.arange(njobs_per_cpu * ncpu).reshape((njobs_per_cpu, ncpu)).T
    jobs = jobs.tolist()
    for i in range(njobs_remainder):
-        jobs[i].append(njobs_per_cpu * Ncpu + i)
+        jobs[i].append(njobs_per_cpu * ncpu + i)
    return jobs
-def load_split_particles(Nsplit, dumpfolder, Nsim, Nsnap, remove_split=False):
+def load_split_particles(n_split, paths, remove_split=False):
    """
    Load particles of a split saved by `dump_split_particles`.
    Parameters
    --------
-    Nsplit : int
+    n_split : int
        Split index.
-    dumpfolder : str
+    paths : py:class`csiborgtools.read.CSiBORGPaths`
-        Path to the folder where the splits were dumped.
+        CSiBORG paths-handling object with set `n_sim` and `n_snap`.
    Nsim : int
        CSiBORG simulation index.
    Nsnap : int
        Snapshot index.
    remove_split : bool, optional
        Whether to remove the split file. By default `False`.
@ -196,7 +189,8 @@ def load_split_particles(Nsplit, dumpfolder, Nsim, Nsnap, remove_split=False):
        Clumps belonging to this split.
    """
    fname = join(
-        dumpfolder, "out_{}_snap_{}_{}.npz".format(Nsim, Nsnap, Nsplit))
+        paths.temp_dumpdir, "out_{}_snap_{}_{}.npz".format(
            paths.n_sim, paths.n_snap, n_split))
    file = numpy.load(fname)
    particles, clump_indxs, clumps = (file[f] for f in file.files)
    if remove_split:
--- a/csiborgtools/match/match.py
+++ b/csiborgtools/match/match.py
@ -122,7 +122,38 @@ class RealisationsMatcher:
        """
        return [i for i in range(self.cats.N) if i != n_sim]
-    def cross_knn_position_single(self, n_sim, nmult=5, dlogmass=2):
+    def cosine_similarity(self, x, y):
        r"""
        Calculate the cosine similarity between two Cartesian vectors. Defined
        as :math:`\Sum_{i} x_i y_{i} / (|x|  |y|)`.
        Parameters
        ----------
        x : 1-dimensional array
            The first vector.
        y : 1- or 2-dimensional array
            The second vector. Can be 2-dimensional of shape `(n_samples, 3)`,
            in which case the calculation is broadcasted.
        Returns
        -------
        out : float or 1-dimensional array
            The cosine similarity. If y is 1-dimensinal returns only a float.
        """
        # Quick check of dimensions
        if x.ndim != 1:
            raise ValueError("`x` must be a 1-dimensional array.")
        y = y.reshape(-1, 3) if y.ndim == 1 else y
        out = numpy.sum(x * y, axis=1)
        out /= numpy.linalg.norm(x) * numpy.linalg.norm(y, axis=1)
        if out.size == 1:
            return out[0]
        return out
    def cross_knn_position_single(self, n_sim, nmult=5, dlogmass=2,
                                  verbose=True):
        r"""
        Find all neighbours within :math:`n_{\rm mult} R_{200c}` of halos in
        the `nsim`th simulation. Also enforces that the neighbours'
@ -153,8 +184,13 @@ class RealisationsMatcher:
        pos = self.cats[n_sim].positions
        matches = [None] * (self.cats.N - 1)
        # Verbose iterator
        if verbose:
            iters = enumerate(tqdm(self.search_sim_indices(n_sim)))
        else:
            iters = enumerate(self.search_sim_indices(n_sim))
        # Search for neighbours in the other simulations
-        for count, i in enumerate(self.search_sim_indices(n_sim)):
+        for count, i in iters:
            dist, indxs = self.cats[i].radius_neigbours(pos, r200 * nmult)
            # Get rid of neighbors whose mass is too off
            for j, indx in enumerate(indxs):
--- a/csiborgtools/read/init.py
+++ b/csiborgtools/read/init.py
@ -13,11 +13,7 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-from .readsim import (get_csiborg_ids, get_sim_path, get_snapshots,  # noqa
+from .readsim import (CSiBORGPaths, ParticleReader, read_mmain, get_positions)  # noqa
                      get_snapshot_path, get_maximum_snapshot, read_info, nparts_to_start_ind,  # noqa
                      open_particle, open_unbinding, read_particle,  # noqa
                      drop_zero_indx,  # noqa
                      read_clumpid, read_clumps, read_mmain)  # noqa
 from .make_cat import (HaloCatalogue, CombinedHaloCatalogue)  # noqa
 from .readobs import (PlanckClusters, MCXCClusters, TwoMPPGalaxies, TwoMPPGroups)  # noqa
 from .outsim import (dump_split, combine_splits)  # noqa
--- a/csiborgtools/read/make_cat.py
+++ b/csiborgtools/read/make_cat.py
@ -19,9 +19,9 @@ Functions to read in the particle and clump files.
 import numpy
 from os.path import join
 from tqdm import trange
 from copy import deepcopy
 from sklearn.neighbors import NearestNeighbors
-from .readsim import (get_sim_path, read_mmain, get_csiborg_ids,
+from .readsim import read_mmain
                      get_maximum_snapshot)
 from ..utils import (flip_cols, add_columns)
 from ..units import (BoxUnits, cartesian_to_radec)
@ -32,35 +32,23 @@ class HaloCatalogue:
    Parameters
    ----------
-    n_sim: int
+    paths : py:class:`csiborgtools.read.CSiBORGPaths`
-        Initial condition index.
+        CSiBORG paths-handling object with set `n_sim` and `n_snap`.
    n_snap: int
        Snapshot index.
    minimum_m500 : float, optional
        The minimum :math:`M_{rm 500c} / M_\odot` mass. By default no
        threshold.
    dumpdir : str, optional
        Path to where files from `run_fit_halos` are stored. By default
        `/mnt/extraspace/rstiskalek/csiborg/`.
    mmain_path : str, optional
        Path to where mmain files are stored. By default
        `/mnt/zfsusers/hdesmond/Mmain`.
    """
    _box = None
-    _n_sim = None
+    _paths = None
    _n_snap = None
    _data = None
    _knn = None
    _positions = None
-    def __init__(self, n_sim, n_snap, minimum_m500=None,
+    def __init__(self, paths, minimum_m500=None):
-                 dumpdir="/mnt/extraspace/rstiskalek/csiborg/",
+        self._box = BoxUnits(paths)
                 mmain_path="/mnt/zfsusers/hdesmond/Mmain"):
        self._box = BoxUnits(n_snap, get_sim_path(n_sim))
        minimum_m500 = 0 if minimum_m500 is None else minimum_m500
-        self._set_data(n_sim, n_snap, dumpdir, mmain_path, minimum_m500)
+        self._paths = paths
-        self._nsim = n_sim
+        self._set_data(minimum_m500)
        self._nsnap = n_snap
        # Initialise the KNN
        knn = NearestNeighbors()
        knn.fit(self.positions)
@ -74,7 +62,6 @@ class HaloCatalogue:
        Returns
        -------
        cat : structured array
            Catalogue.
        """
        if self._data is None:
            raise ValueError("`data` is not set!")
@ -88,7 +75,6 @@ class HaloCatalogue:
        Returns
        -------
        box : :py:class:`csiborgtools.units.BoxUnits`
            The box object.
        """
        return self._box
@ -100,10 +86,20 @@ class HaloCatalogue:
        Returns
        -------
        cosmo : `astropy` cosmology object
            Box cosmology.
        """
        return self.box.cosmo
    @property
    def paths(self):
        """
        The paths-handling object.
        Returns
        -------
        paths : :py:class:`csiborgtools.read.CSiBORGPaths`
        """
        return self._paths
    @property
    def n_snap(self):
        """
@ -112,9 +108,8 @@ class HaloCatalogue:
        Returns
        -------
        n_snap : int
            Snapshot ID.
        """
-        return self._n_snap
+        return self.paths.n_snap
    @property
    def n_sim(self):
@ -124,27 +119,37 @@ class HaloCatalogue:
        Returns
        -------
        n_sim : int
            The IC ID.
        """
-        return self._n_sim
+        return self.paths.n_sim
-    def _set_data(self, n_sim, n_snap, dumpdir, mmain_path, minimum_m500):
+    def _set_data(self, minimum_m500):
        """
        Loads the data, merges with mmain, does various coordinate transforms.
        """
        # Load the processed data
        fname = "ramses_out_{}_{}.npy".format(
-            str(n_sim).zfill(5), str(n_snap).zfill(5))
+            str(self.n_sim).zfill(5), str(self.n_snap).zfill(5))
-        data = numpy.load(join(dumpdir, fname))
+        data = numpy.load(join(self.paths.dumpdir, fname))
        # Load the mmain file and add it to the data
-        mmain = read_mmain(n_sim, mmain_path)
+        mmain = read_mmain(self.n_sim, self.paths.mmain_path)
        data = self.merge_mmain_to_clumps(data, mmain)
        flip_cols(data, "peak_x", "peak_z")
        # Cut on number of particles and finite m200
        data = data[(data["npart"] > 100) & numpy.isfinite(data["m200"])]
        # Calculate redshift
        pos = [data["peak_{}".format(p)] - 0.5 for p in ("x", "y", "z")]
        vel = [data["v{}".format(p)] for p in ("x", "y", "z")]
        zpec = self.box.box2pecredshift(*vel, *pos)
        zobs = self.box.box2obsredshift(*vel, *pos)
        zcosmo = self.box.box2cosmoredshift(
            sum(pos[i]**2 for i in range(3))**0.5)
        data = add_columns(data, [zpec, zobs, zcosmo],
                           ["zpec", "zobs", "zcosmo"])
        # Unit conversion
        convert_cols = ["m200", "m500", "totpartmass", "mass_mmain",
                        "r200", "r500", "Rs", "rho0",
@ -203,6 +208,18 @@ class HaloCatalogue:
        """
        return self._positions
    @property
    def velocities(self):
        """
        Cartesian velocities of halos.
        Returns
        -------
        vel : 2-dimensional array
            Array of shape `(n_halos, 3)`.
        """
        return numpy.vstack([self["v{}".format(p)] for p in ("x", "y", "z")]).T
    def radius_neigbours(self, X, radius):
        """
        Return sorted nearest neigbours within `radius` or `X`.
@ -245,15 +262,12 @@ class CombinedHaloCatalogue:
    Parameters
    ----------
    paths : py:class`csiborgtools.read.CSiBORGPaths`
        CSiBORG paths-handling object. Doest not have to have set set `n_sim`
        and `n_snap`.
    minimum_m500 : float, optional
        The minimum :math:`M_{rm 500c} / M_\odot` mass. By default no
        threshold.
    dumpdir : str, optional
        Path to where files from `run_fit_halos` are stored. By default
        `/mnt/extraspace/rstiskalek/csiborg/`.
    mmain_path : str, optional
        Path to where mmain files are stored. By default
        `/mnt/zfsusers/hdesmond/Mmain`.
    verbose : bool, optional
        Verbosity flag for reading the catalogues.
    """
@ -261,19 +275,18 @@ class CombinedHaloCatalogue:
    _n_snaps = None
    _cats = None
-    def __init__(self, minimum_m500=None,
+    def __init__(self, paths, minimum_m500=None, verbose=True):
                 dumpdir="/mnt/extraspace/rstiskalek/csiborg/",
                 mmain_path="/mnt/zfsusers/hdesmond/Mmain", verbose=True):
        # Read simulations and their maximum snapshots
        # NOTE remove this later and take all cats
-        self._n_sims = get_csiborg_ids("/mnt/extraspace/hdesmond")[:10]
+        self._n_sims = paths.ic_ids[:10]
-        n_snaps = [get_maximum_snapshot(get_sim_path(i)) for i in self._n_sims]
+        n_snaps = [paths.get_maximum_snapshot(i) for i in self._n_sims]
        self._n_snaps = numpy.asanyarray(n_snaps)
        cats = [None] * self.N
        for i in trange(self.N) if verbose else range(self.N):
-            cats[i] = HaloCatalogue(self._n_sims[i], self._n_snaps[i],
+            paths = deepcopy(paths)
-                                    minimum_m500, dumpdir, mmain_path)
+            paths.set_info(self.n_sims[i], self.n_snaps[i])
            cats[i] = HaloCatalogue(paths, minimum_m500)
        self._cats = cats
    @property
--- a/csiborgtools/read/outsim.py
+++ b/csiborgtools/read/outsim.py
@ -21,13 +21,12 @@ import numpy
 from os.path import join
 from os import remove
 from tqdm import trange
 from .readsim import (get_sim_path, read_clumps)
 I64 = numpy.int64
 F64 = numpy.float64
-def dump_split(arr, Nsplit, Nsim, Nsnap, outdir):
+def dump_split(arr, n_split, paths):
    """
    Dump an array from a split.
@ -35,11 +34,13 @@ def dump_split(arr, Nsplit, Nsim, Nsnap, outdir):
    ----------
    arr : n-dimensional or structured array
        Array to be saved.
-    Nsplit : int
+    n_split: int
        The split index.
-    Nsim : int
+    paths : py:class`csiborgtools.read.CSiBORGPaths`
        CSiBORG paths-handling object with set `n_sim` and `n_snap`.
    n_sim : int
        The CSiBORG realisation index.
-    Nsnap : int
+    n_snap : int
        The index of a redshift snapshot.
    outdir : string
        Directory where to save the temporary files.
@ -48,13 +49,14 @@ def dump_split(arr, Nsplit, Nsim, Nsnap, outdir):
    -------
    None
    """
-    Nsim = str(Nsim).zfill(5)
+    n_sim = str(paths.n_sim).zfill(5)
-    Nsnap = str(Nsnap).zfill(5)
+    n_snap = str(paths.n_snap).zfill(5)
-    fname = join(outdir, "ramses_out_{}_{}_{}.npy".format(Nsim, Nsnap, Nsplit))
+    fname = join(paths.temp_dumpdir, "ramses_out_{}_{}_{}.npy"
                 .format(n_sim, n_snap, n_split))
    numpy.save(fname, arr)
-def combine_splits(Nsplits, Nsim, Nsnap, outdir, cols_add, remove_splits=False,
+def combine_splits(n_splits, part_reader, cols_add, remove_splits=False,
                   verbose=True):
    """
    Combine results of many splits saved from `dump_split`. Identifies to which
@ -64,14 +66,10 @@ def combine_splits(Nsplits, Nsim, Nsnap, outdir, cols_add, remove_splits=False,
    Paramaters
    ----------
-    Nsplits : int
+    n_splits : int
        The total number of clump splits.
-    Nsim : int
+    part_reader : py:class`csiborgtools.read.ParticleReadear`
-        The CSiBORG realisation index.
+        CSiBORG particle reader.
    Nsnap : int
        The index of a redshift snapshot.
    outdir : str
        Directory where to save the new array.
    cols_add : list of `(str, dtype)`
        Colums to add. Must be formatted as, for example,
        `[("npart", numpy.float64), ("totpartmass", numpy.float64)]`.
@ -86,8 +84,10 @@ def combine_splits(Nsplits, Nsim, Nsnap, outdir, cols_add, remove_splits=False,
        Clump array with appended results from the splits.
    """
    # Load clumps to see how many there are and will add to this array
-    simpath = get_sim_path(Nsim)
+    n_sim = part_reader.paths.n_sim
-    clumps = read_clumps(Nsnap, simpath, cols=None)
+    n_snap = part_reader.paths.n_snap
    clumps = part_reader.read_clumps(cols=None)
    # Get the old + new dtypes and create an empty array
    descr = clumps.dtype.descr + cols_add
    out = numpy.full(clumps.size, numpy.nan, dtype=descr)
@ -96,12 +96,13 @@ def combine_splits(Nsplits, Nsim, Nsnap, outdir, cols_add, remove_splits=False,
        out[par] = clumps[par]
    # Filename of splits data
-    froot = "ramses_out_{}_{}".format(str(Nsim).zfill(5), str(Nsnap).zfill(5))
+    froot = "ramses_out_{}_{}".format(
-    fname = join(outdir, froot + "_{}.npy")
+        str(n_sim).zfill(5), str(n_snap).zfill(5))
    fname = join(part_reader.paths.temp_dumpdir, froot + "_{}.npy")
    # Iterate over splits and add to the output array
    cols_add_names = [col[0] for col in cols_add]
-    iters = trange(Nsplits) if verbose else range(Nsplits)
+    iters = trange(n_splits) if verbose else range(n_splits)
    for n in iters:
        fnamesplit = fname.format(n)
        arr = numpy.load(fnamesplit)
--- a/csiborgtools/read/readsim.py
+++ b/csiborgtools/read/readsim.py
@ -19,10 +19,10 @@ Functions to read in the particle and clump files.
 import numpy
 from scipy.io import FortranFile
 from os import listdir
-from os.path import (join, isfile)
+from os.path import (join, isfile, isdir)
 from glob import glob
 from tqdm import tqdm
-from ..utils import cols_to_structured
+from ..utils import (cols_to_structured, extract_from_structured)
 F16 = numpy.float16
@ -32,23 +32,219 @@ I32 = numpy.int32
 I64 = numpy.int64
-def get_csiborg_ids(srcdir):
+###############################################################################
 #                            Paths manager                                    #
 ###############################################################################
 class CSiBORGPaths:
    """
-    Get CSiBORG simulation IDs from the list of folders in `srcdir`.
+    Paths manager for CSiBORG IC realisations.
    Assumes that the folders look like `ramses_out_X` and extract the `X`
    integer. Removes `5511` from the list of IDs.
    Parameters
    ----------
-    srcdir : string
+    n_sim : int, optional
-        The folder where CSiBORG simulations are stored.
+        CSiBORG IC realisation index. By default not set.
    n_snap : int, optional
        Snapshot index. By default not set.
    srcdir : str, optional
        The file path to the folder where realisations of the ICs are stored.
        By default `/mnt/extraspace/hdesmond/`.
    dumpdir : str, optional
        Path to where files from `run_fit_halos` are stored. By default
        `/mnt/extraspace/rstiskalek/csiborg/`.
    mmain_path : str, optional
        Path to where mmain files are stored. By default
        `/mnt/zfsusers/hdesmond/Mmain`.
    """
    _srcdir = None
    _n_sim = None
    _n_snap = None
    _dumpdir = None
    _mmain_path = None
    def __init__(self, n_sim=None, n_snap=None,
                 srcdir="/mnt/extraspace/hdesmond/",
                 dumpdir="/mnt/extraspace/rstiskalek/csiborg/",
                 mmain_path="/mnt/zfsusers/hdesmond/Mmain"):
        self.srcdir = srcdir
        self.dumpdir = dumpdir
        self.mmain_path = mmain_path
        if n_sim is not None and n_snap is not None:
            self.set_info(n_sim, n_snap)
    @property
    def srcdir(self):
        """
        Folder where CSiBORG simulations are stored.
        Returns
        -------
        srcdir : int
        """
        return self._srcdir
    @srcdir.setter
    def srcdir(self, srcdir):
        """
        Set `srcdir`, check that the directory exists.
        """
        if not isdir(srcdir):
            raise IOError("Invalid directory `{}`!".format(srcdir))
        self._srcdir = srcdir
    @property
    def dumpdir(self):
        """
        Folder where files from `run_fit_halos` are stored.
        Returns
        -------
        dumpdir : str
        """
        return self._dumpdir
    @property
    def temp_dumpdir(self):
        """
        Temporary dumping directory.
        Returns
        -------
        temp_dumpdir : str
        """
        fpath = join(self.dumpdir, "temp")
        if not isdir(fpath):
            raise IOError("Invalid directory `{}`!".format(fpath))
        return fpath
    @dumpdir.setter
    def dumpdir(self, dumpdir):
        """
        Set `dumpdir`, check that the directory exists.
        """
        if not isdir(dumpdir):
            raise IOError("Invalid directory `{}`!".format(dumpdir))
        self._dumpdir = dumpdir
    @property
    def mmain_path(self):
        """
        Path where mmain files are stored.
        Returns
        -------
        mmain_path : str
        """
        return self._mmain_path
    @mmain_path.setter
    def mmain_path(self, mmain_path):
        """
        Set `mmain_path`, check that the directory exists.
        """
        if not isdir(mmain_path):
            raise IOError("Invalid directory `{}`!".format(mmain_path))
        self._mmain_path = mmain_path
    @property
    def n_sim(self):
        """
        The IC realisation index set by the user.
        Returns
        -------
        n_sim : int
        """
        if self._n_sim is None:
            raise ValueError(
                "`self.n_sim` is not set! Either provide a value directly  "
                "or set it using `self.set_info(...)`")
        return self._n_sim
    @n_sim.setter
    def n_sim(self, n_sim):
        """Set `n_sim`, ensure it is a valid simulation index."""
        if n_sim not in self.ic_ids:
            raise ValueError(
                "`{}` is not a valid IC realisation index.".format(n_sim))
        self._n_sim = n_sim
    @property
    def n_snap(self):
        """
        The snapshot index of a IC realisation set by the user.
        Returns
        -------
        n_snap: int
        """
        if self._n_snap is None:
            raise ValueError(
                "`self.n_sim` is not set! Either provide a value directly  "
                "or set it using `self.set_info(...)`")
        return self._n_snap
    @n_snap.setter
    def n_snap(self, n_snap):
        """Set `n_snap`."""
        self._n_snap = n_snap
    def set_info(self, n_sim, n_snap):
        """
        Convenience function for setting `n_sim` and `n_snap`.
        Parameters
        ----------
        n_sim : int
            CSiBORG IC realisation index.
        n_snap : int
            Snapshot index.
        """
        self.n_sim = n_sim
        if n_snap not in self.get_snapshots(n_sim):
            raise ValueError(
                "Invalid snapshot number `{}` for IC realisation `{}`."
                .format(n_snap, n_sim))
        self.n_snap = n_snap
    def reset_info(self):
        """
        Reset `self.n_sim` and `self.n_snap`.
        """
        self._n_sim = None
        self._n_snap = None
    def get_n_sim(self, n_sim):
        """
        Get `n_sim`. If `self.n_sim` return it, otherwise returns `n_sim`.
        """
        if n_sim is None:
            return self.n_sim
        return n_sim
    def get_n_snap(self, n_snap):
        """
        Get `n_snap`. If `self.n_snap` return it, otherwise returns `n_snap`.
        """
        if n_snap is None:
            return self.n_snap
        return n_snap
    @property
    def ic_ids(self):
        """
        CSiBORG initial condition (IC) simulation IDs from the list of folders
        in `self.srcdir`. Assumes that the folders look like `ramses_out_X`
        and extracts the `X` integer. Removes `5511` from the list of IDs.
        Returns
        -------
        ids : 1-dimensional array
            Array of CSiBORG simulation IDs.
        """
-    files = glob(join(srcdir, "ramses_out*"))
+        files = glob(join(self.srcdir, "ramses_out*"))
        # Select only file names
        files = [f.split("/")[-1] for f in files]
        # Remove files with inverted ICs
@ -62,95 +258,153 @@ def get_csiborg_ids(srcdir):
            pass
        return numpy.sort(ids)
-
+    def ic_path(self, n_sim=None):
 def get_sim_path(n, fname="ramses_out_{}", srcdir="/mnt/extraspace/hdesmond"):
        """
-    Get a path to a CSiBORG simulation.
+        Path to `n_sim`th CSiBORG IC realisation.
        Parameters
        ----------
-    n : int
+        n_sim : int, optional
-        The index of the initial conditions (IC) realisation.
+            The index of the initial conditions (IC) realisation. By default
-    fname : str, optional
+            `None` and the set value is attempted to be used.
        The file name. By default `ramses_out_{}`, where `n` is the IC index.
    srcdir : str, optional
        The file path to the folder where realisations of the ICs are stored.
        Returns
        -------
        path : str
        Path to the `n`th CSiBORG simulation.
        """
-    return join(srcdir, fname.format(n))
+        n_sim = self.get_n_sim(n_sim)
        fname = "ramses_out_{}"
        return join(self.srcdir, fname.format(n_sim))
-
+    def get_snapshots(self, n_sim=None):
 def get_snapshots(simpath):
        """
-    Get the list of snapshots for the given IC realisation.
+        List of snapshots for the `n_sim`th IC realisation.
        Parameters
        ----------
-    simpath : str
+        n_sim : int
-        Path to the CSiBORG IC realisation.
+            The index of the initial conditions (IC) realisation. By default
            `None` and the set value is attempted to be used.
        Returns
        -------
        snapshots : 1-dimensional array
            Array of snapshot IDs.
        """
        n_sim = self.get_n_sim(n_sim)
        simpath = self.ic_path(n_sim)
        # Get all files in simpath that start with output_
        snaps = glob(join(simpath, "output_*"))
        # Take just the last _00XXXX from each file  and strip zeros
        snaps = [int(snap.split('_')[-1].lstrip('0')) for snap in snaps]
        return numpy.sort(snaps)
-
+    def get_maximum_snapshot(self, n_sim=None):
 def get_maximum_snapshot(simpath):
        """
-    Return the maximum snapshot of an IC realisation stored at `simpath`.
+        Return the maximum snapshot of an IC realisation.
        Parameters
        ----------
-    simpath : str
+        n_sim : int
-        Path to the CSiBORG IC realisation.
+            The index of the initial conditions (IC) realisation. By default
            `None` and the set value is attempted to be used.
        Returns
        -------
        maxsnap : float
-        The maximum snapshot.
+            Maximum snapshot.
        """
-    return max(get_snapshots(simpath))
+        n_sim = self.get_n_sim(n_sim)
        return max(self.get_snapshots(n_sim))
-
+    def get_minimum_snapshot(self, n_sim=None):
 def get_snapshot_path(Nsnap, simpath):
        """
-    Get a path to a CSiBORG IC realisation snapshot.
+        Return the maximum snapshot of an IC realisation.
        Parameters
        ----------
-    Nsnap : int
+        n_sim : int
-        Snapshot index.
+            The index of the initial conditions (IC) realisation. By default
-    simpath : str
+            `None` and the set value is attempted to be used.
-        Path to the CSiBORG IC realisation.
+
        Returns
        -------
        minsnap : float
            Minimum snapshot.
        """
        n_sim = self.get_n_sim(n_sim)
        return min(self.get_snapshots(n_sim))
    def snapshot_path(self, n_snap=None, n_sim=None):
        """
        Path to a CSiBORG IC realisation snapshot.
        Parameters
        ----------
        n_snap : int
            Snapshot index. By default `None` and the set value is attempted
            to be used.
        n_sim : str
            Corresponding CSiBORG IC realisation index. By default `None` and
            the set value is attempted to be used.
        Returns
        -------
        snappath : str
            Path to the CSiBORG IC realisation snapshot.
        """
-    return join(simpath, "output_{}".format(str(Nsnap).zfill(5)))
+        n_snap = self.get_n_snap(n_snap)
        n_sim = self.get_n_sim(n_sim)
        simpath = self.ic_path(n_sim)
        return join(simpath, "output_{}".format(str(n_snap).zfill(5)))
-def read_info(Nsnap, simpath):
+###############################################################################
 #                          Fortran readers                                    #
 ###############################################################################
 class ParticleReader:
    """
-    Read CSiBORG simulation snapshot info.
+    Tools to read in particle files alon with their corresponding clumps.
    Parameters
    ----------
-    Nsnap : int
+    paths : py:class`csiborgtools.read.CSiBORGPaths`
-        Snapshot index.
+        CSiBORG paths-handling object with set `n_sim` and `n_snap`.
-    simpath : str
+    """
-        Path to the CSiBORG IC realisation.
+    _paths = None
    def __init__(self, paths):
        self.paths = paths
    @property
    def paths(self):
        """
        The paths-handling object.
        Returns
        -------
        paths : :py:class:`csiborgtools.read.CSiBORGPaths`
        """
        return self._paths
    @paths.setter
    def paths(self, paths):
        """
        Set `paths`. Makes sure it is the right object and `n_sim` and `n_snap`
        are both set.
        """
        if not isinstance(paths, CSiBORGPaths):
            raise TypeError("`paths` must be of type `CSiBORGPaths`.")
        if paths.n_sim is None or paths.n_snap is None:
            raise ValueError(
                "`paths` must have set both `n_sim` and `n_snap`!")
        self._paths = paths
    def read_info(self):
        """
        Read CSiBORG simulation snapshot info.
        Returns
        -------
@ -159,30 +413,26 @@ def read_info(Nsnap, simpath):
            strings.
        """
        # Open the info file
-    snappath = get_snapshot_path(Nsnap, simpath)
+        n_snap = self.paths.n_snap
-    filename = join(snappath, "info_{}.txt".format(str(Nsnap).zfill(5)))
+        snappath = self.paths.snapshot_path()
        filename = join(snappath, "info_{}.txt".format(str(n_snap).zfill(5)))
        with open(filename, "r") as f:
            info = f.read().split()
        # Throw anything below ordering line out
        info = numpy.asarray(info[:info.index("ordering")])
        # Get indexes of lines with `=`. Indxs before/after be keys/vals
-    eqindxs = numpy.asarray([i for i in range(info.size) if info[i] == '='])
+        eqs = numpy.asarray([i for i in range(info.size) if info[i] == '='])
-    keys = info[eqindxs - 1]
+        keys = info[eqs - 1]
-    vals = info[eqindxs + 1]
+        vals = info[eqs + 1]
        return {key: val for key, val in zip(keys, vals)}
-
+    def open_particle(self, verbose=True):
 def open_particle(Nsnap, simpath, verbose=True):
        """
        Open particle files to a given CSiBORG simulation.
        Parameters
        ----------
    Nsnap : int
        The index of a redshift snapshot.
    simpath : str
        The complete path to the CSiBORG simulation.
        verbose : bool, optional
            Verbosity flag.
@ -193,21 +443,24 @@ def open_particle(Nsnap, simpath, verbose=True):
        partfiles : list of `scipy.io.FortranFile`
            Opened part files.
        """
        n_snap = self.paths.n_snap
        # Zeros filled snapshot number and the snapshot path
-    nout = str(Nsnap).zfill(5)
+        nout = str(n_snap).zfill(5)
-    snappath = get_snapshot_path(Nsnap, simpath)
+        snappath = self.paths.snapshot_path()
-    ncpu = int(read_info(Nsnap, simpath)["ncpu"])
+        ncpu = int(self.read_info()["ncpu"])
        if verbose:
-        print("Reading in output `{}` with ncpu = `{}`.".format(nout, ncpu))
+            print("Reading in output `{}` with ncpu = `{}`."
                  .format(nout, ncpu))
        # Check whether the unbinding file exists.
        snapdirlist = listdir(snappath)
        unbinding_file = "unbinding_{}.out00001".format(nout)
        if unbinding_file not in snapdirlist:
            raise FileNotFoundError(
-            "Couldn't find `{}` in `{}`. Use mergertreeplot.py -h or --help "
+                "Couldn't find `{}` in `{}`. Use mergertreeplot.py -h or "
-            "to print help message.".format(unbinding_file, snappath))
+                "--help to print help message."
                .format(unbinding_file, snappath))
        # First read the headers. Reallocate arrays and fill them.
        nparts = numpy.zeros(ncpu, dtype=int)
@ -221,7 +474,8 @@ def open_particle(Nsnap, simpath, verbose=True):
            ncpuloc = f.read_ints()
            if ncpuloc != ncpu:
                infopath = join(snappath, "info_{}.txt".format(nout))
-            raise ValueError("`ncpu = {}` of `{}` disagrees with `ncpu = {}` "
+                raise ValueError(
                    "`ncpu = {}` of `{}` disagrees with `ncpu = {}` "
                    "of `{}`.".format(ncpu, infopath, ncpuloc, fpath))
            ndim = f.read_ints()
            nparts[cpu] = f.read_ints()
@ -236,10 +490,11 @@ def open_particle(Nsnap, simpath, verbose=True):
        return nparts, partfiles
-
+    @staticmethod
    def read_sp(dtype, partfile):
        """
-    Utility function to read a single particle file, depending on the dtype.
+        Utility function to read a single particle file, depending on its
        dtype.
        Parameters
        ----------
@ -264,7 +519,7 @@ def read_sp(dtype, partfile):
        else:
            raise TypeError("Unexpected dtype `{}`.".format(dtype))
-
+    @staticmethod
    def nparts_to_start_ind(nparts):
        """
        Convert `nparts` array to starting indices in a pre-allocated array for
@ -282,8 +537,7 @@ def nparts_to_start_ind(nparts):
        """
        return numpy.hstack([[0], numpy.cumsum(nparts[:-1])])
-
+    def read_particle(self, pars_extract, verbose=True):
 def read_particle(pars_extract, Nsnap, simpath, verbose=True):
        """
        Read particle files of a simulation at a given snapshot and return
        values of `pars_extract`.
@ -305,7 +559,7 @@ def read_particle(pars_extract, Nsnap, simpath, verbose=True):
            The data read from the particle file.
        """
        # Open the particle files
-    nparts, partfiles = open_particle(Nsnap, simpath)
+        nparts, partfiles = self.open_particle(verbose=verbose)
        if verbose:
            print("Opened {} particle files.".format(nparts.size))
        ncpu = nparts.size
@ -320,7 +574,8 @@ def read_particle(pars_extract, Nsnap, simpath, verbose=True):
            pars_extract = [pars_extract]
        for p in pars_extract:
            if p not in fnames:
-            raise ValueError("Undefined parameter `{}`. Must be one of `{}`."
+                raise ValueError(
                    "Undefined parameter `{}`. Must be one of `{}`."
                    .format(p, fnames))
        npart_tot = numpy.sum(nparts)
@ -331,21 +586,20 @@ def read_particle(pars_extract, Nsnap, simpath, verbose=True):
                 "formats": [forder[fnames.index(p)][1] for p in pars_extract]}
        # Allocate the output structured array
        out = numpy.full(npart_tot, numpy.nan, dtype)
-    start_ind = nparts_to_start_ind((nparts))
+        start_ind = self.nparts_to_start_ind(nparts)
        iters = tqdm(range(ncpu)) if verbose else range(ncpu)
        for cpu in iters:
            i = start_ind[cpu]
            j = nparts[cpu]
            for (fname, fdtype) in zip(fnames, fdtypes):
                if fname in pars_extract:
-                out[fname][i:i + j] = read_sp(fdtype, partfiles[cpu])
+                    out[fname][i:i + j] = self.read_sp(fdtype, partfiles[cpu])
                else:
-                dum[i:i + j] = read_sp(fdtype, partfiles[cpu])
+                    dum[i:i + j] = self.read_sp(fdtype, partfiles[cpu])
        return out
-
+    def open_unbinding(self, cpu):
 def open_unbinding(cpu, Nsnap, simpath):
        """
        Open particle files to a given CSiBORG simulation. Note that to be
        consistent CPU is incremented by 1.
@ -354,34 +608,24 @@ def open_unbinding(cpu, Nsnap, simpath):
        ----------
        cpu : int
            The CPU index.
    Nsnap : int
        The index of a redshift snapshot.
    simpath : str
        The complete path to the CSiBORG simulation.
        Returns
        -------
        unbinding : `scipy.io.FortranFile`
            The opened unbinding FortranFile.
        """
-    nout = str(Nsnap).zfill(5)
+        nout = str(self.paths.n_snap).zfill(5)
        cpu = str(cpu + 1).zfill(5)
-    fpath = join(simpath, "output_{}".format(nout),
+        fpath = join(self.paths.ic_path(), "output_{}".format(nout),
                     "unbinding_{}.out{}".format(nout, cpu))
        return FortranFile(fpath)
-
+    def read_clumpid(self, verbose=True):
 def read_clumpid(Nsnap, simpath, verbose=True):
        """
-    Read clump IDs of halos from unbinding files.
+        Read clump IDs of particles from unbinding files.
        Parameters
        ----------
    Nsnap : int
        The index of a redshift snapshot.
    simpath : str
        The complete path to the CSiBORG simulation.
        verbose : bool, optional
            Verbosity flag while for reading the CPU outputs.
@ -390,8 +634,8 @@ def read_clumpid(Nsnap, simpath, verbose=True):
        clumpid : 1-dimensional array
            The array of clump IDs.
        """
-    nparts, __ = open_particle(Nsnap, simpath, verbose)
+        nparts, __ = self.open_particle(verbose)
-    start_ind = nparts_to_start_ind(nparts)
+        start_ind = self.nparts_to_start_ind(nparts)
        ncpu = nparts.size
        clumpid = numpy.full(numpy.sum(nparts), numpy.nan, dtype=I32)
@ -399,12 +643,12 @@ def read_clumpid(Nsnap, simpath, verbose=True):
        for cpu in iters:
            i = start_ind[cpu]
            j = nparts[cpu]
-        ff = open_unbinding(cpu, Nsnap, simpath)
+            ff = self.open_unbinding(cpu)
            clumpid[i:i + j] = ff.read_ints()
        return clumpid
-
+    @staticmethod
    def drop_zero_indx(clump_ids, particles):
        """
        Drop from `clump_ids` and `particles` entries whose clump index is 0.
@ -426,31 +670,28 @@ def drop_zero_indx(clump_ids, particles):
        mask = clump_ids != 0
        return clump_ids[mask], particles[mask]
-
+    def read_clumps(self, cols=None):
 def read_clumps(Nsnap, simpath, cols=None):
        """
        Read in a clump file `clump_Nsnap.dat`.
        Parameters
        ----------
    Nsnap : int
        The index of a redshift snapshot.
    simpath : str
        The complete path to the CSiBORG simulation.
        cols : list of str, optional.
-        Columns to extract. By default `None` and all columns are extracted.
+            Columns to extract. By default `None` and all columns are
            extracted.
        Returns
        -------
        out : structured array
            Structured array of the clumps.
        """
-    Nsnap = str(Nsnap).zfill(5)
+        n_snap = str(self.paths.n_snap).zfill(5)
-    fname = join(simpath, "output_{}".format(Nsnap),
+        fname = join(self.paths.ic_path(), "output_{}".format(n_snap),
-                 "clump_{}.dat".format(Nsnap))
+                     "clump_{}.dat".format(n_snap))
        # Check the file exists.
        if not isfile(fname):
-        raise FileExistsError("Clump file `{}` does not exist.".format(fname))
+            raise FileExistsError(
                "Clump file `{}` does not exist.".format(fname))
        # Read in the clump array. This is how the columns must be written!
        data = numpy.genfromtxt(fname)
@ -511,3 +752,58 @@ def read_mmain(n, srcdir, fname="Mmain_{}.npy"):
        out[name] = arr[:, i]
    return out
 def get_positions(n_sim, n_snap, get_clumpid, verbose=True,
                  srcdir="/mnt/extraspace/hdesmond/"):
    """
    Shortcut to get particle IDs, positions, masses and optionally clump
    indices.
    Parameters
    ----------
    n_sim : int
        CSiBORG IC realisation index.
    n_snap : int
        Snapshot index.
    get_clumpid : bool
        Whether to also return the clump indices.
    verbose : bool, optional
        Verbosity flag. By default `True`.
    srcdir : str, optional
        The file path to the folder where realisations of the ICs are stored.
        By default `/mnt/extraspace/hdesmond/`.
    Returns
    -------
    particle_ids : 1-dimensional array
        Particle IDs of shape `(n_particles, )`.
    particle_pos : 2-dimensional array
        Particle box coordinates of shape `(n_particles, 3)`.
    particle_mass : 1-dimensional array
        Particle mass of shape `(n_particles, )`.
    clump_ids : 1-dimensional array, optional
        Particles' clump IDs of shape `(n_particles, )`. Returned only if
        `get_clumpid` is `True`.
    """
    # Setup the paths
    paths = CSiBORGPaths(srcdir)
    paths.set_info(n_sim, n_snap)
    # Extract particles
    reader = ParticleReader(paths)
    pars_extract = ["ID", "x", "y", "z", "M"]
    # Read particles and unpack
    particles = reader.read_particle(pars_extract, verbose)
    pids = extract_from_structured(particles, "ID")
    ppos = extract_from_structured(particles, ["x", "y", "z"])
    pmass = extract_from_structured(particles, "M")
    # Force early memory release
    del particles
    out = (pids, ppos, pmass)
    if get_clumpid:
        out += (reader.read_clumpid(verbose),)
    return out
--- a/csiborgtools/units/box_units.py
+++ b/csiborgtools/units/box_units.py
@ -20,7 +20,7 @@ import numpy
 from scipy.interpolate import interp1d
 from astropy.cosmology import LambdaCDM
 from astropy import (constants, units)
-from ..read import read_info
+from ..read import ParticleReader
 # Map of unit conversions
@ -38,18 +38,17 @@ class BoxUnits:
    Paramaters
    ----------
-    Nsnap : int
+    paths : py:class`csiborgtools.read.CSiBORGPaths`
-        Snapshot index.
+        CSiBORG paths-handling object with set `n_sim` and `n_snap`.
    simpath : str
        Path to the simulation where its snapshot index folders are stored.
    """
    _cosmo = None
-    def __init__(self, Nsnap, simpath):
+    def __init__(self, paths):
        """
        Read in the snapshot info file and set the units from it.
        """
-        info = read_info(Nsnap, simpath)
+        partreader = ParticleReader(paths)
        info = partreader.read_info()
        pars = ["boxlen", "time", "aexp", "H0",
                "omega_m", "omega_l", "omega_k", "omega_b",
                "unit_l", "unit_d", "unit_t"]
@ -220,6 +219,8 @@ class BoxUnits:
        r"""
        Convert the box comoving distance to cosmological redshift.
        NOTE: this likely is already the observed redshift.
        Parameters
        ----------
        dist : float
@ -236,8 +237,24 @@ class BoxUnits:
    def box2pecredshift(self, vx, vy, vz, px, py, pz, p0x=0, p0y=0, p0z=0):
        """
-        TODO: docs
+        Convert the box phase-space information to a peculiar redshift.
        NOTE: there is some confusion about this.
        Parameters
        ----------
        vx, vy, vz : 1-dimensional arrays
            The Cartesian velocity components.
        px, py, pz : 1-dimensional arrays
            The Cartesian position vectors components.
        p0x, p0y, p0z : floats
            The centre of the box. By default 0, in which it is assumed that
            the coordinates are already centred.
        Returns
        -------
        pec_redshift : 1-dimensional array
            The peculiar redshift.
        """
        # Peculiar velocity along the radial distance
        r = numpy.vstack([px - p0x, py - p0y, pz - p0z]).T
@ -251,8 +268,24 @@ class BoxUnits:
    def box2obsredshift(self, vx, vy, vz, px, py, pz, p0x=0, p0y=0, p0z=0):
        """
-        TODO: docs
+        Convert the box phase-space information to an 'observed' redshift.
        NOTE: there is some confusion about this.
        Parameters
        ----------
        vx, vy, vz : 1-dimensional arrays
            The Cartesian velocity components.
        px, py, pz : 1-dimensional arrays
            The Cartesian position vectors components.
        p0x, p0y, p0z : floats
            The centre of the box. By default 0, in which it is assumed that
            the coordinates are already centred.
        Returns
        -------
        obs_redshift : 1-dimensional array
            The observed redshift.
        """
        r = numpy.vstack([px - p0x, py - p0y, pz - p0z]).T
        zcosmo = self.box2cosmoredshift(numpy.sum(r**2, axis=1)**0.5)
--- a/csiborgtools/utils/init.py
+++ b/csiborgtools/utils/init.py
@ -15,4 +15,4 @@
 from .recarray_manip import (cols_to_structured, add_columns, rm_columns,  # noqa
                             list_to_ndarray, array_to_structured,  # noqa
-                             flip_cols)  # noqa
+                             flip_cols, extract_from_structured)  # noqa
--- a/csiborgtools/utils/recarray_manip.py
+++ b/csiborgtools/utils/recarray_manip.py
@ -209,3 +209,35 @@ def flip_cols(arr, col1, col2):
    dum = numpy.copy(arr[col1])
    arr[col1] = arr[col2]
    arr[col2] = dum
 def extract_from_structured(arr, cols):
    """
    Extract columns `cols` from a structured array. The  array dtype is set
    to be that of the first column in `cols`.
    Parameters
    ----------
    arr : structured array
        Array from which to extract columns.
    cols : list of str or str
        Column to extract.
    Returns
    -------
    out : 2- or 1-dimensional array
        Array with shape `(n_particles, len(cols))`. If `len(cols)` is 1
        flattens the array.
    """
    cols = [cols] if isinstance(cols, str) else cols
    for col in cols:
        if col not in arr.dtype.names:
            raise ValueError("Invalid column `{}`!".format(col))
    # Preallocate an array and populate it
    out = numpy.zeros((arr.size, len(cols)), dtype=arr[cols[0]].dtype)
    for i, col in enumerate(cols):
        out[:, i] = arr[col]
    # Optionally flatten
    if len(cols) == 1:
        return out.reshape(-1,)
    return out
--- a/pymake.sh
+++ b/pymake.sh
@ -0,0 +1,2 @@
 cmd="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python -m pip install ."
 $cmd
--- a/pyremove.sh
+++ b/pyremove.sh
@ -0,0 +1,2 @@
 cmd="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python -m pip uninstall csiborgtools"
 $cmd
--- a/scripts/playground.ipynb
+++ b/scripts/playground.ipynb
--- a/scripts/run_find_initpos.jl
+++ b/scripts/run_find_initpos.jl
@ -0,0 +1,49 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 using Pkg: activate, build
 activate("../JuliaCSiBORGTools/")
 using JuliaCSiBORGTools
 using NPZ: npzread
 using PyCall: pyimport
 csiborgtools = pyimport("csiborgtools")
 verbose = true
 paths = csiborgtools.read.CSiBORGPaths()
 nsims = paths.ic_ids[:1]
 for nsim in nsims
    nsnap_min = convert(Int64, paths.get_minimum_snapshot(nsim))
    nsnap_max = convert(Int64, paths.get_maximum_snapshot(nsim))
    # Get the maximum snapshot properties
    verbose ? println("Loading snapshot $nsnap_max from simulation $nsim") : nothing
    pids, ppos, pmass, clumpids = csiborgtools.read.get_positions(nsim, nsnap_max, get_clumpid=true, verbose=false)
    println("Sizes are: ")
    println(size(pids))
    println(size(ppos))
    println(size(pmass))
 #    # Get the minimum snapshot properties
 #    verbose ? println("Loading snapshot $nsnap_min from simulation $nsim") : nothing
 #    pids, ppos, pmass = csiborgtools.read.get_positions(nsim, nsnap_max, get_clumpid=false, verbose=false)
    JuliaCSiBORGTools.halo_parts(0, partids, clumpids)
 end
--- a/scripts/run_fit_halos.py
+++ b/scripts/run_fit_halos.py
@ -46,20 +46,21 @@ cols_collect = [("npart", I64), ("totpartmass", F64), ("Rs", F64),
                ("rho0", F64), ("conc", F64), ("rmin", F64),
                ("rmax", F64), ("r200", F64), ("r500", F64),
                ("m200", F64), ("m500", F64)]
 paths = csiborgtools.read.CSiBORGPaths()
-Nsims = csiborgtools.read.get_csiborg_ids("/mnt/extraspace/hdesmond")
+for i, n_sim in enumerate(paths.ic_ids):
 for i, Nsim in enumerate(Nsims):
    if rank == 0:
        print("{}: calculating {}th simulation.".format(datetime.now(), i))
    # Correctly set the paths!
    n_snap = paths.get_maximum_snapshot(n_sim)
    paths.set_info(n_sim, n_snap)
-    simpath = csiborgtools.read.get_sim_path(Nsim)
+    box = csiborgtools.units.BoxUnits(paths)
    Nsnap = csiborgtools.read.get_maximum_snapshot(simpath)
    box = csiborgtools.units.BoxUnits(Nsnap, simpath)
    jobs = csiborgtools.fits.split_jobs(utils.Nsplits, nproc)[rank]
-    for Nsplit in jobs:
+    for n_split in jobs:
        parts, part_clumps, clumps = csiborgtools.fits.load_split_particles(
-            Nsplit, loaddir, Nsim, Nsnap, remove_split=False)
+            n_split, paths, remove_split=False)
        N = clumps.size
        cols = [("index", I64), ("npart", I64), ("totpartmass", F64),
@ -79,9 +80,9 @@ for i, Nsim in enumerate(Nsims):
            out["rmin"][n] = clump.rmin
            out["rmax"][n] = clump.rmax
            out["totpartmass"][n] = clump.total_particle_mass
-            out["vx"] = numpy.average(clump.vel[:, 0], weights=clump.m)
+            out["vx"][n] = numpy.average(clump.vel[:, 0], weights=clump.m)
-            out["vy"] = numpy.average(clump.vel[:, 1], weights=clump.m)
+            out["vy"][n] = numpy.average(clump.vel[:, 1], weights=clump.m)
-            out["vz"] = numpy.average(clump.vel[:, 2], weights=clump.m)
+            out["vz"][n] = numpy.average(clump.vel[:, 2], weights=clump.m)
            # Spherical overdensity radii and masses
            rs, ms = clump.spherical_overdensity_mass([200, 500])
@ -100,7 +101,7 @@ for i, Nsim in enumerate(Nsims):
                    out["rho0"][n] = nfwpost.rho0_from_Rs(Rs)
                    out["conc"][n] = out["r200"][n] / Rs
-        csiborgtools.read.dump_split(out, Nsplit, Nsim, Nsnap, dumpdir)
+        csiborgtools.read.dump_split(out, n_split, paths)
    # Wait until all jobs finished before moving to another simulation
    comm.Barrier()
@ -108,11 +109,13 @@ for i, Nsim in enumerate(Nsims):
    # Use the rank 0 to combine outputs for this CSiBORG realisation
    if rank == 0:
        print("Collecting results!")
        partreader = csiborgtools.read.ParticleReader(paths)
        out_collected = csiborgtools.read.combine_splits(
-            utils.Nsplits, Nsim, Nsnap, utils.dumpdir, cols_collect,
+            utils.Nsplits, partreader, cols_collect, remove_splits=True,
-            remove_splits=True, verbose=False)
+            verbose=False)
-        fname = join(utils.dumpdir, "ramses_out_{}_{}.npy"
+        fname = join(paths.dumpdir, "ramses_out_{}_{}.npy"
-                     .format(str(Nsim).zfill(5), str(Nsnap).zfill(5)))
+                     .format(str(paths.n_sim).zfill(5),
                             str(paths.n_snap).zfill(5)))
        print("Saving results to `{}`.".format(fname))
        numpy.save(fname, out_collected)
--- a/scripts/run_split_halos.py
+++ b/scripts/run_split_halos.py
@ -18,7 +18,6 @@ membership for faster manipulation. Currently does this for the maximum
 snapshot of each simulation. Running this will require a lot of memory.
 """
 from os.path import join
 from mpi4py import MPI
 from datetime import datetime
 try:
@ -34,29 +33,28 @@ comm = MPI.COMM_WORLD
 rank = comm.Get_rank()
 nproc = comm.Get_size()
-Nsims = csiborgtools.read.get_csiborg_ids("/mnt/extraspace/hdesmond")
+paths = csiborgtools.read.CSiBORGPaths()
 n_sims = paths.ic_ids[:1]
 partcols = ["x", "y", "z", "vx", "vy", "vz", "M", "level"]
 dumpdir = join(utils.dumpdir, "temp")
-jobs = csiborgtools.fits.split_jobs(len(Nsims), nproc)[rank]
+jobs = csiborgtools.fits.split_jobs(len(n_sims), nproc)[rank]
 for icount, sim_index in enumerate(jobs):
    print("{}: rank {} working {} / {} jobs.".format(datetime.now(), rank,
                                                     icount + 1, len(jobs)))
-    Nsim = Nsims[sim_index]
+    n_sim = n_sims[sim_index]
-    simpath = csiborgtools.read.get_sim_path(Nsim)
+    n_snap = paths.get_maximum_snapshot(n_sim)
-    Nsnap = csiborgtools.read.get_maximum_snapshot(simpath)
+    # Set paths and inifitalise a particle reader
    paths.set_info(n_sim, n_snap)
    partreader = csiborgtools.read.ParticleReader(paths)
    # Load the clumps, particles' clump IDs and particles.
-    clumps = csiborgtools.read.read_clumps(Nsnap, simpath)
+    clumps = partreader.read_clumps()
-    particle_clumps = csiborgtools.read.read_clumpid(Nsnap, simpath,
+    particle_clumps = partreader.read_clumpid(verbose=False)
-                                                     verbose=False)
+    particles = partreader.read_particle(partcols, verbose=False)
    particles = csiborgtools.read.read_particle(partcols, Nsnap, simpath,
                                                verbose=False)
    # Drop all particles whose clump index is 0 (not assigned to any halo)
-    particle_clumps, particles = csiborgtools.read.drop_zero_indx(
+    particle_clumps, particles = partreader.drop_zero_indx(
        particle_clumps, particles)
    # Dump it!
    csiborgtools.fits.dump_split_particles(particles, particle_clumps, clumps,
-                                           utils.Nsplits, dumpdir, Nsim, Nsnap,
+                                           utils.Nsplits, paths, verbose=False)
                                           verbose=False)
 print("All finished!")
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,31 @@
 from setuptools import find_packages, setup
 BUILD_REQ = ["numpy>=1.17.3", "scipy<1.9.0"]
 INSTALL_REQ = BUILD_REQ
 INSTALL_REQ += ["scikit-learn>=1.1.0",
                "jax[cpu]>=0.3.23",
                "tqdm>=4.64.1",
                "astropy>=5.1",
                ],
 setup(
    name="csiborgtools",
    version="0.1",
    description="CSiBORG analysis tools",
    url="https://github.com/Richard-Sti/csiborgtools",
    author="Richard Stiskalek",
    author_email="richard.stiskalek@protonmail.com",
    license="GPL-3.0",
    packages=find_packages(),
    python_requires=">=3.8",
    build_requires=BUILD_REQ,
    setup_requires=BUILD_REQ,
    install_requires=INSTALL_REQ,
    classifiers=[
        "Development Status :: 1 - Planning",
        "Intended Audience :: Science/Research",
        "Operating System :: POSIX :: Linux",
        "Programming Language :: Python :: 3.8",
        "Programming Language :: Python :: 3.9"]
 )
		`@ -0,0 +1,2 @@`
							`cmd="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python -m pip install ."`
							`$cmd`