csiborgtools/notebooks/playground_matching.ipynb
Richard Stiskalek 04119a5314
Update initial matching & overlaps (#47)
* pep8

* fix convention

* Update script

* enforce optimisation boundaries to be finite

* Update TODO

* Remove sky matching

* FIx a small bug

* fix bug

* Remove import

* Add halo fitted quantities

* Update nbs

* update README

* Add load_initial comments

* Rename nbs

* Delete nb

* Update imports

* Rename function

* Update matcher

* Add overlap paths

* Update the matching script

* Update verbosity

* Add verbosity flags

* Simplify make_bckg_delta

* bug fix

* fix bug
2023-04-21 01:35:06 +02:00

101 KiB

In [1]:
import numpy as np
import numpy
%matplotlib notebook
import matplotlib
import matplotlib.pyplot as plt
try:
    import csiborgtools
except ModuleNotFoundError:
    print("not found")
    import sys
    sys.path.append("../")
    import csiborgtools
# import utils
import joblib

from scipy.stats import spearmanr
from datetime import datetime

from tqdm import tqdm, trange
from numba import jit
from scipy.ndimage import gaussian_filter

from os.path import join
%load_ext autoreload
%autoreload 2

%load_ext line_profiler
not found
In [2]:
cat0 = csiborgtools.read.ClumpsCatalogue(7468)
catx = csiborgtools.read.ClumpsCatalogue(7588)
In [3]:
reader = csiborgtools.read.PairOverlap(cat0, catx, max_dist=150 / 0.705)
In [31]:
ks = np.argsort(reader.cat0("totpartmass"))[::-1]
k = ks[1]


plt.figure()
plt.scatter(reader.dist(False, "r200")[k], reader.mass_ratio()[k], c=reader.overlap(False)[k])
plt.colorbar(label="Overlap")

plt.title(r"$\log M_{{\rm tot}} / M_\odot = {:.4f}$".format(np.log10(reader.cat0("totpartmass")[k])))
plt.xlabel(r"$\Delta r_i / R_{200c}$")
plt.ylabel(r"$|\log \dfrac{M_i}{M_{\rm tot}}|$")
plt.tight_layout()
plt.show()
No description has been provided for this image
In [32]:
print("Starting: {}.".format(datetime.now()))
clumps0 = np.load("/mnt/extraspace/rstiskalek/csiborg/initmatch/clump_7468_particles.npy", allow_pickle=True)
print("Loaded `clump0`: {}.".format(datetime.now()))
clumpsx = np.load("/mnt/extraspace/rstiskalek/csiborg/initmatch/clump_7588_particles.npy", allow_pickle=True)
print("Loaded `clumpx`: {}.".format(datetime.now()))

overlapper = csiborgtools.match.ParticleOverlap()

hid2clumps0 = {hid: n for n, hid in enumerate(clumps0["ID"])}
hid2clumpsx = {hid: n for n, hid in enumerate(clumpsx["ID"])}
Starting: 2023-03-24 14:27:05.644524.
Loaded `clump0`: 2023-03-24 14:41:48.868024.
Loaded `clumpx`: 2023-03-24 14:49:25.871648.
In [33]:
# Convert positions to cell IDs
overlapper.clumps_pos2cell(clumps0)
overlapper.clumps_pos2cell(clumpsx)

mins0, maxs0 = csiborgtools.match.get_halolims(clumps0, overlapper.inv_clength, overlapper.nshift)
minsx, maxsx = csiborgtools.match.get_halolims(clumpsx, overlapper.inv_clength, overlapper.nshift)
In [34]:
delta_bckg = overlapper.make_bckg_delta(clumps0)
delta_bckg = overlapper.make_bckg_delta(clumpsx, delta=delta_bckg)
In [369]:
smooth_kwargs = {"sigma": 1, "truncate": 4, "mode": "constant", "cval": 0.0}

delta_bckg_smooth = gaussian_filter(delta_bckg, **smooth_kwargs)
In [363]:
# k = 24734 # skull!

ks = np.argsort(reader.cat0("totpartmass"))[::-1]
# k = ks[1]
k = 331
n = 0

print("Ratio is ", summed_ratio[k])

print("Original overlap is ", overlap_raw[k][n])
print("Smoothed overlap is ", overlap_smoothed[k][n])

index_cl0 = hid2clumps0[reader.cat0("index", k)]
cl0 = clumps0[index_cl0][0]
mins_cl0, maxs_cl0 = mins0[index_cl0], maxs0[index_cl0]

index_clx = hid2clumpsx[reader.catx("index", reader["match_indxs"][k][n])]
clx = clumpsx[index_clx][0]
mins_clx, maxs_clx = minsx[index_clx], maxsx[index_clx]



delta1, delta2, cellmins, nonzero = overlapper.make_deltas(
    cl0, clx, mins_cl0, maxs_cl0, mins_clx, maxs_clx, smooth_kwargs=smooth_kwargs)

csiborgtools.match.calculate_overlap(delta1, delta2, cellmins, delta_bckg_smooth)
Ratio is  0.9820815
Original overlap is  0.6785714
Smoothed overlap is  0.6664124
Out[363]:
0.32628544480462635
In [364]:
xs = []
for n in range(reader["match_indxs"][k].size):

    index_clx = hid2clumpsx[reader.catx("index", reader["match_indxs"][k][n])]
    clx = clumpsx[index_clx][0]
    mins_clx, maxs_clx = minsx[index_clx], maxsx[index_clx]
    
    print("NGP/smoothed overlap ", overlap_raw[k][n], overlap_smoothed[k][n])
    delta1, delta2, cellmins, nonzero1 = overlapper.make_deltas(
        cl0, clx, mins_cl0, maxs_cl0, mins_clx, maxs_clx, smooth_kwargs=smooth_kwargs)
    
    x = csiborgtools.match.calculate_overlap(delta1, delta2, cellmins, delta_bckg_smooth)
    print(x)
    xs.append(x)
    
print("Sum is ", sum(xs))
print("Originally NGP/smoothed was ", summed_raw[k], summed_smoothed[k])
NGP/smoothed overlap  0.6785714 0.6664124
0.32628544480462635
Sum is  0.32628544480462635
Originally NGP/smoothed was  0.6785714 0.6664124
In [ ]:

In [ ]:

In [ ]:

In [ ]:
dlogm = [None] * len(indxs)
mass = [None] * len(indxs)
for k in trange(len(indxs)):
    dlogm[k] = np.abs(np.log10(cat[0]["totpartmass"][k]) - np.log10(cat[1]["totpartmass"][indxs[k]]))
    mass[k] = np.ones(indxs[k].size) * cat[0]["totpartmass"][k]
dlogm = np.asanyarray(dlogm)
mass = np.asanyarray(mass)
In [ ]:
plt.figure()
plt.scatter(np.concatenate(dlogm), np.concatenate(overlap), s=1, rasterized=True)
t = np.linspace(0, 2)
plt.plot(t, 10**(-t), c="red", label=r"$10^{-|\log M_1 / M_2|}$")
plt.xlabel(r"$|\log M_1 / M_2|$")
plt.ylabel(r"$\mathcal{O}$")
plt.legend()
plt.tight_layout()
# plt.savefig("../plots/mass_comparison.png", dpi=450)
plt.show()
In [ ]:

In [ ]:
for k in trange(len(indxs)):
    if np.any((dlogm[k] > 1.75) & (overlap[k] > 0.15)):
        print(k)
In [ ]:
k = 97788
print(dlogm[k])
print(overlap[k])
n = np.argmax(overlap[k])

index_cl0 = [cl[1] for cl in clumps0].index(cat[0][k]["index"])
cl0 = clumps0[index_cl0][0]
mins_cl0, maxs_cl0 = mins0[index_cl0], maxs0[index_cl0]

index_clx = [cl[1] for cl in clumpsx].index(cat[1]["index"][indxs[k]][n])
clx = clumpsx[index_clx][0]
mins_clx, maxs_clx = minsx[index_clx], maxsx[index_clx]
In [ ]:
delta1, delta2, cellmins = overlapper.make_deltas(cl0, clx, mins_cl0, maxs_cl0, mins_clx, maxs_clx)
In [ ]:
overlapper.overlap(delta1, delta2, cellmins, delta)
In [ ]:
delta1.sum() / delta2.sum()
In [ ]:

In [ ]:
plt.figure()
plt.imshow(np.sum(delta1, axis=2))
plt.show()

plt.figure()
plt.imshow(np.sum(delta2, axis=2))
plt.show()
In [ ]:

In [ ]:

In [ ]:
ncounter = len(indxs[k])
true_overlap = np.full(ncounter, np.nan)
spherical_overlap = np.full(ncounter, np.nan)

for n in trange(len(indxs[k])):
    clx = clumpsx[[cl[1] for cl in clumpsx].index(cat[1]["index"][indxs[k]][n])][0]
    
    R1 = (3 * cl0.size / (4 * np.pi))**(1./3) * 1 / 2048
    R2 = (3 * clx.size / (4 * np.pi))**(1./3) * 1 / 2048
    d = np.linalg.norm([np.mean(cl0[p]) - np.mean(clx[p]) for p in ('x', 'y', 'z')])
    
    spherical_overlap[n] = csiborgtools.match.spherical_overlap(R1, R2, d)
    true_overlap[n] = overlapper(cl0, clx, delta)
    
#     print(true_overlap, spherical_overlap)
In [ ]:
plt.figure()
plt.scatter(true_overlap, spherical_overlap)

t = np.linspace(0, 1, 100)
plt.plot(t, t, c="k", ls="--")

plt.xlabel("True overlap")
plt.ylabel("Spherical overlap")
# plt.xscale("log")
# plt.yscale("log")
plt.show()
In [ ]:

In [ ]:
R1 = (3 * cl0.size / (4 * np.pi))**(1./3) * 1 / 2048
R2 = (3 * clx.size / (4 * np.pi))**(1./3) * 1 / 2048
d = np.linalg.norm([np.mean(cl0[p]) - np.mean(clx[p]) for p in ('x', 'y', 'z')])
In [ ]:

In [ ]:

In [ ]:

In [ ]:
box = cat[0].box
maverage = box.box2solarmass(clumps0[2][0]["M"][0])
cell = box.box2mpc(1/2048)
In [ ]:
n_sim = 0
import numpy

R = (3 * cat.cats[n_sim]["npart"] / (4 * numpy.pi))**(1./3) * 1 / 2048
R = cat.cats[n_sim].box.box2mpc(R)
In [ ]:
# dlogm = [None] * len(indxs)
# for k in trange(len(indxs)):
#     dlogm[k] = np.abs(np.log10(cat[0]["totpartmass"][k]) - np.log10(cat[1]["totpartmass"][indxs[k]]))
# dlogm = np.asanyarray(dlogm)

normdist = [None] * len(indxs)
masses = [None] * len(indxs)
for k in trange(len(indxs)):
    normdist[k] = dist0[k] / ((3 * cat[0]["totpartmass"][k] / (4 * np.pi * maverage))**(1/3) * cell)
    masses[k] = np.log10(np.ones(indxs[k].size) * cat[0]["totpartmass"][k])
    
normdist = np.asanyarray(normdist)
masses = np.asanyarray(masses)
In [ ]:
plt.figure()

# plt.scatter(np.concatenate(normdist), np.concatenate(overlap), c=np.concatenate(masses), s=4)

plt.scatter(np.concatenate(normdist), np.concatenate(masses), c=np.concatenate(overlap), s=4)


plt.colorbar()
# plt.xlabel(r"$z = 0$ normalised separation by $\hat{R}$")
# plt.xlabel(r"Absolute difference in total mass [dex]")
# plt.xscale("log")
# plt.ylabel(r"$\mathcal{O}$")
plt.xscale("log")
plt.tight_layout()
# plt.savefig("../plots/another_view.png", dpi=450)
plt.show()
In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:
cl0 = clumps0[[cl[1] for cl in clumps0].index(cat[0][k]["index"])][0]






clx = clumpsx[[cl[1] for cl in clumpsx].index(cat[1]["index"][indxs[k]][n])][0]
In [ ]: