csiborgtools/notebooks/knn.ipynb
Richard Stiskalek 63ab3548b4
kNN-CDF implementation (#34)
* Rewrite doc

* add kNN

* edit loading of samples with no init

* Add verbosity flag

* add KNN submission script

* do not make peaked cdf by default

* Add submit script

* stop ignore sh

* Add mass thresholding

* Edit gitignore

* edits

* Space points in logspace

* Calculate for all ICs

* Update TODO

* Add dtype support

* Update readme

* Update nb
2023-03-31 18:13:41 +01:00

15 KiB

In [4]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors
import joblib
from tqdm import tqdm
try:
    import csiborgtools
except ModuleNotFoundError:
    print("not found")
    import sys
    sys.path.append("../")
    import csiborgtools


%matplotlib notebook
%load_ext autoreload
%autoreload 2
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
In [5]:
cat = csiborgtools.read.HaloCatalogue(7444, min_mass=1e13, max_dist=155 / 0.705)
In [11]:
knn = NearestNeighbors()
knn.fit(cat.positions)

knncdf = csiborgtools.match.kNN_CDF()

rs, cdfs_high = knncdf(knn, nneighbours=3, Rmax=155 / 0.705, rmin=0.05, rmax=40,
                  nsamples=int(1e6), neval=int(1e4), random_state=42)
  0%|          | 0/1 [00:00<?, ?it/s]
float32
float32
100%|██████████| 1/1 [00:03<00:00,  3.37s/it]
float32
float32

In [ ]:

In [ ]:

In [ ]:

In [ ]:
m1 = (rs > 1) & (rs < 35)

fig, axs = plt.subplots(ncols=3, figsize=(6.4 * 1.5, 4.8), sharey=True)
fig.subplots_adjust(wspace=0)
for k in range(3):
    for n in range(len(ics)):
        m = m1 & (cdfs[n, k, :] > 1e-3)
        axs[k].plot(rs[m], cdfs[n, k, m], c="black", lw=0.05)

    axs[k].set_xscale("log")
    axs[k].set_yscale("log")
    axs[k].set_title(r"$k = {}$".format(k))
    axs[k].set_xlabel(r"$r~\left[\mathrm{Mpc}\right]$")

axs[0].set_ylabel(r"Peaked CDF")

plt.tight_layout(w_pad=0)
fig.savefig("../plots/peaked_cdf.png", dpi=450)
fig.show()
In [ ]:
m = (rs > 0.5) & (rs < 35)

fig, axs = plt.subplots(ncols=3, figsize=(6.4 * 1.5, 4.8), sharey=True)
fig.subplots_adjust(wspace=0)
for k in range(3):
    mu = np.nanmean(cdfs[:, k, :], axis=0)

    for n in range(len(ics)):
        axs[k].plot(rs[m], (cdfs[n, k, :] / mu)[m], c="black", lw=0.1)

    axs[k].set_ylim(0.5, 1.5)
    axs[k].axhline(1, ls="--", c="red", zorder=0)
    axs[k].axvline(2.65 / 0.705, ls="--", c="red", zorder=0)
    axs[k].set_xscale("log")
    axs[k].set_xlabel(r"$r~\left[\mathrm{Mpc}\right]$")
    axs[k].set_title(r"$k = {}$".format(k))
    
axs[0].set_ylabel(r"Relative peaked CDF")
plt.tight_layout(w_pad=0)
fig.savefig("../plots/peaked_cdf_ratios.png", dpi=450)
fig.show()
In [ ]:
plt.figure()
k = 2
mu = np.nanmean(cdfs[:, k, :], axis=0)
# plt.plot(rs, mu, c="black")
for i in range(len(ics)):
    plt.plot(rs, cdfs[i, k, :] / mu)


plt.ylim(0.75, 1.25)
plt.axhline(1, ls="--", c="black")
plt.xscale("log")
# plt.yscale("log")
plt.show()
In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:
x.shape
In [ ]:

In [ ]:

In [ ]:
dist0, __ = knn0.kneighbors(X, 3)
distx, __ = knnx.kneighbors(X, 3)
In [ ]:
x0, y0 = knncdf.peaked_cdf_from_samples(dist0[:, 0], 0.5, 20, neval=10000)
xx, yx = knncdf.peaked_cdf_from_samples(distx[:, 0], 0.5, 20, neval=10000)
In [ ]:
distx[:, 0].min()
In [ ]:
plt.figure()
plt.plot(x0, y0)
plt.plot(xx, yx)

plt.yscale("log")
plt.xscale("log")
plt.show()
In [ ]:

In [ ]:
plt.figure()

for i in range(3):
    plt.plot(*knncdf.cdf_from_samples(dist0[:, i], 1, 25))
    plt.plot(*knncdf.cdf_from_samples(distx[:, i], 1, 25))

# plt.xlim(0.5, 25)

plt.yscale("log")
plt.xscale("log")
plt.xlabel(r"$r~\left[\mathrm{Mpc}\right]$")



plt.show()
In [ ]:

In [ ]:
x = dist[:, 0]
q = np.linspace(0, 100, int(x.size / 5))

p = np.percentile(x, q)
In [ ]:
y = np.sort(x)

yy = np.arange(y.size) / y.size
In [ ]:
plt.figure()
plt.plot(p, q / 100)

plt.plot(y, yy)

# plt.yscale("log")
plt.show()
In [ ]:

In [ ]:
plt.figure()
plt.hist(dist[:, 0], bins="auto", histtype="step")
plt.hist(dist[:, 1], bins="auto", histtype="step")
plt.hist(dist[:, 2], bins="auto", histtype="step")

plt.show()
In [ ]:

In [ ]:

In [ ]:
plt.figure()
plt.hist(cat0["dec"], bins="auto")

plt.show()
In [ ]:
gen = np.random.default_rng(22)
In [ ]:
gen.normal()
In [ ]:

In [ ]:
theta = np.linspace( t, np.pi, 100)

plt.figure()
plt.plot(theta, np.sin(theta))
plt.show()
In [ ]:

In [ ]:

In [ ]:
X = np.array([-3.9514747, -0.6966991,  2.97158]).reshape(1, -1)

X
In [ ]:
dist, indxs = knn0.kneighbors(X, n_neighbors=1)

dist, indxs
In [ ]:
cat0.positions[indxs]
In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]: