csiborgtools/notebooks/knn.ipynb

824 lines
18 KiB
Text
Raw Normal View History

{
"cells": [
{
"cell_type": "code",
2023-04-01 06:16:10 +00:00
"execution_count": 1,
"id": "5a38ed25",
"metadata": {
"ExecuteTime": {
2023-04-01 06:16:10 +00:00
"end_time": "2023-04-01T06:10:36.618752Z",
"start_time": "2023-04-01T06:10:10.251616Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2023-04-01 06:16:10 +00:00
"not found\n"
]
}
],
"source": [
"import numpy as np\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.neighbors import NearestNeighbors\n",
"import joblib\n",
"from tqdm import tqdm\n",
"try:\n",
" import csiborgtools\n",
"except ModuleNotFoundError:\n",
" print(\"not found\")\n",
" import sys\n",
" sys.path.append(\"../\")\n",
" import csiborgtools\n",
"\n",
"\n",
"%matplotlib notebook\n",
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
2023-04-01 06:16:10 +00:00
"execution_count": 4,
"id": "4218b673",
"metadata": {
"ExecuteTime": {
2023-04-01 06:16:10 +00:00
"end_time": "2023-04-01T06:10:44.171672Z",
"start_time": "2023-04-01T06:10:42.109733Z"
}
},
"outputs": [],
"source": [
"cat = csiborgtools.read.HaloCatalogue(7444, min_mass=1e13, max_dist=155 / 0.705)"
]
},
{
"cell_type": "code",
2023-04-01 06:16:10 +00:00
"execution_count": null,
"id": "5ff7a1b6",
"metadata": {
"ExecuteTime": {
2023-04-01 06:16:10 +00:00
"start_time": "2023-04-01T06:12:15.998Z"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
" 0%| | 0/1 [00:00<?, ?it/s]"
]
}
],
"source": [
"knn = NearestNeighbors()\n",
"knn.fit(cat.positions)\n",
"\n",
"knncdf = csiborgtools.match.kNN_CDF()\n",
"\n",
2023-04-01 06:16:10 +00:00
"rs, cdf = knncdf(knn, nneighbours=2, Rmax=155 / 0.705, rmin=0.01, rmax=100,\n",
" nsamples=int(1e8), neval=int(1e4), random_state=42, batch_size=int(1e7))"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "2b85c1c6",
"metadata": {
"ExecuteTime": {
"end_time": "2023-04-01T06:11:13.560595Z",
"start_time": "2023-04-01T06:11:12.888821Z"
}
},
"outputs": [],
"source": [
"X = knncdf.rvs_in_sphere(nsamples=int(1e7), R=1.)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "3a80cb5b",
"metadata": {
"ExecuteTime": {
"end_time": "2023-04-01T06:11:15.024753Z",
"start_time": "2023-04-01T06:11:14.973967Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Variable Type Data/Info\n",
"------------------------------------------------\n",
"NearestNeighbors ABCMeta <class 'sklearn.neighbors<...>rvised.NearestNeighbors'>\n",
"X ndarray 10000000x3: 30000000 elems, type `float32`, 120000000 bytes (114.44091796875 Mb)\n",
"cat HaloCatalogue <csiborgtools.read.make_c<...>object at 0x7fbbc6073fa0>\n",
"cdf ndarray 2x9999: 19998 elems, type `float32`, 79992 bytes\n",
"csiborgtools module <module 'csiborgtools' fr<...>siborgtools/__init__.py'>\n",
"joblib module <module 'joblib' from '/m<...>ages/joblib/__init__.py'>\n",
"knn NearestNeighbors NearestNeighbors()\n",
"knncdf kNN_CDF <csiborgtools.match.knn.k<...>object at 0x7fbbc68bb5b0>\n",
"matplotlib module <module 'matplotlib' from<...>/matplotlib/__init__.py'>\n",
"np module <module 'numpy' from '/mn<...>kages/numpy/__init__.py'>\n",
"plt module <module 'matplotlib.pyplo<...>es/matplotlib/pyplot.py'>\n",
"rs ndarray 9999: 9999 elems, type `float64`, 79992 bytes\n",
"sys module <module 'sys' (built-in)>\n",
"tqdm type <class 'tqdm.std.tqdm'>\n"
]
}
],
"source": [
"%whos"
]
},
{
"cell_type": "code",
"execution_count": null,
2023-04-01 06:16:10 +00:00
"id": "8b9a8cf0",
"metadata": {},
"outputs": [],
"source": []
},
2023-04-01 06:16:10 +00:00
{
"cell_type": "code",
"execution_count": null,
"id": "a1825f00",
"metadata": {
"ExecuteTime": {
"end_time": "2023-04-01T06:01:29.388586Z",
"start_time": "2023-04-01T06:01:29.321025Z"
},
"scrolled": false
},
"outputs": [],
"source": [
"plt.figure()\n",
"plt.plot(rs, knncdf.peaked_cdf(cdf[0, :]))\n",
"\n",
"plt.yscale(\"log\" )\n",
"plt.xscale(\"log\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "289549a0",
"metadata": {
"ExecuteTime": {
"end_time": "2023-03-31T22:55:20.690887Z",
"start_time": "2023-03-31T22:55:20.656550Z"
}
},
"outputs": [],
"source": [
"mask"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7a8c5202",
"metadata": {
"ExecuteTime": {
"end_time": "2023-03-31T22:54:52.330633Z",
"start_time": "2023-03-31T22:54:52.299548Z"
}
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "46f54897",
"metadata": {
"ExecuteTime": {
"end_time": "2023-03-31T22:54:25.138813Z",
"start_time": "2023-03-31T22:54:25.105044Z"
}
},
"outputs": [],
"source": [
"dist"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "58806ab9",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "c59b3a19",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "e345945c",
"metadata": {
"ExecuteTime": {
"end_time": "2023-03-31T09:35:49.059172Z",
"start_time": "2023-03-31T09:35:42.817291Z"
}
},
"outputs": [],
"source": [
"m1 = (rs > 1) & (rs < 35)\n",
"\n",
"fig, axs = plt.subplots(ncols=3, figsize=(6.4 * 1.5, 4.8), sharey=True)\n",
"fig.subplots_adjust(wspace=0)\n",
"for k in range(3):\n",
" for n in range(len(ics)):\n",
" m = m1 & (cdfs[n, k, :] > 1e-3)\n",
" axs[k].plot(rs[m], cdfs[n, k, m], c=\"black\", lw=0.05)\n",
"\n",
" axs[k].set_xscale(\"log\")\n",
" axs[k].set_yscale(\"log\")\n",
" axs[k].set_title(r\"$k = {}$\".format(k))\n",
" axs[k].set_xlabel(r\"$r~\\left[\\mathrm{Mpc}\\right]$\")\n",
"\n",
"axs[0].set_ylabel(r\"Peaked CDF\")\n",
"\n",
"plt.tight_layout(w_pad=0)\n",
"fig.savefig(\"../plots/peaked_cdf.png\", dpi=450)\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9f8786c0",
"metadata": {
"ExecuteTime": {
"end_time": "2023-03-31T09:50:10.103650Z",
"start_time": "2023-03-31T09:50:02.221741Z"
}
},
"outputs": [],
"source": [
"m = (rs > 0.5) & (rs < 35)\n",
"\n",
"fig, axs = plt.subplots(ncols=3, figsize=(6.4 * 1.5, 4.8), sharey=True)\n",
"fig.subplots_adjust(wspace=0)\n",
"for k in range(3):\n",
" mu = np.nanmean(cdfs[:, k, :], axis=0)\n",
"\n",
" for n in range(len(ics)):\n",
" axs[k].plot(rs[m], (cdfs[n, k, :] / mu)[m], c=\"black\", lw=0.1)\n",
"\n",
" axs[k].set_ylim(0.5, 1.5)\n",
" axs[k].axhline(1, ls=\"--\", c=\"red\", zorder=0)\n",
" axs[k].axvline(2.65 / 0.705, ls=\"--\", c=\"red\", zorder=0)\n",
" axs[k].set_xscale(\"log\")\n",
" axs[k].set_xlabel(r\"$r~\\left[\\mathrm{Mpc}\\right]$\")\n",
" axs[k].set_title(r\"$k = {}$\".format(k))\n",
" \n",
"axs[0].set_ylabel(r\"Relative peaked CDF\")\n",
"plt.tight_layout(w_pad=0)\n",
"fig.savefig(\"../plots/peaked_cdf_ratios.png\", dpi=450)\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2f64cec1",
"metadata": {
"ExecuteTime": {
"end_time": "2023-03-30T15:46:31.532259Z",
"start_time": "2023-03-30T15:46:30.977449Z"
}
},
"outputs": [],
"source": [
"plt.figure()\n",
"k = 2\n",
"mu = np.nanmean(cdfs[:, k, :], axis=0)\n",
"# plt.plot(rs, mu, c=\"black\")\n",
"for i in range(len(ics)):\n",
" plt.plot(rs, cdfs[i, k, :] / mu)\n",
"\n",
"\n",
"plt.ylim(0.75, 1.25)\n",
"plt.axhline(1, ls=\"--\", c=\"black\")\n",
"plt.xscale(\"log\")\n",
"# plt.yscale(\"log\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a6784766",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "b416efb3",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "e650fe2c",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "1311187d",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "03e49a11",
"metadata": {
"ExecuteTime": {
"end_time": "2023-03-30T14:58:29.937514Z",
"start_time": "2023-03-30T14:58:29.530552Z"
}
},
"outputs": [],
"source": [
"x.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "24578cba",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "b0024bbf",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "6dc55410",
"metadata": {
"ExecuteTime": {
"end_time": "2023-03-30T14:41:24.290602Z",
"start_time": "2023-03-30T14:41:16.204679Z"
}
},
"outputs": [],
"source": [
"dist0, __ = knn0.kneighbors(X, 3)\n",
"distx, __ = knnx.kneighbors(X, 3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "11508c3c",
"metadata": {
"ExecuteTime": {
"end_time": "2023-03-30T14:41:24.560538Z",
"start_time": "2023-03-30T14:41:24.292674Z"
}
},
"outputs": [],
"source": [
"x0, y0 = knncdf.peaked_cdf_from_samples(dist0[:, 0], 0.5, 20, neval=10000)\n",
"xx, yx = knncdf.peaked_cdf_from_samples(distx[:, 0], 0.5, 20, neval=10000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "404501ad",
"metadata": {
"ExecuteTime": {
"end_time": "2023-03-30T14:41:24.598933Z",
"start_time": "2023-03-30T14:41:24.562062Z"
}
},
"outputs": [],
"source": [
"distx[:, 0].min()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "43e08969",
"metadata": {
"ExecuteTime": {
"end_time": "2023-03-30T14:46:10.262865Z",
"start_time": "2023-03-30T14:46:09.486658Z"
}
},
"outputs": [],
"source": [
"plt.figure()\n",
"plt.plot(x0, y0)\n",
"plt.plot(xx, yx)\n",
"\n",
"plt.yscale(\"log\")\n",
"plt.xscale(\"log\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "39547a75",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "9e160b38",
"metadata": {
"ExecuteTime": {
"end_time": "2023-03-30T13:02:02.033125Z",
"start_time": "2023-03-30T13:02:00.674878Z"
}
},
"outputs": [],
"source": [
"plt.figure()\n",
"\n",
"for i in range(3):\n",
" plt.plot(*knncdf.cdf_from_samples(dist0[:, i], 1, 25))\n",
" plt.plot(*knncdf.cdf_from_samples(distx[:, i], 1, 25))\n",
"\n",
"# plt.xlim(0.5, 25)\n",
"\n",
"plt.yscale(\"log\")\n",
"plt.xscale(\"log\")\n",
"plt.xlabel(r\"$r~\\left[\\mathrm{Mpc}\\right]$\")\n",
"\n",
"\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4bfb65d8",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "4703d81c",
"metadata": {
"ExecuteTime": {
"end_time": "2023-03-30T12:13:35.958444Z",
"start_time": "2023-03-30T12:13:35.924241Z"
}
},
"outputs": [],
"source": [
"x = dist[:, 0]\n",
"q = np.linspace(0, 100, int(x.size / 5))\n",
"\n",
"p = np.percentile(x, q)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b054c6df",
"metadata": {
"ExecuteTime": {
"end_time": "2023-03-30T12:16:50.052225Z",
"start_time": "2023-03-30T12:16:50.020395Z"
}
},
"outputs": [],
"source": [
"y = np.sort(x)\n",
"\n",
"yy = np.arange(y.size) / y.size"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5445c964",
"metadata": {
"ExecuteTime": {
"end_time": "2023-03-30T12:16:53.599925Z",
"start_time": "2023-03-30T12:16:53.521266Z"
}
},
"outputs": [],
"source": [
"plt.figure()\n",
"plt.plot(p, q / 100)\n",
"\n",
"plt.plot(y, yy)\n",
"\n",
"# plt.yscale(\"log\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "87fe5874",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "fb0ad6b9",
"metadata": {
"ExecuteTime": {
"end_time": "2023-03-30T12:03:34.387625Z",
"start_time": "2023-03-30T12:03:34.290961Z"
}
},
"outputs": [],
"source": [
"plt.figure()\n",
"plt.hist(dist[:, 0], bins=\"auto\", histtype=\"step\")\n",
"plt.hist(dist[:, 1], bins=\"auto\", histtype=\"step\")\n",
"plt.hist(dist[:, 2], bins=\"auto\", histtype=\"step\")\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c2aba833",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "6f70f238",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "03bcb191",
"metadata": {
"ExecuteTime": {
"end_time": "2023-03-30T11:38:04.906150Z",
"start_time": "2023-03-30T11:38:04.758107Z"
}
},
"outputs": [],
"source": [
"plt.figure()\n",
"plt.hist(cat0[\"dec\"], bins=\"auto\")\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e5ad4722",
"metadata": {
"ExecuteTime": {
"end_time": "2023-03-30T11:53:23.004853Z",
"start_time": "2023-03-30T11:53:22.971967Z"
}
},
"outputs": [],
"source": [
"gen = np.random.default_rng(22)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "785b530a",
"metadata": {
"ExecuteTime": {
"end_time": "2023-03-30T11:53:23.330397Z",
"start_time": "2023-03-30T11:53:23.296612Z"
}
},
"outputs": [],
"source": [
"gen.normal()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b3d3b5e6",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "464b606d",
"metadata": {
"ExecuteTime": {
"end_time": "2023-03-30T11:36:13.649124Z",
"start_time": "2023-03-30T11:36:12.995693Z"
}
},
"outputs": [],
"source": [
"theta = np.linspace( t, np.pi, 100)\n",
"\n",
"plt.figure()\n",
"plt.plot(theta, np.sin(theta))\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c29049f5",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "cd2a3295",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "af9abf04",
"metadata": {
"ExecuteTime": {
"end_time": "2023-03-30T11:10:11.104389Z",
"start_time": "2023-03-30T11:10:11.070499Z"
}
},
"outputs": [],
"source": [
"X = np.array([-3.9514747, -0.6966991, 2.97158]).reshape(1, -1)\n",
"\n",
"X"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e181b3c3",
"metadata": {
"ExecuteTime": {
"end_time": "2023-03-30T11:32:17.840355Z",
"start_time": "2023-03-30T11:32:17.351883Z"
}
},
"outputs": [],
"source": [
"dist, indxs = knn0.kneighbors(X, n_neighbors=1)\n",
"\n",
"dist, indxs"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d38fd960",
"metadata": {
"ExecuteTime": {
"end_time": "2023-03-30T11:10:18.182326Z",
"start_time": "2023-03-30T11:10:18.145629Z"
}
},
"outputs": [],
"source": [
"cat0.positions[indxs]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a16ddc2f",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "bbbe8fb6",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "759a0149",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "312c96c9",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "b097637b",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "2ced23cb",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "be26cbcc",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "venv_galomatch",
"language": "python",
"name": "venv_galomatch"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.0"
},
"vscode": {
"interpreter": {
"hash": "f29d02a8350410abc2a9fb79641689d10bf7ab64afc03ec87ca3cf6ed2daa499"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}