diff --git a/notebooks/knn.ipynb b/notebooks/knn.ipynb index 8ebdb6d..c99dbb6 100644 --- a/notebooks/knn.ipynb +++ b/notebooks/knn.ipynb @@ -2,12 +2,12 @@ "cells": [ { "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "id": "5a38ed25", "metadata": { "ExecuteTime": { - "end_time": "2023-03-31T17:09:12.165480Z", - "start_time": "2023-03-31T17:09:12.116708Z" + "end_time": "2023-04-01T06:10:36.618752Z", + "start_time": "2023-04-01T06:10:10.251616Z" }, "scrolled": true }, @@ -16,8 +16,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" + "not found\n" ] } ], @@ -44,12 +43,12 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "4218b673", "metadata": { "ExecuteTime": { - "end_time": "2023-03-31T17:09:13.943312Z", - "start_time": "2023-03-31T17:09:12.167027Z" + "end_time": "2023-04-01T06:10:44.171672Z", + "start_time": "2023-04-01T06:10:42.109733Z" } }, "outputs": [], @@ -59,12 +58,11 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "5ff7a1b6", "metadata": { "ExecuteTime": { - "end_time": "2023-03-31T17:10:18.303240Z", - "start_time": "2023-03-31T17:10:14.674751Z" + "start_time": "2023-04-01T06:12:15.998Z" } }, "outputs": [ @@ -75,36 +73,6 @@ "\r", " 0%| | 0/1 [00:00<?, ?it/s]" ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "float32\n", - "float32\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 1/1 [00:03<00:00, 3.37s/it]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "float32\n", - "float32\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] } ], "source": [ @@ -113,18 +81,135 @@ "\n", "knncdf = csiborgtools.match.kNN_CDF()\n", "\n", - "rs, cdfs_high = knncdf(knn, nneighbours=3, Rmax=155 / 0.705, rmin=0.05, rmax=40,\n", - " nsamples=int(1e6), neval=int(1e4), random_state=42)" + "rs, cdf = knncdf(knn, nneighbours=2, Rmax=155 / 0.705, rmin=0.01, rmax=100,\n", + " nsamples=int(1e8), neval=int(1e4), random_state=42, batch_size=int(1e7))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "2b85c1c6", + "metadata": { + "ExecuteTime": { + "end_time": "2023-04-01T06:11:13.560595Z", + "start_time": "2023-04-01T06:11:12.888821Z" + } + }, + "outputs": [], + "source": [ + "X = knncdf.rvs_in_sphere(nsamples=int(1e7), R=1.)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "3a80cb5b", + "metadata": { + "ExecuteTime": { + "end_time": "2023-04-01T06:11:15.024753Z", + "start_time": "2023-04-01T06:11:14.973967Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Variable Type Data/Info\n", + "------------------------------------------------\n", + "NearestNeighbors ABCMeta <class 'sklearn.neighbors<...>rvised.NearestNeighbors'>\n", + "X ndarray 10000000x3: 30000000 elems, type `float32`, 120000000 bytes (114.44091796875 Mb)\n", + "cat HaloCatalogue <csiborgtools.read.make_c<...>object at 0x7fbbc6073fa0>\n", + "cdf ndarray 2x9999: 19998 elems, type `float32`, 79992 bytes\n", + "csiborgtools module <module 'csiborgtools' fr<...>siborgtools/__init__.py'>\n", + "joblib module <module 'joblib' from '/m<...>ages/joblib/__init__.py'>\n", + "knn NearestNeighbors NearestNeighbors()\n", + "knncdf kNN_CDF <csiborgtools.match.knn.k<...>object at 0x7fbbc68bb5b0>\n", + "matplotlib module <module 'matplotlib' from<...>/matplotlib/__init__.py'>\n", + "np module <module 'numpy' from '/mn<...>kages/numpy/__init__.py'>\n", + "plt module <module 'matplotlib.pyplo<...>es/matplotlib/pyplot.py'>\n", + "rs ndarray 9999: 9999 elems, type `float64`, 79992 bytes\n", + "sys module <module 'sys' (built-in)>\n", + "tqdm type <class 'tqdm.std.tqdm'>\n" + ] + } + ], + "source": [ + "%whos" ] }, { "cell_type": "code", "execution_count": null, - "id": "08321431", + "id": "8b9a8cf0", "metadata": {}, "outputs": [], "source": [] }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1825f00", + "metadata": { + "ExecuteTime": { + "end_time": "2023-04-01T06:01:29.388586Z", + "start_time": "2023-04-01T06:01:29.321025Z" + }, + "scrolled": false + }, + "outputs": [], + "source": [ + "plt.figure()\n", + "plt.plot(rs, knncdf.peaked_cdf(cdf[0, :]))\n", + "\n", + "plt.yscale(\"log\" )\n", + "plt.xscale(\"log\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "289549a0", + "metadata": { + "ExecuteTime": { + "end_time": "2023-03-31T22:55:20.690887Z", + "start_time": "2023-03-31T22:55:20.656550Z" + } + }, + "outputs": [], + "source": [ + "mask" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7a8c5202", + "metadata": { + "ExecuteTime": { + "end_time": "2023-03-31T22:54:52.330633Z", + "start_time": "2023-03-31T22:54:52.299548Z" + } + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "46f54897", + "metadata": { + "ExecuteTime": { + "end_time": "2023-03-31T22:54:25.138813Z", + "start_time": "2023-03-31T22:54:25.105044Z" + } + }, + "outputs": [], + "source": [ + "dist" + ] + }, { "cell_type": "code", "execution_count": null,