kNN memory batching (#35)

* Add batch sizing for less memory

* Add batch size to submission

* Update nb

* Add brute KNN

* unused variable

* Update nb
This commit is contained in:
Richard Stiskalek 2023-04-01 07:57:21 +01:00 committed by GitHub
parent 63ab3548b4
commit 513872ceb6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 188 additions and 64 deletions

View file

@ -42,6 +42,7 @@ parser.add_argument("--rmax", type=float)
parser.add_argument("--nneighbours", type=int)
parser.add_argument("--nsamples", type=int)
parser.add_argument("--neval", type=int)
parser.add_argument("--batch_size", type=int)
parser.add_argument("--seed", type=int, default=42)
args = parser.parse_args()
@ -77,8 +78,8 @@ def do_task(ic):
rs, cdf = knncdf(knn, nneighbours=args.nneighbours, Rmax=Rmax,
rmin=args.rmin, rmax=args.rmax, nsamples=args.nsamples,
neval=args.neval, random_state=args.seed,
verbose=False)
neval=args.neval, batch_size=args.batch_size,
random_state=args.seed, verbose=False)
out.update({"cdf_{}".format(i): cdf})
out.update({"rs": rs, "mass_threshold": mass_threshold})

View file

@ -1,4 +1,4 @@
nthreads=140
nthreads=30
memory=7
queue="berg"
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
@ -7,9 +7,14 @@ file="run_knn.py"
rmin=0.01
rmax=100
nneighbours=16
nsamples=10000000
nsamples=1000000000
batch_size=10000000
neval=10000
# 1000,000,0
# 10000000 # 1e7
# 1000000000
pythoncm="$env $file --rmin $rmin --rmax $rmax --nneighbours $nneighbours --nsamples $nsamples --neval $neval"
# echo $pythoncm