Added OpenCL CIC code
This commit is contained in:
parent
1e733f2318
commit
f6ad248f75
8 changed files with 297 additions and 78 deletions
|
@ -463,6 +463,8 @@ cdef class CosmologyPower:
|
|||
return self._compute(k)
|
||||
|
||||
@cython.boundscheck(False)
|
||||
@cython.cdivision(True)
|
||||
@cython.wraparound(False)
|
||||
def leanCic(float[:,:] particles, float L, int Resolution):
|
||||
cdef CICParticles p
|
||||
cdef CICFilter *cic
|
||||
|
@ -470,6 +472,8 @@ def leanCic(float[:,:] particles, float L, int Resolution):
|
|||
cdef CICType *field
|
||||
cdef np.uint32_t dummyRes
|
||||
cdef np.ndarray[np.float64_t, ndim=3] out_field
|
||||
cdef np.ndarray[np.float64_t, ndim=1] out_field0
|
||||
cdef np.float64_t[:] out_field_buf
|
||||
cdef np.uint64_t j
|
||||
|
||||
cic = new CICFilter(Resolution, L)
|
||||
|
@ -490,8 +494,10 @@ def leanCic(float[:,:] particles, float L, int Resolution):
|
|||
cic.getDensityField(field, dummyRes)
|
||||
|
||||
out_field = np.empty((dummyRes, dummyRes, dummyRes), dtype=np.float64)
|
||||
for j in xrange(out_field.size):
|
||||
out_field[j] = field[j]
|
||||
out_field0 = out_field.reshape(out_field.size)
|
||||
out_field_buf = out_field
|
||||
for j in xrange(out_field_buf.size):
|
||||
out_field_buf[j] = field[j]
|
||||
|
||||
del cic
|
||||
return out_field
|
||||
|
|
|
@ -21,6 +21,7 @@ cdef extern from "project_tool.hpp" namespace "":
|
|||
|
||||
@cython.boundscheck(False)
|
||||
@cython.cdivision(True)
|
||||
@cython.wraparound(False)
|
||||
cdef int interp3d_INTERNAL_periodic(DTYPE_t x, DTYPE_t y,
|
||||
DTYPE_t z,
|
||||
DTYPE_t[:,:,:] d, DTYPE_t Lbox, DTYPE_t *retval) nogil:
|
||||
|
@ -45,6 +46,10 @@ cdef int interp3d_INTERNAL_periodic(DTYPE_t x, DTYPE_t y,
|
|||
ry -= iy
|
||||
rz -= iz
|
||||
|
||||
ix = ix % Ngrid
|
||||
iy = iy % Ngrid
|
||||
iz = iz % Ngrid
|
||||
|
||||
jx = (ix+1)%Ngrid
|
||||
jy = (iy+1)%Ngrid
|
||||
jz = (iz+1)%Ngrid
|
||||
|
@ -53,13 +58,6 @@ cdef int interp3d_INTERNAL_periodic(DTYPE_t x, DTYPE_t y,
|
|||
iy = iy%Ngrid
|
||||
iz = iz%Ngrid
|
||||
|
||||
if (ix < 0) or (jx >= Ngrid):
|
||||
return -1
|
||||
if (iy < 0) or (jy >= Ngrid):
|
||||
return -2
|
||||
if (iz < 0) or (jz >= Ngrid):
|
||||
return -3
|
||||
|
||||
f[0][0][0] = (1-rx)*(1-ry)*(1-rz)
|
||||
f[1][0][0] = ( rx)*(1-ry)*(1-rz)
|
||||
f[0][1][0] = (1-rx)*( ry)*(1-rz)
|
||||
|
@ -80,10 +78,9 @@ cdef int interp3d_INTERNAL_periodic(DTYPE_t x, DTYPE_t y,
|
|||
d[ix ,jy ,jz ] * f[0][1][1] + \
|
||||
d[jx ,jy ,jz ] * f[1][1][1]
|
||||
|
||||
return 0
|
||||
|
||||
@cython.boundscheck(False)
|
||||
@cython.cdivision(True)
|
||||
@cython.wraparound(False)
|
||||
cdef int ngp3d_INTERNAL_periodic(DTYPE_t x, DTYPE_t y,
|
||||
DTYPE_t z,
|
||||
DTYPE_t[:,:,:] d, DTYPE_t Lbox, DTYPE_t *retval) nogil:
|
||||
|
@ -110,14 +107,13 @@ cdef int ngp3d_INTERNAL_periodic(DTYPE_t x, DTYPE_t y,
|
|||
|
||||
retval[0] = d[ix ,iy ,iz ]
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
@cython.boundscheck(False)
|
||||
@cython.cdivision(True)
|
||||
@cython.wraparound(False)
|
||||
cdef int ngp3d_INTERNAL(DTYPE_t x, DTYPE_t y,
|
||||
DTYPE_t z,
|
||||
DTYPE_t[:,:,:] d, DTYPE_t Lbox, DTYPE_t *retval) nogil:
|
||||
DTYPE_t[:,:,:] d, DTYPE_t Lbox, DTYPE_t *retval, DTYPE_t inval) nogil:
|
||||
|
||||
cdef int Ngrid = d.shape[0]
|
||||
cdef DTYPE_t inv_delta = Ngrid/Lbox
|
||||
|
@ -134,23 +130,18 @@ cdef int ngp3d_INTERNAL(DTYPE_t x, DTYPE_t y,
|
|||
iy = int(round(ry))
|
||||
iz = int(round(rz))
|
||||
|
||||
if (ix < 0 or ix >= Ngrid):
|
||||
return -1
|
||||
if (iy < 0 or iy >= Ngrid):
|
||||
return -2
|
||||
if (iz < 0 or iz >= Ngrid):
|
||||
return -3
|
||||
if ((ix < 0) or (ix+1) >= Ngrid or (iy < 0) or (iy+1) >= Ngrid or (iz < 0) or (iz+1) >= Ngrid):
|
||||
retval[0] = inval
|
||||
|
||||
retval[0] = d[ix ,iy ,iz ]
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
@cython.boundscheck(False)
|
||||
@cython.cdivision(True)
|
||||
@cython.wraparound(False)
|
||||
cdef int interp3d_INTERNAL(DTYPE_t x, DTYPE_t y,
|
||||
DTYPE_t z,
|
||||
DTYPE_t[:,:,:] d, DTYPE_t Lbox, DTYPE_t *retval) nogil:
|
||||
DTYPE_t[:,:,:] d, DTYPE_t Lbox, DTYPE_t *retval, DTYPE_t inval) nogil:
|
||||
|
||||
cdef int Ngrid = d.shape[0]
|
||||
cdef DTYPE_t inv_delta = Ngrid/Lbox
|
||||
|
@ -170,14 +161,8 @@ cdef int interp3d_INTERNAL(DTYPE_t x, DTYPE_t y,
|
|||
ry -= iy
|
||||
rz -= iz
|
||||
|
||||
if ((ix < 0) or (ix+1) >= Ngrid):
|
||||
return -1
|
||||
|
||||
if ((iy < 0) or (iy+1) >= Ngrid):
|
||||
return -2
|
||||
|
||||
if ((iz < 0) or (iz+1) >= Ngrid):
|
||||
return -3
|
||||
if ((ix < 0) or (ix+1) >= Ngrid or (iy < 0) or (iy+1) >= Ngrid or (iz < 0) or (iz+1) >= Ngrid):
|
||||
retval[0] = inval
|
||||
# assert ((ix >= 0) and ((ix+1) < Ngrid))
|
||||
# assert ((iy >= 0) and ((iy+1) < Ngrid))
|
||||
# assert ((iz >= 0) and ((iz+1) < Ngrid))
|
||||
|
@ -202,13 +187,11 @@ cdef int interp3d_INTERNAL(DTYPE_t x, DTYPE_t y,
|
|||
d[ix ,iy+1,iz+1] * f[0][1][1] + \
|
||||
d[ix+1,iy+1,iz+1] * f[1][1][1]
|
||||
|
||||
return 0
|
||||
|
||||
@cython.boundscheck(False)
|
||||
def interp3d(x not None, y not None,
|
||||
z not None,
|
||||
npx.ndarray[DTYPE_t, ndim=3] d not None, DTYPE_t Lbox,
|
||||
bool periodic=False, bool centered=True, bool ngp=False):
|
||||
bool periodic=False, bool centered=True, bool ngp=False, DTYPE_t inval = 0):
|
||||
""" interp3d(x,y,z,d,Lbox,periodic=False,centered=True,ngp=False) -> interpolated values
|
||||
|
||||
Compute the tri-linear interpolation of the given field (d) at the given position (x,y,z). It assumes that they are box-centered coordinates. So (x,y,z) == (0,0,0) is equivalent to the pixel at (Nx/2,Ny/2,Nz/2) with Nx,Ny,Nz = d.shape. If periodic is set, it assumes the box is periodic
|
||||
|
@ -253,41 +236,29 @@ def interp3d(x not None, y not None,
|
|||
if not myngp:
|
||||
if myperiodic:
|
||||
for i in prange(Nelt):
|
||||
if interp3d_INTERNAL_periodic(shifter+ax[i], shifter+ay[i], shifter+az[i], in_slice, Lbox, &out_slice[i]) < 0:
|
||||
with gil:
|
||||
raise ierror
|
||||
interp3d_INTERNAL_periodic(shifter+ax[i], shifter+ay[i], shifter+az[i], in_slice, Lbox, &out_slice[i])
|
||||
else:
|
||||
for i in prange(Nelt):
|
||||
if interp3d_INTERNAL(shifter+ax[i], shifter+ay[i], shifter+az[i], in_slice, Lbox, &out_slice[i]) < 0:
|
||||
with gil:
|
||||
raise ierror
|
||||
interp3d_INTERNAL(shifter+ax[i], shifter+ay[i], shifter+az[i], in_slice, Lbox, &out_slice[i], inval)
|
||||
else:
|
||||
if myperiodic:
|
||||
for i in prange(Nelt):
|
||||
if ngp3d_INTERNAL_periodic(shifter+ax[i], shifter+ay[i], shifter+az[i], in_slice, Lbox, &out_slice[i]) < 0:
|
||||
with gil:
|
||||
raise ierror
|
||||
ngp3d_INTERNAL_periodic(shifter+ax[i], shifter+ay[i], shifter+az[i], in_slice, Lbox, &out_slice[i])
|
||||
else:
|
||||
for i in prange(Nelt):
|
||||
if ngp3d_INTERNAL(shifter+ax[i], shifter+ay[i], shifter+az[i], in_slice, Lbox, &out_slice[i]) < 0:
|
||||
with gil:
|
||||
raise ierror
|
||||
ngp3d_INTERNAL(shifter+ax[i], shifter+ay[i], shifter+az[i], in_slice, Lbox, &out_slice[i], inval)
|
||||
return out
|
||||
else:
|
||||
if not myngp:
|
||||
if periodic:
|
||||
if interp3d_INTERNAL_periodic(shifter+x, shifter+y, shifter+z, d, Lbox, &retval) < 0:
|
||||
raise ierror
|
||||
interp3d_INTERNAL_periodic(shifter+x, shifter+y, shifter+z, d, Lbox, &retval)
|
||||
else:
|
||||
if interp3d_INTERNAL(shifter+x, shifter+y, shifter+z, d, Lbox, &retval) < 0:
|
||||
raise ierror
|
||||
interp3d_INTERNAL(shifter+x, shifter+y, shifter+z, d, Lbox, &retval, inval)
|
||||
else:
|
||||
if periodic:
|
||||
if ngp3d_INTERNAL_periodic(shifter+x, shifter+y, shifter+z, d, Lbox, &retval) < 0:
|
||||
raise ierror
|
||||
ngp3d_INTERNAL_periodic(shifter+x, shifter+y, shifter+z, d, Lbox, &retval)
|
||||
else:
|
||||
if ngp3d_INTERNAL(shifter+x, shifter+y, shifter+z, d, Lbox, &retval) < 0:
|
||||
raise ierror
|
||||
ngp3d_INTERNAL(shifter+x, shifter+y, shifter+z, d, Lbox, &retval, inval)
|
||||
return retval
|
||||
|
||||
@cython.boundscheck(False)
|
||||
|
@ -410,8 +381,7 @@ def interp2d(x not None, y not None,
|
|||
@cython.boundscheck(False)
|
||||
@cython.cdivision(True)
|
||||
cdef void INTERNAL_project_cic_no_mass(npx.ndarray[DTYPE_t, ndim=3] g,
|
||||
npx.ndarray[DTYPE_t, ndim=2] x, int Ngrid, float Lbox):
|
||||
cdef double half_Box = 0.5*Lbox
|
||||
npx.ndarray[DTYPE_t, ndim=2] x, int Ngrid, double Lbox, double shifter):
|
||||
cdef double delta_Box = Ngrid/Lbox
|
||||
cdef int i
|
||||
cdef double a[3], c[3]
|
||||
|
@ -422,7 +392,7 @@ cdef void INTERNAL_project_cic_no_mass(npx.ndarray[DTYPE_t, ndim=3] g,
|
|||
|
||||
do_not_put = 0
|
||||
for j in range(3):
|
||||
a[j] = (x[i,j]+half_Box)*delta_Box
|
||||
a[j] = (x[i,j]+shifter)*delta_Box
|
||||
b[j] = int(floor(a[j]))
|
||||
a[j] -= b[j]
|
||||
c[j] = 1-a[j]
|
||||
|
@ -446,8 +416,7 @@ cdef void INTERNAL_project_cic_no_mass(npx.ndarray[DTYPE_t, ndim=3] g,
|
|||
@cython.boundscheck(False)
|
||||
@cython.cdivision(True)
|
||||
cdef void INTERNAL_project_cic_no_mass_periodic(npx.ndarray[DTYPE_t, ndim=3] g,
|
||||
npx.ndarray[DTYPE_t, ndim=2] x, int Ngrid, double Lbox):
|
||||
cdef double half_Box = 0.5*Lbox
|
||||
npx.ndarray[DTYPE_t, ndim=2] x, int Ngrid, double Lbox, double shifter):
|
||||
cdef double delta_Box = Ngrid/Lbox
|
||||
cdef int i
|
||||
cdef double a[3], c[3]
|
||||
|
@ -463,7 +432,7 @@ cdef void INTERNAL_project_cic_no_mass_periodic(npx.ndarray[DTYPE_t, ndim=3] g,
|
|||
|
||||
do_not_put = 0
|
||||
for j in range(3):
|
||||
a[j] = (ax[i,j]+half_Box)*delta_Box
|
||||
a[j] = (ax[i,j]+shifter)*delta_Box
|
||||
b[j] = int(floor(a[j]))
|
||||
b1[j] = (b[j]+1) % Ngrid
|
||||
|
||||
|
@ -488,8 +457,7 @@ cdef void INTERNAL_project_cic_no_mass_periodic(npx.ndarray[DTYPE_t, ndim=3] g,
|
|||
cdef void INTERNAL_project_cic_with_mass(npx.ndarray[DTYPE_t, ndim=3] g,
|
||||
npx.ndarray[DTYPE_t, ndim=2] x,
|
||||
npx.ndarray[DTYPE_t, ndim=1] mass,
|
||||
int Ngrid, double Lbox):
|
||||
cdef double half_Box = 0.5*Lbox
|
||||
int Ngrid, double Lbox, double shifter):
|
||||
cdef double delta_Box = Ngrid/Lbox
|
||||
cdef int i
|
||||
cdef double a[3], c[3]
|
||||
|
@ -500,7 +468,7 @@ cdef void INTERNAL_project_cic_with_mass(npx.ndarray[DTYPE_t, ndim=3] g,
|
|||
|
||||
do_not_put = False
|
||||
for j in range(3):
|
||||
a[j] = (x[i,j]+half_Box)*delta_Box
|
||||
a[j] = (x[i,j]+shifter)*delta_Box
|
||||
b[j] = int(a[j])
|
||||
a[j] -= b[j]
|
||||
c[j] = 1-a[j]
|
||||
|
@ -525,7 +493,7 @@ cdef void INTERNAL_project_cic_with_mass(npx.ndarray[DTYPE_t, ndim=3] g,
|
|||
cdef void INTERNAL_project_cic_with_mass_periodic(npx.ndarray[DTYPE_t, ndim=3] g,
|
||||
npx.ndarray[DTYPE_t, ndim=2] x,
|
||||
npx.ndarray[DTYPE_t, ndim=1] mass,
|
||||
int Ngrid, double Lbox):
|
||||
int Ngrid, double Lbox, double shifter):
|
||||
cdef double half_Box = 0.5*Lbox, m0
|
||||
cdef double delta_Box = Ngrid/Lbox
|
||||
cdef int i
|
||||
|
@ -535,7 +503,7 @@ cdef void INTERNAL_project_cic_with_mass_periodic(npx.ndarray[DTYPE_t, ndim=3] g
|
|||
for i in range(x.shape[0]):
|
||||
|
||||
for j in range(3):
|
||||
a[j] = (x[i,j]+half_Box)*delta_Box
|
||||
a[j] = (x[i,j]+shifter)*delta_Box
|
||||
b[j] = int(floor(a[j]))
|
||||
b1[j] = b[j]+1
|
||||
while b1[j] < 0:
|
||||
|
@ -559,14 +527,20 @@ cdef void INTERNAL_project_cic_with_mass_periodic(npx.ndarray[DTYPE_t, ndim=3] g
|
|||
|
||||
|
||||
def project_cic(npx.ndarray[DTYPE_t, ndim=2] x not None, npx.ndarray[DTYPE_t, ndim=1] mass, int Ngrid,
|
||||
double Lbox, bool periodic = False):
|
||||
double Lbox, bool periodic = False, centered=True):
|
||||
"""
|
||||
project_cic(x array (N,3), mass (may be None), Ngrid, Lbox, periodict)
|
||||
project_cic(x array (N,3), mass (may be None), Ngrid, Lbox, periodict, centered=True)
|
||||
|
||||
This function does a Cloud-In-Cell projection of a 3d unstructured dataset. First argument is a Nx3 array of coordinates.
|
||||
Second argument is an optinal mass. Ngrid is the size output grid and Lbox is the physical size of the grid.
|
||||
"""
|
||||
cdef npx.ndarray[DTYPE_t, ndim=3] g
|
||||
cdef double shifter
|
||||
|
||||
if centered:
|
||||
shifter = 0.5*Lbox
|
||||
else:
|
||||
shifter = 0
|
||||
|
||||
if x.shape[1] != 3:
|
||||
raise ValueError("Invalid shape for x array")
|
||||
|
@ -578,14 +552,14 @@ def project_cic(npx.ndarray[DTYPE_t, ndim=2] x not None, npx.ndarray[DTYPE_t, nd
|
|||
|
||||
if not periodic:
|
||||
if mass == None:
|
||||
INTERNAL_project_cic_no_mass(g, x, Ngrid, Lbox)
|
||||
INTERNAL_project_cic_no_mass(g, x, Ngrid, Lbox, shifter)
|
||||
else:
|
||||
INTERNAL_project_cic_with_mass(g, x, mass, Ngrid, Lbox)
|
||||
INTERNAL_project_cic_with_mass(g, x, mass, Ngrid, Lbox, shifter)
|
||||
else:
|
||||
if mass == None:
|
||||
INTERNAL_project_cic_no_mass_periodic(g, x, Ngrid, Lbox)
|
||||
INTERNAL_project_cic_no_mass_periodic(g, x, Ngrid, Lbox, shifter)
|
||||
else:
|
||||
INTERNAL_project_cic_with_mass_periodic(g, x, mass, Ngrid, Lbox)
|
||||
INTERNAL_project_cic_with_mass_periodic(g, x, mass, Ngrid, Lbox, shifter)
|
||||
|
||||
return g
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@ from _project import *
|
|||
from .grafic import writeGrafic, writeWhitePhase, readGrafic, readWhitePhase
|
||||
from .borg import read_borg_vol
|
||||
from .cic import cicParticles
|
||||
from .cl_cic import cl_CIC_Density
|
||||
from .simu import loadRamsesAll, simpleWriteGadget, SimulationBare
|
||||
from .timing import time_block, timeit, timeit_quiet
|
||||
|
||||
|
|
232
python/cosmotool/cl_cic.py
Normal file
232
python/cosmotool/cl_cic.py
Normal file
|
@ -0,0 +1,232 @@
|
|||
from .timing import time_block as time_block_orig
|
||||
import numpy as np
|
||||
import pyopencl as cl
|
||||
import pyopencl.array as cl_array
|
||||
from contextlib import contextmanager
|
||||
|
||||
TIMER_ACTIVE=False
|
||||
|
||||
@contextmanager
|
||||
def time_block_dummy(*args):
|
||||
yield
|
||||
|
||||
if TIMER_ACTIVE:
|
||||
time_block=time_block_orig
|
||||
else:
|
||||
time_block=time_block_dummy
|
||||
|
||||
CIC_PREKERNEL='''
|
||||
#define NDIM {ndim}
|
||||
#define CENTERED {centered}
|
||||
typedef {cicType} BASIC_TYPE;
|
||||
|
||||
'''
|
||||
|
||||
CIC_KERNEL='''///CL///
|
||||
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
|
||||
|
||||
__kernel void init_pcell(__global int *p_cell, const int value)
|
||||
{
|
||||
int i = get_global_id(0);
|
||||
p_cell[i] = value;
|
||||
}
|
||||
|
||||
__kernel void build_indices(__global const BASIC_TYPE *pos,
|
||||
__global int *part_mesh, __global int *part_list, const int N, const BASIC_TYPE delta, const BASIC_TYPE shift_pos)
|
||||
{
|
||||
int i_part = get_global_id(0);
|
||||
long shifter = 1;
|
||||
long idx = 0;
|
||||
int d;
|
||||
|
||||
for (d = 0; d < NDIM; d++) {
|
||||
BASIC_TYPE x;
|
||||
|
||||
if (CENTERED)
|
||||
x = pos[i_part*NDIM + d] - shift_pos;
|
||||
else
|
||||
x = pos[i_part*NDIM + d];
|
||||
|
||||
int m = (int)floor(x*delta) %% N;
|
||||
|
||||
idx += shifter * m;
|
||||
shifter *= N;
|
||||
}
|
||||
|
||||
// Head of the list
|
||||
int initial_elt = atom_xchg(&part_mesh[idx], i_part);
|
||||
if (initial_elt == -1) {
|
||||
return;
|
||||
}
|
||||
// Point the next pointer of old_end to i_part
|
||||
part_list[i_part] = initial_elt;
|
||||
}
|
||||
|
||||
__kernel void reverse_list(__global int *part_mesh, __global int *part_list)
|
||||
{
|
||||
int mid = get_global_id(0);
|
||||
|
||||
int current_part = part_mesh[mid];
|
||||
if (current_part >= 0) {
|
||||
int next_part = part_list[current_part];
|
||||
part_list[current_part] = -1;
|
||||
while (next_part != -1) {
|
||||
int p = part_list[next_part];
|
||||
part_list[next_part] = current_part;
|
||||
current_part = next_part;
|
||||
next_part = p;
|
||||
}
|
||||
part_mesh[mid] = current_part;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void dance(__global const BASIC_TYPE *pos,
|
||||
__global BASIC_TYPE *density,
|
||||
__global int *part_mesh, __global int *part_list, const int N, const BASIC_TYPE delta, const BASIC_TYPE shift_pos)
|
||||
{
|
||||
int m[NDIM];
|
||||
int shifter = 1;
|
||||
int i;
|
||||
int first, i_part;
|
||||
int idx = 0;
|
||||
|
||||
for (i = 0; i < NDIM; i++) {
|
||||
m[i] = get_global_id(i);
|
||||
idx += shifter * m[i];
|
||||
shifter *= N;
|
||||
}
|
||||
|
||||
first = 1;
|
||||
|
||||
//BEGIN LOOPER
|
||||
%(looperFor)s
|
||||
//END LOOPER
|
||||
|
||||
int idx_dance = 0;
|
||||
BASIC_TYPE w = 0;
|
||||
//LOOPER INDEX
|
||||
int r[NDIM] = { %(looperVariables)s };
|
||||
//END LOOPER
|
||||
|
||||
i_part = part_mesh[idx];
|
||||
while (i_part != -1) {
|
||||
BASIC_TYPE w0 = 1;
|
||||
|
||||
for (int d = 0; d < NDIM; d++) {
|
||||
BASIC_TYPE x;
|
||||
BASIC_TYPE q;
|
||||
BASIC_TYPE dx;
|
||||
|
||||
if (CENTERED)
|
||||
x = pos[i_part*NDIM + d]*delta - shift_pos;
|
||||
else
|
||||
x = pos[i_part*NDIM + d]*delta;
|
||||
q = floor(x);
|
||||
dx = x - q;
|
||||
|
||||
w0 *= (r[d] == 1) ? dx : ((BASIC_TYPE)1-dx);
|
||||
}
|
||||
|
||||
i_part = part_list[i_part];
|
||||
w += w0;
|
||||
}
|
||||
|
||||
shifter = 1;
|
||||
for (i = 0; i < NDIM; i++) {
|
||||
idx_dance += shifter * ((m[i]+r[i])%%N);
|
||||
shifter *= N;
|
||||
}
|
||||
|
||||
density[idx_dance] += w;
|
||||
|
||||
// One dance done. Wait for everybody for the next iteration
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
%(looperForEnd)s
|
||||
}
|
||||
'''
|
||||
|
||||
class CIC_CL(object):
|
||||
|
||||
def __init__(self, context, ndim=2, ktype=np.float32, centered=False):
|
||||
global CIC_PREKERNEL, CIC_KERNEL
|
||||
|
||||
translator = {}
|
||||
if ktype == np.float32:
|
||||
translator['cicType'] = 'float'
|
||||
pragmas = ''
|
||||
elif ktype == np.float64:
|
||||
translator['cicType'] = 'double'
|
||||
pragmas = '#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n'
|
||||
else:
|
||||
raise ValueError("Invalid ktype")
|
||||
|
||||
# 2 dimensions
|
||||
translator['ndim'] = ndim
|
||||
translator['centered'] = '1' if centered else '0'
|
||||
looperVariables = ','.join(['id%d' % d for d in xrange(ndim)])
|
||||
looperFor = '\n'.join(['for (int id{dim}=0; id{dim} < 2; id{dim}++) {{'.format(dim=d) for d in xrange(ndim)])
|
||||
looperForEnd = '}' * ndim
|
||||
|
||||
kern = pragmas + CIC_PREKERNEL.format(**translator) + (CIC_KERNEL % {'looperVariables': looperVariables, 'looperFor': looperFor, 'looperForEnd':looperForEnd})
|
||||
self.kern_code = kern
|
||||
self.ctx = context
|
||||
self.queue = cl.CommandQueue(context)#, properties=cl.OUT_OF_ORDER_EXEC_MODE_ENABLE)
|
||||
self.ktype = ktype
|
||||
self.ndim = ndim
|
||||
self.prog = cl.Program(self.ctx, kern).build()
|
||||
self.centered = centered
|
||||
|
||||
def run(self, particles, Ng, L):
|
||||
assert particles.strides[1] == self.ktype().itemsize # This is C-ordering
|
||||
assert particles.shape[1] == self.ndim
|
||||
|
||||
print("Start again")
|
||||
|
||||
ndim = self.ndim
|
||||
part_pos = cl_array.to_device(self.queue, particles)
|
||||
part_mesh = cl_array.empty(self.queue, (Ng,)*ndim, np.int32, order='C')
|
||||
density = cl_array.zeros(self.queue, (Ng,)*ndim, self.ktype, order='C')
|
||||
part_list = cl_array.empty(self.queue, (particles.shape[0],), np.int32, order='C')
|
||||
shift_pos = 0.5*L if self.centered else 0
|
||||
|
||||
if True:
|
||||
delta = Ng/L
|
||||
|
||||
with time_block("Init pcell array"):
|
||||
e = self.prog.init_pcell(self.queue, (Ng**ndim,), None, part_mesh.data, np.int32(-1))
|
||||
e.wait()
|
||||
|
||||
with time_block("Init idx array"):
|
||||
e=self.prog.init_pcell(self.queue, (particles.shape[0],), None, part_list.data, np.int32(-1))
|
||||
e.wait()
|
||||
|
||||
with time_block("Build indices"):
|
||||
self.prog.build_indices(self.queue, (particles.shape[0],), None,
|
||||
part_pos.data, part_mesh.data, part_list.data, np.int32(Ng), self.ktype(delta), self.ktype(shift_pos))
|
||||
|
||||
if True:
|
||||
with time_block("Reverse list"):
|
||||
lastevt = self.prog.reverse_list(self.queue, (Ng**ndim,), None, part_mesh.data, part_list.data)
|
||||
# We require pmax pass, particles are ordered according to part_idx
|
||||
|
||||
with time_block("dance"):
|
||||
self.prog.dance(self.queue, (Ng,)*ndim, None, part_pos.data, density.data, part_mesh.data, part_list.data, np.int32(Ng), self.ktype(delta), self.ktype(shift_pos))
|
||||
|
||||
self.queue.finish()
|
||||
del part_pos
|
||||
del part_mesh
|
||||
del part_list
|
||||
with time_block("download"):
|
||||
return density.get()
|
||||
|
||||
|
||||
def cl_CIC_Density(particles, Ngrid, Lbox, context=None, periodic=True, centered=False):
|
||||
"""
|
||||
cl_CIC_Density(particles (Nx3), Ngrid, Lbox, context=None, periodic=True, centered=False)
|
||||
"""
|
||||
if context is None:
|
||||
context = cl.create_some_context()
|
||||
|
||||
cic = CIC_CL(context, ndim=3, centered=centered)
|
||||
|
||||
return cic.run(particles, Ng, L)
|
|
@ -74,6 +74,7 @@ def run_generation(input_borg, a_borg, a_ic, cosmo, supersample=1, do_lpt2=True,
|
|||
# Compute LPT scaling coefficient
|
||||
D1 = cgrowth.D(a_ic)
|
||||
D1_0 = D1/cgrowth.D(a_borg)
|
||||
print "D1_0=%lg" % D1_0
|
||||
velmul = cgrowth.compute_velmul(a_ic) if not psi_instead else 1
|
||||
|
||||
D2 = -3./7 * D1_0**2
|
||||
|
@ -169,13 +170,12 @@ def whitify(density, L, cosmo, supergenerate=1, zero_fill=False, func='HU_WIGGLE
|
|||
def write_icfiles(*generated_ic, **kwargs):
|
||||
"""Write the initial conditions from the tuple returned by run_generation"""
|
||||
|
||||
supergenerate=1
|
||||
supergenerate=kwargs.get('supergenerate', 1)
|
||||
zero_fill=kwargs.get('zero_fill', False)
|
||||
posx,vel,density,N,L,a_ic,cosmo = generated_ic
|
||||
|
||||
ct.simpleWriteGadget("Data/borg.gad", posx, velocities=vel, boxsize=L, Hubble=cosmo['h'], Omega_M=cosmo['omega_M_0'], time=a_ic)
|
||||
for i,c in enumerate(["x","y","z"]):
|
||||
for i,c in enumerate(["z","y","x"]):
|
||||
ct.writeGrafic("Data/ic_velc%s" % c, vel[i].reshape((N,N,N)), L, a_ic, **cosmo)
|
||||
|
||||
ct.writeGrafic("Data/ic_deltab", density, L, a_ic, **cosmo)
|
||||
|
|
|
@ -1,6 +1,11 @@
|
|||
import pyfftw
|
||||
import numpy as np
|
||||
import cosmotool as ct
|
||||
import borgicgen as bic
|
||||
import pickle
|
||||
|
||||
with file("wisdom") as f:
|
||||
pyfftw.import_wisdom(pickle.load(f))
|
||||
|
||||
cosmo={'omega_M_0':0.3175, 'h':0.6711}
|
||||
cosmo['omega_lambda_0']=1-cosmo['omega_M_0']
|
||||
|
@ -9,11 +14,11 @@ cosmo['omega_B_0']=0.049
|
|||
cosmo['SIGMA8']=0.8344
|
||||
cosmo['ns']=0.9624
|
||||
|
||||
supergen=2
|
||||
supergen=8
|
||||
zstart=50
|
||||
astart=1/69.#1/(1.+zstart)
|
||||
astart=1/(1.+zstart)
|
||||
halfPixelShift=False
|
||||
zero_fill=True
|
||||
zero_fill=False
|
||||
|
||||
if __name__=="__main__":
|
||||
bic.write_icfiles(*bic.run_generation("initial_density_1380.dat", 0.001, astart, cosmo, supersample=1, shiftPixel=halfPixelShift, do_lpt2=False), supergenerate=supergen, zero_fill=zero_fill)
|
||||
bic.write_icfiles(*bic.run_generation("initial_density_1872.dat", 0.001, astart, cosmo, supersample=1, shiftPixel=halfPixelShift, do_lpt2=False), supergenerate=supergen, zero_fill=zero_fill)
|
||||
|
|
|
@ -8,6 +8,7 @@ s = ct.loadRamsesAll(sys.argv[1], int(sys.argv[2]), doublePrecision=True, loadVe
|
|||
|
||||
q = [p for p in s.getPositions()]
|
||||
q += [p for p in s.getVelocities()]
|
||||
q += [np.ones(q[0].size,dtype=q[0].dtype)]
|
||||
q = np.array(q)
|
||||
|
||||
with h5.File("particles.h5", mode="w") as f:
|
||||
|
|
|
@ -203,7 +203,7 @@ int main(int argc, char **argv)
|
|||
bins, interpolated, getMass, rLimit2);
|
||||
hdf5_write_array(out_f, "density", interpolated);
|
||||
//out_f.flush();
|
||||
for (int i = 0; i < 3; i++) {
|
||||
for (int i = 0; i < 0; i++) {
|
||||
computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz,
|
||||
bins, interpolated, boost::bind(getVelocity, _1, i), rLimit2);
|
||||
hdf5_write_array(out_f, str(format("p%d") % i), interpolated);
|
||||
|
|
Loading…
Reference in a new issue