Added OpenCL CIC code

This commit is contained in:
Guilhem Lavaux 2014-09-23 11:54:44 +02:00
parent 1e733f2318
commit f6ad248f75
8 changed files with 297 additions and 78 deletions

View File

@ -463,6 +463,8 @@ cdef class CosmologyPower:
return self._compute(k) return self._compute(k)
@cython.boundscheck(False) @cython.boundscheck(False)
@cython.cdivision(True)
@cython.wraparound(False)
def leanCic(float[:,:] particles, float L, int Resolution): def leanCic(float[:,:] particles, float L, int Resolution):
cdef CICParticles p cdef CICParticles p
cdef CICFilter *cic cdef CICFilter *cic
@ -470,6 +472,8 @@ def leanCic(float[:,:] particles, float L, int Resolution):
cdef CICType *field cdef CICType *field
cdef np.uint32_t dummyRes cdef np.uint32_t dummyRes
cdef np.ndarray[np.float64_t, ndim=3] out_field cdef np.ndarray[np.float64_t, ndim=3] out_field
cdef np.ndarray[np.float64_t, ndim=1] out_field0
cdef np.float64_t[:] out_field_buf
cdef np.uint64_t j cdef np.uint64_t j
cic = new CICFilter(Resolution, L) cic = new CICFilter(Resolution, L)
@ -490,8 +494,10 @@ def leanCic(float[:,:] particles, float L, int Resolution):
cic.getDensityField(field, dummyRes) cic.getDensityField(field, dummyRes)
out_field = np.empty((dummyRes, dummyRes, dummyRes), dtype=np.float64) out_field = np.empty((dummyRes, dummyRes, dummyRes), dtype=np.float64)
for j in xrange(out_field.size): out_field0 = out_field.reshape(out_field.size)
out_field[j] = field[j] out_field_buf = out_field
for j in xrange(out_field_buf.size):
out_field_buf[j] = field[j]
del cic del cic
return out_field return out_field

View File

@ -21,6 +21,7 @@ cdef extern from "project_tool.hpp" namespace "":
@cython.boundscheck(False) @cython.boundscheck(False)
@cython.cdivision(True) @cython.cdivision(True)
@cython.wraparound(False)
cdef int interp3d_INTERNAL_periodic(DTYPE_t x, DTYPE_t y, cdef int interp3d_INTERNAL_periodic(DTYPE_t x, DTYPE_t y,
DTYPE_t z, DTYPE_t z,
DTYPE_t[:,:,:] d, DTYPE_t Lbox, DTYPE_t *retval) nogil: DTYPE_t[:,:,:] d, DTYPE_t Lbox, DTYPE_t *retval) nogil:
@ -45,6 +46,10 @@ cdef int interp3d_INTERNAL_periodic(DTYPE_t x, DTYPE_t y,
ry -= iy ry -= iy
rz -= iz rz -= iz
ix = ix % Ngrid
iy = iy % Ngrid
iz = iz % Ngrid
jx = (ix+1)%Ngrid jx = (ix+1)%Ngrid
jy = (iy+1)%Ngrid jy = (iy+1)%Ngrid
jz = (iz+1)%Ngrid jz = (iz+1)%Ngrid
@ -53,13 +58,6 @@ cdef int interp3d_INTERNAL_periodic(DTYPE_t x, DTYPE_t y,
iy = iy%Ngrid iy = iy%Ngrid
iz = iz%Ngrid iz = iz%Ngrid
if (ix < 0) or (jx >= Ngrid):
return -1
if (iy < 0) or (jy >= Ngrid):
return -2
if (iz < 0) or (jz >= Ngrid):
return -3
f[0][0][0] = (1-rx)*(1-ry)*(1-rz) f[0][0][0] = (1-rx)*(1-ry)*(1-rz)
f[1][0][0] = ( rx)*(1-ry)*(1-rz) f[1][0][0] = ( rx)*(1-ry)*(1-rz)
f[0][1][0] = (1-rx)*( ry)*(1-rz) f[0][1][0] = (1-rx)*( ry)*(1-rz)
@ -80,10 +78,9 @@ cdef int interp3d_INTERNAL_periodic(DTYPE_t x, DTYPE_t y,
d[ix ,jy ,jz ] * f[0][1][1] + \ d[ix ,jy ,jz ] * f[0][1][1] + \
d[jx ,jy ,jz ] * f[1][1][1] d[jx ,jy ,jz ] * f[1][1][1]
return 0
@cython.boundscheck(False) @cython.boundscheck(False)
@cython.cdivision(True) @cython.cdivision(True)
@cython.wraparound(False)
cdef int ngp3d_INTERNAL_periodic(DTYPE_t x, DTYPE_t y, cdef int ngp3d_INTERNAL_periodic(DTYPE_t x, DTYPE_t y,
DTYPE_t z, DTYPE_t z,
DTYPE_t[:,:,:] d, DTYPE_t Lbox, DTYPE_t *retval) nogil: DTYPE_t[:,:,:] d, DTYPE_t Lbox, DTYPE_t *retval) nogil:
@ -110,14 +107,13 @@ cdef int ngp3d_INTERNAL_periodic(DTYPE_t x, DTYPE_t y,
retval[0] = d[ix ,iy ,iz ] retval[0] = d[ix ,iy ,iz ]
return 0
@cython.boundscheck(False) @cython.boundscheck(False)
@cython.cdivision(True) @cython.cdivision(True)
@cython.wraparound(False)
cdef int ngp3d_INTERNAL(DTYPE_t x, DTYPE_t y, cdef int ngp3d_INTERNAL(DTYPE_t x, DTYPE_t y,
DTYPE_t z, DTYPE_t z,
DTYPE_t[:,:,:] d, DTYPE_t Lbox, DTYPE_t *retval) nogil: DTYPE_t[:,:,:] d, DTYPE_t Lbox, DTYPE_t *retval, DTYPE_t inval) nogil:
cdef int Ngrid = d.shape[0] cdef int Ngrid = d.shape[0]
cdef DTYPE_t inv_delta = Ngrid/Lbox cdef DTYPE_t inv_delta = Ngrid/Lbox
@ -134,23 +130,18 @@ cdef int ngp3d_INTERNAL(DTYPE_t x, DTYPE_t y,
iy = int(round(ry)) iy = int(round(ry))
iz = int(round(rz)) iz = int(round(rz))
if (ix < 0 or ix >= Ngrid): if ((ix < 0) or (ix+1) >= Ngrid or (iy < 0) or (iy+1) >= Ngrid or (iz < 0) or (iz+1) >= Ngrid):
return -1 retval[0] = inval
if (iy < 0 or iy >= Ngrid):
return -2
if (iz < 0 or iz >= Ngrid):
return -3
retval[0] = d[ix ,iy ,iz ] retval[0] = d[ix ,iy ,iz ]
return 0
@cython.boundscheck(False) @cython.boundscheck(False)
@cython.cdivision(True) @cython.cdivision(True)
@cython.wraparound(False)
cdef int interp3d_INTERNAL(DTYPE_t x, DTYPE_t y, cdef int interp3d_INTERNAL(DTYPE_t x, DTYPE_t y,
DTYPE_t z, DTYPE_t z,
DTYPE_t[:,:,:] d, DTYPE_t Lbox, DTYPE_t *retval) nogil: DTYPE_t[:,:,:] d, DTYPE_t Lbox, DTYPE_t *retval, DTYPE_t inval) nogil:
cdef int Ngrid = d.shape[0] cdef int Ngrid = d.shape[0]
cdef DTYPE_t inv_delta = Ngrid/Lbox cdef DTYPE_t inv_delta = Ngrid/Lbox
@ -170,14 +161,8 @@ cdef int interp3d_INTERNAL(DTYPE_t x, DTYPE_t y,
ry -= iy ry -= iy
rz -= iz rz -= iz
if ((ix < 0) or (ix+1) >= Ngrid): if ((ix < 0) or (ix+1) >= Ngrid or (iy < 0) or (iy+1) >= Ngrid or (iz < 0) or (iz+1) >= Ngrid):
return -1 retval[0] = inval
if ((iy < 0) or (iy+1) >= Ngrid):
return -2
if ((iz < 0) or (iz+1) >= Ngrid):
return -3
# assert ((ix >= 0) and ((ix+1) < Ngrid)) # assert ((ix >= 0) and ((ix+1) < Ngrid))
# assert ((iy >= 0) and ((iy+1) < Ngrid)) # assert ((iy >= 0) and ((iy+1) < Ngrid))
# assert ((iz >= 0) and ((iz+1) < Ngrid)) # assert ((iz >= 0) and ((iz+1) < Ngrid))
@ -202,13 +187,11 @@ cdef int interp3d_INTERNAL(DTYPE_t x, DTYPE_t y,
d[ix ,iy+1,iz+1] * f[0][1][1] + \ d[ix ,iy+1,iz+1] * f[0][1][1] + \
d[ix+1,iy+1,iz+1] * f[1][1][1] d[ix+1,iy+1,iz+1] * f[1][1][1]
return 0
@cython.boundscheck(False) @cython.boundscheck(False)
def interp3d(x not None, y not None, def interp3d(x not None, y not None,
z not None, z not None,
npx.ndarray[DTYPE_t, ndim=3] d not None, DTYPE_t Lbox, npx.ndarray[DTYPE_t, ndim=3] d not None, DTYPE_t Lbox,
bool periodic=False, bool centered=True, bool ngp=False): bool periodic=False, bool centered=True, bool ngp=False, DTYPE_t inval = 0):
""" interp3d(x,y,z,d,Lbox,periodic=False,centered=True,ngp=False) -> interpolated values """ interp3d(x,y,z,d,Lbox,periodic=False,centered=True,ngp=False) -> interpolated values
Compute the tri-linear interpolation of the given field (d) at the given position (x,y,z). It assumes that they are box-centered coordinates. So (x,y,z) == (0,0,0) is equivalent to the pixel at (Nx/2,Ny/2,Nz/2) with Nx,Ny,Nz = d.shape. If periodic is set, it assumes the box is periodic Compute the tri-linear interpolation of the given field (d) at the given position (x,y,z). It assumes that they are box-centered coordinates. So (x,y,z) == (0,0,0) is equivalent to the pixel at (Nx/2,Ny/2,Nz/2) with Nx,Ny,Nz = d.shape. If periodic is set, it assumes the box is periodic
@ -253,41 +236,29 @@ def interp3d(x not None, y not None,
if not myngp: if not myngp:
if myperiodic: if myperiodic:
for i in prange(Nelt): for i in prange(Nelt):
if interp3d_INTERNAL_periodic(shifter+ax[i], shifter+ay[i], shifter+az[i], in_slice, Lbox, &out_slice[i]) < 0: interp3d_INTERNAL_periodic(shifter+ax[i], shifter+ay[i], shifter+az[i], in_slice, Lbox, &out_slice[i])
with gil:
raise ierror
else: else:
for i in prange(Nelt): for i in prange(Nelt):
if interp3d_INTERNAL(shifter+ax[i], shifter+ay[i], shifter+az[i], in_slice, Lbox, &out_slice[i]) < 0: interp3d_INTERNAL(shifter+ax[i], shifter+ay[i], shifter+az[i], in_slice, Lbox, &out_slice[i], inval)
with gil:
raise ierror
else: else:
if myperiodic: if myperiodic:
for i in prange(Nelt): for i in prange(Nelt):
if ngp3d_INTERNAL_periodic(shifter+ax[i], shifter+ay[i], shifter+az[i], in_slice, Lbox, &out_slice[i]) < 0: ngp3d_INTERNAL_periodic(shifter+ax[i], shifter+ay[i], shifter+az[i], in_slice, Lbox, &out_slice[i])
with gil:
raise ierror
else: else:
for i in prange(Nelt): for i in prange(Nelt):
if ngp3d_INTERNAL(shifter+ax[i], shifter+ay[i], shifter+az[i], in_slice, Lbox, &out_slice[i]) < 0: ngp3d_INTERNAL(shifter+ax[i], shifter+ay[i], shifter+az[i], in_slice, Lbox, &out_slice[i], inval)
with gil:
raise ierror
return out return out
else: else:
if not myngp: if not myngp:
if periodic: if periodic:
if interp3d_INTERNAL_periodic(shifter+x, shifter+y, shifter+z, d, Lbox, &retval) < 0: interp3d_INTERNAL_periodic(shifter+x, shifter+y, shifter+z, d, Lbox, &retval)
raise ierror
else: else:
if interp3d_INTERNAL(shifter+x, shifter+y, shifter+z, d, Lbox, &retval) < 0: interp3d_INTERNAL(shifter+x, shifter+y, shifter+z, d, Lbox, &retval, inval)
raise ierror
else: else:
if periodic: if periodic:
if ngp3d_INTERNAL_periodic(shifter+x, shifter+y, shifter+z, d, Lbox, &retval) < 0: ngp3d_INTERNAL_periodic(shifter+x, shifter+y, shifter+z, d, Lbox, &retval)
raise ierror
else: else:
if ngp3d_INTERNAL(shifter+x, shifter+y, shifter+z, d, Lbox, &retval) < 0: ngp3d_INTERNAL(shifter+x, shifter+y, shifter+z, d, Lbox, &retval, inval)
raise ierror
return retval return retval
@cython.boundscheck(False) @cython.boundscheck(False)
@ -410,8 +381,7 @@ def interp2d(x not None, y not None,
@cython.boundscheck(False) @cython.boundscheck(False)
@cython.cdivision(True) @cython.cdivision(True)
cdef void INTERNAL_project_cic_no_mass(npx.ndarray[DTYPE_t, ndim=3] g, cdef void INTERNAL_project_cic_no_mass(npx.ndarray[DTYPE_t, ndim=3] g,
npx.ndarray[DTYPE_t, ndim=2] x, int Ngrid, float Lbox): npx.ndarray[DTYPE_t, ndim=2] x, int Ngrid, double Lbox, double shifter):
cdef double half_Box = 0.5*Lbox
cdef double delta_Box = Ngrid/Lbox cdef double delta_Box = Ngrid/Lbox
cdef int i cdef int i
cdef double a[3], c[3] cdef double a[3], c[3]
@ -422,7 +392,7 @@ cdef void INTERNAL_project_cic_no_mass(npx.ndarray[DTYPE_t, ndim=3] g,
do_not_put = 0 do_not_put = 0
for j in range(3): for j in range(3):
a[j] = (x[i,j]+half_Box)*delta_Box a[j] = (x[i,j]+shifter)*delta_Box
b[j] = int(floor(a[j])) b[j] = int(floor(a[j]))
a[j] -= b[j] a[j] -= b[j]
c[j] = 1-a[j] c[j] = 1-a[j]
@ -446,8 +416,7 @@ cdef void INTERNAL_project_cic_no_mass(npx.ndarray[DTYPE_t, ndim=3] g,
@cython.boundscheck(False) @cython.boundscheck(False)
@cython.cdivision(True) @cython.cdivision(True)
cdef void INTERNAL_project_cic_no_mass_periodic(npx.ndarray[DTYPE_t, ndim=3] g, cdef void INTERNAL_project_cic_no_mass_periodic(npx.ndarray[DTYPE_t, ndim=3] g,
npx.ndarray[DTYPE_t, ndim=2] x, int Ngrid, double Lbox): npx.ndarray[DTYPE_t, ndim=2] x, int Ngrid, double Lbox, double shifter):
cdef double half_Box = 0.5*Lbox
cdef double delta_Box = Ngrid/Lbox cdef double delta_Box = Ngrid/Lbox
cdef int i cdef int i
cdef double a[3], c[3] cdef double a[3], c[3]
@ -463,7 +432,7 @@ cdef void INTERNAL_project_cic_no_mass_periodic(npx.ndarray[DTYPE_t, ndim=3] g,
do_not_put = 0 do_not_put = 0
for j in range(3): for j in range(3):
a[j] = (ax[i,j]+half_Box)*delta_Box a[j] = (ax[i,j]+shifter)*delta_Box
b[j] = int(floor(a[j])) b[j] = int(floor(a[j]))
b1[j] = (b[j]+1) % Ngrid b1[j] = (b[j]+1) % Ngrid
@ -488,8 +457,7 @@ cdef void INTERNAL_project_cic_no_mass_periodic(npx.ndarray[DTYPE_t, ndim=3] g,
cdef void INTERNAL_project_cic_with_mass(npx.ndarray[DTYPE_t, ndim=3] g, cdef void INTERNAL_project_cic_with_mass(npx.ndarray[DTYPE_t, ndim=3] g,
npx.ndarray[DTYPE_t, ndim=2] x, npx.ndarray[DTYPE_t, ndim=2] x,
npx.ndarray[DTYPE_t, ndim=1] mass, npx.ndarray[DTYPE_t, ndim=1] mass,
int Ngrid, double Lbox): int Ngrid, double Lbox, double shifter):
cdef double half_Box = 0.5*Lbox
cdef double delta_Box = Ngrid/Lbox cdef double delta_Box = Ngrid/Lbox
cdef int i cdef int i
cdef double a[3], c[3] cdef double a[3], c[3]
@ -500,7 +468,7 @@ cdef void INTERNAL_project_cic_with_mass(npx.ndarray[DTYPE_t, ndim=3] g,
do_not_put = False do_not_put = False
for j in range(3): for j in range(3):
a[j] = (x[i,j]+half_Box)*delta_Box a[j] = (x[i,j]+shifter)*delta_Box
b[j] = int(a[j]) b[j] = int(a[j])
a[j] -= b[j] a[j] -= b[j]
c[j] = 1-a[j] c[j] = 1-a[j]
@ -525,7 +493,7 @@ cdef void INTERNAL_project_cic_with_mass(npx.ndarray[DTYPE_t, ndim=3] g,
cdef void INTERNAL_project_cic_with_mass_periodic(npx.ndarray[DTYPE_t, ndim=3] g, cdef void INTERNAL_project_cic_with_mass_periodic(npx.ndarray[DTYPE_t, ndim=3] g,
npx.ndarray[DTYPE_t, ndim=2] x, npx.ndarray[DTYPE_t, ndim=2] x,
npx.ndarray[DTYPE_t, ndim=1] mass, npx.ndarray[DTYPE_t, ndim=1] mass,
int Ngrid, double Lbox): int Ngrid, double Lbox, double shifter):
cdef double half_Box = 0.5*Lbox, m0 cdef double half_Box = 0.5*Lbox, m0
cdef double delta_Box = Ngrid/Lbox cdef double delta_Box = Ngrid/Lbox
cdef int i cdef int i
@ -535,7 +503,7 @@ cdef void INTERNAL_project_cic_with_mass_periodic(npx.ndarray[DTYPE_t, ndim=3] g
for i in range(x.shape[0]): for i in range(x.shape[0]):
for j in range(3): for j in range(3):
a[j] = (x[i,j]+half_Box)*delta_Box a[j] = (x[i,j]+shifter)*delta_Box
b[j] = int(floor(a[j])) b[j] = int(floor(a[j]))
b1[j] = b[j]+1 b1[j] = b[j]+1
while b1[j] < 0: while b1[j] < 0:
@ -559,14 +527,20 @@ cdef void INTERNAL_project_cic_with_mass_periodic(npx.ndarray[DTYPE_t, ndim=3] g
def project_cic(npx.ndarray[DTYPE_t, ndim=2] x not None, npx.ndarray[DTYPE_t, ndim=1] mass, int Ngrid, def project_cic(npx.ndarray[DTYPE_t, ndim=2] x not None, npx.ndarray[DTYPE_t, ndim=1] mass, int Ngrid,
double Lbox, bool periodic = False): double Lbox, bool periodic = False, centered=True):
""" """
project_cic(x array (N,3), mass (may be None), Ngrid, Lbox, periodict) project_cic(x array (N,3), mass (may be None), Ngrid, Lbox, periodict, centered=True)
This function does a Cloud-In-Cell projection of a 3d unstructured dataset. First argument is a Nx3 array of coordinates. This function does a Cloud-In-Cell projection of a 3d unstructured dataset. First argument is a Nx3 array of coordinates.
Second argument is an optinal mass. Ngrid is the size output grid and Lbox is the physical size of the grid. Second argument is an optinal mass. Ngrid is the size output grid and Lbox is the physical size of the grid.
""" """
cdef npx.ndarray[DTYPE_t, ndim=3] g cdef npx.ndarray[DTYPE_t, ndim=3] g
cdef double shifter
if centered:
shifter = 0.5*Lbox
else:
shifter = 0
if x.shape[1] != 3: if x.shape[1] != 3:
raise ValueError("Invalid shape for x array") raise ValueError("Invalid shape for x array")
@ -578,14 +552,14 @@ def project_cic(npx.ndarray[DTYPE_t, ndim=2] x not None, npx.ndarray[DTYPE_t, nd
if not periodic: if not periodic:
if mass == None: if mass == None:
INTERNAL_project_cic_no_mass(g, x, Ngrid, Lbox) INTERNAL_project_cic_no_mass(g, x, Ngrid, Lbox, shifter)
else: else:
INTERNAL_project_cic_with_mass(g, x, mass, Ngrid, Lbox) INTERNAL_project_cic_with_mass(g, x, mass, Ngrid, Lbox, shifter)
else: else:
if mass == None: if mass == None:
INTERNAL_project_cic_no_mass_periodic(g, x, Ngrid, Lbox) INTERNAL_project_cic_no_mass_periodic(g, x, Ngrid, Lbox, shifter)
else: else:
INTERNAL_project_cic_with_mass_periodic(g, x, mass, Ngrid, Lbox) INTERNAL_project_cic_with_mass_periodic(g, x, mass, Ngrid, Lbox, shifter)
return g return g

View File

@ -3,6 +3,7 @@ from _project import *
from .grafic import writeGrafic, writeWhitePhase, readGrafic, readWhitePhase from .grafic import writeGrafic, writeWhitePhase, readGrafic, readWhitePhase
from .borg import read_borg_vol from .borg import read_borg_vol
from .cic import cicParticles from .cic import cicParticles
from .cl_cic import cl_CIC_Density
from .simu import loadRamsesAll, simpleWriteGadget, SimulationBare from .simu import loadRamsesAll, simpleWriteGadget, SimulationBare
from .timing import time_block, timeit, timeit_quiet from .timing import time_block, timeit, timeit_quiet

232
python/cosmotool/cl_cic.py Normal file
View File

@ -0,0 +1,232 @@
from .timing import time_block as time_block_orig
import numpy as np
import pyopencl as cl
import pyopencl.array as cl_array
from contextlib import contextmanager
TIMER_ACTIVE=False
@contextmanager
def time_block_dummy(*args):
yield
if TIMER_ACTIVE:
time_block=time_block_orig
else:
time_block=time_block_dummy
CIC_PREKERNEL='''
#define NDIM {ndim}
#define CENTERED {centered}
typedef {cicType} BASIC_TYPE;
'''
CIC_KERNEL='''///CL///
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
__kernel void init_pcell(__global int *p_cell, const int value)
{
int i = get_global_id(0);
p_cell[i] = value;
}
__kernel void build_indices(__global const BASIC_TYPE *pos,
__global int *part_mesh, __global int *part_list, const int N, const BASIC_TYPE delta, const BASIC_TYPE shift_pos)
{
int i_part = get_global_id(0);
long shifter = 1;
long idx = 0;
int d;
for (d = 0; d < NDIM; d++) {
BASIC_TYPE x;
if (CENTERED)
x = pos[i_part*NDIM + d] - shift_pos;
else
x = pos[i_part*NDIM + d];
int m = (int)floor(x*delta) %% N;
idx += shifter * m;
shifter *= N;
}
// Head of the list
int initial_elt = atom_xchg(&part_mesh[idx], i_part);
if (initial_elt == -1) {
return;
}
// Point the next pointer of old_end to i_part
part_list[i_part] = initial_elt;
}
__kernel void reverse_list(__global int *part_mesh, __global int *part_list)
{
int mid = get_global_id(0);
int current_part = part_mesh[mid];
if (current_part >= 0) {
int next_part = part_list[current_part];
part_list[current_part] = -1;
while (next_part != -1) {
int p = part_list[next_part];
part_list[next_part] = current_part;
current_part = next_part;
next_part = p;
}
part_mesh[mid] = current_part;
}
}
__kernel void dance(__global const BASIC_TYPE *pos,
__global BASIC_TYPE *density,
__global int *part_mesh, __global int *part_list, const int N, const BASIC_TYPE delta, const BASIC_TYPE shift_pos)
{
int m[NDIM];
int shifter = 1;
int i;
int first, i_part;
int idx = 0;
for (i = 0; i < NDIM; i++) {
m[i] = get_global_id(i);
idx += shifter * m[i];
shifter *= N;
}
first = 1;
//BEGIN LOOPER
%(looperFor)s
//END LOOPER
int idx_dance = 0;
BASIC_TYPE w = 0;
//LOOPER INDEX
int r[NDIM] = { %(looperVariables)s };
//END LOOPER
i_part = part_mesh[idx];
while (i_part != -1) {
BASIC_TYPE w0 = 1;
for (int d = 0; d < NDIM; d++) {
BASIC_TYPE x;
BASIC_TYPE q;
BASIC_TYPE dx;
if (CENTERED)
x = pos[i_part*NDIM + d]*delta - shift_pos;
else
x = pos[i_part*NDIM + d]*delta;
q = floor(x);
dx = x - q;
w0 *= (r[d] == 1) ? dx : ((BASIC_TYPE)1-dx);
}
i_part = part_list[i_part];
w += w0;
}
shifter = 1;
for (i = 0; i < NDIM; i++) {
idx_dance += shifter * ((m[i]+r[i])%%N);
shifter *= N;
}
density[idx_dance] += w;
// One dance done. Wait for everybody for the next iteration
barrier(CLK_GLOBAL_MEM_FENCE);
%(looperForEnd)s
}
'''
class CIC_CL(object):
def __init__(self, context, ndim=2, ktype=np.float32, centered=False):
global CIC_PREKERNEL, CIC_KERNEL
translator = {}
if ktype == np.float32:
translator['cicType'] = 'float'
pragmas = ''
elif ktype == np.float64:
translator['cicType'] = 'double'
pragmas = '#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n'
else:
raise ValueError("Invalid ktype")
# 2 dimensions
translator['ndim'] = ndim
translator['centered'] = '1' if centered else '0'
looperVariables = ','.join(['id%d' % d for d in xrange(ndim)])
looperFor = '\n'.join(['for (int id{dim}=0; id{dim} < 2; id{dim}++) {{'.format(dim=d) for d in xrange(ndim)])
looperForEnd = '}' * ndim
kern = pragmas + CIC_PREKERNEL.format(**translator) + (CIC_KERNEL % {'looperVariables': looperVariables, 'looperFor': looperFor, 'looperForEnd':looperForEnd})
self.kern_code = kern
self.ctx = context
self.queue = cl.CommandQueue(context)#, properties=cl.OUT_OF_ORDER_EXEC_MODE_ENABLE)
self.ktype = ktype
self.ndim = ndim
self.prog = cl.Program(self.ctx, kern).build()
self.centered = centered
def run(self, particles, Ng, L):
assert particles.strides[1] == self.ktype().itemsize # This is C-ordering
assert particles.shape[1] == self.ndim
print("Start again")
ndim = self.ndim
part_pos = cl_array.to_device(self.queue, particles)
part_mesh = cl_array.empty(self.queue, (Ng,)*ndim, np.int32, order='C')
density = cl_array.zeros(self.queue, (Ng,)*ndim, self.ktype, order='C')
part_list = cl_array.empty(self.queue, (particles.shape[0],), np.int32, order='C')
shift_pos = 0.5*L if self.centered else 0
if True:
delta = Ng/L
with time_block("Init pcell array"):
e = self.prog.init_pcell(self.queue, (Ng**ndim,), None, part_mesh.data, np.int32(-1))
e.wait()
with time_block("Init idx array"):
e=self.prog.init_pcell(self.queue, (particles.shape[0],), None, part_list.data, np.int32(-1))
e.wait()
with time_block("Build indices"):
self.prog.build_indices(self.queue, (particles.shape[0],), None,
part_pos.data, part_mesh.data, part_list.data, np.int32(Ng), self.ktype(delta), self.ktype(shift_pos))
if True:
with time_block("Reverse list"):
lastevt = self.prog.reverse_list(self.queue, (Ng**ndim,), None, part_mesh.data, part_list.data)
# We require pmax pass, particles are ordered according to part_idx
with time_block("dance"):
self.prog.dance(self.queue, (Ng,)*ndim, None, part_pos.data, density.data, part_mesh.data, part_list.data, np.int32(Ng), self.ktype(delta), self.ktype(shift_pos))
self.queue.finish()
del part_pos
del part_mesh
del part_list
with time_block("download"):
return density.get()
def cl_CIC_Density(particles, Ngrid, Lbox, context=None, periodic=True, centered=False):
"""
cl_CIC_Density(particles (Nx3), Ngrid, Lbox, context=None, periodic=True, centered=False)
"""
if context is None:
context = cl.create_some_context()
cic = CIC_CL(context, ndim=3, centered=centered)
return cic.run(particles, Ng, L)

View File

@ -74,6 +74,7 @@ def run_generation(input_borg, a_borg, a_ic, cosmo, supersample=1, do_lpt2=True,
# Compute LPT scaling coefficient # Compute LPT scaling coefficient
D1 = cgrowth.D(a_ic) D1 = cgrowth.D(a_ic)
D1_0 = D1/cgrowth.D(a_borg) D1_0 = D1/cgrowth.D(a_borg)
print "D1_0=%lg" % D1_0
velmul = cgrowth.compute_velmul(a_ic) if not psi_instead else 1 velmul = cgrowth.compute_velmul(a_ic) if not psi_instead else 1
D2 = -3./7 * D1_0**2 D2 = -3./7 * D1_0**2
@ -169,13 +170,12 @@ def whitify(density, L, cosmo, supergenerate=1, zero_fill=False, func='HU_WIGGLE
def write_icfiles(*generated_ic, **kwargs): def write_icfiles(*generated_ic, **kwargs):
"""Write the initial conditions from the tuple returned by run_generation""" """Write the initial conditions from the tuple returned by run_generation"""
supergenerate=1
supergenerate=kwargs.get('supergenerate', 1) supergenerate=kwargs.get('supergenerate', 1)
zero_fill=kwargs.get('zero_fill', False) zero_fill=kwargs.get('zero_fill', False)
posx,vel,density,N,L,a_ic,cosmo = generated_ic posx,vel,density,N,L,a_ic,cosmo = generated_ic
ct.simpleWriteGadget("Data/borg.gad", posx, velocities=vel, boxsize=L, Hubble=cosmo['h'], Omega_M=cosmo['omega_M_0'], time=a_ic) ct.simpleWriteGadget("Data/borg.gad", posx, velocities=vel, boxsize=L, Hubble=cosmo['h'], Omega_M=cosmo['omega_M_0'], time=a_ic)
for i,c in enumerate(["x","y","z"]): for i,c in enumerate(["z","y","x"]):
ct.writeGrafic("Data/ic_velc%s" % c, vel[i].reshape((N,N,N)), L, a_ic, **cosmo) ct.writeGrafic("Data/ic_velc%s" % c, vel[i].reshape((N,N,N)), L, a_ic, **cosmo)
ct.writeGrafic("Data/ic_deltab", density, L, a_ic, **cosmo) ct.writeGrafic("Data/ic_deltab", density, L, a_ic, **cosmo)

View File

@ -1,6 +1,11 @@
import pyfftw
import numpy as np import numpy as np
import cosmotool as ct import cosmotool as ct
import borgicgen as bic import borgicgen as bic
import pickle
with file("wisdom") as f:
pyfftw.import_wisdom(pickle.load(f))
cosmo={'omega_M_0':0.3175, 'h':0.6711} cosmo={'omega_M_0':0.3175, 'h':0.6711}
cosmo['omega_lambda_0']=1-cosmo['omega_M_0'] cosmo['omega_lambda_0']=1-cosmo['omega_M_0']
@ -9,11 +14,11 @@ cosmo['omega_B_0']=0.049
cosmo['SIGMA8']=0.8344 cosmo['SIGMA8']=0.8344
cosmo['ns']=0.9624 cosmo['ns']=0.9624
supergen=2 supergen=8
zstart=50 zstart=50
astart=1/69.#1/(1.+zstart) astart=1/(1.+zstart)
halfPixelShift=False halfPixelShift=False
zero_fill=True zero_fill=False
if __name__=="__main__": if __name__=="__main__":
bic.write_icfiles(*bic.run_generation("initial_density_1380.dat", 0.001, astart, cosmo, supersample=1, shiftPixel=halfPixelShift, do_lpt2=False), supergenerate=supergen, zero_fill=zero_fill) bic.write_icfiles(*bic.run_generation("initial_density_1872.dat", 0.001, astart, cosmo, supersample=1, shiftPixel=halfPixelShift, do_lpt2=False), supergenerate=supergen, zero_fill=zero_fill)

View File

@ -8,6 +8,7 @@ s = ct.loadRamsesAll(sys.argv[1], int(sys.argv[2]), doublePrecision=True, loadVe
q = [p for p in s.getPositions()] q = [p for p in s.getPositions()]
q += [p for p in s.getVelocities()] q += [p for p in s.getVelocities()]
q += [np.ones(q[0].size,dtype=q[0].dtype)]
q = np.array(q) q = np.array(q)
with h5.File("particles.h5", mode="w") as f: with h5.File("particles.h5", mode="w") as f:

View File

@ -203,7 +203,7 @@ int main(int argc, char **argv)
bins, interpolated, getMass, rLimit2); bins, interpolated, getMass, rLimit2);
hdf5_write_array(out_f, "density", interpolated); hdf5_write_array(out_f, "density", interpolated);
//out_f.flush(); //out_f.flush();
for (int i = 0; i < 3; i++) { for (int i = 0; i < 0; i++) {
computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz, computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz,
bins, interpolated, boost::bind(getVelocity, _1, i), rLimit2); bins, interpolated, boost::bind(getVelocity, _1, i), rLimit2);
hdf5_write_array(out_f, str(format("p%d") % i), interpolated); hdf5_write_array(out_f, str(format("p%d") % i), interpolated);