Re-optimized projection

This commit is contained in:
Guilhem Lavaux 2014-06-14 17:15:56 +02:00
parent 60a477a4c0
commit 81f4b642e4
2 changed files with 56 additions and 93 deletions

View File

@ -1,12 +1,12 @@
from cpython cimport bool from cpython cimport bool
from cython cimport view from cython cimport view
from cython.parallel import prange, parallel from cython.parallel import prange, parallel
from openmp cimport omp_get_max_threads, omp_get_thread_num
from libc.math cimport sin, cos, abs, floor, sqrt from libc.math cimport sin, cos, abs, floor, sqrt
import numpy as np import numpy as np
cimport numpy as npx cimport numpy as npx
cimport cython cimport cython
from copy cimport * from copy cimport *
from openmp cimport omp_get_max_threads, omp_get_thread_num
ctypedef npx.float64_t DTYPE_t ctypedef npx.float64_t DTYPE_t
DTYPE=np.float64 DTYPE=np.float64
@ -298,8 +298,7 @@ cdef void INTERNAL_project_cic_no_mass(npx.ndarray[DTYPE_t, ndim=3] g,
cdef double half_Box = 0.5*Lbox cdef double half_Box = 0.5*Lbox
cdef double delta_Box = Ngrid/Lbox cdef double delta_Box = Ngrid/Lbox
cdef int i cdef int i
cdef double a[3] cdef double a[3], c[3]
cdef double c[3]
cdef int b[3] cdef int b[3]
cdef int do_not_put cdef int do_not_put
@ -335,10 +334,8 @@ cdef void INTERNAL_project_cic_no_mass_periodic(npx.ndarray[DTYPE_t, ndim=3] g,
cdef double half_Box = 0.5*Lbox cdef double half_Box = 0.5*Lbox
cdef double delta_Box = Ngrid/Lbox cdef double delta_Box = Ngrid/Lbox
cdef int i cdef int i
cdef double a[3] cdef double a[3], c[3]
cdef double c[3] cdef int b[3], b1[3]
cdef int b[3]
cdef int b1[3]
cdef int do_not_put cdef int do_not_put
for i in range(x.shape[0]): for i in range(x.shape[0]):
@ -376,8 +373,7 @@ cdef void INTERNAL_project_cic_with_mass(npx.ndarray[DTYPE_t, ndim=3] g,
cdef double half_Box = 0.5*Lbox cdef double half_Box = 0.5*Lbox
cdef double delta_Box = Ngrid/Lbox cdef double delta_Box = Ngrid/Lbox
cdef int i cdef int i
cdef double a[3] cdef double a[3], c[3]
cdef double c[3]
cdef DTYPE_t m0 cdef DTYPE_t m0
cdef int b[3] cdef int b[3]
@ -414,10 +410,8 @@ cdef void INTERNAL_project_cic_with_mass_periodic(npx.ndarray[DTYPE_t, ndim=3] g
cdef double half_Box = 0.5*Lbox, m0 cdef double half_Box = 0.5*Lbox, m0
cdef double delta_Box = Ngrid/Lbox cdef double delta_Box = Ngrid/Lbox
cdef int i cdef int i
cdef double a[3] cdef double a[3], c[3]
cdef double c[3] cdef int b[3], b1[3]
cdef int b[3]
cdef int b1[3]
for i in range(x.shape[0]): for i in range(x.shape[0]):
@ -538,11 +532,10 @@ cdef DTYPE_t cube_integral(DTYPE_t u[3], DTYPE_t u0[3], int r[1]) nogil:
@cython.boundscheck(False) @cython.boundscheck(False)
@cython.cdivision(True) @cython.cdivision(True)
cdef DTYPE_t cube_integral_trilin(DTYPE_t u[3], DTYPE_t u0[3], int r[1], DTYPE_t vertex_value[8], int err[1]) nogil: cdef DTYPE_t cube_integral_trilin(DTYPE_t u[3], DTYPE_t u0[3], int r[1], DTYPE_t vertex_value[8]) nogil:
cdef DTYPE_t alpha_max cdef DTYPE_t alpha_max
cdef DTYPE_t I, tmp_a cdef DTYPE_t I, tmp_a
cdef DTYPE_t v[3] cdef DTYPE_t v[3], term[4]
cdef DTYPE_t term[4]
cdef int i, j, q cdef int i, j, q
alpha_max = 10.0 # A big number alpha_max = 10.0 # A big number
@ -560,13 +553,7 @@ cdef DTYPE_t cube_integral_trilin(DTYPE_t u[3], DTYPE_t u0[3], int r[1], DTYPE_t
if tmp_a < alpha_max: if tmp_a < alpha_max:
alpha_max = tmp_a alpha_max = tmp_a
j = i j = i
# if (u0[i] < 0 or u0[i] > 1):
# err[0] = 1
# with gil:
# print("Bouh! u0[%d] = %lg" % (i, u0[i]))
# return 0
I = compute_projection(vertex_value, u, u0, alpha_max) I = compute_projection(vertex_value, u, u0, alpha_max)
for i in xrange(3): for i in xrange(3):
@ -580,7 +567,7 @@ cdef DTYPE_t cube_integral_trilin(DTYPE_t u[3], DTYPE_t u0[3], int r[1], DTYPE_t
@cython.boundscheck(False) @cython.boundscheck(False)
cdef DTYPE_t integrator0(DTYPE_t[:,:,:] density, cdef DTYPE_t integrator0(DTYPE_t[:,:,:] density,
DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1], int err[1]) nogil except? 0: DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1]) nogil:
cdef DTYPE_t d cdef DTYPE_t d
d = density[iu0[0], iu0[1], iu0[2]] d = density[iu0[0], iu0[1], iu0[2]]
@ -589,20 +576,15 @@ cdef DTYPE_t integrator0(DTYPE_t[:,:,:] density,
@cython.boundscheck(False) @cython.boundscheck(False)
cdef DTYPE_t integrator1(DTYPE_t[:,:,:] density, cdef DTYPE_t integrator1(DTYPE_t[:,:,:] density,
DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1], int err[1]) nogil except? 0: DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1]) nogil:
cdef DTYPE_t vertex_value[8] cdef DTYPE_t vertex_value[8]
cdef DTYPE_t d cdef DTYPE_t d
cdef int a[3][2] cdef int a[3][2], i
cdef int i
for i in xrange(3): for i in xrange(3):
a[i][0] = iu0[i] a[i][0] = iu0[i]
a[i][1] = iu0[i]+1 a[i][1] = iu0[i]+1
with gil:
assert a[i][0] >= 0
assert a[i][1] < density.shape[i]
vertex_value[0 + 2*0 + 4*0] = density[a[0][0], a[1][0], a[2][0]] vertex_value[0 + 2*0 + 4*0] = density[a[0][0], a[1][0], a[2][0]]
vertex_value[1 + 2*0 + 4*0] = density[a[0][1], a[1][0], a[2][0]] vertex_value[1 + 2*0 + 4*0] = density[a[0][1], a[1][0], a[2][0]]
vertex_value[0 + 2*1 + 4*0] = density[a[0][0], a[1][1], a[2][0]] vertex_value[0 + 2*1 + 4*0] = density[a[0][0], a[1][1], a[2][0]]
@ -613,8 +595,7 @@ cdef DTYPE_t integrator1(DTYPE_t[:,:,:] density,
vertex_value[0 + 2*1 + 4*1] = density[a[0][0], a[1][1], a[2][1]] vertex_value[0 + 2*1 + 4*1] = density[a[0][0], a[1][1], a[2][1]]
vertex_value[1 + 2*1 + 4*1] = density[a[0][1], a[1][1], a[2][1]] vertex_value[1 + 2*1 + 4*1] = density[a[0][1], a[1][1], a[2][1]]
# return cube_integral(u, u0, jumper)*d return cube_integral_trilin(u, u0, jumper, vertex_value)
return cube_integral_trilin(u, u0, jumper, vertex_value, err)
@ -622,15 +603,11 @@ cdef DTYPE_t integrator1(DTYPE_t[:,:,:] density,
cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density, cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density,
DTYPE_t a_u[3], DTYPE_t a_u[3],
DTYPE_t min_distance, DTYPE_t min_distance,
DTYPE_t max_distance, DTYPE_t[:] shifter, int integrator_id, int out_err[1]) nogil except? 0: DTYPE_t max_distance, DTYPE_t[:] shifter, int integrator_id) nogil except? 0:
cdef DTYPE_t u[3] cdef DTYPE_t u[3], ifu0[3], u0[3], utot[3]
cdef DTYPE_t ifu0[3]
cdef DTYPE_t u0[3]
cdef DTYPE_t utot[3]
cdef int u_delta[3] cdef int u_delta[3]
cdef int iu0[3] cdef int iu0[3]
cdef int err[1]
cdef int i cdef int i
cdef int N = density.shape[0] cdef int N = density.shape[0]
cdef int half_N = density.shape[0]/2 cdef int half_N = density.shape[0]/2
@ -639,10 +616,8 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density,
cdef int jumper[1] cdef int jumper[1]
cdef DTYPE_t (*integrator)(DTYPE_t[:,:,:], cdef DTYPE_t (*integrator)(DTYPE_t[:,:,:],
DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1], int err[1]) nogil except? 0 DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1]) nogil
out_err[0] = 0
if integrator_id == 0: if integrator_id == 0:
integrator = integrator0 integrator = integrator0
else: else:
@ -655,19 +630,17 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density,
u0[i] = a_u[i]*min_distance u0[i] = a_u[i]*min_distance
ifu0[i] = half_N+u0[i]+shifter[i] ifu0[i] = half_N+u0[i]+shifter[i]
if (ifu0[i] <= 0 or ifu0[i] >= N): if (ifu0[i] <= 0 or ifu0[i] >= N):
with gil:
print "BLABLA"
return 0 return 0
iu0[i] = int(floor(ifu0[i])) iu0[i] = int(floor(ifu0[i]))
u0[i] = ifu0[i]-iu0[i] u0[i] = ifu0[i]-iu0[i]
u_delta[i] = 1 if iu0[i] > 0 else -1 u_delta[i] = 1 if iu0[i] > 0 else -1
# if (not ((iu0[i]>= 0) and (iu0[i] < N))): if (not ((iu0[i]>= 0) and (iu0[i] < N))):
# out_err[0] = 1 with gil:
# return 0 raise RuntimeError("iu0[%d] = %d !!" % (i,iu0[i]))
# if (not (u0[i]>=0 and u0[i]<=1)): if (not (u0[i]>=0 and u0[i]<=1)):
# out_err[0] = 1 with gil:
# return 0 raise RuntimeError("u0[%d] = %g !" % (i,u0[i]))
completed = 0 completed = 0
if ((iu0[0] >= N) or (iu0[0] <= 0) or if ((iu0[0] >= N) or (iu0[0] <= 0) or
@ -678,14 +651,8 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density,
I0 = 0 I0 = 0
jumper[0] = 0 jumper[0] = 0
while completed == 0: while completed == 0:
err[0] = 0
I0 += integrator(density, u, u0, u_delta, iu0, jumper, err) I0 += integrator(density, u, u0, u_delta, iu0, jumper)
# if err[0] == 1:
# with gil:
# print("Bah! error!")
# out_err[0] = 1
# return 0
if u[jumper[0]] < 0: if u[jumper[0]] < 0:
iu0[jumper[0]] -= 1 iu0[jumper[0]] -= 1
@ -718,25 +685,19 @@ def line_of_sight_projection(DTYPE_t[:,:,:] density not None,
DTYPE_t min_distance, DTYPE_t min_distance,
DTYPE_t max_distance, DTYPE_t[:] shifter not None, int integrator_id=0): DTYPE_t max_distance, DTYPE_t[:] shifter not None, int integrator_id=0):
cdef DTYPE_t u[3] cdef DTYPE_t u[3]
cdef int out_err[1]
cdef DTYPE_t v
u[0] = a_u[0] u[0] = a_u[0]
u[1] = a_u[1] u[1] = a_u[1]
u[2] = a_u[2] u[2] = a_u[2]
out_err[0] = 0 C_line_of_sight_projection(density,
v = C_line_of_sight_projection(density,
u, u,
min_distance, min_distance,
max_distance, shifter, integrator_id, out_err) max_distance, shifter, integrator_id)
# if out_err[0] == 1:
# raise RuntimeError("Error occured during integration")
return v
cdef double _spherical_projloop(double theta, double phi, DTYPE_t[:,:,:] density, cdef double _spherical_projloop(double theta, double phi, DTYPE_t[:,:,:] density,
double min_distance, double max_distance, double min_distance, double max_distance,
DTYPE_t[:] shifter, int integrator_id, int out_err[1]) nogil: DTYPE_t[:] shifter, int integrator_id) nogil:
cdef DTYPE_t u0[3] cdef DTYPE_t u0[3]
stheta = sin(theta) stheta = sin(theta)
@ -744,7 +705,7 @@ cdef double _spherical_projloop(double theta, double phi, DTYPE_t[:,:,:] density
u0[1] = sin(phi)*stheta u0[1] = sin(phi)*stheta
u0[2] = cos(theta) u0[2] = cos(theta)
return C_line_of_sight_projection(density, u0, min_distance, max_distance, shifter, integrator_id, out_err) return C_line_of_sight_projection(density, u0, min_distance, max_distance, shifter, integrator_id)
@cython.boundscheck(False) @cython.boundscheck(False)
@ -761,9 +722,9 @@ def spherical_projection(int Nside,
cdef DTYPE_t[:] outm cdef DTYPE_t[:] outm
cdef int[:] job_done cdef int[:] job_done
cdef npx.ndarray[DTYPE_t, ndim=1] outm_array cdef npx.ndarray[DTYPE_t, ndim=1] outm_array
cdef long N cdef long N, N0
cdef double stheta cdef double stheta
cdef int out_err[1] cdef int tid
if shifter is None: if shifter is None:
shifter = view.array(shape=(3,), format=FORMAT_DTYPE, itemsize=sizeof(DTYPE_t)) shifter = view.array(shape=(3,), format=FORMAT_DTYPE, itemsize=sizeof(DTYPE_t))
@ -777,21 +738,22 @@ def spherical_projection(int Nside,
p = pb.ProgressBar(maxval=outm.size,widgets=[pb.Bar(), pb.ETA()]).start() p = pb.ProgressBar(maxval=outm.size,widgets=[pb.Bar(), pb.ETA()]).start()
N = omp_get_max_threads() N = omp_get_max_threads()
N0 = outm.size
if booster < 0:
booster = 1000
job_done = view.array(shape=(N,), format="i", itemsize=sizeof(int)) job_done = view.array(shape=(N,), format="i", itemsize=sizeof(int))
job_done[:] = 0 job_done[:] = 0
theta,phi = hp.pix2ang(Nside, np.arange(outm.size)) theta,phi = hp.pix2ang(Nside, np.arange(N0))
if booster <= 0:
booster = density.size / 100 / N
with nogil, parallel(): with nogil, parallel():
for i in prange(N): tid = omp_get_thread_num()
if omp_get_thread_num() == 0 and progress != 0 and (i%booster) == 0: for i in prange(N0):
if progress != 0 and (i%booster) == 0:
with gil: with gil:
p.update(_mysum(job_done)) p.update(_mysum(job_done))
# out_err[0] = 0 outm[i] = _spherical_projloop(theta[i], phi[i], density_view, min_distance, max_distance, shifter, integrator_id)
outm[i] = _spherical_projloop(theta[i], phi[i], density_view, min_distance, max_distance, shifter, integrator_id, out_err) job_done[tid] += 1
job_done[omp_get_thread_num()] = i
if progress: if progress:
p.finish() p.finish()

View File

@ -5,14 +5,14 @@ template<typename T, typename ProdType>
static T project_tool(T *vertex_value, T *u, T *u0) static T project_tool(T *vertex_value, T *u, T *u0)
{ {
T ret0 = 0; T ret0 = 0;
for (int i = 0; i < 8; i++) for (unsigned int i = 0; i < 8; i++)
{ {
int c[3] = { i & 1, (i>>1)&1, (i>>2)&1 }; unsigned int c[3] = { i & 1, (i>>1)&1, (i>>2)&1 };
int epsilon[3]; int epsilon[3];
T ret = 0; T ret = 0;
for (int q = 0; q < 3; q++) for (int q = 0; q < 3; q++)
epsilon[q] = (2*c[q]-1); epsilon[q] = 2*c[q]-1;
for (int q = 0; q < ProdType::numProducts; q++) for (int q = 0; q < ProdType::numProducts; q++)
ret += ProdType::product(u, u0, epsilon, q); ret += ProdType::product(u, u0, epsilon, q);
@ -27,7 +27,8 @@ static T project_tool(T *vertex_value, T *u, T *u0)
template<typename T> template<typename T>
static inline T get_u0(const T& u0, int epsilon) static inline T get_u0(const T& u0, int epsilon)
{ {
return (epsilon > 0) ? u0 : (1-u0); return (1-epsilon)/2 + epsilon*u0;
// return (epsilon > 0) ? u0 : (1-u0);
} }
template<typename T> template<typename T>
@ -39,7 +40,7 @@ struct ProductTerm0
{ {
T a = 1; T a = 1;
for (int r = 0; r < 3; r++) for (unsigned int r = 0; r < 3; r++)
a *= get_u0(u0[r], epsilon[r]); a *= get_u0(u0[r], epsilon[r]);
return a; return a;
} }
@ -54,14 +55,14 @@ struct ProductTerm1
static T product(T *u, T *u0, int *epsilon, int q) static T product(T *u, T *u0, int *epsilon, int q)
{ {
T a = 1; T a = 1;
double G[3]; T G[3];
for (int r = 0; r < 3; r++) for (unsigned int r = 0; r < 3; r++)
{ {
G[r] = get_u0(u0[r], epsilon[r]); G[r] = get_u0(u0[r], epsilon[r]);
} }
double F[3] = { G[1]*G[2], G[0]*G[2], G[0]*G[1] }; T F[3] = { G[1]*G[2], G[0]*G[2], G[0]*G[1] };
return F[q] * u[q] * epsilon[q]; return F[q] * u[q] * epsilon[q];
} }
@ -75,14 +76,14 @@ struct ProductTerm2
static inline T product(T *u, T *u0, int *epsilon, int q) static inline T product(T *u, T *u0, int *epsilon, int q)
{ {
T a = 1; T a = 1;
double G[3]; T G[3];
for (int r = 0; r < 3; r++) for (unsigned int r = 0; r < 3; r++)
{ {
G[r] = get_u0(u0[r], epsilon[r]); G[r] = get_u0(u0[r], epsilon[r]);
} }
double F[3] = { epsilon[1]*epsilon[2]*u[1]*u[2], epsilon[0]*epsilon[2]*u[0]*u[2], epsilon[0]*epsilon[1]*u[0]*u[1] }; T F[3] = { epsilon[1]*epsilon[2]*u[1]*u[2], epsilon[0]*epsilon[2]*u[0]*u[2], epsilon[0]*epsilon[1]*u[0]*u[1] };
return F[q] * G[q]; return F[q] * G[q];
} }