diff --git a/python/_project.pyx b/python/_project.pyx index 34a7de8..efe66bd 100644 --- a/python/_project.pyx +++ b/python/_project.pyx @@ -1,12 +1,12 @@ from cpython cimport bool from cython cimport view from cython.parallel import prange, parallel -from openmp cimport omp_get_max_threads, omp_get_thread_num from libc.math cimport sin, cos, abs, floor, sqrt import numpy as np cimport numpy as npx cimport cython from copy cimport * +from openmp cimport omp_get_max_threads, omp_get_thread_num ctypedef npx.float64_t DTYPE_t DTYPE=np.float64 @@ -298,8 +298,7 @@ cdef void INTERNAL_project_cic_no_mass(npx.ndarray[DTYPE_t, ndim=3] g, cdef double half_Box = 0.5*Lbox cdef double delta_Box = Ngrid/Lbox cdef int i - cdef double a[3] - cdef double c[3] + cdef double a[3], c[3] cdef int b[3] cdef int do_not_put @@ -335,10 +334,8 @@ cdef void INTERNAL_project_cic_no_mass_periodic(npx.ndarray[DTYPE_t, ndim=3] g, cdef double half_Box = 0.5*Lbox cdef double delta_Box = Ngrid/Lbox cdef int i - cdef double a[3] - cdef double c[3] - cdef int b[3] - cdef int b1[3] + cdef double a[3], c[3] + cdef int b[3], b1[3] cdef int do_not_put for i in range(x.shape[0]): @@ -376,8 +373,7 @@ cdef void INTERNAL_project_cic_with_mass(npx.ndarray[DTYPE_t, ndim=3] g, cdef double half_Box = 0.5*Lbox cdef double delta_Box = Ngrid/Lbox cdef int i - cdef double a[3] - cdef double c[3] + cdef double a[3], c[3] cdef DTYPE_t m0 cdef int b[3] @@ -414,10 +410,8 @@ cdef void INTERNAL_project_cic_with_mass_periodic(npx.ndarray[DTYPE_t, ndim=3] g cdef double half_Box = 0.5*Lbox, m0 cdef double delta_Box = Ngrid/Lbox cdef int i - cdef double a[3] - cdef double c[3] - cdef int b[3] - cdef int b1[3] + cdef double a[3], c[3] + cdef int b[3], b1[3] for i in range(x.shape[0]): @@ -538,11 +532,10 @@ cdef DTYPE_t cube_integral(DTYPE_t u[3], DTYPE_t u0[3], int r[1]) nogil: @cython.boundscheck(False) @cython.cdivision(True) -cdef DTYPE_t cube_integral_trilin(DTYPE_t u[3], DTYPE_t u0[3], int r[1], DTYPE_t vertex_value[8], int err[1]) nogil: +cdef DTYPE_t cube_integral_trilin(DTYPE_t u[3], DTYPE_t u0[3], int r[1], DTYPE_t vertex_value[8]) nogil: cdef DTYPE_t alpha_max cdef DTYPE_t I, tmp_a - cdef DTYPE_t v[3] - cdef DTYPE_t term[4] + cdef DTYPE_t v[3], term[4] cdef int i, j, q alpha_max = 10.0 # A big number @@ -560,13 +553,7 @@ cdef DTYPE_t cube_integral_trilin(DTYPE_t u[3], DTYPE_t u0[3], int r[1], DTYPE_t if tmp_a < alpha_max: alpha_max = tmp_a j = i - -# if (u0[i] < 0 or u0[i] > 1): -# err[0] = 1 -# with gil: -# print("Bouh! u0[%d] = %lg" % (i, u0[i])) -# return 0 - + I = compute_projection(vertex_value, u, u0, alpha_max) for i in xrange(3): @@ -580,7 +567,7 @@ cdef DTYPE_t cube_integral_trilin(DTYPE_t u[3], DTYPE_t u0[3], int r[1], DTYPE_t @cython.boundscheck(False) cdef DTYPE_t integrator0(DTYPE_t[:,:,:] density, - DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1], int err[1]) nogil except? 0: + DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1]) nogil: cdef DTYPE_t d d = density[iu0[0], iu0[1], iu0[2]] @@ -589,20 +576,15 @@ cdef DTYPE_t integrator0(DTYPE_t[:,:,:] density, @cython.boundscheck(False) cdef DTYPE_t integrator1(DTYPE_t[:,:,:] density, - DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1], int err[1]) nogil except? 0: + DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1]) nogil: cdef DTYPE_t vertex_value[8] cdef DTYPE_t d - cdef int a[3][2] - cdef int i + cdef int a[3][2], i for i in xrange(3): a[i][0] = iu0[i] a[i][1] = iu0[i]+1 - - with gil: - assert a[i][0] >= 0 - assert a[i][1] < density.shape[i] - + vertex_value[0 + 2*0 + 4*0] = density[a[0][0], a[1][0], a[2][0]] vertex_value[1 + 2*0 + 4*0] = density[a[0][1], a[1][0], a[2][0]] vertex_value[0 + 2*1 + 4*0] = density[a[0][0], a[1][1], a[2][0]] @@ -613,8 +595,7 @@ cdef DTYPE_t integrator1(DTYPE_t[:,:,:] density, vertex_value[0 + 2*1 + 4*1] = density[a[0][0], a[1][1], a[2][1]] vertex_value[1 + 2*1 + 4*1] = density[a[0][1], a[1][1], a[2][1]] -# return cube_integral(u, u0, jumper)*d - return cube_integral_trilin(u, u0, jumper, vertex_value, err) + return cube_integral_trilin(u, u0, jumper, vertex_value) @@ -622,15 +603,11 @@ cdef DTYPE_t integrator1(DTYPE_t[:,:,:] density, cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density, DTYPE_t a_u[3], DTYPE_t min_distance, - DTYPE_t max_distance, DTYPE_t[:] shifter, int integrator_id, int out_err[1]) nogil except? 0: + DTYPE_t max_distance, DTYPE_t[:] shifter, int integrator_id) nogil except? 0: - cdef DTYPE_t u[3] - cdef DTYPE_t ifu0[3] - cdef DTYPE_t u0[3] - cdef DTYPE_t utot[3] + cdef DTYPE_t u[3], ifu0[3], u0[3], utot[3] cdef int u_delta[3] cdef int iu0[3] - cdef int err[1] cdef int i cdef int N = density.shape[0] cdef int half_N = density.shape[0]/2 @@ -639,10 +616,8 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density, cdef int jumper[1] cdef DTYPE_t (*integrator)(DTYPE_t[:,:,:], - DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1], int err[1]) nogil except? 0 + DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1]) nogil - out_err[0] = 0 - if integrator_id == 0: integrator = integrator0 else: @@ -655,19 +630,17 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density, u0[i] = a_u[i]*min_distance ifu0[i] = half_N+u0[i]+shifter[i] if (ifu0[i] <= 0 or ifu0[i] >= N): - with gil: - print "BLABLA" return 0 iu0[i] = int(floor(ifu0[i])) u0[i] = ifu0[i]-iu0[i] u_delta[i] = 1 if iu0[i] > 0 else -1 -# if (not ((iu0[i]>= 0) and (iu0[i] < N))): -# out_err[0] = 1 -# return 0 - -# if (not (u0[i]>=0 and u0[i]<=1)): -# out_err[0] = 1 -# return 0 + if (not ((iu0[i]>= 0) and (iu0[i] < N))): + with gil: + raise RuntimeError("iu0[%d] = %d !!" % (i,iu0[i])) + + if (not (u0[i]>=0 and u0[i]<=1)): + with gil: + raise RuntimeError("u0[%d] = %g !" % (i,u0[i])) completed = 0 if ((iu0[0] >= N) or (iu0[0] <= 0) or @@ -678,14 +651,8 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density, I0 = 0 jumper[0] = 0 while completed == 0: - err[0] = 0 - I0 += integrator(density, u, u0, u_delta, iu0, jumper, err) - -# if err[0] == 1: -# with gil: -# print("Bah! error!") -# out_err[0] = 1 -# return 0 + + I0 += integrator(density, u, u0, u_delta, iu0, jumper) if u[jumper[0]] < 0: iu0[jumper[0]] -= 1 @@ -718,25 +685,19 @@ def line_of_sight_projection(DTYPE_t[:,:,:] density not None, DTYPE_t min_distance, DTYPE_t max_distance, DTYPE_t[:] shifter not None, int integrator_id=0): cdef DTYPE_t u[3] - cdef int out_err[1] - cdef DTYPE_t v u[0] = a_u[0] u[1] = a_u[1] u[2] = a_u[2] - out_err[0] = 0 - v = C_line_of_sight_projection(density, + C_line_of_sight_projection(density, u, min_distance, - max_distance, shifter, integrator_id, out_err) -# if out_err[0] == 1: -# raise RuntimeError("Error occured during integration") - return v + max_distance, shifter, integrator_id) cdef double _spherical_projloop(double theta, double phi, DTYPE_t[:,:,:] density, double min_distance, double max_distance, - DTYPE_t[:] shifter, int integrator_id, int out_err[1]) nogil: + DTYPE_t[:] shifter, int integrator_id) nogil: cdef DTYPE_t u0[3] stheta = sin(theta) @@ -744,7 +705,7 @@ cdef double _spherical_projloop(double theta, double phi, DTYPE_t[:,:,:] density u0[1] = sin(phi)*stheta u0[2] = cos(theta) - return C_line_of_sight_projection(density, u0, min_distance, max_distance, shifter, integrator_id, out_err) + return C_line_of_sight_projection(density, u0, min_distance, max_distance, shifter, integrator_id) @cython.boundscheck(False) @@ -761,9 +722,9 @@ def spherical_projection(int Nside, cdef DTYPE_t[:] outm cdef int[:] job_done cdef npx.ndarray[DTYPE_t, ndim=1] outm_array - cdef long N + cdef long N, N0 cdef double stheta - cdef int out_err[1] + cdef int tid if shifter is None: shifter = view.array(shape=(3,), format=FORMAT_DTYPE, itemsize=sizeof(DTYPE_t)) @@ -777,21 +738,22 @@ def spherical_projection(int Nside, p = pb.ProgressBar(maxval=outm.size,widgets=[pb.Bar(), pb.ETA()]).start() N = omp_get_max_threads() + N0 = outm.size + + if booster < 0: + booster = 1000 + job_done = view.array(shape=(N,), format="i", itemsize=sizeof(int)) job_done[:] = 0 - theta,phi = hp.pix2ang(Nside, np.arange(outm.size)) - - if booster <= 0: - booster = density.size / 100 / N - + theta,phi = hp.pix2ang(Nside, np.arange(N0)) with nogil, parallel(): - for i in prange(N): - if omp_get_thread_num() == 0 and progress != 0 and (i%booster) == 0: + tid = omp_get_thread_num() + for i in prange(N0): + if progress != 0 and (i%booster) == 0: with gil: p.update(_mysum(job_done)) -# out_err[0] = 0 - outm[i] = _spherical_projloop(theta[i], phi[i], density_view, min_distance, max_distance, shifter, integrator_id, out_err) - job_done[omp_get_thread_num()] = i + outm[i] = _spherical_projloop(theta[i], phi[i], density_view, min_distance, max_distance, shifter, integrator_id) + job_done[tid] += 1 if progress: p.finish() diff --git a/python/project_tool.hpp b/python/project_tool.hpp index 6f8c1d7..c185c44 100644 --- a/python/project_tool.hpp +++ b/python/project_tool.hpp @@ -5,14 +5,14 @@ template static T project_tool(T *vertex_value, T *u, T *u0) { T ret0 = 0; - for (int i = 0; i < 8; i++) + for (unsigned int i = 0; i < 8; i++) { - int c[3] = { i & 1, (i>>1)&1, (i>>2)&1 }; + unsigned int c[3] = { i & 1, (i>>1)&1, (i>>2)&1 }; int epsilon[3]; T ret = 0; for (int q = 0; q < 3; q++) - epsilon[q] = (2*c[q]-1); + epsilon[q] = 2*c[q]-1; for (int q = 0; q < ProdType::numProducts; q++) ret += ProdType::product(u, u0, epsilon, q); @@ -27,7 +27,8 @@ static T project_tool(T *vertex_value, T *u, T *u0) template static inline T get_u0(const T& u0, int epsilon) { - return (epsilon > 0) ? u0 : (1-u0); + return (1-epsilon)/2 + epsilon*u0; +// return (epsilon > 0) ? u0 : (1-u0); } template @@ -39,7 +40,7 @@ struct ProductTerm0 { T a = 1; - for (int r = 0; r < 3; r++) + for (unsigned int r = 0; r < 3; r++) a *= get_u0(u0[r], epsilon[r]); return a; } @@ -54,14 +55,14 @@ struct ProductTerm1 static T product(T *u, T *u0, int *epsilon, int q) { T a = 1; - double G[3]; + T G[3]; - for (int r = 0; r < 3; r++) + for (unsigned int r = 0; r < 3; r++) { G[r] = get_u0(u0[r], epsilon[r]); } - double F[3] = { G[1]*G[2], G[0]*G[2], G[0]*G[1] }; + T F[3] = { G[1]*G[2], G[0]*G[2], G[0]*G[1] }; return F[q] * u[q] * epsilon[q]; } @@ -75,14 +76,14 @@ struct ProductTerm2 static inline T product(T *u, T *u0, int *epsilon, int q) { T a = 1; - double G[3]; + T G[3]; - for (int r = 0; r < 3; r++) + for (unsigned int r = 0; r < 3; r++) { G[r] = get_u0(u0[r], epsilon[r]); } - double F[3] = { epsilon[1]*epsilon[2]*u[1]*u[2], epsilon[0]*epsilon[2]*u[0]*u[2], epsilon[0]*epsilon[1]*u[0]*u[1] }; + T F[3] = { epsilon[1]*epsilon[2]*u[1]*u[2], epsilon[0]*epsilon[2]*u[0]*u[2], epsilon[0]*epsilon[1]*u[0]*u[1] }; return F[q] * G[q]; }