Re-optimized projection

2014-06-14 17:15:56 +02:00 · 2014-06-14 17:15:56 +02:00 · 81f4b642e4
commit 81f4b642e4
parent 60a477a4c0
2 changed files with 56 additions and 93 deletions
--- a/python/_project.pyx
+++ b/python/_project.pyx
@ -1,12 +1,12 @@
 from cpython cimport bool
 from cython cimport view
 from cython.parallel import prange, parallel
 from openmp cimport omp_get_max_threads, omp_get_thread_num
 from libc.math cimport sin, cos, abs, floor, sqrt
 import numpy as np
 cimport numpy as npx
 cimport cython
 from copy cimport *
 from openmp cimport omp_get_max_threads, omp_get_thread_num
 ctypedef npx.float64_t DTYPE_t
 DTYPE=np.float64
@ -298,8 +298,7 @@ cdef void INTERNAL_project_cic_no_mass(npx.ndarray[DTYPE_t, ndim=3] g,
    cdef double half_Box = 0.5*Lbox
    cdef double delta_Box = Ngrid/Lbox
    cdef int i
-    cdef double a[3]
+    cdef double a[3], c[3]
    cdef double c[3]
    cdef int b[3]
    cdef int do_not_put
@ -335,10 +334,8 @@ cdef void INTERNAL_project_cic_no_mass_periodic(npx.ndarray[DTYPE_t, ndim=3] g,
    cdef double half_Box = 0.5*Lbox
    cdef double delta_Box = Ngrid/Lbox
    cdef int i
-    cdef double a[3]
+    cdef double a[3], c[3]
-    cdef double c[3]
+    cdef int b[3], b1[3]
    cdef int b[3]
    cdef int b1[3]
    cdef int do_not_put
    for i in range(x.shape[0]):
@ -376,8 +373,7 @@ cdef void INTERNAL_project_cic_with_mass(npx.ndarray[DTYPE_t, ndim=3] g,
    cdef double half_Box = 0.5*Lbox
    cdef double delta_Box = Ngrid/Lbox
    cdef int i
-    cdef double a[3]
+    cdef double a[3], c[3]
    cdef double c[3]
    cdef DTYPE_t m0
    cdef int b[3]
@ -414,10 +410,8 @@ cdef void INTERNAL_project_cic_with_mass_periodic(npx.ndarray[DTYPE_t, ndim=3] g
    cdef double half_Box = 0.5*Lbox, m0
    cdef double delta_Box = Ngrid/Lbox
    cdef int i
-    cdef double a[3]
+    cdef double a[3], c[3]
-    cdef double c[3]
+    cdef int b[3], b1[3]
    cdef int b[3]
    cdef int b1[3]
    for i in range(x.shape[0]):
@ -538,11 +532,10 @@ cdef DTYPE_t cube_integral(DTYPE_t u[3], DTYPE_t u0[3], int r[1]) nogil:
@cython.boundscheck(False)
@cython.cdivision(True)
-cdef DTYPE_t cube_integral_trilin(DTYPE_t u[3], DTYPE_t u0[3], int r[1], DTYPE_t vertex_value[8], int err[1]) nogil:
+cdef DTYPE_t cube_integral_trilin(DTYPE_t u[3], DTYPE_t u0[3], int r[1], DTYPE_t vertex_value[8]) nogil:
    cdef DTYPE_t alpha_max
    cdef DTYPE_t I, tmp_a
-    cdef DTYPE_t v[3]
+    cdef DTYPE_t v[3], term[4]
    cdef DTYPE_t term[4]
    cdef int i, j, q
    alpha_max = 10.0 # A big number
@ -561,12 +554,6 @@ cdef DTYPE_t cube_integral_trilin(DTYPE_t u[3], DTYPE_t u0[3], int r[1], DTYPE_t
            alpha_max = tmp_a
            j = i
 #    if (u0[i] < 0 or u0[i] > 1):
 #      err[0] = 1
 #      with gil:
 #        print("Bouh! u0[%d] = %lg" % (i, u0[i]))
 #      return 0
    I = compute_projection(vertex_value, u, u0, alpha_max)
    for i in xrange(3):
@ -580,7 +567,7 @@ cdef DTYPE_t cube_integral_trilin(DTYPE_t u[3], DTYPE_t u0[3], int r[1], DTYPE_t
@cython.boundscheck(False)
 cdef DTYPE_t integrator0(DTYPE_t[:,:,:] density,
-                         DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1], int err[1]) nogil except? 0:
+                         DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1]) nogil:
    cdef DTYPE_t d
    d = density[iu0[0], iu0[1], iu0[2]]
@ -589,20 +576,15 @@ cdef DTYPE_t integrator0(DTYPE_t[:,:,:] density,
@cython.boundscheck(False)
 cdef DTYPE_t integrator1(DTYPE_t[:,:,:] density,
-                         DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1], int err[1]) nogil except? 0:
+                         DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1]) nogil:
    cdef DTYPE_t vertex_value[8]
    cdef DTYPE_t d
-    cdef int a[3][2]
+    cdef int a[3][2], i
    cdef int i
    for i in xrange(3):
      a[i][0] = iu0[i]
      a[i][1] = iu0[i]+1
      with gil:    
        assert a[i][0] >= 0
        assert a[i][1] < density.shape[i]
    vertex_value[0 + 2*0 + 4*0] = density[a[0][0], a[1][0], a[2][0]]
    vertex_value[1 + 2*0 + 4*0] = density[a[0][1], a[1][0], a[2][0]]
    vertex_value[0 + 2*1 + 4*0] = density[a[0][0], a[1][1], a[2][0]]
@ -613,8 +595,7 @@ cdef DTYPE_t integrator1(DTYPE_t[:,:,:] density,
    vertex_value[0 + 2*1 + 4*1] = density[a[0][0], a[1][1], a[2][1]]
    vertex_value[1 + 2*1 + 4*1] = density[a[0][1], a[1][1], a[2][1]]
-#    return cube_integral(u, u0, jumper)*d
+    return cube_integral_trilin(u, u0, jumper, vertex_value)
    return cube_integral_trilin(u, u0, jumper, vertex_value, err)
@ -622,15 +603,11 @@ cdef DTYPE_t integrator1(DTYPE_t[:,:,:] density,
 cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density,
                             DTYPE_t a_u[3],
                             DTYPE_t min_distance,
-                             DTYPE_t max_distance, DTYPE_t[:] shifter, int integrator_id, int out_err[1]) nogil except? 0:
+                             DTYPE_t max_distance, DTYPE_t[:] shifter, int integrator_id) nogil except? 0:
-    cdef DTYPE_t u[3]
+    cdef DTYPE_t u[3], ifu0[3], u0[3], utot[3]
    cdef DTYPE_t ifu0[3]
    cdef DTYPE_t u0[3]
    cdef DTYPE_t utot[3]
    cdef int u_delta[3]
    cdef int iu0[3]
    cdef int err[1]
    cdef int i
    cdef int N = density.shape[0]
    cdef int half_N = density.shape[0]/2
@ -639,9 +616,7 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density,
    cdef int jumper[1]
    cdef DTYPE_t (*integrator)(DTYPE_t[:,:,:],
-                               DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1], int err[1]) nogil except? 0
+                               DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1]) nogil
    out_err[0] = 0
    if integrator_id == 0:
      integrator = integrator0
@ -655,19 +630,17 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density,
        u0[i] = a_u[i]*min_distance
        ifu0[i] = half_N+u0[i]+shifter[i]
        if (ifu0[i] <= 0 or ifu0[i] >= N):
          with gil: 
            print "BLABLA"
          return 0
        iu0[i] = int(floor(ifu0[i]))
        u0[i] = ifu0[i]-iu0[i]
        u_delta[i] = 1 if iu0[i] > 0 else -1
-#        if (not ((iu0[i]>= 0) and (iu0[i] < N))):
+        if (not ((iu0[i]>= 0) and (iu0[i] < N))):
-#          out_err[0] = 1
+          with gil:
-#          return 0
+            raise RuntimeError("iu0[%d] = %d !!" % (i,iu0[i]))
-#        if (not (u0[i]>=0 and u0[i]<=1)):
+        if (not (u0[i]>=0 and u0[i]<=1)):
-#          out_err[0] = 1
+          with gil:
-#          return 0
+            raise RuntimeError("u0[%d] = %g !" % (i,u0[i]))
    completed = 0
    if ((iu0[0] >= N) or (iu0[0] <= 0) or
@ -678,14 +651,8 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density,
    I0 = 0
    jumper[0] = 0
    while completed == 0:
        err[0] = 0
        I0 += integrator(density, u, u0, u_delta, iu0, jumper, err)
-#        if err[0] == 1:
+        I0 += integrator(density, u, u0, u_delta, iu0, jumper)
 #          with gil:
 #            print("Bah! error!")
 #          out_err[0] = 1
 #          return 0
        if u[jumper[0]] < 0:
            iu0[jumper[0]] -= 1
@ -718,25 +685,19 @@ def line_of_sight_projection(DTYPE_t[:,:,:] density not None,
                             DTYPE_t min_distance,
                             DTYPE_t max_distance, DTYPE_t[:] shifter not None, int integrator_id=0):
  cdef DTYPE_t u[3]
  cdef int out_err[1]
  cdef DTYPE_t v
  u[0] = a_u[0]
  u[1] = a_u[1]
  u[2] = a_u[2]
-  out_err[0] = 0
+  C_line_of_sight_projection(density,
  v = C_line_of_sight_projection(density,
                             u,
                             min_distance,
-                             max_distance, shifter, integrator_id, out_err)
+                             max_distance, shifter, integrator_id)
 #  if out_err[0] == 1:
 #    raise RuntimeError("Error occured during integration")
  return v
 cdef double _spherical_projloop(double theta, double phi, DTYPE_t[:,:,:] density, 
                double min_distance, double max_distance, 
-                DTYPE_t[:] shifter, int integrator_id, int out_err[1]) nogil:
+                DTYPE_t[:] shifter, int integrator_id) nogil:
  cdef DTYPE_t u0[3]
  stheta = sin(theta)
@ -744,7 +705,7 @@ cdef double _spherical_projloop(double theta, double phi, DTYPE_t[:,:,:] density
  u0[1] = sin(phi)*stheta
  u0[2] = cos(theta)
-  return C_line_of_sight_projection(density, u0, min_distance, max_distance, shifter, integrator_id, out_err)
+  return C_line_of_sight_projection(density, u0, min_distance, max_distance, shifter, integrator_id)
@cython.boundscheck(False)
@ -761,9 +722,9 @@ def spherical_projection(int Nside,
    cdef DTYPE_t[:] outm
    cdef int[:] job_done
    cdef npx.ndarray[DTYPE_t, ndim=1] outm_array
-    cdef long N
+    cdef long N, N0
    cdef double stheta
-    cdef int out_err[1]
+    cdef int tid
    if shifter is None:
        shifter = view.array(shape=(3,), format=FORMAT_DTYPE, itemsize=sizeof(DTYPE_t))
@ -777,21 +738,22 @@ def spherical_projection(int Nside,
      p = pb.ProgressBar(maxval=outm.size,widgets=[pb.Bar(), pb.ETA()]).start()
    N = omp_get_max_threads()
    N0 = outm.size
    if booster < 0:
      booster = 1000
    job_done = view.array(shape=(N,), format="i", itemsize=sizeof(int))
    job_done[:] = 0
-    theta,phi = hp.pix2ang(Nside, np.arange(outm.size))
+    theta,phi = hp.pix2ang(Nside, np.arange(N0))
    if booster <= 0:
      booster = density.size / 100 / N
    with nogil, parallel():
-      for i in prange(N):
+      tid = omp_get_thread_num()
-          if omp_get_thread_num() == 0 and progress != 0 and (i%booster) == 0:
+      for i in prange(N0):
          if progress != 0 and (i%booster) == 0:
            with gil:
              p.update(_mysum(job_done))
-#          out_err[0] = 0
+          outm[i] = _spherical_projloop(theta[i], phi[i], density_view, min_distance, max_distance, shifter, integrator_id)
-          outm[i] = _spherical_projloop(theta[i], phi[i], density_view, min_distance, max_distance, shifter, integrator_id, out_err)
+          job_done[tid] += 1
          job_done[omp_get_thread_num()] = i
    if progress:
      p.finish()
--- a/python/project_tool.hpp
+++ b/python/project_tool.hpp
@ -5,14 +5,14 @@ template<typename T, typename ProdType>
 static T project_tool(T *vertex_value, T *u, T *u0)
 {
  T ret0 = 0;
-  for (int i = 0; i < 8; i++)
+  for (unsigned int i = 0; i < 8; i++)
    {
-      int c[3] = { i & 1, (i>>1)&1, (i>>2)&1 };
+      unsigned int c[3] = { i & 1, (i>>1)&1, (i>>2)&1 };
      int epsilon[3];
      T ret = 0;
      for (int q = 0; q < 3; q++)
-        epsilon[q] = (2*c[q]-1);
+        epsilon[q] = 2*c[q]-1;
      for (int q = 0; q < ProdType::numProducts; q++)
        ret += ProdType::product(u, u0, epsilon, q);
@ -27,7 +27,8 @@ static T project_tool(T *vertex_value, T *u, T *u0)
 template<typename T>
 static inline T get_u0(const T& u0, int epsilon)
 {
-  return (epsilon > 0) ? u0 : (1-u0);
+  return (1-epsilon)/2 + epsilon*u0;
 //  return (epsilon > 0) ? u0 : (1-u0);
 }
 template<typename T>
@ -39,7 +40,7 @@ struct ProductTerm0
  {
    T a = 1;
-    for (int r = 0; r < 3; r++)
+    for (unsigned int r = 0; r < 3; r++)
      a *= get_u0(u0[r], epsilon[r]);
    return a;
  }
@ -54,14 +55,14 @@ struct ProductTerm1
  static T product(T *u, T *u0, int *epsilon, int q)
  {
    T a = 1;
-    double G[3];
+    T G[3];
-    for (int r = 0; r < 3; r++)
+    for (unsigned int r = 0; r < 3; r++)
      {
        G[r] = get_u0(u0[r], epsilon[r]);
      }
-    double F[3] = { G[1]*G[2], G[0]*G[2], G[0]*G[1] };
+    T F[3] = { G[1]*G[2], G[0]*G[2], G[0]*G[1] };
    return F[q] * u[q] * epsilon[q];
  }
@ -75,14 +76,14 @@ struct ProductTerm2
  static inline T product(T *u, T *u0, int *epsilon, int q)
  {
    T a = 1;
-    double G[3];
+    T G[3];
-    for (int r = 0; r < 3; r++)
+    for (unsigned int r = 0; r < 3; r++)
      {
        G[r] = get_u0(u0[r], epsilon[r]);
      }
-    double F[3] = { epsilon[1]*epsilon[2]*u[1]*u[2], epsilon[0]*epsilon[2]*u[0]*u[2], epsilon[0]*epsilon[1]*u[0]*u[1] };    
+    T F[3] = { epsilon[1]*epsilon[2]*u[1]*u[2], epsilon[0]*epsilon[2]*u[0]*u[2], epsilon[0]*epsilon[1]*u[0]*u[1] };    
    return F[q] * G[q];
  }