From 00f4f0a00a981f13ec4857946ab30f79050bfc8a Mon Sep 17 00:00:00 2001 From: Guilhem Lavaux Date: Fri, 13 Jun 2014 17:11:40 +0200 Subject: [PATCH 1/4] More work on performance but broke the algorithm --- python/_project.pyx | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/python/_project.pyx b/python/_project.pyx index f2c31d9..2d85d32 100644 --- a/python/_project.pyx +++ b/python/_project.pyx @@ -535,18 +535,6 @@ cdef DTYPE_t cube_integral(DTYPE_t u[3], DTYPE_t u0[3], int r[1]) nogil: return alpha_max -@cython.boundscheck(False) -@cython.cdivision(True) -cdef DTYPE_t mysum(DTYPE_t *v, int q) nogil: - cdef int i - cdef DTYPE_t s - - s = 0 - - for i in xrange(q): - s += v[i] - return s - @cython.boundscheck(False) @cython.cdivision(True) cdef DTYPE_t cube_integral_trilin(DTYPE_t u[3], DTYPE_t u0[3], int r[1], DTYPE_t vertex_value[8], int err[1]) nogil: @@ -572,8 +560,10 @@ cdef DTYPE_t cube_integral_trilin(DTYPE_t u[3], DTYPE_t u0[3], int r[1], DTYPE_t alpha_max = tmp_a j = i - if (u0[i] >= 0 || u0[i] <= 1) + if (u0[i] < 0 or u0[i] > 1): err[0] = 1 + with gil: + print("Bouh! u0[%d] = %lg" % (i, u0[i])) return 0 I = compute_projection(vertex_value, u, u0, alpha_max) @@ -589,7 +579,7 @@ cdef DTYPE_t cube_integral_trilin(DTYPE_t u[3], DTYPE_t u0[3], int r[1], DTYPE_t @cython.boundscheck(False) cdef DTYPE_t integrator0(DTYPE_t[:,:,:] density, - DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1], int err[1]) nogil: + DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1], int err[1]) nogil except? 0: cdef DTYPE_t d d = density[iu0[0], iu0[1], iu0[2]] @@ -598,7 +588,7 @@ cdef DTYPE_t integrator0(DTYPE_t[:,:,:] density, @cython.boundscheck(False) cdef DTYPE_t integrator1(DTYPE_t[:,:,:] density, - DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1], int err[1]) nogil: + DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1], int err[1]) nogil except? 0: cdef DTYPE_t vertex_value[8] cdef DTYPE_t d cdef int a[3][2] @@ -635,7 +625,7 @@ cdef DTYPE_t integrator1(DTYPE_t[:,:,:] density, cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density, DTYPE_t a_u[3], DTYPE_t min_distance, - DTYPE_t max_distance, DTYPE_t[:] shifter, int integrator_id, int out_err[1]) nogil: + DTYPE_t max_distance, DTYPE_t[:] shifter, int integrator_id, int out_err[1]) nogil except? 0: cdef DTYPE_t u[3] cdef DTYPE_t ifu0[3] @@ -652,7 +642,7 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density, cdef int jumper[1] cdef DTYPE_t (*integrator)(DTYPE_t[:,:,:], - DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1], int err[1]) nogil + DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1], int err[1]) nogil except? 0 out_err[0] = 0 @@ -675,7 +665,7 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density, if (not ((iu0[i]>= 0) and (iu0[i] < N))): out_err[0] = 1 return 0 - + if (not (u0[i]>=0 and u0[i]<=1)): out_err[0] = 1 return 0 @@ -693,6 +683,8 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density, I0 += integrator(density, u, u0, u_delta, iu0, jumper, err) if err[0] == 1: + with gil: + print("Bah! error!") out_err[0] = 1 return 0 @@ -734,6 +726,7 @@ def line_of_sight_projection(DTYPE_t[:,:,:] density not None, u[1] = a_u[1] u[2] = a_u[2] + out_err[0] = 0 v = C_line_of_sight_projection(density, u, min_distance, @@ -754,9 +747,15 @@ cdef double _spherical_projloop(double theta, double phi, DTYPE_t[:,:,:] density return C_line_of_sight_projection(density, u0, min_distance, max_distance, shifter, integrator_id, out_err) +ctypedef fused sum_type: + cython.int + cython.float + npx.uint64_t + npx.uint32_t + @cython.boundscheck(False) -cdef npx.uint64_t _mysum(int[:] jobs) nogil: - cdef npx.uint64_t s +cdef sum_type _mysum(sum_type[:] jobs) nogil: + cdef sum_type s cdef npx.uint64_t N cdef int i @@ -809,6 +808,7 @@ def spherical_projection(int Nside, if omp_get_thread_num() == 0 and progress != 0 and (i%booster) == 0: with gil: p.update(_mysum(job_done)) + out_err[0] = 0 outm[i] = _spherical_projloop(theta[i], phi[i], density_view, min_distance, max_distance, shifter, integrator_id, out_err) job_done[omp_get_thread_num()] = i From 60a477a4c072e3a8401aa30dd74b99a808b09751 Mon Sep 17 00:00:00 2001 From: Guilhem Lavaux Date: Fri, 13 Jun 2014 17:38:49 +0200 Subject: [PATCH 2/4] Try to debug error --- python/_project.pyx | 65 ++++++++++++++++----------------------------- python/copy.pxd | 22 +++++++++++++++ 2 files changed, 45 insertions(+), 42 deletions(-) create mode 100644 python/copy.pxd diff --git a/python/_project.pyx b/python/_project.pyx index 2d85d32..34a7de8 100644 --- a/python/_project.pyx +++ b/python/_project.pyx @@ -6,6 +6,7 @@ from libc.math cimport sin, cos, abs, floor, sqrt import numpy as np cimport numpy as npx cimport cython +from copy cimport * ctypedef npx.float64_t DTYPE_t DTYPE=np.float64 @@ -560,11 +561,11 @@ cdef DTYPE_t cube_integral_trilin(DTYPE_t u[3], DTYPE_t u0[3], int r[1], DTYPE_t alpha_max = tmp_a j = i - if (u0[i] < 0 or u0[i] > 1): - err[0] = 1 - with gil: - print("Bouh! u0[%d] = %lg" % (i, u0[i])) - return 0 +# if (u0[i] < 0 or u0[i] > 1): +# err[0] = 1 +# with gil: +# print("Bouh! u0[%d] = %lg" % (i, u0[i])) +# return 0 I = compute_projection(vertex_value, u, u0, alpha_max) @@ -595,10 +596,6 @@ cdef DTYPE_t integrator1(DTYPE_t[:,:,:] density, cdef int i for i in xrange(3): -# if u[i] < 0: -# a[i][0] = iu0[i]-1 -# a[i][1] = iu0[i] -# else: a[i][0] = iu0[i] a[i][1] = iu0[i]+1 @@ -658,17 +655,19 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density, u0[i] = a_u[i]*min_distance ifu0[i] = half_N+u0[i]+shifter[i] if (ifu0[i] <= 0 or ifu0[i] >= N): + with gil: + print "BLABLA" return 0 iu0[i] = int(floor(ifu0[i])) u0[i] = ifu0[i]-iu0[i] u_delta[i] = 1 if iu0[i] > 0 else -1 - if (not ((iu0[i]>= 0) and (iu0[i] < N))): - out_err[0] = 1 - return 0 +# if (not ((iu0[i]>= 0) and (iu0[i] < N))): +# out_err[0] = 1 +# return 0 - if (not (u0[i]>=0 and u0[i]<=1)): - out_err[0] = 1 - return 0 +# if (not (u0[i]>=0 and u0[i]<=1)): +# out_err[0] = 1 +# return 0 completed = 0 if ((iu0[0] >= N) or (iu0[0] <= 0) or @@ -682,11 +681,11 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density, err[0] = 0 I0 += integrator(density, u, u0, u_delta, iu0, jumper, err) - if err[0] == 1: - with gil: - print("Bah! error!") - out_err[0] = 1 - return 0 +# if err[0] == 1: +# with gil: +# print("Bah! error!") +# out_err[0] = 1 +# return 0 if u[jumper[0]] < 0: iu0[jumper[0]] -= 1 @@ -703,7 +702,7 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density, else: dist2 = 0 for i in range(3): - delta = iu0[i]+u0[i]-half_N + delta = iu0[i]+u0[i]-half_N-shifter[i] dist2 += delta*delta if (dist2 > max_distance2): @@ -731,8 +730,8 @@ def line_of_sight_projection(DTYPE_t[:,:,:] density not None, u, min_distance, max_distance, shifter, integrator_id, out_err) - if out_err[0] == 1: - raise RuntimeError("Error occured during integration") +# if out_err[0] == 1: +# raise RuntimeError("Error occured during integration") return v cdef double _spherical_projloop(double theta, double phi, DTYPE_t[:,:,:] density, @@ -747,24 +746,6 @@ cdef double _spherical_projloop(double theta, double phi, DTYPE_t[:,:,:] density return C_line_of_sight_projection(density, u0, min_distance, max_distance, shifter, integrator_id, out_err) -ctypedef fused sum_type: - cython.int - cython.float - npx.uint64_t - npx.uint32_t - -@cython.boundscheck(False) -cdef sum_type _mysum(sum_type[:] jobs) nogil: - cdef sum_type s - cdef npx.uint64_t N - cdef int i - - s = 0 - N = jobs.shape[0] - for i in xrange(N): - s += jobs[i] - return s - @cython.boundscheck(False) def spherical_projection(int Nside, @@ -808,7 +789,7 @@ def spherical_projection(int Nside, if omp_get_thread_num() == 0 and progress != 0 and (i%booster) == 0: with gil: p.update(_mysum(job_done)) - out_err[0] = 0 +# out_err[0] = 0 outm[i] = _spherical_projloop(theta[i], phi[i], density_view, min_distance, max_distance, shifter, integrator_id, out_err) job_done[omp_get_thread_num()] = i diff --git a/python/copy.pxd b/python/copy.pxd new file mode 100644 index 0000000..182bbba --- /dev/null +++ b/python/copy.pxd @@ -0,0 +1,22 @@ +cimport cython +cimport numpy as npx + +ctypedef fused sum_type: + cython.int + cython.float + npx.uint64_t + npx.uint32_t + +@cython.boundscheck(False) +cdef inline sum_type _mysum(sum_type[:] jobs) nogil: + cdef sum_type s + cdef npx.uint64_t N + cdef int i + + s = 0 + N = jobs.shape[0] + for i in xrange(N): + s += jobs[i] + return s + + From 81f4b642e4f7f3e20fed848f3f03d47d136d86e8 Mon Sep 17 00:00:00 2001 From: Guilhem Lavaux Date: Sat, 14 Jun 2014 17:15:56 +0200 Subject: [PATCH 3/4] Re-optimized projection --- python/_project.pyx | 126 ++++++++++++++-------------------------- python/project_tool.hpp | 23 ++++---- 2 files changed, 56 insertions(+), 93 deletions(-) diff --git a/python/_project.pyx b/python/_project.pyx index 34a7de8..efe66bd 100644 --- a/python/_project.pyx +++ b/python/_project.pyx @@ -1,12 +1,12 @@ from cpython cimport bool from cython cimport view from cython.parallel import prange, parallel -from openmp cimport omp_get_max_threads, omp_get_thread_num from libc.math cimport sin, cos, abs, floor, sqrt import numpy as np cimport numpy as npx cimport cython from copy cimport * +from openmp cimport omp_get_max_threads, omp_get_thread_num ctypedef npx.float64_t DTYPE_t DTYPE=np.float64 @@ -298,8 +298,7 @@ cdef void INTERNAL_project_cic_no_mass(npx.ndarray[DTYPE_t, ndim=3] g, cdef double half_Box = 0.5*Lbox cdef double delta_Box = Ngrid/Lbox cdef int i - cdef double a[3] - cdef double c[3] + cdef double a[3], c[3] cdef int b[3] cdef int do_not_put @@ -335,10 +334,8 @@ cdef void INTERNAL_project_cic_no_mass_periodic(npx.ndarray[DTYPE_t, ndim=3] g, cdef double half_Box = 0.5*Lbox cdef double delta_Box = Ngrid/Lbox cdef int i - cdef double a[3] - cdef double c[3] - cdef int b[3] - cdef int b1[3] + cdef double a[3], c[3] + cdef int b[3], b1[3] cdef int do_not_put for i in range(x.shape[0]): @@ -376,8 +373,7 @@ cdef void INTERNAL_project_cic_with_mass(npx.ndarray[DTYPE_t, ndim=3] g, cdef double half_Box = 0.5*Lbox cdef double delta_Box = Ngrid/Lbox cdef int i - cdef double a[3] - cdef double c[3] + cdef double a[3], c[3] cdef DTYPE_t m0 cdef int b[3] @@ -414,10 +410,8 @@ cdef void INTERNAL_project_cic_with_mass_periodic(npx.ndarray[DTYPE_t, ndim=3] g cdef double half_Box = 0.5*Lbox, m0 cdef double delta_Box = Ngrid/Lbox cdef int i - cdef double a[3] - cdef double c[3] - cdef int b[3] - cdef int b1[3] + cdef double a[3], c[3] + cdef int b[3], b1[3] for i in range(x.shape[0]): @@ -538,11 +532,10 @@ cdef DTYPE_t cube_integral(DTYPE_t u[3], DTYPE_t u0[3], int r[1]) nogil: @cython.boundscheck(False) @cython.cdivision(True) -cdef DTYPE_t cube_integral_trilin(DTYPE_t u[3], DTYPE_t u0[3], int r[1], DTYPE_t vertex_value[8], int err[1]) nogil: +cdef DTYPE_t cube_integral_trilin(DTYPE_t u[3], DTYPE_t u0[3], int r[1], DTYPE_t vertex_value[8]) nogil: cdef DTYPE_t alpha_max cdef DTYPE_t I, tmp_a - cdef DTYPE_t v[3] - cdef DTYPE_t term[4] + cdef DTYPE_t v[3], term[4] cdef int i, j, q alpha_max = 10.0 # A big number @@ -560,13 +553,7 @@ cdef DTYPE_t cube_integral_trilin(DTYPE_t u[3], DTYPE_t u0[3], int r[1], DTYPE_t if tmp_a < alpha_max: alpha_max = tmp_a j = i - -# if (u0[i] < 0 or u0[i] > 1): -# err[0] = 1 -# with gil: -# print("Bouh! u0[%d] = %lg" % (i, u0[i])) -# return 0 - + I = compute_projection(vertex_value, u, u0, alpha_max) for i in xrange(3): @@ -580,7 +567,7 @@ cdef DTYPE_t cube_integral_trilin(DTYPE_t u[3], DTYPE_t u0[3], int r[1], DTYPE_t @cython.boundscheck(False) cdef DTYPE_t integrator0(DTYPE_t[:,:,:] density, - DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1], int err[1]) nogil except? 0: + DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1]) nogil: cdef DTYPE_t d d = density[iu0[0], iu0[1], iu0[2]] @@ -589,20 +576,15 @@ cdef DTYPE_t integrator0(DTYPE_t[:,:,:] density, @cython.boundscheck(False) cdef DTYPE_t integrator1(DTYPE_t[:,:,:] density, - DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1], int err[1]) nogil except? 0: + DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1]) nogil: cdef DTYPE_t vertex_value[8] cdef DTYPE_t d - cdef int a[3][2] - cdef int i + cdef int a[3][2], i for i in xrange(3): a[i][0] = iu0[i] a[i][1] = iu0[i]+1 - - with gil: - assert a[i][0] >= 0 - assert a[i][1] < density.shape[i] - + vertex_value[0 + 2*0 + 4*0] = density[a[0][0], a[1][0], a[2][0]] vertex_value[1 + 2*0 + 4*0] = density[a[0][1], a[1][0], a[2][0]] vertex_value[0 + 2*1 + 4*0] = density[a[0][0], a[1][1], a[2][0]] @@ -613,8 +595,7 @@ cdef DTYPE_t integrator1(DTYPE_t[:,:,:] density, vertex_value[0 + 2*1 + 4*1] = density[a[0][0], a[1][1], a[2][1]] vertex_value[1 + 2*1 + 4*1] = density[a[0][1], a[1][1], a[2][1]] -# return cube_integral(u, u0, jumper)*d - return cube_integral_trilin(u, u0, jumper, vertex_value, err) + return cube_integral_trilin(u, u0, jumper, vertex_value) @@ -622,15 +603,11 @@ cdef DTYPE_t integrator1(DTYPE_t[:,:,:] density, cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density, DTYPE_t a_u[3], DTYPE_t min_distance, - DTYPE_t max_distance, DTYPE_t[:] shifter, int integrator_id, int out_err[1]) nogil except? 0: + DTYPE_t max_distance, DTYPE_t[:] shifter, int integrator_id) nogil except? 0: - cdef DTYPE_t u[3] - cdef DTYPE_t ifu0[3] - cdef DTYPE_t u0[3] - cdef DTYPE_t utot[3] + cdef DTYPE_t u[3], ifu0[3], u0[3], utot[3] cdef int u_delta[3] cdef int iu0[3] - cdef int err[1] cdef int i cdef int N = density.shape[0] cdef int half_N = density.shape[0]/2 @@ -639,10 +616,8 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density, cdef int jumper[1] cdef DTYPE_t (*integrator)(DTYPE_t[:,:,:], - DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1], int err[1]) nogil except? 0 + DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1]) nogil - out_err[0] = 0 - if integrator_id == 0: integrator = integrator0 else: @@ -655,19 +630,17 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density, u0[i] = a_u[i]*min_distance ifu0[i] = half_N+u0[i]+shifter[i] if (ifu0[i] <= 0 or ifu0[i] >= N): - with gil: - print "BLABLA" return 0 iu0[i] = int(floor(ifu0[i])) u0[i] = ifu0[i]-iu0[i] u_delta[i] = 1 if iu0[i] > 0 else -1 -# if (not ((iu0[i]>= 0) and (iu0[i] < N))): -# out_err[0] = 1 -# return 0 - -# if (not (u0[i]>=0 and u0[i]<=1)): -# out_err[0] = 1 -# return 0 + if (not ((iu0[i]>= 0) and (iu0[i] < N))): + with gil: + raise RuntimeError("iu0[%d] = %d !!" % (i,iu0[i])) + + if (not (u0[i]>=0 and u0[i]<=1)): + with gil: + raise RuntimeError("u0[%d] = %g !" % (i,u0[i])) completed = 0 if ((iu0[0] >= N) or (iu0[0] <= 0) or @@ -678,14 +651,8 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density, I0 = 0 jumper[0] = 0 while completed == 0: - err[0] = 0 - I0 += integrator(density, u, u0, u_delta, iu0, jumper, err) - -# if err[0] == 1: -# with gil: -# print("Bah! error!") -# out_err[0] = 1 -# return 0 + + I0 += integrator(density, u, u0, u_delta, iu0, jumper) if u[jumper[0]] < 0: iu0[jumper[0]] -= 1 @@ -718,25 +685,19 @@ def line_of_sight_projection(DTYPE_t[:,:,:] density not None, DTYPE_t min_distance, DTYPE_t max_distance, DTYPE_t[:] shifter not None, int integrator_id=0): cdef DTYPE_t u[3] - cdef int out_err[1] - cdef DTYPE_t v u[0] = a_u[0] u[1] = a_u[1] u[2] = a_u[2] - out_err[0] = 0 - v = C_line_of_sight_projection(density, + C_line_of_sight_projection(density, u, min_distance, - max_distance, shifter, integrator_id, out_err) -# if out_err[0] == 1: -# raise RuntimeError("Error occured during integration") - return v + max_distance, shifter, integrator_id) cdef double _spherical_projloop(double theta, double phi, DTYPE_t[:,:,:] density, double min_distance, double max_distance, - DTYPE_t[:] shifter, int integrator_id, int out_err[1]) nogil: + DTYPE_t[:] shifter, int integrator_id) nogil: cdef DTYPE_t u0[3] stheta = sin(theta) @@ -744,7 +705,7 @@ cdef double _spherical_projloop(double theta, double phi, DTYPE_t[:,:,:] density u0[1] = sin(phi)*stheta u0[2] = cos(theta) - return C_line_of_sight_projection(density, u0, min_distance, max_distance, shifter, integrator_id, out_err) + return C_line_of_sight_projection(density, u0, min_distance, max_distance, shifter, integrator_id) @cython.boundscheck(False) @@ -761,9 +722,9 @@ def spherical_projection(int Nside, cdef DTYPE_t[:] outm cdef int[:] job_done cdef npx.ndarray[DTYPE_t, ndim=1] outm_array - cdef long N + cdef long N, N0 cdef double stheta - cdef int out_err[1] + cdef int tid if shifter is None: shifter = view.array(shape=(3,), format=FORMAT_DTYPE, itemsize=sizeof(DTYPE_t)) @@ -777,21 +738,22 @@ def spherical_projection(int Nside, p = pb.ProgressBar(maxval=outm.size,widgets=[pb.Bar(), pb.ETA()]).start() N = omp_get_max_threads() + N0 = outm.size + + if booster < 0: + booster = 1000 + job_done = view.array(shape=(N,), format="i", itemsize=sizeof(int)) job_done[:] = 0 - theta,phi = hp.pix2ang(Nside, np.arange(outm.size)) - - if booster <= 0: - booster = density.size / 100 / N - + theta,phi = hp.pix2ang(Nside, np.arange(N0)) with nogil, parallel(): - for i in prange(N): - if omp_get_thread_num() == 0 and progress != 0 and (i%booster) == 0: + tid = omp_get_thread_num() + for i in prange(N0): + if progress != 0 and (i%booster) == 0: with gil: p.update(_mysum(job_done)) -# out_err[0] = 0 - outm[i] = _spherical_projloop(theta[i], phi[i], density_view, min_distance, max_distance, shifter, integrator_id, out_err) - job_done[omp_get_thread_num()] = i + outm[i] = _spherical_projloop(theta[i], phi[i], density_view, min_distance, max_distance, shifter, integrator_id) + job_done[tid] += 1 if progress: p.finish() diff --git a/python/project_tool.hpp b/python/project_tool.hpp index 6f8c1d7..c185c44 100644 --- a/python/project_tool.hpp +++ b/python/project_tool.hpp @@ -5,14 +5,14 @@ template static T project_tool(T *vertex_value, T *u, T *u0) { T ret0 = 0; - for (int i = 0; i < 8; i++) + for (unsigned int i = 0; i < 8; i++) { - int c[3] = { i & 1, (i>>1)&1, (i>>2)&1 }; + unsigned int c[3] = { i & 1, (i>>1)&1, (i>>2)&1 }; int epsilon[3]; T ret = 0; for (int q = 0; q < 3; q++) - epsilon[q] = (2*c[q]-1); + epsilon[q] = 2*c[q]-1; for (int q = 0; q < ProdType::numProducts; q++) ret += ProdType::product(u, u0, epsilon, q); @@ -27,7 +27,8 @@ static T project_tool(T *vertex_value, T *u, T *u0) template static inline T get_u0(const T& u0, int epsilon) { - return (epsilon > 0) ? u0 : (1-u0); + return (1-epsilon)/2 + epsilon*u0; +// return (epsilon > 0) ? u0 : (1-u0); } template @@ -39,7 +40,7 @@ struct ProductTerm0 { T a = 1; - for (int r = 0; r < 3; r++) + for (unsigned int r = 0; r < 3; r++) a *= get_u0(u0[r], epsilon[r]); return a; } @@ -54,14 +55,14 @@ struct ProductTerm1 static T product(T *u, T *u0, int *epsilon, int q) { T a = 1; - double G[3]; + T G[3]; - for (int r = 0; r < 3; r++) + for (unsigned int r = 0; r < 3; r++) { G[r] = get_u0(u0[r], epsilon[r]); } - double F[3] = { G[1]*G[2], G[0]*G[2], G[0]*G[1] }; + T F[3] = { G[1]*G[2], G[0]*G[2], G[0]*G[1] }; return F[q] * u[q] * epsilon[q]; } @@ -75,14 +76,14 @@ struct ProductTerm2 static inline T product(T *u, T *u0, int *epsilon, int q) { T a = 1; - double G[3]; + T G[3]; - for (int r = 0; r < 3; r++) + for (unsigned int r = 0; r < 3; r++) { G[r] = get_u0(u0[r], epsilon[r]); } - double F[3] = { epsilon[1]*epsilon[2]*u[1]*u[2], epsilon[0]*epsilon[2]*u[0]*u[2], epsilon[0]*epsilon[1]*u[0]*u[1] }; + T F[3] = { epsilon[1]*epsilon[2]*u[1]*u[2], epsilon[0]*epsilon[2]*u[0]*u[2], epsilon[0]*epsilon[1]*u[0]*u[1] }; return F[q] * G[q]; } From d23f79cbaf7f3e027f7cabbf11a896b881ec7e82 Mon Sep 17 00:00:00 2001 From: Guilhem Lavaux Date: Sat, 14 Jun 2014 17:18:28 +0200 Subject: [PATCH 4/4] Templatized SPHSmooth. Reduced memory consumption of simple3DFilter --- sample/simple3DFilter.cpp | 118 ++++++++++++++++++++------------------ src/sphSmooth.hpp | 18 ++++-- src/sphSmooth.tcc | 12 ++-- 3 files changed, 84 insertions(+), 64 deletions(-) diff --git a/sample/simple3DFilter.cpp b/sample/simple3DFilter.cpp index b3a74d2..345834c 100644 --- a/sample/simple3DFilter.cpp +++ b/sample/simple3DFilter.cpp @@ -7,6 +7,8 @@ #include #include "hdf5_array.hpp" #include +#include +#include using namespace std; using namespace CosmoTool; @@ -17,8 +19,13 @@ struct VCoord{ float v[3]; }; -template -ComputePrecision getVelocity(const VCoord& v) +using boost::format; +using boost::str; +typedef boost::multi_array array_type; +typedef boost::multi_array array3_type; +typedef boost::multi_array array4_type; + +ComputePrecision getVelocity(const VCoord& v, int i) { return v.v[i]; } @@ -32,11 +39,51 @@ typedef SPHSmooth MySmooth; typedef MySmooth::SPHTree MyTree; typedef MyTree::Cell MyCell; +template +void computeInterpolatedField(MyTree *tree1, double boxsize, int Nres, double cx, double cy, double cz, + array3_type& bins, array3_type& arr, FuncT func, double rLimit2) +{ +#pragma omp parallel + { + MySmooth smooth1(tree1, N_SPH); + +#pragma omp for schedule(dynamic) + for (int rz = 0; rz < Nres; rz++) + { + double pz = (rz)*boxsize/Nres-cz; + + cout << format("[%d] %d / %d") % omp_get_thread_num() % rz % Nres << endl; + for (int ry = 0; ry < Nres; ry++) + { + double py = (ry)*boxsize/Nres-cy; + for (int rx = 0; rx < Nres; rx++) + { + double px = (rx)*boxsize/Nres-cx; + + MyTree::coords c = { px, py, pz }; + + double r2 = c[0]*c[0]+c[1]*c[1]+c[2]*c[2]; + if (r2 > rLimit2) + { + arr[rx][ry][rz] = 0; + continue; + } + + uint32_t numInCell = bins[rx][ry][rz]; + if (numInCell > N_SPH) + smooth1.fetchNeighbours(c, numInCell); + else + smooth1.fetchNeighbours(c); + + arr[rx][ry][rz] = smooth1.computeSmoothedValue(c, func); + } + } + } + } +} + int main(int argc, char **argv) { - typedef boost::multi_array array_type; - typedef boost::multi_array array3_type; - typedef boost::multi_array array4_type; char *fname1, *fname2; double rLimit, boxsize, rLimit2, cx, cy, cz; @@ -105,8 +152,6 @@ int main(int argc, char **argv) cout << "Creating smoothing filter..." << endl; - array3_type out_den_1(boost::extents[Nres][Nres][Nres]); - array4_type out_v3d_1(boost::extents[Nres][Nres][Nres][3]); // array3_type out_rad_1(boost::extents[Nres][Nres][Nres]); cout << "Weighing..." << endl; @@ -149,55 +194,18 @@ int main(int argc, char **argv) } cout << "Interpolating..." << endl; -#pragma omp parallel - { - MySmooth smooth1(&tree1, N_SPH); - -#pragma omp for schedule(dynamic) - for (int rz = 0; rz < Nres; rz++) - { - double pz = (rz)*boxsize/Nres-cz; - cout << rz << " / " << Nres << endl; - for (int ry = 0; ry < Nres; ry++) - { - double py = (ry)*boxsize/Nres-cy; - for (int rx = 0; rx < Nres; rx++) - { - double px = (rx)*boxsize/Nres-cx; - - MyTree::coords c = { px, py, pz }; - - double r2 = c[0]*c[0]+c[1]*c[1]+c[2]*c[2]; - if (r2 > rLimit2) - { - out_v3d_1[rx][ry][rz][0] = 0; - out_v3d_1[rx][ry][rz][1] = 0; - out_v3d_1[rx][ry][rz][2] = 0; - out_den_1[rx][ry][rz] = 0; - //out_rad_1[rx][ry][rz] = -1; - continue; - } - - uint32_t numInCell = bins[rx][ry][rz]; - if (numInCell > N_SPH) - smooth1.fetchNeighbours(c, numInCell); - else - smooth1.fetchNeighbours(c); - - //out_rad_1[rx][ry][rz] = smooth1.getSmoothingLen(); - out_v3d_1[rx][ry][rz][0] = smooth1.computeSmoothedValue(c, getVelocity<0>); - out_v3d_1[rx][ry][rz][1] = smooth1.computeSmoothedValue(c, getVelocity<1>); - out_v3d_1[rx][ry][rz][2] = smooth1.computeSmoothedValue(c, getVelocity<2>); - out_den_1[rx][ry][rz] = smooth1.computeSmoothedValue(c, getUnity); - } - } - } + array3_type interpolated(boost::extents[Nres][Nres][Nres]); + + computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz, + bins, interpolated, getUnity, rLimit2); + hdf5_write_array(out_f, "density", interpolated); + //out_f.flush(); + for (int i = 0; i < 3; i++) { + computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz, + bins, interpolated, boost::bind(getVelocity, _1, i), rLimit2); + hdf5_write_array(out_f, str(format("p%d") % i), interpolated); } - - //hdf5_write_array(out_f, "radii", out_rad_1); - hdf5_write_array(out_f, "velocity", out_v3d_1); - hdf5_write_array(out_f, "density", out_den_1); - + return 0; }; diff --git a/src/sphSmooth.hpp b/src/sphSmooth.hpp index d116dd4..bcd98a5 100644 --- a/src/sphSmooth.hpp +++ b/src/sphSmooth.hpp @@ -82,10 +82,14 @@ namespace CosmoTool return internal.currentCenter; } + template ComputePrecision computeSmoothedValue(const typename SPHTree::coords& c, - computeParticleValue fun, SPHState *state = 0); + FuncT fun, SPHState *state = 0); + + template ComputePrecision computeInterpolatedValue(const typename SPHTree::coords& c, - computeParticleValue fun, SPHState *state = 0); + FuncT fun, SPHState *state = 0); + ComputePrecision getMaxDistance(const typename SPHTree::coords& c, SPHNode *node) const; @@ -101,7 +105,8 @@ namespace CosmoTool } // END - void runForEachNeighbour(runParticleValue fun, SPHState *state = 0); + template + void runForEachNeighbour(FuncT fun, SPHState *state = 0); void addGridSite(const typename SPHTree::coords& c); bool hasNeighbours() const; @@ -127,12 +132,15 @@ namespace CosmoTool uint32_t maxNgb; SPHTree *tree; + template ComputePrecision computeWValue(const typename SPHTree::coords & c, SPHCell& cell, CoordType d, - computeParticleValue fun, SPHState *state); + FuncT fun, SPHState *state); + + template void runUnrollNode(SPHNode *node, - runParticleValue fun); + FuncT fun); }; template diff --git a/src/sphSmooth.tcc b/src/sphSmooth.tcc index 1ae5486..99094cd 100644 --- a/src/sphSmooth.tcc +++ b/src/sphSmooth.tcc @@ -21,10 +21,11 @@ SPHSmooth::~SPHSmooth() } template +template ComputePrecision SPHSmooth::computeWValue(const typename SPHTree::coords& c, SPHCell& cell, CoordType d, - computeParticleValue fun, SPHState *state) + FuncT fun, SPHState *state) { CoordType weight; @@ -117,9 +118,10 @@ SPHSmooth::fetchNeighboursOnVolume(const typename SPHTree::coords } template +template ComputePrecision SPHSmooth::computeSmoothedValue(const typename SPHTree::coords& c, - computeParticleValue fun, SPHState *state) + FuncT fun, SPHState *state) { if (state == 0) state = &internal; @@ -144,8 +146,9 @@ ComputePrecision interpolateOne(const ValType& t) // WARNING ! Cell's weight must be 1 !!! template +template ComputePrecision SPHSmooth::computeInterpolatedValue(const typename SPHTree::coords& c, - computeParticleValue fun, SPHState *state) + FuncT fun, SPHState *state) { if (state == 0) state = &internal; @@ -164,7 +167,8 @@ ComputePrecision SPHSmooth::computeInterpolatedValue(const typena } template -void SPHSmooth::runForEachNeighbour(runParticleValue fun, SPHState *state) +template +void SPHSmooth::runForEachNeighbour(FuncT fun, SPHState *state) { if (state == 0) state = &internal;