Merge branch 'master' of bitbucket.org:glavaux/cosmotool

2014-06-14 17:26:26 +02:00 · 2014-06-14 17:26:26 +02:00 · e698201f38
commit e698201f38
parent 34eac2721a d23f79cbaf
5 changed files with 149 additions and 164 deletions
--- a/python/_project.pyx
+++ b/python/_project.pyx
@ -1,11 +1,12 @@
 from cpython cimport bool
 from cython cimport view
 from cython.parallel import prange, parallel
 from openmp cimport omp_get_max_threads, omp_get_thread_num
 from libc.math cimport sin, cos, abs, floor, sqrt
 import numpy as np
 cimport numpy as npx
 cimport cython
 from copy cimport *
 from openmp cimport omp_get_max_threads, omp_get_thread_num
 ctypedef npx.float64_t DTYPE_t
 DTYPE=np.float64
@ -297,8 +298,7 @@ cdef void INTERNAL_project_cic_no_mass(npx.ndarray[DTYPE_t, ndim=3] g,
    cdef double half_Box = 0.5*Lbox
    cdef double delta_Box = Ngrid/Lbox
    cdef int i
-    cdef double a[3]
+    cdef double a[3], c[3]
    cdef double c[3]
    cdef int b[3]
    cdef int do_not_put
@ -334,10 +334,8 @@ cdef void INTERNAL_project_cic_no_mass_periodic(npx.ndarray[DTYPE_t, ndim=3] g,
    cdef double half_Box = 0.5*Lbox
    cdef double delta_Box = Ngrid/Lbox
    cdef int i
-    cdef double a[3]
+    cdef double a[3], c[3]
-    cdef double c[3]
+    cdef int b[3], b1[3]
    cdef int b[3]
    cdef int b1[3]
    cdef int do_not_put
    for i in range(x.shape[0]):
@ -375,8 +373,7 @@ cdef void INTERNAL_project_cic_with_mass(npx.ndarray[DTYPE_t, ndim=3] g,
    cdef double half_Box = 0.5*Lbox
    cdef double delta_Box = Ngrid/Lbox
    cdef int i
-    cdef double a[3]
+    cdef double a[3], c[3]
    cdef double c[3]
    cdef DTYPE_t m0
    cdef int b[3]
@ -413,10 +410,8 @@ cdef void INTERNAL_project_cic_with_mass_periodic(npx.ndarray[DTYPE_t, ndim=3] g
    cdef double half_Box = 0.5*Lbox, m0
    cdef double delta_Box = Ngrid/Lbox
    cdef int i
-    cdef double a[3]
+    cdef double a[3], c[3]
-    cdef double c[3]
+    cdef int b[3], b1[3]
    cdef int b[3]
    cdef int b1[3]
    for i in range(x.shape[0]):
@ -537,23 +532,10 @@ cdef DTYPE_t cube_integral(DTYPE_t u[3], DTYPE_t u0[3], int r[1]) nogil:
@cython.boundscheck(False)
@cython.cdivision(True)
-cdef DTYPE_t mysum(DTYPE_t *v, int q) nogil:
+cdef DTYPE_t cube_integral_trilin(DTYPE_t u[3], DTYPE_t u0[3], int r[1], DTYPE_t vertex_value[8]) nogil:
  cdef int i
  cdef DTYPE_t s
  s = 0
  for i in xrange(q):
    s += v[i]
  return s
@cython.boundscheck(False)
@cython.cdivision(True)
 cdef DTYPE_t cube_integral_trilin(DTYPE_t u[3], DTYPE_t u0[3], int r[1], DTYPE_t vertex_value[8], int err[1]) nogil:
    cdef DTYPE_t alpha_max
    cdef DTYPE_t I, tmp_a
-    cdef DTYPE_t v[3]
+    cdef DTYPE_t v[3], term[4]
    cdef DTYPE_t term[4]
    cdef int i, j, q
    alpha_max = 10.0 # A big number
@ -572,10 +554,6 @@ cdef DTYPE_t cube_integral_trilin(DTYPE_t u[3], DTYPE_t u0[3], int r[1], DTYPE_t
            alpha_max = tmp_a
            j = i
    if (u0[i] >= 0 || u0[i] <= 1)
      err[0] = 1
      return 0
    I = compute_projection(vertex_value, u, u0, alpha_max)
    for i in xrange(3):
@ -589,7 +567,7 @@ cdef DTYPE_t cube_integral_trilin(DTYPE_t u[3], DTYPE_t u0[3], int r[1], DTYPE_t
@cython.boundscheck(False)
 cdef DTYPE_t integrator0(DTYPE_t[:,:,:] density,
-                         DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1], int err[1]) nogil:
+                         DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1]) nogil:
    cdef DTYPE_t d
    d = density[iu0[0], iu0[1], iu0[2]]
@ -598,24 +576,15 @@ cdef DTYPE_t integrator0(DTYPE_t[:,:,:] density,
@cython.boundscheck(False)
 cdef DTYPE_t integrator1(DTYPE_t[:,:,:] density,
-                         DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1], int err[1]) nogil:
+                         DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1]) nogil:
    cdef DTYPE_t vertex_value[8]
    cdef DTYPE_t d
-    cdef int a[3][2]
+    cdef int a[3][2], i
    cdef int i
    for i in xrange(3):
 #      if u[i] < 0:
 #        a[i][0] = iu0[i]-1
 #        a[i][1] = iu0[i]
 #      else:
      a[i][0] = iu0[i]
      a[i][1] = iu0[i]+1
      with gil:    
        assert a[i][0] >= 0
        assert a[i][1] < density.shape[i]
    vertex_value[0 + 2*0 + 4*0] = density[a[0][0], a[1][0], a[2][0]]
    vertex_value[1 + 2*0 + 4*0] = density[a[0][1], a[1][0], a[2][0]]
    vertex_value[0 + 2*1 + 4*0] = density[a[0][0], a[1][1], a[2][0]]
@ -626,8 +595,7 @@ cdef DTYPE_t integrator1(DTYPE_t[:,:,:] density,
    vertex_value[0 + 2*1 + 4*1] = density[a[0][0], a[1][1], a[2][1]]
    vertex_value[1 + 2*1 + 4*1] = density[a[0][1], a[1][1], a[2][1]]
-#    return cube_integral(u, u0, jumper)*d
+    return cube_integral_trilin(u, u0, jumper, vertex_value)
    return cube_integral_trilin(u, u0, jumper, vertex_value, err)
@ -635,15 +603,11 @@ cdef DTYPE_t integrator1(DTYPE_t[:,:,:] density,
 cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density,
                             DTYPE_t a_u[3],
                             DTYPE_t min_distance,
-                             DTYPE_t max_distance, DTYPE_t[:] shifter, int integrator_id, int out_err[1]) nogil:
+                             DTYPE_t max_distance, DTYPE_t[:] shifter, int integrator_id) nogil except? 0:
-    cdef DTYPE_t u[3]
+    cdef DTYPE_t u[3], ifu0[3], u0[3], utot[3]
    cdef DTYPE_t ifu0[3]
    cdef DTYPE_t u0[3]
    cdef DTYPE_t utot[3]
    cdef int u_delta[3]
    cdef int iu0[3]
    cdef int err[1]
    cdef int i
    cdef int N = density.shape[0]
    cdef int half_N = density.shape[0]/2
@ -652,9 +616,7 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density,
    cdef int jumper[1]
    cdef DTYPE_t (*integrator)(DTYPE_t[:,:,:],
-                               DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1], int err[1]) nogil
+                               DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1]) nogil
    out_err[0] = 0
    if integrator_id == 0:
      integrator = integrator0
@ -673,12 +635,12 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density,
        u0[i] = ifu0[i]-iu0[i]
        u_delta[i] = 1 if iu0[i] > 0 else -1
        if (not ((iu0[i]>= 0) and (iu0[i] < N))):
-          out_err[0] = 1
+          with gil:
-          return 0
+            raise RuntimeError("iu0[%d] = %d !!" % (i,iu0[i]))
        if (not (u0[i]>=0 and u0[i]<=1)):
-          out_err[0] = 1
+          with gil:
-          return 0
+            raise RuntimeError("u0[%d] = %g !" % (i,u0[i]))
    completed = 0
    if ((iu0[0] >= N) or (iu0[0] <= 0) or
@ -689,12 +651,8 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density,
    I0 = 0
    jumper[0] = 0
    while completed == 0:
        err[0] = 0
        I0 += integrator(density, u, u0, u_delta, iu0, jumper, err)
-        if err[0] == 1:
+        I0 += integrator(density, u, u0, u_delta, iu0, jumper)
          out_err[0] = 1
          return 0
        if u[jumper[0]] < 0:
            iu0[jumper[0]] -= 1
@ -711,7 +669,7 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density,
        else:
          dist2 = 0
          for i in range(3):
-             delta =  iu0[i]+u0[i]-half_N
+             delta =  iu0[i]+u0[i]-half_N-shifter[i]
             dist2 += delta*delta
          if (dist2 > max_distance2):
@ -727,24 +685,19 @@ def line_of_sight_projection(DTYPE_t[:,:,:] density not None,
                             DTYPE_t min_distance,
                             DTYPE_t max_distance, DTYPE_t[:] shifter not None, int integrator_id=0):
  cdef DTYPE_t u[3]
  cdef int out_err[1]
  cdef DTYPE_t v
  u[0] = a_u[0]
  u[1] = a_u[1]
  u[2] = a_u[2]
-  v = C_line_of_sight_projection(density,
+  C_line_of_sight_projection(density,
                             u,
                             min_distance,
-                             max_distance, shifter, integrator_id, out_err)
+                             max_distance, shifter, integrator_id)
  if out_err[0] == 1:
    raise RuntimeError("Error occured during integration")
  return v
 cdef double _spherical_projloop(double theta, double phi, DTYPE_t[:,:,:] density, 
                double min_distance, double max_distance, 
-                DTYPE_t[:] shifter, int integrator_id, int out_err[1]) nogil:
+                DTYPE_t[:] shifter, int integrator_id) nogil:
  cdef DTYPE_t u0[3]
  stheta = sin(theta)
@ -752,19 +705,7 @@ cdef double _spherical_projloop(double theta, double phi, DTYPE_t[:,:,:] density
  u0[1] = sin(phi)*stheta
  u0[2] = cos(theta)
-  return C_line_of_sight_projection(density, u0, min_distance, max_distance, shifter, integrator_id, out_err)
+  return C_line_of_sight_projection(density, u0, min_distance, max_distance, shifter, integrator_id)
@cython.boundscheck(False)
 cdef npx.uint64_t _mysum(int[:] jobs) nogil:
  cdef npx.uint64_t s
  cdef npx.uint64_t N
  cdef int i
  s = 0
  N = jobs.shape[0]
  for i in xrange(N):
    s += jobs[i]
  return s
@cython.boundscheck(False)
@ -781,9 +722,9 @@ def spherical_projection(int Nside,
    cdef DTYPE_t[:] outm
    cdef int[:] job_done
    cdef npx.ndarray[DTYPE_t, ndim=1] outm_array
-    cdef long N
+    cdef long N, N0
    cdef double stheta
-    cdef int out_err[1]
+    cdef int tid
    if shifter is None:
        shifter = view.array(shape=(3,), format=FORMAT_DTYPE, itemsize=sizeof(DTYPE_t))
@ -797,20 +738,22 @@ def spherical_projection(int Nside,
      p = pb.ProgressBar(maxval=outm.size,widgets=[pb.Bar(), pb.ETA()]).start()
    N = omp_get_max_threads()
    N0 = outm.size
    if booster < 0:
      booster = 1000
    job_done = view.array(shape=(N,), format="i", itemsize=sizeof(int))
    job_done[:] = 0
-    theta,phi = hp.pix2ang(Nside, np.arange(outm.size))
+    theta,phi = hp.pix2ang(Nside, np.arange(N0))
    if booster <= 0:
      booster = density.size / 100 / N
    with nogil, parallel():
-      for i in prange(N):
+      tid = omp_get_thread_num()
-          if omp_get_thread_num() == 0 and progress != 0 and (i%booster) == 0:
+      for i in prange(N0):
          if progress != 0 and (i%booster) == 0:
            with gil:
              p.update(_mysum(job_done))
-          outm[i] = _spherical_projloop(theta[i], phi[i], density_view, min_distance, max_distance, shifter, integrator_id, out_err)
+          outm[i] = _spherical_projloop(theta[i], phi[i], density_view, min_distance, max_distance, shifter, integrator_id)
-          job_done[omp_get_thread_num()] = i
+          job_done[tid] += 1
    if progress:
      p.finish()
--- a/python/copy.pxd
+++ b/python/copy.pxd
@ -0,0 +1,22 @@
 cimport cython
 cimport numpy as npx
 ctypedef fused sum_type:
    cython.int
    cython.float
    npx.uint64_t
    npx.uint32_t
@cython.boundscheck(False)
 cdef inline sum_type _mysum(sum_type[:] jobs) nogil:
  cdef sum_type s
  cdef npx.uint64_t N
  cdef int i
  s = 0
  N = jobs.shape[0]
  for i in xrange(N):
    s += jobs[i]
  return s
--- a/sample/simple3DFilter.cpp
+++ b/sample/simple3DFilter.cpp
@ -7,6 +7,8 @@
 #include <H5Cpp.h>
 #include "hdf5_array.hpp"
 #include <iostream>
 #include <boost/format.hpp>
 #include <boost/bind.hpp>
 using namespace std;
 using namespace CosmoTool;
@ -17,8 +19,13 @@ struct VCoord{
  float v[3];
 };
-template<int i>
+using boost::format;
-ComputePrecision getVelocity(const VCoord& v)
+using boost::str;
 typedef boost::multi_array<float, 2> array_type;
 typedef boost::multi_array<float, 3> array3_type;
 typedef boost::multi_array<float, 4> array4_type;
 ComputePrecision getVelocity(const VCoord& v, int i)
 {
  return v.v[i];
 }
@ -32,11 +39,51 @@ typedef SPHSmooth<VCoord> MySmooth;
 typedef MySmooth::SPHTree MyTree;
 typedef MyTree::Cell MyCell;
 template<typename FuncT>
 void computeInterpolatedField(MyTree *tree1, double boxsize, int Nres, double cx, double cy, double cz,
                              array3_type& bins, array3_type& arr, FuncT func, double rLimit2)
 {
 #pragma omp parallel
  {
    MySmooth smooth1(tree1, N_SPH);
 #pragma omp for schedule(dynamic) 
    for (int rz = 0; rz < Nres; rz++)
      {
        double pz = (rz)*boxsize/Nres-cz;
        cout << format("[%d] %d / %d") % omp_get_thread_num() % rz % Nres << endl;
        for (int ry = 0; ry < Nres; ry++)
          {
            double py = (ry)*boxsize/Nres-cy;
            for (int rx = 0; rx < Nres; rx++)
              {
                double px = (rx)*boxsize/Nres-cx;
                MyTree::coords c = { px, py, pz };
                double r2 = c[0]*c[0]+c[1]*c[1]+c[2]*c[2];
                if (r2 > rLimit2)
                  {
                    arr[rx][ry][rz] = 0;
                    continue;
                  }
                uint32_t numInCell = bins[rx][ry][rz];
                if (numInCell > N_SPH)
                  smooth1.fetchNeighbours(c, numInCell);
                else
                  smooth1.fetchNeighbours(c);
                arr[rx][ry][rz] = smooth1.computeSmoothedValue(c, func);
              }
          }
      }
  }
 }
 int main(int argc, char **argv)
 {
  typedef boost::multi_array<float, 2> array_type;
  typedef boost::multi_array<float, 3> array3_type;
  typedef boost::multi_array<float, 4> array4_type;
  char *fname1, *fname2;
  double rLimit, boxsize, rLimit2, cx, cy, cz;
@ -105,8 +152,6 @@ int main(int argc, char **argv)
  cout << "Creating smoothing filter..." << endl;
  array3_type out_den_1(boost::extents[Nres][Nres][Nres]);
  array4_type out_v3d_1(boost::extents[Nres][Nres][Nres][3]);
 //  array3_type out_rad_1(boost::extents[Nres][Nres][Nres]);
  cout << "Weighing..." << endl;
@ -149,55 +194,18 @@ int main(int argc, char **argv)
  }
  cout << "Interpolating..." << endl;
 #pragma omp parallel
  {
    MySmooth smooth1(&tree1, N_SPH);
-#pragma omp for schedule(dynamic) 
+  array3_type interpolated(boost::extents[Nres][Nres][Nres]);
    for (int rz = 0; rz < Nres; rz++)
      {
        double pz = (rz)*boxsize/Nres-cz;
-        cout << rz << " / " << Nres << endl;
+  computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz,
-        for (int ry = 0; ry < Nres; ry++)
+                           bins, interpolated, getUnity, rLimit2);
-          {
+  hdf5_write_array(out_f, "density", interpolated);
-            double py = (ry)*boxsize/Nres-cy;
+  //out_f.flush();
-            for (int rx = 0; rx < Nres; rx++)
+  for (int i = 0; i < 3; i++) {
-              {
+      computeInterpolatedField(&tree1, boxsize, Nres, cx, cy, cz,
-                double px = (rx)*boxsize/Nres-cx;
+                               bins, interpolated, boost::bind(getVelocity, _1, i), rLimit2);
-                
+      hdf5_write_array(out_f, str(format("p%d") % i), interpolated);
                MyTree::coords c = { px, py, pz };
                double r2 = c[0]*c[0]+c[1]*c[1]+c[2]*c[2];
                if (r2 > rLimit2)
                  {
                    out_v3d_1[rx][ry][rz][0] = 0;
                    out_v3d_1[rx][ry][rz][1] = 0;
                    out_v3d_1[rx][ry][rz][2] = 0;
                    out_den_1[rx][ry][rz] = 0;
                    //out_rad_1[rx][ry][rz] = -1;
                    continue;
                  }
                uint32_t numInCell = bins[rx][ry][rz];
                if (numInCell > N_SPH)
                  smooth1.fetchNeighbours(c, numInCell);
                else
                  smooth1.fetchNeighbours(c);
                //out_rad_1[rx][ry][rz] = smooth1.getSmoothingLen();
                out_v3d_1[rx][ry][rz][0] = smooth1.computeSmoothedValue(c, getVelocity<0>);
                out_v3d_1[rx][ry][rz][1] = smooth1.computeSmoothedValue(c, getVelocity<1>);
                out_v3d_1[rx][ry][rz][2] = smooth1.computeSmoothedValue(c, getVelocity<2>);
                out_den_1[rx][ry][rz] = smooth1.computeSmoothedValue(c, getUnity);
              }
          }
      }
  }
  //hdf5_write_array(out_f, "radii", out_rad_1);
  hdf5_write_array(out_f, "velocity", out_v3d_1);
  hdf5_write_array(out_f, "density", out_den_1);
  return 0;
 };
--- a/src/sphSmooth.hpp
+++ b/src/sphSmooth.hpp
@ -82,10 +82,14 @@ namespace CosmoTool
      return internal.currentCenter;
    }
    template<typename FuncT>
    ComputePrecision computeSmoothedValue(const typename SPHTree::coords& c,
-					  computeParticleValue fun, SPHState *state = 0);    
+					  FuncT fun, SPHState *state = 0);    
 	template<typename FuncT>
    ComputePrecision computeInterpolatedValue(const typename SPHTree::coords& c,
-					      computeParticleValue fun, SPHState *state = 0);    
+					      FuncT fun, SPHState *state = 0);
    ComputePrecision getMaxDistance(const typename SPHTree::coords& c, 
 				    SPHNode *node) const;
@ -101,7 +105,8 @@ namespace CosmoTool
    }
    // END
-    void runForEachNeighbour(runParticleValue fun, SPHState *state = 0);
+    template<typename FuncT>
    void runForEachNeighbour(FuncT fun, SPHState *state = 0);
    void addGridSite(const typename SPHTree::coords& c);
    bool hasNeighbours() const;
@ -127,12 +132,15 @@ namespace CosmoTool
    uint32_t maxNgb;
    SPHTree *tree;
    template<typename FuncT>
    ComputePrecision computeWValue(const typename SPHTree::coords & c,
 				   SPHCell& cell,
 				   CoordType d,
-				   computeParticleValue fun, SPHState *state);
+				   FuncT fun, SPHState *state);
    template<typename FuncT>
    void runUnrollNode(SPHNode *node,
-		       runParticleValue fun);
+		       FuncT fun);
  };
  template<class ValType1, class ValType2, int Ndims>
--- a/src/sphSmooth.tcc
+++ b/src/sphSmooth.tcc
@ -21,10 +21,11 @@ SPHSmooth<ValType,Ndims>::~SPHSmooth()
 }
 template<typename ValType, int Ndims>
 template<typename FuncT>
 ComputePrecision SPHSmooth<ValType,Ndims>::computeWValue(const typename SPHTree::coords& c,
 							 SPHCell& cell,
 							 CoordType d,
-							 computeParticleValue fun, SPHState *state)
+							 FuncT fun, SPHState *state)
 {
  CoordType weight;
@ -117,9 +118,10 @@ SPHSmooth<ValType,Ndims>::fetchNeighboursOnVolume(const typename SPHTree::coords
 }				
 template<typename ValType, int Ndims>
 template<typename FuncT>
 ComputePrecision 
 SPHSmooth<ValType,Ndims>::computeSmoothedValue(const typename SPHTree::coords& c,
-                                               computeParticleValue fun, SPHState *state)
+                                               FuncT fun, SPHState *state)
 {
  if (state == 0)
    state = &internal;
@ -144,8 +146,9 @@ ComputePrecision interpolateOne(const ValType& t)
 // WARNING ! Cell's weight must be 1 !!!
 template<typename ValType, int Ndims>
 template<typename FuncT>
 ComputePrecision SPHSmooth<ValType,Ndims>::computeInterpolatedValue(const typename SPHTree::coords& c,
-								    computeParticleValue fun, SPHState *state)
+								    FuncT fun, SPHState *state)
 {
  if (state == 0)
    state = &internal;
@ -164,7 +167,8 @@ ComputePrecision SPHSmooth<ValType,Ndims>::computeInterpolatedValue(const typena
 }
 template<typename ValType, int Ndims>
-void SPHSmooth<ValType,Ndims>::runForEachNeighbour(runParticleValue fun, SPHState *state)
+template<typename FuncT>
 void SPHSmooth<ValType,Ndims>::runForEachNeighbour(FuncT fun, SPHState *state)
 {
  if (state == 0)
    state = &internal;