Update to allow accumulators in CIC projection

2021-08-05 08:50:31 +03:00 · 2021-08-05 08:50:31 +03:00 · 6cafdd50b2
commit 6cafdd50b2
parent a16ae60382
3 changed files with 167 additions and 148 deletions
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@ -8,32 +8,32 @@ IF(CYTHON)
  add_custom_command(
    OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/_cosmotool.cpp
    COMMAND ${CYTHON} --cplus -o ${CMAKE_CURRENT_BINARY_DIR}/_cosmotool.cpp ${CMAKE_CURRENT_SOURCE_DIR}/_cosmotool.pyx
-    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/_cosmotool.pyx) 
+    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/_cosmotool.pyx)

  add_custom_command(
    OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/_cosmo_power.cpp
    COMMAND ${CYTHON} --cplus -o ${CMAKE_CURRENT_BINARY_DIR}/_cosmo_power.cpp ${CMAKE_CURRENT_SOURCE_DIR}/_cosmo_power.pyx
-    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/_cosmo_power.pyx) 
+    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/_cosmo_power.pyx)

  add_custom_command(
    OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/_fast_interp.cpp
    COMMAND ${CYTHON} --cplus -o ${CMAKE_CURRENT_BINARY_DIR}/_fast_interp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/_fast_interp.pyx
-    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/_fast_interp.pyx) 
+    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/_fast_interp.pyx)

  add_custom_command(
    OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/_cosmo_cic.cpp
    COMMAND ${CYTHON} --cplus -o ${CMAKE_CURRENT_BINARY_DIR}/_cosmo_cic.cpp ${CMAKE_CURRENT_SOURCE_DIR}/_cosmo_cic.pyx
-    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/_cosmo_cic.pyx) 
+    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/_cosmo_cic.pyx)

  add_custom_command(
    OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/_project.cpp
    COMMAND ${CYTHON} --cplus -o ${CMAKE_CURRENT_BINARY_DIR}/_project.cpp ${CMAKE_CURRENT_SOURCE_DIR}/_project.pyx
-    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/_project.pyx ${CMAKE_CURRENT_SOURCE_DIR}/project_tool.hpp ) 
+    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/_project.pyx ${CMAKE_CURRENT_SOURCE_DIR}/project_tool.hpp )

  add_custom_command(
    OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/_cosmomath.cpp
    COMMAND ${CYTHON} --cplus -o ${CMAKE_CURRENT_BINARY_DIR}/_cosmomath.cpp ${CMAKE_CURRENT_SOURCE_DIR}/_cosmomath.pyx
-    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/_cosmomath.pyx ) 
+    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/_cosmomath.pyx )
 ENDIF(CYTHON)


@ -43,11 +43,11 @@ add_library(_cosmo_cic MODULE ${CMAKE_CURRENT_BINARY_DIR}/_cosmo_cic.cpp)
 add_library(_fast_interp MODULE ${CMAKE_CURRENT_BINARY_DIR}/_fast_interp.cpp)
 add_library(_project MODULE ${CMAKE_CURRENT_BINARY_DIR}/_project.cpp)
 add_library(_cosmomath MODULE ${CMAKE_CURRENT_BINARY_DIR}/_cosmomath.cpp)
-target_include_directories(_cosmotool PRIVATE ${PYTHON_INCLUDES}) 
-target_include_directories(_cosmo_power PRIVATE ${PYTHON_INCLUDES}) 
-target_include_directories(_cosmo_cic PRIVATE ${PYTHON_INCLUDES}) 
-target_include_directories(_fast_interp PRIVATE ${PYTHON_INCLUDES}) 
-target_include_directories(_project PRIVATE ${PYTHON_INCLUDES}) 
+target_include_directories(_cosmotool PRIVATE ${PYTHON_INCLUDES})
+target_include_directories(_cosmo_power PRIVATE ${PYTHON_INCLUDES})
+target_include_directories(_cosmo_cic PRIVATE ${PYTHON_INCLUDES})
+target_include_directories(_fast_interp PRIVATE ${PYTHON_INCLUDES})
+target_include_directories(_project PRIVATE ${PYTHON_INCLUDES})
 target_include_directories(_cosmomath PRIVATE ${PYTHON_INCLUDES})

 SET(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Bsymbolic-functions")
@ -114,10 +114,19 @@ endif (WIN32 AND NOT CYGWIN)

 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cosmotool/config.py.in ${CMAKE_CURRENT_BINARY_DIR}/cosmotool/config.py @ONLY)

-INSTALL(TARGETS 
+INSTALL(TARGETS
  ${ct_TARGETS}
+  COMPONENT python
  LIBRARY DESTINATION ${PYTHON_SITE_PACKAGES}/cosmotool
 )

-INSTALL(DIRECTORY cosmotool ${CMAKE_CURRENT_BINARY_DIR}/cosmotool DESTINATION ${PYTHON_SITE_PACKAGES} 
+INSTALL(DIRECTORY cosmotool ${CMAKE_CURRENT_BINARY_DIR}/cosmotool
+        COMPONENT python DESTINATION ${PYTHON_SITE_PACKAGES}
        FILES_MATCHING PATTERN "*.py")
+
+add_custom_target(
+    python-install
+    DEPENDS ${ct_TARGETS}
+    COMMAND "${CMAKE_COMMAND}" -DCMAKE_INSTALL_COMPONENT=python -P
+            "${CMAKE_BINARY_DIR}/cmake_install.cmake")
+
--- a/python/_cosmotool.pyx
+++ b/python/_cosmotool.pyx
@ -31,7 +31,7 @@ cdef extern from "loadSimu.hpp" namespace "CosmoTool":

    bool noAuto

-  cdef const int NEED_GADGET_ID 
+  cdef const int NEED_GADGET_ID
  cdef const int NEED_POSITION
  cdef const int NEED_VELOCITY
  cdef const int NEED_TYPE
@ -57,36 +57,36 @@ class PySimulationBase(object):
  """
  This is the base class to representation Simulation in CosmoTool/python.
  """
-  
+
  def getPositions(self):
    """
    getPositions(self)
-    
+
    Returns:
-        A list of three arrays holding the positions of the particles. 
+        A list of three arrays holding the positions of the particles.
        The i-th element is the i-th coordinate of each particle.
        It may be None if the positions were not requested.
    """
    raise NotImplementedError("getPositions is not implemented")
-    
+
  def getVelocities(self):
    """
    getVelocities(self)
-    
+
    Returns:
-        A list of three arrays holding the velocities of the particles. 
+        A list of three arrays holding the velocities of the particles.
        The i-th element is the i-th coordinate of each particle.
        It may be None if the velocities were not requested.
    """
    raise NotImplementedError("getVelocities is not implemented")
-    
+
  def getIdentifiers(self):
    """
    getIdentifiers(self)
-    
+
    Returns:
        Returns an integer array that hold the unique identifiers of
-        each particle. 
+        each particle.
        It may be None if the identifiers were not requested.
    """
    raise NotImplementedError("getIdentifiers is not implemented")
@ -94,10 +94,10 @@ class PySimulationBase(object):
  def getTypes(self):
    """
    getTypes(self)
-    
+
    Returns:
        Returns an integer array that hold the type of
-        each particle. 
+        each particle.
        It may be None if the types were not requested.
    """
    raise NotImplementedError("getTypes is not implemented")
@ -105,27 +105,27 @@ class PySimulationBase(object):
  def getOmega_M(self):
    """
    getOmega_M(self)
-    
+
    Returns:
        the mean matter density in the simulation, with respect
        to the critical density.
-    """ 
+    """
    raise NotImplementedError("getOmega_M is not implemented")
-  
+
  def getOmega_Lambda(self):
    """
    getOmega_Lambda(self)
-    
+
    Returns:
        the mean dark energy density in the simulation, with respect
        to the critical density.
-    """ 
+    """
    raise NotImplementedError("getOmega_Lambda is not implemented")

  def getTime(self):
    """
    getTime(self)
-    
+
    Returns:
        the time the snapshot was taken in the simulation. It can
        have various units depending on the file format.
@ -135,7 +135,7 @@ class PySimulationBase(object):
  def getHubble(self):
    """
    getHubble(self)
-    
+
    Returns:
        the hubble constant in unit of 100 km/s/Mpc
    """
@ -144,7 +144,7 @@ class PySimulationBase(object):
  def getBoxsize(self):
    """
    getBoxsize(self)
-    
+
    Returns:
        the size of the simulation box. The length unit is not fixed,
        though it is customary to have it in Mpc/h if the loader has
@ -155,7 +155,7 @@ class PySimulationBase(object):
  def getMasses(self):
    """
    getMasses(self)
-    
+
    Returns:
        an array with the masses of each particles, in unspecified unit that
        depend on the loader.
@ -165,7 +165,7 @@ class PySimulationBase(object):
 cdef class Simulation:
    """
    Simulation()
-    
+
    Class that directly manages internal loaded data obtained from a loader
    """

@ -180,7 +180,7 @@ cdef class Simulation:
    property BoxSize:
        def __get__(Simulation self):
            return self.data.BoxSize
-        
+
    property time:
        def __get__(Simulation self):
            return self.data.time
@ -192,15 +192,15 @@ cdef class Simulation:
    property Omega_M:
        def __get__(Simulation self):
            return self.data.Omega_M
-        
+
    property Omega_Lambda:
        def __get__(Simulation self):
            return self.data.Omega_Lambda
-        
+
    property positions:
        def __get__(Simulation self):
            return self.positions
-        
+
    property velocities:
        def __get__(Simulation self):
            return self.velocities
@ -216,7 +216,7 @@ cdef class Simulation:
    property masses:
        def __get__(Simulation self):
            return self.masses
-        
+
    property numParticles:
        def __get__(Simulation self):
            return self.data.NumPart
@ -228,7 +228,7 @@ cdef class Simulation:

    def __cinit__(Simulation self):
        self.data = <SimuData *>0
-  
+
    def __dealloc__(Simulation self):
        if self.data != <SimuData *>0:
          del self.data
@ -237,40 +237,43 @@ cdef class Simulation:
 class PySimulationAdaptor(PySimulationBase):
  """
  PySimulationAdaptor(PySimulationBase_)
-  
+
  This class is an adaptor for an internal type to the loader. It defines
  all the methods of PySimulationBase.
-  
+
  Attributes:
    simu: a Simulation_ object
  """
  def __init__(self,sim):
    self.simu = sim

+  def getNumParticles(self):
+    return self.simu.numParticles
+
  def getBoxsize(self):
    return self.simu.BoxSize
-    
+
  def getPositions(self):
    return self.simu.positions

  def getTypes(self):
    return self.simu.types
-    
+
  def getVelocities(self):
    return self.simu.velocities
-    
+
  def getIdentifiers(self):
    return self.simu.identifiers

  def getTime(self):
    return self.simu.time
-    
+
  def getHubble(self):
    return self.simu.Hubble
-    
+
  def getOmega_M(self):
    return self.simu.Omega_M
-    
+
  def getOmega_Lambda(self):
    return self.simu.Omega_Lambda

@ -281,7 +284,7 @@ cdef class ArrayWrapper:
    cdef void* data_ptr
    cdef np.uint64_t size
    cdef int type_array
-    
+
    cdef set_data(self, np.uint64_t size, int type_array, void* data_ptr):
        """ Set the data of the array
            This cannot be done in the constructor as it must recieve C-level
@ -294,22 +297,22 @@ cdef class ArrayWrapper:
        self.data_ptr = data_ptr
        self.size = size
        self.type_array = type_array
-     
+
    def __array__(self):
        """ Here we use the __array__ method, that is called when numpy
 tries to get an array from the object."""
        cdef np.npy_intp shape[1]
-        
+
        shape[0] = <np.npy_intp> self.size
        # Create a 1D array, of length 'size'
        ndarray = np.PyArray_SimpleNewFromData(1, shape, self.type_array, self.data_ptr)
        return ndarray
-     
+
    def __dealloc__(self):
        """ Frees the array. This is called by Python when all the
 references to the object are gone. """
        pass
- 
+
 cdef object wrap_array(void *p, np.uint64_t s, int typ):
    cdef np.ndarray ndarray
    cdef ArrayWrapper wrapper
@ -319,7 +322,7 @@ cdef object wrap_array(void *p, np.uint64_t s, int typ):
    ndarray = np.array(wrapper, copy=False)
    ndarray.base = <PyObject*> wrapper
    Py_INCREF(wrapper)
-    
+
    return ndarray


@ -368,7 +371,7 @@ def loadGadget(str filename, int snapshot_id, int gadgetFormat = 1, bool loadPos
    """loadGadget(filename, snapshot_id, gadgetFormat = 1, loadPosition=True, loadVelocity=False, loadId=False, loadType=False)

    This function loads Gadget-1 snapshot format.
-    
+
    If snapshot_id is negative then the snapshot is considered not to be part of
    a set of snapshots written by different cpu. Otherwise the filename is modified
    to reflect the indicated snapshot_id.
@ -376,16 +379,16 @@ def loadGadget(str filename, int snapshot_id, int gadgetFormat = 1, bool loadPos
    Arguments:
        filename (str): input filename
        snapshot_id (int): identifier of the gadget file if it is a multi-file snapshot
-    
+
    Keyword arguments:
        loadPosition (bool): whether to load positions
        loadVelocity (bool): whether to load velocities
        loadId (bool): whether to load unique identifiers
        loadType (bool): whether to set types to particles
        loadMass (bool): whether to set the mass of particles
-    
+
    Returns:
-        an PySimulationAdaptor instance. 
+        an PySimulationAdaptor instance.
 """

    cdef int flags
@ -419,13 +422,13 @@ def loadParallelGadget(object filename_list, int gadgetFormat = 1, bool loadPosi

  Arguments:
    filename (list): a list or tuple of filenames to load in parallel
-  
+
  Keyword arguments:
    loadPosition (bool): indicate to load positions
    loadVelocity (bool): indicate to load velocities
    loadId (bool): indicate to load id
    loadType (bool): indicate to load particle types
-    
+
  Returns:
    It loads a gadget-1 snapshot and return a cosmotool.PySimulationBase_ object.
  """
@ -453,16 +456,16 @@ def loadParallelGadget(object filename_list, int gadgetFormat = 1, bool loadPosi
  data = alloc_simudata(num_files)
  for i,l in enumerate(filename_list):
    filenames[i] = l.encode('utf-8')
-  
+
  with nogil:
    for i in prange(num_files):
      local_data = loadGadgetMulti_safe(filenames[i], flags, gadgetFormat)
      data[i] = local_data
 #      data[i] = loadGadgetMulti(filenames[i].c_str(), -1, flags)
-  
+
  out_arrays = []
  for i in xrange(num_files):
-    if data[i] == <SimuData*>0:        
+    if data[i] == <SimuData*>0:
      out_arrays.append(None)
    else:
      out_arrays.append(PySimulationAdaptor(wrap_simudata(data[i], flags)))
@ -473,10 +476,10 @@ def loadParallelGadget(object filename_list, int gadgetFormat = 1, bool loadPosi

 def writeGadget(str filename, object simulation):
  """writeGadget(filename, simulation)
-  
-  This function attempts to write the content of the simulation object into 
+
+  This function attempts to write the content of the simulation object into
  a file named `filename` using a Gadget-1 file format.
-  
+
  Arguments:
    filename (str): output filename
    simulation (PySimulationBase): a simulation object
@ -486,23 +489,23 @@ def writeGadget(str filename, object simulation):
  cdef np.ndarray[np.int64_t, ndim=1] ids
  cdef np.int64_t NumPart
  cdef int j
-  
+
  if not isinstance(simulation,PySimulationBase):
    raise TypeError("Second argument must be of type SimulationBase")
-  
+
  NumPart = simulation.positions[0].size
  simdata.noAuto = True
-  
+
  for j in xrange(3):
    pos = simulation.getPositions()[j]
    vel = simulation.getVelocities()[j]
-    
+
    if pos.size != NumPart or vel.size != NumPart:
      raise ValueError("Invalid number of particles")
-    
+
    simdata.Pos[j] = <float *>pos.data
    simdata.Vel[j] = <float *>vel.data
-  
+
  ids = simulation.getIdentifiers()
  simdata.Id = <int64_t *>ids.data
  simdata.BoxSize = simulation.getBoxsize()
@ -519,21 +522,21 @@ def writeGadget(str filename, object simulation):
 def loadRamses(str basepath, int snapshot_id, int cpu_id, bool doublePrecision = False, bool loadPosition = True, bool loadVelocity = False, bool loadId = False, bool loadMass = False):
  """ loadRamses(basepath, snapshot_id, cpu_id, doublePrecision = False, loadPosition = True, loadVelocity = False)
  Loads the indicated snapshot based on the cpu id, snapshot id and basepath. It is important to specify the correct precision in doublePrecision otherwise the loading will fail. There is no way of auto-detecting properly the precision of the snapshot file.
-  
+
  Args:
    basepath (str): the base directory of the snapshot
    snapshot_id (int): the snapshot id
    cpu_id (int): the cpu id of the file to load
-  
+
  Keyword args:
    doublePrecision (bool): By default it is False, thus singlePrecision
    loadPosition (bool): Whether to load positions
    loadVelocity (bool): Whether to load velocities
    loadId (bool): Whether to load identifiers
-    loadMass (bool): Whether to load mass value 
-    
+    loadMass (bool): Whether to load mass value
+
  Returns:
-    An object derived from PySimulationBase_. 
+    An object derived from PySimulationBase_.
 """
  cdef int flags
  cdef SimuData *data
@ -549,7 +552,7 @@ def loadRamses(str basepath, int snapshot_id, int cpu_id, bool doublePrecision =
  if loadMass:
     flags |= NEED_MASS

-  encpath = basepath.encode('utf-8') 
+  encpath = basepath.encode('utf-8')
  try:
    data = loadRamsesSimu(encpath, snapshot_id, cpu_id, doublePrecision, flags)
    if data == <SimuData*>0:
@ -558,4 +561,4 @@ def loadRamses(str basepath, int snapshot_id, int cpu_id, bool doublePrecision =
    raise RuntimeError(str(e) + ' (check the float precision in snapshot)')

  return PySimulationAdaptor(wrap_simudata(data, flags))
-  
+
--- a/python/_project.pyx
+++ b/python/_project.pyx
@ -25,8 +25,8 @@ cdef extern from "openmp.hpp" namespace "CosmoTool":
@cython.boundscheck(False)
@cython.cdivision(True)
@cython.wraparound(False)
-cdef void interp3d_INTERNAL_periodic(DTYPE_t x, DTYPE_t y, 
-                                        DTYPE_t z, 
+cdef void interp3d_INTERNAL_periodic(DTYPE_t x, DTYPE_t y,
+                                        DTYPE_t z,
                                        DTYPE_t[:,:,:] d, DTYPE_t Lbox, DTYPE_t *retval) nogil:

    cdef int Ngrid = d.shape[0]
@ -84,8 +84,8 @@ cdef void interp3d_INTERNAL_periodic(DTYPE_t x, DTYPE_t y,
@cython.boundscheck(False)
@cython.cdivision(True)
@cython.wraparound(False)
-cdef void ngp3d_INTERNAL_periodic(DTYPE_t x, DTYPE_t y, 
-                                 DTYPE_t z, 
+cdef void ngp3d_INTERNAL_periodic(DTYPE_t x, DTYPE_t y,
+                                 DTYPE_t z,
                                 DTYPE_t[:,:,:] d, DTYPE_t Lbox, DTYPE_t *retval) nogil:

    cdef int Ngrid = d.shape[0]
@ -108,14 +108,14 @@ cdef void ngp3d_INTERNAL_periodic(DTYPE_t x, DTYPE_t y,
    iy = iy%Ngrid
    iz = iz%Ngrid

-    retval[0] = d[ix ,iy ,iz ] 
+    retval[0] = d[ix ,iy ,iz ]


@cython.boundscheck(False)
@cython.cdivision(True)
@cython.wraparound(False)
-cdef void ngp3d_INTERNAL(DTYPE_t x, DTYPE_t y, 
-                         DTYPE_t z, 
+cdef void ngp3d_INTERNAL(DTYPE_t x, DTYPE_t y,
+                         DTYPE_t z,
                         DTYPE_t[:,:,:] d, DTYPE_t Lbox, DTYPE_t *retval, DTYPE_t inval) nogil:

    cdef int Ngrid = d.shape[0]
@ -137,16 +137,16 @@ cdef void ngp3d_INTERNAL(DTYPE_t x, DTYPE_t y,
      retval[0] = inval
      return

-    retval[0] = d[ix ,iy ,iz ] 
+    retval[0] = d[ix ,iy ,iz ]


@cython.boundscheck(False)
@cython.cdivision(True)
@cython.wraparound(False)
-cdef void interp3d_INTERNAL(DTYPE_t x, DTYPE_t y, 
-                               DTYPE_t z, 
+cdef void interp3d_INTERNAL(DTYPE_t x, DTYPE_t y,
+                               DTYPE_t z,
                               DTYPE_t[:,:,:] d, DTYPE_t Lbox, DTYPE_t *retval, DTYPE_t inval) nogil:
-    
+
    cdef int Ngrid = d.shape[0]
    cdef DTYPE_t inv_delta = Ngrid/Lbox
    cdef int ix, iy, iz
@ -193,13 +193,13 @@ cdef void interp3d_INTERNAL(DTYPE_t x, DTYPE_t y,
        d[ix+1,iy+1,iz+1] * f[1][1][1]

@cython.boundscheck(False)
-def interp3d(x not None, y not None, 
+def interp3d(x not None, y not None,
             z not None,
             npx.ndarray[DTYPE_t, ndim=3] d not None, DTYPE_t Lbox,
             bool periodic=False, bool centered=True, bool ngp=False, DTYPE_t inval = 0):
    """ interp3d(x,y,z,d,Lbox,periodic=False,centered=True,ngp=False) -> interpolated values
-    
-    Compute the tri-linear interpolation of the given field (d) at the given position (x,y,z). It assumes that they are box-centered coordinates. So (x,y,z) == (0,0,0) is equivalent to the pixel at (Nx/2,Ny/2,Nz/2) with Nx,Ny,Nz = d.shape. If periodic is set, it assumes the box is periodic 
+
+    Compute the tri-linear interpolation of the given field (d) at the given position (x,y,z). It assumes that they are box-centered coordinates. So (x,y,z) == (0,0,0) is equivalent to the pixel at (Nx/2,Ny/2,Nz/2) with Nx,Ny,Nz = d.shape. If periodic is set, it assumes the box is periodic
 """
    cdef npx.ndarray[DTYPE_t] out
    cdef DTYPE_t[:] out_slice
@ -227,12 +227,12 @@ def interp3d(x not None, y not None,
    if type(x) == np.ndarray or type(y) == np.ndarray or type(z) == np.ndarray:
        if type(x) != np.ndarray or type(y) != np.ndarray or type(z) != np.ndarray:
            raise ValueError("All or no array. No partial arguments")
-        
+
        ax = x
        ay = y
        az = z
        assert ax.size == ay.size and ax.size == az.size
-        
+
        out = np.empty(x.shape, dtype=DTYPE)
        out_slice = out
        in_slice = d
@ -280,10 +280,10 @@ cdef DTYPE_t interp2d_INTERNAL_periodic(DTYPE_t x, DTYPE_t y,

    rx = (inv_delta*x + Ngrid/2)
    ry = (inv_delta*y + Ngrid/2)
-    
+
    ix = int(floor(rx))
    iy = int(floor(ry))
-    
+
    rx -= ix
    ry -= iy

@ -291,13 +291,13 @@ cdef DTYPE_t interp2d_INTERNAL_periodic(DTYPE_t x, DTYPE_t y,
      ix += Ngrid
    while iy < 0:
      iy += Ngrid
-    
+
    jx = (ix+1)%Ngrid
    jy = (iy+1)%Ngrid
-    
+
    assert ((ix >= 0) and ((jx) < Ngrid))
    assert ((iy >= 0) and ((jy) < Ngrid))
-    
+
    f[0][0] = (1-rx)*(1-ry)
    f[1][0] = (  rx)*(1-ry)
    f[0][1] = (1-rx)*(  ry)
@ -314,7 +314,7 @@ cdef DTYPE_t interp2d_INTERNAL_periodic(DTYPE_t x, DTYPE_t y,
@cython.cdivision(True)
 cdef DTYPE_t interp2d_INTERNAL(DTYPE_t x, DTYPE_t y,
                               npx.ndarray[DTYPE_t, ndim=2] d, DTYPE_t Lbox) except? 0:
-    
+
    cdef int Ngrid = d.shape[0]
    cdef DTYPE_t inv_delta = Ngrid/Lbox
    cdef int ix, iy
@ -348,7 +348,7 @@ cdef DTYPE_t interp2d_INTERNAL(DTYPE_t x, DTYPE_t y,
        d[ix+1,iy  ] * f[1][0] + \
        d[ix  ,iy+1] * f[0][1] + \
        d[ix+1,iy+1] * f[1][1]
-        
+
 def interp2d(x not None, y not None,
             npx.ndarray[DTYPE_t, ndim=2] d not None, DTYPE_t Lbox,
             bool periodic=False):
@ -362,11 +362,11 @@ def interp2d(x not None, y not None,
    if type(x) == np.ndarray or type(y) == np.ndarray:
        if type(x) != np.ndarray or type(y) != np.ndarray:
            raise ValueError("All or no array. No partial arguments")
-        
+
        ax = x
        ay = y
-        assert ax.size == ay.size 
-        
+        assert ax.size == ay.size
+
        out = np.empty(x.shape, dtype=DTYPE)
        if periodic:
          for i in range(ax.size):
@ -381,8 +381,8 @@ def interp2d(x not None, y not None,
        return interp2d_INTERNAL_periodic(x, y, d, Lbox)
      else:
        return interp2d_INTERNAL(x, y, d, Lbox)
-        
- 
+
+
@cython.boundscheck(False)
@cython.cdivision(True)
 cdef void INTERNAL_project_cic_no_mass(DTYPE_t[:,:,:] g,
@ -450,7 +450,7 @@ cdef void INTERNAL_project_cic_no_mass_periodic(DTYPE_t[:,:,:] g,
        ag[b1[0],b[1],b[2]] += a[0]*c[1]*c[2]
        ag[b[0],b1[1],b[2]] += c[0]*a[1]*c[2]
        ag[b1[0],b1[1],b[2]] += a[0]*a[1]*c[2]
-        
+
        ag[b[0],b[1],b1[2]] += c[0]*c[1]*a[2]
        ag[b1[0],b[1],b1[2]] += a[0]*c[1]*a[2]
        ag[b[0],b1[1],b1[2]] += c[0]*a[1]*a[2]
@ -525,20 +525,21 @@ cdef void INTERNAL_project_cic_with_mass_periodic(DTYPE_t[:,:,:] g,
        g[b1[0],b[1],b[2]] += a[0]*c[1]*c[2]*m0
        g[b[0],b1[1],b[2]] += c[0]*a[1]*c[2]*m0
        g[b1[0],b1[1],b[2]] += a[0]*a[1]*c[2]*m0
-        
+
        g[b[0],b[1],b1[2]] += c[0]*c[1]*a[2]*m0
        g[b1[0],b[1],b1[2]] += a[0]*c[1]*a[2]*m0
        g[b[0],b1[1],b1[2]] += c[0]*a[1]*a[2]*m0
        g[b1[0],b1[1],b1[2]] += a[0]*a[1]*a[2]*m0

-       
-def project_cic(npx.ndarray[DTYPE_t, ndim=2] x not None, npx.ndarray[DTYPE_t, ndim=1] mass, int Ngrid,
-                double Lbox, bool periodic = False, centered=True):
-    """
-    project_cic(x array (N,3), mass (may be None), Ngrid, Lbox, periodict, centered=True)

-    This function does a Cloud-In-Cell projection of a 3d unstructured dataset. First argument is a Nx3 array of coordinates. 
+def project_cic(npx.ndarray[DTYPE_t, ndim=2] x not None, npx.ndarray[DTYPE_t, ndim=1] mass, int Ngrid,
+                double Lbox, bool periodic = False, centered=True, output=None):
+    """
+    project_cic(x array (N,3), mass (may be None), Ngrid, Lbox, periodict, centered=True, output=None)
+
+    This function does a Cloud-In-Cell projection of a 3d unstructured dataset. First argument is a Nx3 array of coordinates.
    Second argument is an optinal mass. Ngrid is the size output grid and Lbox is the physical size of the grid.
+    if output is not None, it must be a numpy array with dimension NgridxNgridxNgrid. The result will be accumulated in that array.
    """
    cdef npx.ndarray[DTYPE_t, ndim=3] g
    cdef double shifter
@ -558,7 +559,13 @@ def project_cic(npx.ndarray[DTYPE_t, ndim=2] x not None, npx.ndarray[DTYPE_t, nd
    if mass is not None and mass.shape[0] != x.shape[0]:
        raise ValueError("Mass array and coordinate array must have the same number of elements")

-    g = np.zeros((Ngrid,Ngrid,Ngrid),dtype=DTYPE)
+    if output is None:
+        g = np.zeros((Ngrid,Ngrid,Ngrid),dtype=DTYPE)
+    else:
+        if type(output) != np.ndarray:
+            raise ValueError("Invalid array type")
+        g = output
+
    cdef DTYPE_t[:,:,:] d_g = g
    cdef DTYPE_t[:,:] d_x = x

@ -569,7 +576,7 @@ def project_cic(npx.ndarray[DTYPE_t, ndim=2] x not None, npx.ndarray[DTYPE_t, nd
       else:
         d_mass = mass
         with nogil:
-           INTERNAL_project_cic_with_mass(d_g, d_x, d_mass, Ngrid, Lbox, shifter)       
+           INTERNAL_project_cic_with_mass(d_g, d_x, d_mass, Ngrid, Lbox, shifter)
    else:
       if mass is None:
         with nogil:
@ -577,13 +584,13 @@ def project_cic(npx.ndarray[DTYPE_t, ndim=2] x not None, npx.ndarray[DTYPE_t, nd
       else:
         d_mass = mass
         with nogil:
-          INTERNAL_project_cic_with_mass_periodic(d_g, d_x, d_mass, Ngrid, Lbox, shifter)       
-         
+          INTERNAL_project_cic_with_mass_periodic(d_g, d_x, d_mass, Ngrid, Lbox, shifter)
+
    return g

 def tophat_fourier_internal(npx.ndarray[DTYPE_t, ndim=1] x not None):
    cdef int i
-    cdef npx.ndarray[DTYPE_t] y    
+    cdef npx.ndarray[DTYPE_t] y
    cdef DTYPE_t x0

    y = np.empty(x.size, dtype=DTYPE)
@ -609,7 +616,7 @@ def tophat_fourier(x not None):

    return b.reshape(x.shape)

-    
+

@cython.boundscheck(False)
@cython.cdivision(True)
@ -659,25 +666,25 @@ cdef DTYPE_t cube_integral_trilin(DTYPE_t u[3], DTYPE_t u0[3], int r[1], DTYPE_t
        if tmp_a < alpha_max:
            alpha_max = tmp_a
            j = i
-     
+
    I = compute_projection(vertex_value, u, u0, alpha_max)
-    
+
    for i in xrange(3):
        u0[i] += u[i]*alpha_max

    # alpha_max is the integration length
    # we integrate between 0 and alpha_max (curvilinear coordinates)
    r[0] = j
-    
+
    return I

@cython.boundscheck(False)
 cdef DTYPE_t integrator0(DTYPE_t[:,:,:] density,
                         DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1], DTYPE_t alpha_max) nogil:
    cdef DTYPE_t d
-    
+
    d = density[iu0[0], iu0[1], iu0[2]]
-    
+
    return cube_integral(u, u0, jumper, alpha_max)*d

@cython.boundscheck(False)
@ -687,7 +694,7 @@ cdef DTYPE_t integrator1(DTYPE_t[:,:,:] density,
    cdef DTYPE_t d
    cdef int a[3][2]
    cdef int i
-    
+
    for i in xrange(3):
      a[i][0] = iu0[i]
      a[i][1] = iu0[i]+1
@ -705,14 +712,14 @@ cdef DTYPE_t integrator1(DTYPE_t[:,:,:] density,
    return cube_integral_trilin(u, u0, jumper, vertex_value, alpha_max)


-    
+
@cython.boundscheck(False)
 cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density,
                             DTYPE_t a_u[3],
                             DTYPE_t min_distance,
                             DTYPE_t max_distance, DTYPE_t[:] shifter, int integrator_id) nogil except? 0:

-    cdef DTYPE_t u[3] 
+    cdef DTYPE_t u[3]
    cdef DTYPE_t ifu0[3]
    cdef DTYPE_t u0[3]
    cdef DTYPE_t utot[3]
@ -724,7 +731,7 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density,
    cdef int completed
    cdef DTYPE_t I0, d, dist2, delta, s, max_distance2
    cdef int jumper[1]
-    
+
    cdef DTYPE_t (*integrator)(DTYPE_t[:,:,:],
                               DTYPE_t u[3], DTYPE_t u0[3], int u_delta[3], int iu0[3], int jumper[1], DTYPE_t alpha_max) nogil

@ -747,7 +754,7 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density,
        if (not ((iu0[i]>= 0) and (iu0[i] < N))):
          with gil:
            raise RuntimeError("iu0[%d] = %d !!" % (i,iu0[i]))
-            
+
        if (not (u0[i]>=0 and u0[i]<=1)):
          with gil:
            raise RuntimeError("u0[%d] = %g !" % (i,u0[i]))
@ -756,7 +763,7 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density,
    if ((iu0[0] >= N-1) or (iu0[0] <= 0) or
        (iu0[1] >= N-1) or (iu0[1] <= 0) or
        (iu0[2] >= N-1) or (iu0[2] <= 0)):
-      completed = 1 
+      completed = 1

    I0 = 0
    jumper[0] = 0
@ -771,8 +778,8 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density,
            iu0[jumper[0]] += 1
            u0[jumper[0]] = 0

-            
-        if ((iu0[0] >= N-1) or (iu0[0] <= 0) or 
+
+        if ((iu0[0] >= N-1) or (iu0[0] <= 0) or
            (iu0[1] >= N-1) or (iu0[1] <= 0) or
            (iu0[2] >= N-1) or (iu0[2] <= 0)):
            completed = 1
@ -787,7 +794,7 @@ cdef DTYPE_t C_line_of_sight_projection(DTYPE_t[:,:,:] density,
            #delta = sqrt(dist2) - max_distance
            #I0 -= d*delta
            completed = 1
-            
+
    return I0

 def line_of_sight_projection(DTYPE_t[:,:,:] density not None,
@ -795,18 +802,18 @@ def line_of_sight_projection(DTYPE_t[:,:,:] density not None,
                             DTYPE_t min_distance,
                             DTYPE_t max_distance, DTYPE_t[:] shifter not None, int integrator_id=0):
  cdef DTYPE_t u[3]
-  
+
  u[0] = a_u[0]
  u[1] = a_u[1]
  u[2] = a_u[2]
-  
+
  return C_line_of_sight_projection(density,
                             u,
                             min_distance,
                             max_distance, shifter, integrator_id)

-cdef double _spherical_projloop(double theta, double phi, DTYPE_t[:,:,:] density, 
-                double min_distance, double max_distance, 
+cdef double _spherical_projloop(double theta, double phi, DTYPE_t[:,:,:] density,
+                double min_distance, double max_distance,
                DTYPE_t[:] shifter, int integrator_id) nogil:
  cdef DTYPE_t u0[3]

@ -814,7 +821,7 @@ cdef double _spherical_projloop(double theta, double phi, DTYPE_t[:,:,:] density
  u0[0] = cos(phi)*stheta
  u0[1] = sin(phi)*stheta
  u0[2] = cos(theta)
- 
+
  return C_line_of_sight_projection(density, u0, min_distance, max_distance, shifter, integrator_id)


@ -825,20 +832,20 @@ def spherical_projection(int Nside,
                         DTYPE_t max_distance, int progress=1, int integrator_id=0, DTYPE_t[:] shifter = None, int booster=-1):
    """
    spherical_projection(Nside, density, min_distance, max_distance, progress=1, integrator_id=0, shifter=None, booster=-1)
-    
+
    Keyword arguments:
      progress (int): show progress if it is equal to 1
      integrator_id (int): specify the order of integration along the line of shift
      shifter (DTYPE_t array): this is an array of size 3. It specifies the amount of shift to apply to the center, in unit of voxel
      booster (int): what is the frequency of refreshment of the progress bar. Small number decreases performance by locking the GIL.
-      
+

    Arguments:
      Nside (int): Nside of the returned map
      density (NxNxN array): this is the density field, expressed as a cubic array
      min_distance (float): lower bound of the integration
      max_distance (float): upper bound of the integration
-      
+
    Returns:
      an healpix map, as a 1-dimensional array.
    """
@ -853,11 +860,11 @@ def spherical_projection(int Nside,
    cdef long N, N0
    cdef double stheta
    cdef int tid
-    
+
    if shifter is None:
        shifter = view.array(shape=(3,), format=FORMAT_DTYPE, itemsize=sizeof(DTYPE_t))
        shifter[:] = 0
-        
+
    print("allocating map")
    outm_array = np.empty(hp.nside2npix(Nside),dtype=DTYPE)
    print("initializing views")
@ -870,10 +877,10 @@ def spherical_projection(int Nside,

    N = smp_get_max_threads()
    N0 = outm.size
-    
+
    if booster < 0:
      booster = 1#000
-    
+
    job_done = view.array(shape=(N,), format="i", itemsize=sizeof(int))
    job_done[:] = 0
    theta,phi = hp.pix2ang(Nside, np.arange(N0))