Use PYFFTW for ICgen

2014-07-03 15:36:58 +02:00 · 2014-07-03 15:36:58 +02:00 · 33806a690f
commit 33806a690f
parent f2b81d863f
4 changed files with 101 additions and 38 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -47,31 +47,7 @@ find_program(CYTHON cython)
 find_package(Boost 1.53)
-if (ENABLE_SHARP)
+include(${CMAKE_SOURCE_DIR}/external/external_build.cmake)
  SET(SHARP_SOURCE ${CMAKE_SOURCE_DIR}/external/sharp)
  SET(DEP_BUILD ${CMAKE_SOURCE_DIR}/external/sharp/auto)
  ExternalProject_Add(sharp
    SOURCE_DIR ${SHARP_SOURCE}
    BUILD_IN_SOURCE 1 
    CONFIGURE_COMMAND ${SHARP_SOURCE}/configure "CC=${CMAKE_C_COMPILER}" "CXX=${CMAKE_CXX_COMPILER}" --prefix=${DEP_BUILD}
    BUILD_COMMAND ${CMAKE_MAKE_PROGRAM}
    INSTALL_COMMAND echo "No install"
  )
  SET(CUTILS_LIBRARY ${DEP_BUILD}/lib/libc_utils.a)
  SET(FFTPACK_LIBRARY ${DEP_BUILD}/lib/libfftpack.a)
  SET(SHARP_LIBRARY ${DEP_BUILD}/lib/libsharp.a)
  SET(SHARP_LIBRARIES ${SHARP_LIBRARY} ${FFTPACK_LIBRARY} ${CUTILS_LIBRARY})
  SET(SHARP_INCLUDE_PATH ${DEP_BUILD}/include)
 endif (ENABLE_SHARP)
 SET(OMPTL_BUILD_DIR ${CMAKE_BINARY_DIR}/omptl-prefix/src/omptl)
 ExternalProject_Add(omptl
  URL ${CMAKE_SOURCE_DIR}/external/omptl-20120422.tar.bz2
  CONFIGURE_COMMAND echo "No configure"
  BUILD_COMMAND echo "No build"
  INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory ${OMPTL_BUILD_DIR} ${CMAKE_BINARY_DIR}/external/stage/include/omptl
 )
 include_directories(${OMPTL_BUILD_DIR}/..)
 set(HDF5_FIND_COMPONENTS HL CXX)
 if(HDF5_ROOTDIR)
--- a/external/external_build.cmake
+++ b/external/external_build.cmake
@ -0,0 +1,29 @@
 if (ENABLE_SHARP)
  SET(SHARP_SOURCE ${CMAKE_SOURCE_DIR}/external/sharp)
  SET(DEP_BUILD ${CMAKE_SOURCE_DIR}/external/sharp/auto)
  ExternalProject_Add(sharp
    SOURCE_DIR ${SHARP_SOURCE}
    BUILD_IN_SOURCE 1 
    CONFIGURE_COMMAND ${SHARP_SOURCE}/configure "CC=${CMAKE_C_COMPILER}" "CXX=${CMAKE_CXX_COMPILER}" --prefix=${DEP_BUILD}
    BUILD_COMMAND ${CMAKE_MAKE_PROGRAM}
    INSTALL_COMMAND echo "No install"
  )
  SET(CUTILS_LIBRARY ${DEP_BUILD}/lib/libc_utils.a)
  SET(FFTPACK_LIBRARY ${DEP_BUILD}/lib/libfftpack.a)
  SET(SHARP_LIBRARY ${DEP_BUILD}/lib/libsharp.a)
  SET(SHARP_LIBRARIES ${SHARP_LIBRARY} ${FFTPACK_LIBRARY} ${CUTILS_LIBRARY})
  SET(SHARP_INCLUDE_PATH ${DEP_BUILD}/include)
 endif (ENABLE_SHARP)
 SET(OMPTL_BUILD_DIR ${CMAKE_BINARY_DIR}/omptl-prefix/src/omptl)
 ExternalProject_Add(omptl
  URL ${CMAKE_SOURCE_DIR}/external/omptl-20120422.tar.bz2
  CONFIGURE_COMMAND echo "No configure"
  BUILD_COMMAND echo "No build"
  INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory ${OMPTL_BUILD_DIR} ${CMAKE_BINARY_DIR}/external/stage/include/omptl
 )
 include_directories(${OMPTL_BUILD_DIR}/..)
--- a/python_sample/icgen/borgicgen.py
+++ b/python_sample/icgen/borgicgen.py
@ -7,7 +7,7 @@ import borgadaptor as ba
 def gen_posgrid(N, L):
    """ Generate an ordered lagrangian grid"""
-    ix = np.arange(N)*L/N
+    ix = (np.arange(N)*L/N).astype(np.float32)
    x = ix[:,None,None].repeat(N, axis=1).repeat(N, axis=2)
    y = ix[None,:,None].repeat(N, axis=0).repeat(N, axis=2)
@ -52,7 +52,7 @@ def compute_ref_power(L, N, cosmo, bins=10, range=(0,1), func='HU_WIGGLES'):
  p.normalize(cosmo['SIGMA8'])
  return bin_power(p.compute(k)*cosmo['h']**3, L, bins=bins, range=range)
-
+                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  
 def run_generation(input_borg, a_borg, a_ic, cosmo, supersample=1, do_lpt2=True, shiftPixel=False, needvel=True):
    """ Generate particles and velocities from a BORG snapshot. Returns a tuple of 
    (positions,velocities,N,BoxSize,scale_factor)."""
@ -95,7 +95,9 @@ def run_generation(input_borg, a_borg, a_ic, cosmo, supersample=1, do_lpt2=True,
    print("velmul=%lg" % (cosmo['h']*velmul))
-    density = cgrowth.D(1)/cgrowth.D(a_borg)*np.fft.irfftn(lpt.dhat)*(supersample*N/L)**3
+    lpt.cube.dhat = lpt.dhat
    density = lpt.cube.irfft()    
    density *= (cgrowth.D(1)/cgrowth.D(a_borg))
    return posx,vel,density,N*supersample,L,a_ic,cosmo
@ -117,7 +119,9 @@ def whitify(density, L, cosmo, supergenerate=1, func='HU_WIGGLES'):
  Pk = build_Pk()
  Pk[0,0,0]=1
-  density_hat = np.fft.rfftn(density)*(L/N)**3
+  cube = CubeFT(N, L)
  cube.density = density
  density_hat = cube.rfft()
  density_hat /= np.sqrt(Pk)
  Ns = N*supergenerate
@ -152,6 +156,8 @@ def whitify(density, L, cosmo, supergenerate=1, func='HU_WIGGLES'):
    print np.where(np.isnan(density_hat_super))[0].size
  cube = CubeFT(Ns, L)
  cube.dhat = density_hat_super
  return np.fft.irfftn(density_hat_super)*Ns**1.5
--- a/python_sample/icgen/cosmogrowth.py
+++ b/python_sample/icgen/cosmogrowth.py
@ -1,7 +1,40 @@
 import multiprocessing
 import pyfftw
 import weakref
 import numpy as np
 import cosmolopy as cpy
 class CubeFT(object):
  def __init__(self, L, N, max_cpu=-1):
      self.N = N
      self.align = pyfftw.simd_alignment
      self.L = L
      self.max_cpu = multiprocessing.cpu_count() if max_cpu < 0 else max_cpu
      self._dhat = pyfftw.n_byte_align_empty((self.N,self.N,self.N/2+1), self.align, dtype='complex64')
      self._density = pyfftw.n_byte_align_empty((self.N,self.N,self.N), self.align, dtype='float32')
      self.irfft = pyfftw.FFTW(self._dhat, self._density, axes=(0,1,2), direction='FFTW_BACKWARD', threads=self.max_cpu, normalize_idft=False)
      self.rfft = pyfftw.FFTW(self._density, self._dhat, axes=(0,1,2), threads=self.max_cpu, normalize_idft=False)
  def rfft(self):
      return self.rfft()*(self.L/self.N)**3
  def irfft(self):
      return self.irfft()/self.L**3
  def get_dhat(self):
      return self._dhat
  def set_dhat(self, in_dhat):
      self._dhat[:] = in_dhat 
  dhat = property(get_dhat, set_dhat, None)
  def get_density(self):
      return self._density      
  def set_density(self, d):
      self._density[:] = d 
  density = property(get_density, set_density, None)
 class CosmoGrowth(object):
  def __init__(self, **cosmo):
@ -42,11 +75,20 @@ class CosmoGrowth(object):
 class LagrangianPerturbation(object):
-    def __init__(self,density,L, fourier=False, supersample=1):
+    def __init__(self,density,L, fourier=False, supersample=1, max_cpu=-1):
        self.L = L
        self.N = density.shape[0]
-        self.dhat = np.fft.rfftn(density)*(L/self.N)**3 if not fourier else density
+        
        self.max_cpu = max_cpu
        self.cube = CubeFT(self.L, self.N, max_cpu=max_cpu)
        if not fourier:
          self.cube.density = density
          self.dhat = self.cube.rfft().copy()
        else:
          self.dhat = density.copy()
        if supersample > 1:
          self.upgrade_sampling(supersample)
        self.ik = np.fft.fftfreq(self.N, d=L/self.N)*2*np.pi
@ -65,9 +107,11 @@ class LagrangianPerturbation(object):
        self.dhat = dhat_new
        self.N = N2
        self.cube = CubeFT(self.L, self.N, max_cpu=self.max_cpu)
    def _gradient(self, phi, direction):
-        return np.fft.irfftn(self._kdir(direction)*1j*phi)*(self.N/self.L)**3
+        self.cube.dhat = self._kdir(direction)*1j*phi 
        return self.cube.irfft()
    def lpt1(self, direction=0):
        k2 = self._get_k2()
@ -95,6 +139,14 @@ class LagrangianPerturbation(object):
        self.cache['k2'] = k2
        return k2
    def _do_irfft(self, array):
        self.cube.dhat = array
        return self.cube.irfft()
    def _do_rfft(self, array):
        self.cube.density = array
        return self.cube.rfft()
    def lpt2(self, direction=0):
        k2 = self._get_k2()
@ -104,14 +156,14 @@ class LagrangianPerturbation(object):
            print("Rebuilding potential...")
            div_phi2 = np.zeros((self.N,self.N,self.N), dtype=np.float64)
            for j in xrange(3):
-                q = np.fft.irfftn( self._kdir(j)**2*self.dhat / k2 )
+                q = self._do_irfft( self._kdir(j)**2*self.dhat / k2 ).copy()
                for i in xrange(j+1, 3):
-                    div_phi2 += q * np.fft.irfftn( self._kdir(i)**2*self.dhat / k2 )
+                    div_phi2 += q * self._do_irfft( self._kdir(i)**2*self.dhat / k2 )
-                    div_phi2 -= (np.fft.irfftn( self._kdir(j)*self._kdir(i)*self.dhat / k2 ))**2
+                    div_phi2 -= (self._do_irfft(self._kdir(j)*self._kdir(i)*self.dhat / k2 ) )**2
-            div_phi2 *= (self.N/self.L)**6
+            div_phi2 *= 1/self.L**6
-            phi2_hat = -np.fft.rfftn(div_phi2) * ((self.L/self.N)**3) / k2
+            phi2_hat = -self._do_rfft(div_phi2) / k2
-            self.cache['lpt2_potential'] = phi2_hat
+            #self.cache['lpt2_potential'] = phi2_hat
            del div_phi2
        else:
            phi2_hat = self.cache['lpt2_potential']