Use PYFFTW for ICgen

2014-07-03 15:36:58 +02:00 · 2014-07-03 15:36:58 +02:00 · 33806a690f
commit 33806a690f
parent f2b81d863f
4 changed files with 101 additions and 38 deletions
--- a/python_sample/icgen/cosmogrowth.py
+++ b/python_sample/icgen/cosmogrowth.py
@ -1,7 +1,40 @@
+import multiprocessing
+import pyfftw
 import weakref
 import numpy as np
 import cosmolopy as cpy

+class CubeFT(object):
+  def __init__(self, L, N, max_cpu=-1):
+  
+      self.N = N
+      self.align = pyfftw.simd_alignment
+      self.L = L
+      self.max_cpu = multiprocessing.cpu_count() if max_cpu < 0 else max_cpu
+      self._dhat = pyfftw.n_byte_align_empty((self.N,self.N,self.N/2+1), self.align, dtype='complex64')
+      self._density = pyfftw.n_byte_align_empty((self.N,self.N,self.N), self.align, dtype='float32')
+      self.irfft = pyfftw.FFTW(self._dhat, self._density, axes=(0,1,2), direction='FFTW_BACKWARD', threads=self.max_cpu, normalize_idft=False)
+      self.rfft = pyfftw.FFTW(self._density, self._dhat, axes=(0,1,2), threads=self.max_cpu, normalize_idft=False)
+
+  def rfft(self):
+      return self.rfft()*(self.L/self.N)**3
+      
+  def irfft(self):
+      return self.irfft()/self.L**3
+   
+  def get_dhat(self):
+      return self._dhat
+  def set_dhat(self, in_dhat):
+      self._dhat[:] = in_dhat 
+  dhat = property(get_dhat, set_dhat, None)
+      
+  def get_density(self):
+      return self._density      
+  def set_density(self, d):
+      self._density[:] = d 
+  density = property(get_density, set_density, None)
+      
+
 class CosmoGrowth(object):

  def __init__(self, **cosmo):
@ -42,11 +75,20 @@ class CosmoGrowth(object):

 class LagrangianPerturbation(object):

-    def __init__(self,density,L, fourier=False, supersample=1):
+    def __init__(self,density,L, fourier=False, supersample=1, max_cpu=-1):
    
        self.L = L
        self.N = density.shape[0]
-        self.dhat = np.fft.rfftn(density)*(L/self.N)**3 if not fourier else density
+        
+        self.max_cpu = max_cpu
+        self.cube = CubeFT(self.L, self.N, max_cpu=max_cpu)
+        
+        if not fourier:
+          self.cube.density = density
+          self.dhat = self.cube.rfft().copy()
+        else:
+          self.dhat = density.copy()
+
        if supersample > 1:
          self.upgrade_sampling(supersample)
        self.ik = np.fft.fftfreq(self.N, d=L/self.N)*2*np.pi
@ -65,9 +107,11 @@ class LagrangianPerturbation(object):

        self.dhat = dhat_new
        self.N = N2
+        self.cube = CubeFT(self.L, self.N, max_cpu=self.max_cpu)
        
    def _gradient(self, phi, direction):
-        return np.fft.irfftn(self._kdir(direction)*1j*phi)*(self.N/self.L)**3
+        self.cube.dhat = self._kdir(direction)*1j*phi 
+        return self.cube.irfft()
            
    def lpt1(self, direction=0):
        k2 = self._get_k2()
@ -95,6 +139,14 @@ class LagrangianPerturbation(object):
            
        self.cache['k2'] = k2
        return k2
+        
+    def _do_irfft(self, array):
+        self.cube.dhat = array
+        return self.cube.irfft()
+        
+    def _do_rfft(self, array):
+        self.cube.density = array
+        return self.cube.rfft()

    def lpt2(self, direction=0):
        k2 = self._get_k2()
@ -104,14 +156,14 @@ class LagrangianPerturbation(object):
            print("Rebuilding potential...")
            div_phi2 = np.zeros((self.N,self.N,self.N), dtype=np.float64)
            for j in xrange(3):
-                q = np.fft.irfftn( self._kdir(j)**2*self.dhat / k2 )
+                q = self._do_irfft( self._kdir(j)**2*self.dhat / k2 ).copy()
                for i in xrange(j+1, 3):
-                    div_phi2 += q * np.fft.irfftn( self._kdir(i)**2*self.dhat / k2 )
-                    div_phi2 -= (np.fft.irfftn( self._kdir(j)*self._kdir(i)*self.dhat / k2 ))**2
+                    div_phi2 += q * self._do_irfft( self._kdir(i)**2*self.dhat / k2 )
+                    div_phi2 -= (self._do_irfft(self._kdir(j)*self._kdir(i)*self.dhat / k2 ) )**2

-            div_phi2 *= (self.N/self.L)**6
-            phi2_hat = -np.fft.rfftn(div_phi2) * ((self.L/self.N)**3) / k2
-            self.cache['lpt2_potential'] = phi2_hat
+            div_phi2 *= 1/self.L**6
+            phi2_hat = -self._do_rfft(div_phi2) / k2
+            #self.cache['lpt2_potential'] = phi2_hat
            del div_phi2
        else:
            phi2_hat = self.cache['lpt2_potential']