Fixed error message. Imported libsharp

This commit is contained in:
Guilhem Lavaux 2012-11-10 08:59:10 -05:00
parent 3aa898e636
commit bddd26a5ca
65 changed files with 18489 additions and 1 deletions

34
external/sharp/libfftpack/README vendored Normal file
View file

@ -0,0 +1,34 @@
ls_fft description:
This package is intended to calculate one-dimensional real or complex FFTs
with high accuracy and good efficiency even for lengths containing large
prime factors.
The code is written in C, but a Fortran wrapper exists as well.
Before any FFT is executed, a plan must be generated for it. Plan creation
is designed to be fast, so that there is no significant overhead if the
plan is only used once or a few times.
The main component of the code is based on Paul N. Swarztrauber's FFTPACK in the
double precision incarnation by Hugh C. Pumphrey
(http://www.netlib.org/fftpack/dp.tgz).
I replaced the iterative sine and cosine calculations in radfg() and radbg()
by an exact calculation, which slightly improves the transform accuracy for
real FFTs with lengths containing large prime factors.
Since FFTPACK becomes quite slow for FFT lengths with large prime factors
(in the worst case of prime lengths it reaches O(n*n) complexity), I
implemented Bluestein's algorithm, which computes a FFT of length n by
several FFTs of length n2>=2*n-1 and a convolution. Since n2 can be chosen
to be highly composite, this algorithm is more efficient if n has large
prime factors. The longer FFTs themselves are then computed using the FFTPACK
routines.
Bluestein's algorithm was implemented according to the description at
http://en.wikipedia.org/wiki/Bluestein's_FFT_algorithm.
Thread-safety:
All routines can be called concurrently; all information needed by ls_fft
is stored in the plan variable. However, using the same plan variable on
multiple threads simultaneously is not supported and will lead to data
corruption.

173
external/sharp/libfftpack/bluestein.c vendored Normal file
View file

@ -0,0 +1,173 @@
/*
* This file is part of libfftpack.
*
* libfftpack is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libfftpack is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libfftpack; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*
* Copyright (C) 2005, 2006, 2007, 2008 Max-Planck-Society
* \author Martin Reinecke
*/
#include <math.h>
#include <stdlib.h>
#include "fftpack.h"
#include "bluestein.h"
/* returns the sum of all prime factors of n */
size_t prime_factor_sum (size_t n)
{
size_t result=0,x,limit,tmp;
while (((tmp=(n>>1))<<1)==n)
{ result+=2; n=tmp; }
limit=(size_t)sqrt(n+0.01);
for (x=3; x<=limit; x+=2)
while ((tmp=(n/x))*x==n)
{
result+=x;
n=tmp;
limit=(size_t)sqrt(n+0.01);
}
if (n>1) result+=n;
return result;
}
/* returns the smallest composite of 2, 3 and 5 which is >= n */
static size_t good_size(size_t n)
{
size_t f2, f23, f235, bestfac=2*n;
if (n<=6) return n;
for (f2=1; f2<bestfac; f2*=2)
for (f23=f2; f23<bestfac; f23*=3)
for (f235=f23; f235<bestfac; f235*=5)
if (f235>=n) bestfac=f235;
return bestfac;
}
void bluestein_i (size_t n, double **tstorage, size_t *worksize)
{
static const double pi=3.14159265358979323846;
size_t n2=good_size(n*2-1);
size_t m, coeff;
double angle, xn2;
double *bk, *bkf, *work;
double pibyn=pi/n;
*worksize=2+2*n+8*n2+16;
*tstorage = RALLOC(double,2+2*n+8*n2+16);
((size_t *)(*tstorage))[0]=n2;
bk = *tstorage+2;
bkf = *tstorage+2+2*n;
work= *tstorage+2+2*(n+n2);
/* initialize b_k */
bk[0] = 1;
bk[1] = 0;
coeff=0;
for (m=1; m<n; ++m)
{
coeff+=2*m-1;
if (coeff>=2*n) coeff-=2*n;
angle = pibyn*coeff;
bk[2*m] = cos(angle);
bk[2*m+1] = sin(angle);
}
/* initialize the zero-padded, Fourier transformed b_k. Add normalisation. */
xn2 = 1./n2;
bkf[0] = bk[0]*xn2;
bkf[1] = bk[1]*xn2;
for (m=2; m<2*n; m+=2)
{
bkf[m] = bkf[2*n2-m] = bk[m] *xn2;
bkf[m+1] = bkf[2*n2-m+1] = bk[m+1] *xn2;
}
for (m=2*n;m<=(2*n2-2*n+1);++m)
bkf[m]=0.;
cffti (n2,work);
cfftf (n2,bkf,work);
}
void bluestein (size_t n, double *data, double *tstorage, int isign)
{
size_t n2=*((size_t *)tstorage);
size_t m;
double *bk, *bkf, *akf, *work;
bk = tstorage+2;
bkf = tstorage+2+2*n;
work= tstorage+2+2*(n+n2);
akf = tstorage+2+2*n+6*n2+16;
/* initialize a_k and FFT it */
if (isign>0)
for (m=0; m<2*n; m+=2)
{
akf[m] = data[m]*bk[m] - data[m+1]*bk[m+1];
akf[m+1] = data[m]*bk[m+1] + data[m+1]*bk[m];
}
else
for (m=0; m<2*n; m+=2)
{
akf[m] = data[m]*bk[m] + data[m+1]*bk[m+1];
akf[m+1] =-data[m]*bk[m+1] + data[m+1]*bk[m];
}
for (m=2*n; m<2*n2; ++m)
akf[m]=0;
cfftf (n2,akf,work);
/* do the convolution */
if (isign>0)
for (m=0; m<2*n2; m+=2)
{
double im = -akf[m]*bkf[m+1] + akf[m+1]*bkf[m];
akf[m ] = akf[m]*bkf[m] + akf[m+1]*bkf[m+1];
akf[m+1] = im;
}
else
for (m=0; m<2*n2; m+=2)
{
double im = akf[m]*bkf[m+1] + akf[m+1]*bkf[m];
akf[m ] = akf[m]*bkf[m] - akf[m+1]*bkf[m+1];
akf[m+1] = im;
}
/* inverse FFT */
cfftb (n2,akf,work);
/* multiply by b_k* */
if (isign>0)
for (m=0; m<2*n; m+=2)
{
data[m] = bk[m] *akf[m] - bk[m+1]*akf[m+1];
data[m+1] = bk[m+1]*akf[m] + bk[m] *akf[m+1];
}
else
for (m=0; m<2*n; m+=2)
{
data[m] = bk[m] *akf[m] + bk[m+1]*akf[m+1];
data[m+1] =-bk[m+1]*akf[m] + bk[m] *akf[m+1];
}
}

48
external/sharp/libfftpack/bluestein.h vendored Normal file
View file

@ -0,0 +1,48 @@
/*
* This file is part of libfftpack.
*
* libfftpack is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libfftpack is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libfftpack; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*
* Copyright (C) 2005 Max-Planck-Society
* \author Martin Reinecke
*/
#ifndef PLANCK_BLUESTEIN_H
#define PLANCK_BLUESTEIN_H
#include "c_utils.h"
#ifdef __cplusplus
extern "C" {
#endif
size_t prime_factor_sum (size_t n);
void bluestein_i (size_t n, double **tstorage, size_t *worksize);
void bluestein (size_t n, double *data, double *tstorage, int isign);
#ifdef __cplusplus
}
#endif
#endif

833
external/sharp/libfftpack/fftpack.c vendored Normal file
View file

@ -0,0 +1,833 @@
/*
* This file is part of libfftpack.
*
* libfftpack is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libfftpack is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libfftpack; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*
fftpack.c : A set of FFT routines in C.
Algorithmically based on Fortran-77 FFTPACK by Paul N. Swarztrauber
(Version 4, 1985).
C port by Martin Reinecke (2010)
*/
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include "fftpack.h"
#define WA(x,i) wa[(i)+(x)*ido]
#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
#define PM(a,b,c,d) { a=c+d; b=c-d; }
#define PMC(a,b,c,d) { a.r=c.r+d.r; a.i=c.i+d.i; b.r=c.r-d.r; b.i=c.i-d.i; }
#define ADDC(a,b,c) { a.r=b.r+c.r; a.i=b.i+c.i; }
#define SCALEC(a,b) { a.r*=b; a.i*=b; }
#define CONJFLIPC(a) { double tmp_=a.r; a.r=-a.i; a.i=tmp_; }
/* (a+ib) = conj(c+id) * (e+if) */
#define MULPM(a,b,c,d,e,f) { a=c*e+d*f; b=c*f-d*e; }
typedef struct {
double r,i;
} cmplx;
#define CONCAT(a,b) a ## b
#define X(arg) CONCAT(passb,arg)
#define BACKWARD
#include "fftpack_inc.c"
#undef BACKWARD
#undef X
#define X(arg) CONCAT(passf,arg)
#include "fftpack_inc.c"
#undef X
#undef CC
#undef CH
#define CC(a,b,c) cc[(a)+ido*((b)+l1*(c))]
#define CH(a,b,c) ch[(a)+ido*((b)+cdim*(c))]
static void radf2 (size_t ido, size_t l1, const double *cc, double *ch,
const double *wa)
{
const size_t cdim=2;
size_t i, k, ic;
double ti2, tr2;
for (k=0; k<l1; k++)
PM (CH(0,0,k),CH(ido-1,1,k),CC(0,k,0),CC(0,k,1))
if ((ido&1)==0)
for (k=0; k<l1; k++)
{
CH( 0,1,k) = -CC(ido-1,k,1);
CH(ido-1,0,k) = CC(ido-1,k,0);
}
if (ido<=2) return;
for (k=0; k<l1; k++)
for (i=2; i<ido; i+=2)
{
ic=ido-i;
MULPM (tr2,ti2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
PM (CH(i-1,0,k),CH(ic-1,1,k),CC(i-1,k,0),tr2)
PM (CH(i ,0,k),CH(ic ,1,k),ti2,CC(i ,k,0))
}
}
static void radf3(size_t ido, size_t l1, const double *cc, double *ch,
const double *wa)
{
const size_t cdim=3;
static const double taur=-0.5, taui=0.86602540378443864676;
size_t i, k, ic;
double ci2, di2, di3, cr2, dr2, dr3, ti2, ti3, tr2, tr3;
for (k=0; k<l1; k++)
{
cr2=CC(0,k,1)+CC(0,k,2);
CH(0,0,k) = CC(0,k,0)+cr2;
CH(0,2,k) = taui*(CC(0,k,2)-CC(0,k,1));
CH(ido-1,1,k) = CC(0,k,0)+taur*cr2;
}
if (ido==1) return;
for (k=0; k<l1; k++)
for (i=2; i<ido; i+=2)
{
ic=ido-i;
MULPM (dr2,di2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
MULPM (dr3,di3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
cr2=dr2+dr3;
ci2=di2+di3;
CH(i-1,0,k) = CC(i-1,k,0)+cr2;
CH(i ,0,k) = CC(i ,k,0)+ci2;
tr2 = CC(i-1,k,0)+taur*cr2;
ti2 = CC(i ,k,0)+taur*ci2;
tr3 = taui*(di2-di3);
ti3 = taui*(dr3-dr2);
PM(CH(i-1,2,k),CH(ic-1,1,k),tr2,tr3)
PM(CH(i ,2,k),CH(ic ,1,k),ti3,ti2)
}
}
static void radf4(size_t ido, size_t l1, const double *cc, double *ch,
const double *wa)
{
const size_t cdim=4;
static const double hsqt2=0.70710678118654752440;
size_t i, k, ic;
double ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
for (k=0; k<l1; k++)
{
PM (tr1,CH(0,2,k),CC(0,k,3),CC(0,k,1))
PM (tr2,CH(ido-1,1,k),CC(0,k,0),CC(0,k,2))
PM (CH(0,0,k),CH(ido-1,3,k),tr2,tr1)
}
if ((ido&1)==0)
for (k=0; k<l1; k++)
{
ti1=-hsqt2*(CC(ido-1,k,1)+CC(ido-1,k,3));
tr1= hsqt2*(CC(ido-1,k,1)-CC(ido-1,k,3));
PM (CH(ido-1,0,k),CH(ido-1,2,k),CC(ido-1,k,0),tr1)
PM (CH( 0,3,k),CH( 0,1,k),ti1,CC(ido-1,k,2))
}
if (ido<=2) return;
for (k=0; k<l1; k++)
for (i=2; i<ido; i+=2)
{
ic=ido-i;
MULPM(cr2,ci2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
MULPM(cr3,ci3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
MULPM(cr4,ci4,WA(2,i-2),WA(2,i-1),CC(i-1,k,3),CC(i,k,3))
PM(tr1,tr4,cr4,cr2)
PM(ti1,ti4,ci2,ci4)
PM(tr2,tr3,CC(i-1,k,0),cr3)
PM(ti2,ti3,CC(i ,k,0),ci3)
PM(CH(i-1,0,k),CH(ic-1,3,k),tr2,tr1)
PM(CH(i ,0,k),CH(ic ,3,k),ti1,ti2)
PM(CH(i-1,2,k),CH(ic-1,1,k),tr3,ti4)
PM(CH(i ,2,k),CH(ic ,1,k),tr4,ti3)
}
}
static void radf5(size_t ido, size_t l1, const double *cc, double *ch,
const double *wa)
{
const size_t cdim=5;
static const double tr11= 0.3090169943749474241, ti11=0.95105651629515357212,
tr12=-0.8090169943749474241, ti12=0.58778525229247312917;
size_t i, k, ic;
double ci2, di2, ci4, ci5, di3, di4, di5, ci3, cr2, cr3, dr2, dr3,
dr4, dr5, cr5, cr4, ti2, ti3, ti5, ti4, tr2, tr3, tr4, tr5;
for (k=0; k<l1; k++)
{
PM (cr2,ci5,CC(0,k,4),CC(0,k,1))
PM (cr3,ci4,CC(0,k,3),CC(0,k,2))
CH(0,0,k)=CC(0,k,0)+cr2+cr3;
CH(ido-1,1,k)=CC(0,k,0)+tr11*cr2+tr12*cr3;
CH(0,2,k)=ti11*ci5+ti12*ci4;
CH(ido-1,3,k)=CC(0,k,0)+tr12*cr2+tr11*cr3;
CH(0,4,k)=ti12*ci5-ti11*ci4;
}
if (ido==1) return;
for (k=0; k<l1;++k)
for (i=2; i<ido; i+=2)
{
ic=ido-i;
MULPM (dr2,di2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
MULPM (dr3,di3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
MULPM (dr4,di4,WA(2,i-2),WA(2,i-1),CC(i-1,k,3),CC(i,k,3))
MULPM (dr5,di5,WA(3,i-2),WA(3,i-1),CC(i-1,k,4),CC(i,k,4))
PM(cr2,ci5,dr5,dr2)
PM(ci2,cr5,di2,di5)
PM(cr3,ci4,dr4,dr3)
PM(ci3,cr4,di3,di4)
CH(i-1,0,k)=CC(i-1,k,0)+cr2+cr3;
CH(i ,0,k)=CC(i ,k,0)+ci2+ci3;
tr2=CC(i-1,k,0)+tr11*cr2+tr12*cr3;
ti2=CC(i ,k,0)+tr11*ci2+tr12*ci3;
tr3=CC(i-1,k,0)+tr12*cr2+tr11*cr3;
ti3=CC(i ,k,0)+tr12*ci2+tr11*ci3;
MULPM(tr5,tr4,cr5,cr4,ti11,ti12)
MULPM(ti5,ti4,ci5,ci4,ti11,ti12)
PM(CH(i-1,2,k),CH(ic-1,1,k),tr2,tr5)
PM(CH(i ,2,k),CH(ic ,1,k),ti5,ti2)
PM(CH(i-1,4,k),CH(ic-1,3,k),tr3,tr4)
PM(CH(i ,4,k),CH(ic ,3,k),ti4,ti3)
}
}
#undef CH
#undef CC
#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
#define C1(a,b,c) cc[(a)+ido*((b)+l1*(c))]
#define C2(a,b) cc[(a)+idl1*(b)]
#define CH2(a,b) ch[(a)+idl1*(b)]
static void radfg(size_t ido, size_t ip, size_t l1, size_t idl1,
double *cc, double *ch, const double *wa)
{
const size_t cdim=ip;
static const double twopi=6.28318530717958647692;
size_t idij, ipph, i, j, k, l, j2, ic, jc, lc, ik;
double ai1, ai2, ar1, ar2, arg;
double *csarr;
size_t aidx;
ipph=(ip+1)/ 2;
if(ido!=1)
{
memcpy(ch,cc,idl1*sizeof(double));
for(j=1; j<ip; j++)
for(k=0; k<l1; k++)
{
CH(0,k,j)=C1(0,k,j);
idij=(j-1)*ido+1;
for(i=2; i<ido; i+=2,idij+=2)
MULPM(CH(i-1,k,j),CH(i,k,j),wa[idij-1],wa[idij],C1(i-1,k,j),C1(i,k,j))
}
for(j=1,jc=ip-1; j<ipph; j++,jc--)
for(k=0; k<l1; k++)
for(i=2; i<ido; i+=2)
{
PM(C1(i-1,k,j),C1(i ,k,jc),CH(i-1,k,jc),CH(i-1,k,j ))
PM(C1(i ,k,j),C1(i-1,k,jc),CH(i ,k,j ),CH(i ,k,jc))
}
}
else
memcpy(cc,ch,idl1*sizeof(double));
for(j=1,jc=ip-1; j<ipph; j++,jc--)
for(k=0; k<l1; k++)
PM(C1(0,k,j),C1(0,k,jc),CH(0,k,jc),CH(0,k,j))
csarr=RALLOC(double,2*ip);
arg=twopi / ip;
csarr[0]=1.;
csarr[1]=0.;
csarr[2]=csarr[2*ip-2]=cos(arg);
csarr[3]=sin(arg); csarr[2*ip-1]=-csarr[3];
for (i=2; i<=ip/2; ++i)
{
csarr[2*i]=csarr[2*ip-2*i]=cos(i*arg);
csarr[2*i+1]=sin(i*arg);
csarr[2*ip-2*i+1]=-csarr[2*i+1];
}
for(l=1,lc=ip-1; l<ipph; l++,lc--)
{
ar1=csarr[2*l];
ai1=csarr[2*l+1];
for(ik=0; ik<idl1; ik++)
{
CH2(ik,l)=C2(ik,0)+ar1*C2(ik,1);
CH2(ik,lc)=ai1*C2(ik,ip-1);
}
aidx=2*l;
for(j=2,jc=ip-2; j<ipph; j++,jc--)
{
aidx+=2*l;
if (aidx>=2*ip) aidx-=2*ip;
ar2=csarr[aidx];
ai2=csarr[aidx+1];
for(ik=0; ik<idl1; ik++)
{
CH2(ik,l )+=ar2*C2(ik,j );
CH2(ik,lc)+=ai2*C2(ik,jc);
}
}
}
DEALLOC(csarr);
for(j=1; j<ipph; j++)
for(ik=0; ik<idl1; ik++)
CH2(ik,0)+=C2(ik,j);
for(k=0; k<l1; k++)
memcpy(&CC(0,0,k),&CH(0,k,0),ido*sizeof(double));
for(j=1; j<ipph; j++)
{
jc=ip-j;
j2=2*j;
for(k=0; k<l1; k++)
{
CC(ido-1,j2-1,k) = CH(0,k,j );
CC(0 ,j2 ,k) = CH(0,k,jc);
}
}
if(ido==1) return;
for(j=1; j<ipph; j++)
{
jc=ip-j;
j2=2*j;
for(k=0; k<l1; k++)
for(i=2; i<ido; i+=2)
{
ic=ido-i;
PM (CC(i-1,j2,k),CC(ic-1,j2-1,k),CH(i-1,k,j ),CH(i-1,k,jc))
PM (CC(i ,j2,k),CC(ic ,j2-1,k),CH(i ,k,jc),CH(i ,k,j ))
}
}
}
#undef CC
#undef CH
#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
static void radb2(size_t ido, size_t l1, const double *cc, double *ch,
const double *wa)
{
const size_t cdim=2;
size_t i, k, ic;
double ti2, tr2;
for (k=0; k<l1; k++)
PM (CH(0,k,0),CH(0,k,1),CC(0,0,k),CC(ido-1,1,k))
if ((ido&1)==0)
for (k=0; k<l1; k++)
{
CH(ido-1,k,0) = 2*CC(ido-1,0,k);
CH(ido-1,k,1) = -2*CC(0 ,1,k);
}
if (ido<=2) return;
for (k=0; k<l1;++k)
for (i=2; i<ido; i+=2)
{
ic=ido-i;
PM (CH(i-1,k,0),tr2,CC(i-1,0,k),CC(ic-1,1,k))
PM (ti2,CH(i ,k,0),CC(i ,0,k),CC(ic ,1,k))
MULPM (CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),ti2,tr2)
}
}
static void radb3(size_t ido, size_t l1, const double *cc, double *ch,
const double *wa)
{
const size_t cdim=3;
static const double taur=-0.5, taui=0.86602540378443864676;
size_t i, k, ic;
double ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2;
for (k=0; k<l1; k++)
{
tr2=2*CC(ido-1,1,k);
cr2=CC(0,0,k)+taur*tr2;
CH(0,k,0)=CC(0,0,k)+tr2;
ci3=2*taui*CC(0,2,k);
PM (CH(0,k,2),CH(0,k,1),cr2,ci3);
}
if (ido==1) return;
for (k=0; k<l1; k++)
for (i=2; i<ido; i+=2)
{
ic=ido-i;
tr2=CC(i-1,2,k)+CC(ic-1,1,k);
ti2=CC(i ,2,k)-CC(ic ,1,k);
cr2=CC(i-1,0,k)+taur*tr2;
ci2=CC(i ,0,k)+taur*ti2;
CH(i-1,k,0)=CC(i-1,0,k)+tr2;
CH(i ,k,0)=CC(i ,0,k)+ti2;
cr3=taui*(CC(i-1,2,k)-CC(ic-1,1,k));
ci3=taui*(CC(i ,2,k)+CC(ic ,1,k));
PM(dr3,dr2,cr2,ci3)
PM(di2,di3,ci2,cr3)
MULPM(CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),di2,dr2)
MULPM(CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),di3,dr3)
}
}
static void radb4(size_t ido, size_t l1, const double *cc, double *ch,
const double *wa)
{
const size_t cdim=4;
static const double sqrt2=1.41421356237309504880;
size_t i, k, ic;
double ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
for (k=0; k<l1; k++)
{
PM (tr2,tr1,CC(0,0,k),CC(ido-1,3,k))
tr3=2*CC(ido-1,1,k);
tr4=2*CC(0,2,k);
PM (CH(0,k,0),CH(0,k,2),tr2,tr3)
PM (CH(0,k,3),CH(0,k,1),tr1,tr4)
}
if ((ido&1)==0)
for (k=0; k<l1; k++)
{
PM (ti1,ti2,CC(0 ,3,k),CC(0 ,1,k))
PM (tr2,tr1,CC(ido-1,0,k),CC(ido-1,2,k))
CH(ido-1,k,0)=tr2+tr2;
CH(ido-1,k,1)=sqrt2*(tr1-ti1);
CH(ido-1,k,2)=ti2+ti2;
CH(ido-1,k,3)=-sqrt2*(tr1+ti1);
}
if (ido<=2) return;
for (k=0; k<l1;++k)
for (i=2; i<ido; i+=2)
{
ic=ido-i;
PM (tr2,tr1,CC(i-1,0,k),CC(ic-1,3,k))
PM (ti1,ti2,CC(i ,0,k),CC(ic ,3,k))
PM (tr4,ti3,CC(i ,2,k),CC(ic ,1,k))
PM (tr3,ti4,CC(i-1,2,k),CC(ic-1,1,k))
PM (CH(i-1,k,0),cr3,tr2,tr3)
PM (CH(i ,k,0),ci3,ti2,ti3)
PM (cr4,cr2,tr1,tr4)
PM (ci2,ci4,ti1,ti4)
MULPM (CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),ci2,cr2)
MULPM (CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),ci3,cr3)
MULPM (CH(i,k,3),CH(i-1,k,3),WA(2,i-2),WA(2,i-1),ci4,cr4)
}
}
static void radb5(size_t ido, size_t l1, const double *cc, double *ch,
const double *wa)
{
const size_t cdim=5;
static const double tr11= 0.3090169943749474241, ti11=0.95105651629515357212,
tr12=-0.8090169943749474241, ti12=0.58778525229247312917;
size_t i, k, ic;
double ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4,
ti2, ti3, ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5;
for (k=0; k<l1; k++)
{
ti5=2*CC(0,2,k);
ti4=2*CC(0,4,k);
tr2=2*CC(ido-1,1,k);
tr3=2*CC(ido-1,3,k);
CH(0,k,0)=CC(0,0,k)+tr2+tr3;
cr2=CC(0,0,k)+tr11*tr2+tr12*tr3;
cr3=CC(0,0,k)+tr12*tr2+tr11*tr3;
MULPM(ci5,ci4,ti5,ti4,ti11,ti12)
PM(CH(0,k,4),CH(0,k,1),cr2,ci5)
PM(CH(0,k,3),CH(0,k,2),cr3,ci4)
}
if (ido==1) return;
for (k=0; k<l1;++k)
for (i=2; i<ido; i+=2)
{
ic=ido-i;
PM(tr2,tr5,CC(i-1,2,k),CC(ic-1,1,k))
PM(ti5,ti2,CC(i ,2,k),CC(ic ,1,k))
PM(tr3,tr4,CC(i-1,4,k),CC(ic-1,3,k))
PM(ti4,ti3,CC(i ,4,k),CC(ic ,3,k))
CH(i-1,k,0)=CC(i-1,0,k)+tr2+tr3;
CH(i ,k,0)=CC(i ,0,k)+ti2+ti3;
cr2=CC(i-1,0,k)+tr11*tr2+tr12*tr3;
ci2=CC(i ,0,k)+tr11*ti2+tr12*ti3;
cr3=CC(i-1,0,k)+tr12*tr2+tr11*tr3;
ci3=CC(i ,0,k)+tr12*ti2+tr11*ti3;
MULPM(cr5,cr4,tr5,tr4,ti11,ti12)
MULPM(ci5,ci4,ti5,ti4,ti11,ti12)
PM(dr4,dr3,cr3,ci4)
PM(di3,di4,ci3,cr4)
PM(dr5,dr2,cr2,ci5)
PM(di2,di5,ci2,cr5)
MULPM(CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),di2,dr2)
MULPM(CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),di3,dr3)
MULPM(CH(i,k,3),CH(i-1,k,3),WA(2,i-2),WA(2,i-1),di4,dr4)
MULPM(CH(i,k,4),CH(i-1,k,4),WA(3,i-2),WA(3,i-1),di5,dr5)
}
}
static void radbg(size_t ido, size_t ip, size_t l1, size_t idl1,
double *cc, double *ch, const double *wa)
{
const size_t cdim=ip;
static const double twopi=6.28318530717958647692;
size_t idij, ipph, i, j, k, l, j2, ic, jc, lc, ik;
double ai1, ai2, ar1, ar2, arg;
double *csarr;
size_t aidx;
ipph=(ip+1)/ 2;
for(k=0; k<l1; k++)
memcpy(&CH(0,k,0),&CC(0,0,k),ido*sizeof(double));
for(j=1; j<ipph; j++)
{
jc=ip-j;
j2=2*j;
for(k=0; k<l1; k++)
{
CH(0,k,j )=2*CC(ido-1,j2-1,k);
CH(0,k,jc)=2*CC(0 ,j2 ,k);
}
}
if(ido!=1)
for(j=1,jc=ip-1; j<ipph; j++,jc--)
for(k=0; k<l1; k++)
for(i=2; i<ido; i+=2)
{
ic=ido-i;
PM (CH(i-1,k,j ),CH(i-1,k,jc),CC(i-1,2*j,k),CC(ic-1,2*j-1,k))
PM (CH(i ,k,jc),CH(i ,k,j ),CC(i ,2*j,k),CC(ic ,2*j-1,k))
}
csarr=RALLOC(double,2*ip);
arg=twopi/ip;
csarr[0]=1.;
csarr[1]=0.;
csarr[2]=csarr[2*ip-2]=cos(arg);
csarr[3]=sin(arg); csarr[2*ip-1]=-csarr[3];
for (i=2; i<=ip/2; ++i)
{
csarr[2*i]=csarr[2*ip-2*i]=cos(i*arg);
csarr[2*i+1]=sin(i*arg);
csarr[2*ip-2*i+1]=-csarr[2*i+1];
}
for(l=1; l<ipph; l++)
{
lc=ip-l;
ar1=csarr[2*l];
ai1=csarr[2*l+1];
for(ik=0; ik<idl1; ik++)
{
C2(ik,l)=CH2(ik,0)+ar1*CH2(ik,1);
C2(ik,lc)=ai1*CH2(ik,ip-1);
}
aidx=2*l;
for(j=2; j<ipph; j++)
{
jc=ip-j;
aidx+=2*l;
if (aidx>=2*ip) aidx-=2*ip;
ar2=csarr[aidx];
ai2=csarr[aidx+1];
for(ik=0; ik<idl1; ik++)
{
C2(ik,l )+=ar2*CH2(ik,j );
C2(ik,lc)+=ai2*CH2(ik,jc);
}
}
}
DEALLOC(csarr);
for(j=1; j<ipph; j++)
for(ik=0; ik<idl1; ik++)
CH2(ik,0)+=CH2(ik,j);
for(j=1,jc=ip-1; j<ipph; j++,jc--)
for(k=0; k<l1; k++)
PM (CH(0,k,jc),CH(0,k,j),C1(0,k,j),C1(0,k,jc))
if(ido==1)
return;
for(j=1,jc=ip-1; j<ipph; j++,jc--)
for(k=0; k<l1; k++)
for(i=2; i<ido; i+=2)
{
PM (CH(i-1,k,jc),CH(i-1,k,j ),C1(i-1,k,j),C1(i ,k,jc))
PM (CH(i ,k,j ),CH(i ,k,jc),C1(i ,k,j),C1(i-1,k,jc))
}
memcpy(cc,ch,idl1*sizeof(double));
for(j=1; j<ip; j++)
for(k=0; k<l1; k++)
{
C1(0,k,j)=CH(0,k,j);
idij=(j-1)*ido+1;
for(i=2; i<ido; i+=2,idij+=2)
MULPM (C1(i,k,j),C1(i-1,k,j),wa[idij-1],wa[idij],CH(i,k,j),CH(i-1,k,j))
}
}
#undef CC
#undef CH
#undef PM
#undef MULPM
/*----------------------------------------------------------------------
cfftf1, cfftb1, cfftf, cfftb, cffti1, cffti. Complex FFTs.
----------------------------------------------------------------------*/
static void cfft1(size_t n, cmplx c[], cmplx ch[], const cmplx wa[],
const size_t ifac[], int isign)
{
size_t k1, l1=1, nf=ifac[1], iw=0;
cmplx *p1=c, *p2=ch;
for(k1=0; k1<nf; k1++)
{
size_t ip=ifac[k1+2];
size_t l2=ip*l1;
size_t ido = n/l2;
if(ip==4)
(isign>0) ? passb4(ido, l1, p1, p2, wa+iw)
: passf4(ido, l1, p1, p2, wa+iw);
else if(ip==2)
(isign>0) ? passb2(ido, l1, p1, p2, wa+iw)
: passf2(ido, l1, p1, p2, wa+iw);
else if(ip==3)
(isign>0) ? passb3(ido, l1, p1, p2, wa+iw)
: passf3(ido, l1, p1, p2, wa+iw);
else if(ip==5)
(isign>0) ? passb5(ido, l1, p1, p2, wa+iw)
: passf5(ido, l1, p1, p2, wa+iw);
else if(ip==6)
(isign>0) ? passb6(ido, l1, p1, p2, wa+iw)
: passf6(ido, l1, p1, p2, wa+iw);
else
(isign>0) ? passbg(ido, ip, l1, p1, p2, wa+iw)
: passfg(ido, ip, l1, p1, p2, wa+iw);
SWAP(p1,p2,cmplx *);
l1=l2;
iw+=(ip-1)*ido;
}
if (p1!=c)
memcpy (c,p1,n*sizeof(cmplx));
}
void cfftf(size_t n, double c[], double wsave[])
{
if (n!=1)
cfft1(n, (cmplx*)c, (cmplx*)wsave, (cmplx*)(wsave+2*n),
(size_t*)(wsave+4*n),-1);
}
void cfftb(size_t n, double c[], double wsave[])
{
if (n!=1)
cfft1(n, (cmplx*)c, (cmplx*)wsave, (cmplx*)(wsave+2*n),
(size_t*)(wsave+4*n),+1);
}
static void factorize (size_t n, const size_t *pf, size_t npf, size_t *ifac)
{
size_t nl=n, nf=0, ntry=0, j=0, i;
startloop:
j++;
ntry = (j<=npf) ? pf[j-1] : ntry+2;
do
{
size_t nq=nl / ntry;
size_t nr=nl-ntry*nq;
if (nr!=0)
goto startloop;
nf++;
ifac[nf+1]=ntry;
nl=nq;
if ((ntry==2) && (nf!=1))
{
for (i=nf+1; i>2; --i)
ifac[i]=ifac[i-1];
ifac[2]=2;
}
}
while(nl!=1);
ifac[0]=n;
ifac[1]=nf;
}
static void cffti1(size_t n, double wa[], size_t ifac[])
{
static const size_t ntryh[5]={4,6,3,2,5};
static const double twopi=6.28318530717958647692;
size_t j, k, fi;
double argh=twopi/n;
size_t i=0, l1=1;
factorize (n,ntryh,5,ifac);
for(k=1; k<=ifac[1]; k++)
{
size_t ip=ifac[k+1];
size_t ido=n/(l1*ip);
for(j=1; j<ip; j++)
{
size_t is = i;
double argld=j*l1*argh;
wa[i ]=1;
wa[i+1]=0;
for(fi=1; fi<=ido; fi++)
{
double arg=fi*argld;
i+=2;
wa[i ]=cos(arg);
wa[i+1]=sin(arg);
}
if(ip>6)
{
wa[is ]=wa[i ];
wa[is+1]=wa[i+1];
}
}
l1*=ip;
}
}
void cffti(size_t n, double wsave[])
{ if (n!=1) cffti1(n, wsave+2*n,(size_t*)(wsave+4*n)); }
/*----------------------------------------------------------------------
rfftf1, rfftb1, rfftf, rfftb, rffti1, rffti. Real FFTs.
----------------------------------------------------------------------*/
static void rfftf1(size_t n, double c[], double ch[], const double wa[],
const size_t ifac[])
{
size_t k1, l1=n, nf=ifac[1], iw=n-1;
double *p1=ch, *p2=c;
for(k1=1; k1<=nf;++k1)
{
size_t ip=ifac[nf-k1+2];
size_t ido=n / l1;
l1 /= ip;
iw-=(ip-1)*ido;
SWAP (p1,p2,double *);
if(ip==4)
radf4(ido, l1, p1, p2, wa+iw);
else if(ip==2)
radf2(ido, l1, p1, p2, wa+iw);
else if(ip==3)
radf3(ido, l1, p1, p2, wa+iw);
else if(ip==5)
radf5(ido, l1, p1, p2, wa+iw);
else
{
if (ido==1)
SWAP (p1,p2,double *);
radfg(ido, ip, l1, ido*l1, p1, p2, wa+iw);
SWAP (p1,p2,double *);
}
}
if (p1==c)
memcpy (c,ch,n*sizeof(double));
}
static void rfftb1(size_t n, double c[], double ch[], const double wa[],
const size_t ifac[])
{
size_t k1, l1=1, nf=ifac[1], iw=0;
double *p1=c, *p2=ch;
for(k1=1; k1<=nf; k1++)
{
size_t ip = ifac[k1+1],
ido= n/(ip*l1);
if(ip==4)
radb4(ido, l1, p1, p2, wa+iw);
else if(ip==2)
radb2(ido, l1, p1, p2, wa+iw);
else if(ip==3)
radb3(ido, l1, p1, p2, wa+iw);
else if(ip==5)
radb5(ido, l1, p1, p2, wa+iw);
else
{
radbg(ido, ip, l1, ido*l1, p1, p2, wa+iw);
if (ido!=1)
SWAP (p1,p2,double *);
}
SWAP (p1,p2,double *);
l1*=ip;
iw+=(ip-1)*ido;
}
if (p1!=c)
memcpy (c,ch,n*sizeof(double));
}
void rfftf(size_t n, double r[], double wsave[])
{ if(n!=1) rfftf1(n, r, wsave, wsave+n,(size_t*)(wsave+2*n)); }
void rfftb(size_t n, double r[], double wsave[])
{ if(n!=1) rfftb1(n, r, wsave, wsave+n,(size_t*)(wsave+2*n)); }
static void rffti1(size_t n, double wa[], size_t ifac[])
{
static const size_t ntryh[4]={4,2,3,5};
static const double twopi=6.28318530717958647692;
size_t i, j, k, fi;
double argh=twopi/n;
size_t is=0, l1=1;
factorize (n,ntryh,4,ifac);
for (k=1; k<ifac[1]; k++)
{
size_t ip=ifac[k+1],
ido=n/(l1*ip);
for (j=1; j<ip; ++j)
{
double argld=j*l1*argh;
for(i=is,fi=1; i<=ido+is-3; i+=2,++fi)
{
double arg=fi*argld;
wa[i ]=cos(arg);
wa[i+1]=sin(arg);
}
is+=ido;
}
l1*=ip;
}
}
void rffti(size_t n, double wsave[])
{ if (n!=1) rffti1(n, wsave+n,(size_t*)(wsave+2*n)); }

64
external/sharp/libfftpack/fftpack.h vendored Normal file
View file

@ -0,0 +1,64 @@
/*
* This file is part of libfftpack.
*
* libfftpack is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libfftpack is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libfftpack; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*
fftpack.h : function declarations for fftpack.c
Algorithmically based on Fortran-77 FFTPACK by Paul N. Swarztrauber
(Version 4, 1985).
Pekka Janhunen 23.2.1995
(reformatted by joerg arndt)
reformatted and slightly enhanced by Martin Reinecke (2004)
*/
#ifndef PLANCK_FFTPACK_H
#define PLANCK_FFTPACK_H
#include "c_utils.h"
#ifdef __cplusplus
extern "C" {
#endif
/*! forward complex transform */
void cfftf(size_t N, double complex_data[], double wrk[]);
/*! backward complex transform */
void cfftb(size_t N, double complex_data[], double wrk[]);
/*! initializer for complex transforms */
void cffti(size_t N, double wrk[]);
/*! forward real transform */
void rfftf(size_t N, double data[], double wrk[]);
/*! backward real transform */
void rfftb(size_t N, double data[], double wrk[]);
/*! initializer for real transforms */
void rffti(size_t N, double wrk[]);
#ifdef __cplusplus
}
#endif
#endif

306
external/sharp/libfftpack/fftpack_inc.c vendored Normal file
View file

@ -0,0 +1,306 @@
/*
* This file is part of libfftpack.
*
* libfftpack is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libfftpack is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libfftpack; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*
fftpack.c : A set of FFT routines in C.
Algorithmically based on Fortran-77 FFTPACK by Paul N. Swarztrauber
(Version 4, 1985).
C port by Martin Reinecke (2010)
*/
#ifdef BACKWARD
#define PSIGN +
#define PMSIGNC(a,b,c,d) { a.r=c.r+d.r; a.i=c.i+d.i; b.r=c.r-d.r; b.i=c.i-d.i; }
/* a = b*c */
#define MULPMSIGNC(a,b,c) { a.r=b.r*c.r-b.i*c.i; a.i=b.r*c.i+b.i*c.r; }
#else
#define PSIGN -
#define PMSIGNC(a,b,c,d) { a.r=c.r-d.r; a.i=c.i-d.i; b.r=c.r+d.r; b.i=c.i+d.i; }
/* a = conj(b)*c */
#define MULPMSIGNC(a,b,c) { a.r=b.r*c.r+b.i*c.i; a.i=b.r*c.i-b.i*c.r; }
#endif
static void X(2) (size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
const cmplx *wa)
{
const size_t cdim=2;
size_t k,i;
cmplx t;
if (ido==1)
for (k=0;k<l1;++k)
PMC (CH(0,k,0),CH(0,k,1),CC(0,0,k),CC(0,1,k))
else
for (k=0;k<l1;++k)
for (i=0;i<ido;++i)
{
PMC (CH(i,k,0),t,CC(i,0,k),CC(i,1,k))
MULPMSIGNC (CH(i,k,1),WA(0,i),t)
}
}
static void X(3)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
const cmplx *wa)
{
const size_t cdim=3;
static const double taur=-0.5, taui= PSIGN 0.86602540378443864676;
size_t i, k;
cmplx c2, c3, d2, d3, t2;
if (ido==1)
for (k=0; k<l1; ++k)
{
PMC (t2,c3,CC(0,1,k),CC(0,2,k))
ADDC (CH(0,k,0),t2,CC(0,0,k))
SCALEC(t2,taur)
ADDC(c2,CC(0,0,k),t2)
SCALEC(c3,taui)
CONJFLIPC(c3)
PMC(CH(0,k,1),CH(0,k,2),c2,c3)
}
else
for (k=0; k<l1; ++k)
for (i=0; i<ido; ++i)
{
PMC (t2,c3,CC(i,1,k),CC(i,2,k))
ADDC (CH(i,k,0),t2,CC(i,0,k))
SCALEC(t2,taur)
ADDC(c2,CC(i,0,k),t2)
SCALEC(c3,taui)
CONJFLIPC(c3)
PMC(d2,d3,c2,c3)
MULPMSIGNC(CH(i,k,1),WA(0,i),d2)
MULPMSIGNC(CH(i,k,2),WA(1,i),d3)
}
}
static void X(4)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
const cmplx *wa)
{
const size_t cdim=4;
size_t i, k;
cmplx c2, c3, c4, t1, t2, t3, t4;
if (ido==1)
for (k=0; k<l1; ++k)
{
PMC(t2,t1,CC(0,0,k),CC(0,2,k))
PMC(t3,t4,CC(0,1,k),CC(0,3,k))
CONJFLIPC(t4)
PMC(CH(0,k,0),CH(0,k,2),t2,t3)
PMSIGNC (CH(0,k,1),CH(0,k,3),t1,t4)
}
else
for (k=0; k<l1; ++k)
for (i=0; i<ido; ++i)
{
PMC(t2,t1,CC(i,0,k),CC(i,2,k))
PMC(t3,t4,CC(i,1,k),CC(i,3,k))
CONJFLIPC(t4)
PMC(CH(i,k,0),c3,t2,t3)
PMSIGNC (c2,c4,t1,t4)
MULPMSIGNC (CH(i,k,1),WA(0,i),c2)
MULPMSIGNC (CH(i,k,2),WA(1,i),c3)
MULPMSIGNC (CH(i,k,3),WA(2,i),c4)
}
}
static void X(5)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
const cmplx *wa)
{
const size_t cdim=5;
static const double tr11= 0.3090169943749474241,
ti11= PSIGN 0.95105651629515357212,
tr12=-0.8090169943749474241,
ti12= PSIGN 0.58778525229247312917;
size_t i, k;
cmplx c2, c3, c4, c5, d2, d3, d4, d5, t2, t3, t4, t5;
if (ido==1)
for (k=0; k<l1; ++k)
{
PMC (t2,t5,CC(0,1,k),CC(0,4,k))
PMC (t3,t4,CC(0,2,k),CC(0,3,k))
CH(0,k,0).r=CC(0,0,k).r+t2.r+t3.r;
CH(0,k,0).i=CC(0,0,k).i+t2.i+t3.i;
c2.r=CC(0,0,k).r+tr11*t2.r+tr12*t3.r;
c2.i=CC(0,0,k).i+tr11*t2.i+tr12*t3.i;
c3.r=CC(0,0,k).r+tr12*t2.r+tr11*t3.r;
c3.i=CC(0,0,k).i+tr12*t2.i+tr11*t3.i;
c5.r=ti11*t5.r+ti12*t4.r;
c5.i=ti11*t5.i+ti12*t4.i;
c4.r=ti12*t5.r-ti11*t4.r;
c4.i=ti12*t5.i-ti11*t4.i;
CONJFLIPC(c5)
PMC(CH(0,k,1),CH(0,k,4),c2,c5)
CONJFLIPC(c4)
PMC(CH(0,k,2),CH(0,k,3),c3,c4)
}
else
for (k=0; k<l1; ++k)
for (i=0; i<ido; ++i)
{
PMC (t2,t5,CC(i,1,k),CC(i,4,k))
PMC (t3,t4,CC(i,2,k),CC(i,3,k))
CH(i,k,0).r=CC(i,0,k).r+t2.r+t3.r;
CH(i,k,0).i=CC(i,0,k).i+t2.i+t3.i;
c2.r=CC(i,0,k).r+tr11*t2.r+tr12*t3.r;
c2.i=CC(i,0,k).i+tr11*t2.i+tr12*t3.i;
c3.r=CC(i,0,k).r+tr12*t2.r+tr11*t3.r;
c3.i=CC(i,0,k).i+tr12*t2.i+tr11*t3.i;
c5.r=ti11*t5.r+ti12*t4.r;
c5.i=ti11*t5.i+ti12*t4.i;
c4.r=ti12*t5.r-ti11*t4.r;
c4.i=ti12*t5.i-ti11*t4.i;
CONJFLIPC(c5)
PMC(d2,d5,c2,c5)
CONJFLIPC(c4)
PMC(d3,d4,c3,c4)
MULPMSIGNC (CH(i,k,1),WA(0,i),d2)
MULPMSIGNC (CH(i,k,2),WA(1,i),d3)
MULPMSIGNC (CH(i,k,3),WA(2,i),d4)
MULPMSIGNC (CH(i,k,4),WA(3,i),d5)
}
}
static void X(6)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
const cmplx *wa)
{
const size_t cdim=6;
static const double taui= PSIGN 0.86602540378443864676;
cmplx ta1,ta2,ta3,a0,a1,a2,tb1,tb2,tb3,b0,b1,b2,d1,d2,d3,d4,d5;
size_t i, k;
if (ido==1)
for (k=0; k<l1; ++k)
{
PMC(ta1,ta3,CC(0,2,k),CC(0,4,k))
ta2.r = CC(0,0,k).r - .5*ta1.r;
ta2.i = CC(0,0,k).i - .5*ta1.i;
SCALEC(ta3,taui)
ADDC(a0,CC(0,0,k),ta1)
CONJFLIPC(ta3)
PMC(a1,a2,ta2,ta3)
PMC(tb1,tb3,CC(0,5,k),CC(0,1,k))
tb2.r = CC(0,3,k).r - .5*tb1.r;
tb2.i = CC(0,3,k).i - .5*tb1.i;
SCALEC(tb3,taui)
ADDC(b0,CC(0,3,k),tb1)
CONJFLIPC(tb3)
PMC(b1,b2,tb2,tb3)
PMC(CH(0,k,0),CH(0,k,3),a0,b0)
PMC(CH(0,k,4),CH(0,k,1),a1,b1)
PMC(CH(0,k,2),CH(0,k,5),a2,b2)
}
else
for (k=0; k<l1; ++k)
for (i=0; i<ido; ++i)
{
PMC(ta1,ta3,CC(i,2,k),CC(i,4,k))
ta2.r = CC(i,0,k).r - .5*ta1.r;
ta2.i = CC(i,0,k).i - .5*ta1.i;
SCALEC(ta3,taui)
ADDC(a0,CC(i,0,k),ta1)
CONJFLIPC(ta3)
PMC(a1,a2,ta2,ta3)
PMC(tb1,tb3,CC(i,5,k),CC(i,1,k))
tb2.r = CC(i,3,k).r - .5*tb1.r;
tb2.i = CC(i,3,k).i - .5*tb1.i;
SCALEC(tb3,taui)
ADDC(b0,CC(i,3,k),tb1)
CONJFLIPC(tb3)
PMC(b1,b2,tb2,tb3)
PMC(CH(i,k,0),d3,a0,b0)
PMC(d4,d1,a1,b1)
PMC(d2,d5,a2,b2)
MULPMSIGNC (CH(i,k,1),WA(0,i),d1)
MULPMSIGNC (CH(i,k,2),WA(1,i),d2)
MULPMSIGNC (CH(i,k,3),WA(2,i),d3)
MULPMSIGNC (CH(i,k,4),WA(3,i),d4)
MULPMSIGNC (CH(i,k,5),WA(4,i),d5)
}
}
static void X(g)(size_t ido, size_t ip, size_t l1, const cmplx *cc, cmplx *ch,
const cmplx *wa)
{
const size_t cdim=ip;
cmplx *tarr=RALLOC(cmplx,2*ip);
cmplx *ccl=tarr, *wal=tarr+ip;
size_t i,j,k,l,jc,lc;
size_t ipph = (ip+1)/2;
for (i=1; i<ip; ++i)
wal[i]=wa[ido*(i-1)];
for (k=0; k<l1; ++k)
for (i=0; i<ido; ++i)
{
cmplx s=CC(i,0,k);
ccl[0] = CC(i,0,k);
for(j=1,jc=ip-1; j<ipph; ++j,--jc)
{
PMC (ccl[j],ccl[jc],CC(i,j,k),CC(i,jc,k))
ADDC (s,s,ccl[j])
}
CH(i,k,0) = s;
for (j=1, jc=ip-1; j<=ipph; ++j,--jc)
{
cmplx abr=ccl[0], abi={0.,0.};
size_t iang=0;
for (l=1,lc=ip-1; l<ipph; ++l,--lc)
{
iang+=j;
if (iang>ip) iang-=ip;
abr.r += ccl[l ].r*wal[iang].r;
abr.i += ccl[l ].i*wal[iang].r;
abi.r += ccl[lc].r*wal[iang].i;
abi.i += ccl[lc].i*wal[iang].i;
}
#ifndef BACKWARD
{ abi.i=-abi.i; abi.r=-abi.r; }
#endif
CONJFLIPC(abi)
PMC(CH(i,k,j),CH(i,k,jc),abr,abi)
}
}
DEALLOC(tarr);
if (ido==1) return;
for (j=1; j<ip; ++j)
for (k=0; k<l1; ++k)
{
size_t idij=(j-1)*ido+1;
for(i=1; i<ido; ++i, ++idij)
{
cmplx t=CH(i,k,j);
MULPMSIGNC (CH(i,k,j),wa[idij],t)
}
}
}
#undef PSIGN
#undef PMSIGNC
#undef MULPMSIGNC

View file

@ -0,0 +1,5 @@
/*! \mainpage Libfftpack documentation
<ul>
<li>\ref fftgroup "Programming interface"
</ul>
*/

291
external/sharp/libfftpack/ls_fft.c vendored Normal file
View file

@ -0,0 +1,291 @@
/*
* This file is part of libfftpack.
*
* libfftpack is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libfftpack is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libfftpack; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*
* Copyright (C) 2005 Max-Planck-Society
* \author Martin Reinecke
*/
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include "bluestein.h"
#include "fftpack.h"
#include "ls_fft.h"
complex_plan make_complex_plan (size_t length)
{
complex_plan plan = RALLOC(complex_plan_i,1);
size_t pfsum = prime_factor_sum(length);
double comp1 = (double)(length*pfsum);
double comp2 = 2*3*length*log(3.*length);
comp2*=3.; /* fudge factor that appears to give good overall performance */
plan->length=length;
plan->bluestein = (comp2<comp1);
if (plan->bluestein)
bluestein_i (length,&(plan->work),&(plan->worksize));
else
{
plan->worksize=4*length+15;
plan->work=RALLOC(double,4*length+15);
cffti(length, plan->work);
}
return plan;
}
complex_plan copy_complex_plan (complex_plan plan)
{
if (!plan) return NULL;
{
complex_plan newplan = RALLOC(complex_plan_i,1);
*newplan = *plan;
newplan->work=RALLOC(double,newplan->worksize);
memcpy(newplan->work,plan->work,sizeof(double)*newplan->worksize);
return newplan;
}
}
void kill_complex_plan (complex_plan plan)
{
DEALLOC(plan->work);
DEALLOC(plan);
}
void complex_plan_forward (complex_plan plan, double *data)
{
if (plan->bluestein)
bluestein (plan->length, data, plan->work, -1);
else
cfftf (plan->length, data, plan->work);
}
void complex_plan_backward (complex_plan plan, double *data)
{
if (plan->bluestein)
bluestein (plan->length, data, plan->work, 1);
else
cfftb (plan->length, data, plan->work);
}
real_plan make_real_plan (size_t length)
{
real_plan plan = RALLOC(real_plan_i,1);
size_t pfsum = prime_factor_sum(length);
double comp1 = .5*length*pfsum;
double comp2 = 2*3*length*log(3.*length);
comp2*=3; /* fudge factor that appears to give good overall performance */
plan->length=length;
plan->bluestein = (comp2<comp1);
if (plan->bluestein)
bluestein_i (length,&(plan->work),&(plan->worksize));
else
{
plan->worksize=2*length+15;
plan->work=RALLOC(double,2*length+15);
rffti(length, plan->work);
}
return plan;
}
real_plan copy_real_plan (real_plan plan)
{
if (!plan) return NULL;
{
real_plan newplan = RALLOC(real_plan_i,1);
*newplan = *plan;
newplan->work=RALLOC(double,newplan->worksize);
memcpy(newplan->work,plan->work,sizeof(double)*newplan->worksize);
return newplan;
}
}
void kill_real_plan (real_plan plan)
{
DEALLOC(plan->work);
DEALLOC(plan);
}
void real_plan_forward_fftpack (real_plan plan, double *data)
{
if (plan->bluestein)
{
size_t m;
size_t n=plan->length;
double *tmp = RALLOC(double,2*n);
for (m=0; m<n; ++m)
{
tmp[2*m] = data[m];
tmp[2*m+1] = 0.;
}
bluestein(n,tmp,plan->work,-1);
data[0] = tmp[0];
memcpy (data+1, tmp+2, (n-1)*sizeof(double));
DEALLOC(tmp);
}
else
rfftf (plan->length, data, plan->work);
}
static void fftpack2halfcomplex (double *data, size_t n)
{
size_t m;
double *tmp = RALLOC(double,n);
tmp[0]=data[0];
for (m=1; m<(n+1)/2; ++m)
{
tmp[m]=data[2*m-1];
tmp[n-m]=data[2*m];
}
if (!(n&1))
tmp[n/2]=data[n-1];
memcpy (data,tmp,n*sizeof(double));
DEALLOC(tmp);
}
static void halfcomplex2fftpack (double *data, size_t n)
{
size_t m;
double *tmp = RALLOC(double,n);
tmp[0]=data[0];
for (m=1; m<(n+1)/2; ++m)
{
tmp[2*m-1]=data[m];
tmp[2*m]=data[n-m];
}
if (!(n&1))
tmp[n-1]=data[n/2];
memcpy (data,tmp,n*sizeof(double));
DEALLOC(tmp);
}
void real_plan_forward_fftw (real_plan plan, double *data)
{
real_plan_forward_fftpack (plan, data);
fftpack2halfcomplex (data,plan->length);
}
void real_plan_backward_fftpack (real_plan plan, double *data)
{
if (plan->bluestein)
{
size_t m;
size_t n=plan->length;
double *tmp = RALLOC(double,2*n);
tmp[0]=data[0];
tmp[1]=0.;
memcpy (tmp+2,data+1, (n-1)*sizeof(double));
if ((n&1)==0) tmp[n+1]=0.;
for (m=2; m<n; m+=2)
{
tmp[2*n-m]=tmp[m];
tmp[2*n-m+1]=-tmp[m+1];
}
bluestein (n, tmp, plan->work, 1);
for (m=0; m<n; ++m)
data[m] = tmp[2*m];
DEALLOC(tmp);
}
else
rfftb (plan->length, data, plan->work);
}
void real_plan_backward_fftw (real_plan plan, double *data)
{
halfcomplex2fftpack (data,plan->length);
real_plan_backward_fftpack (plan, data);
}
void real_plan_forward_c (real_plan plan, double *data)
{
size_t m;
size_t n=plan->length;
if (plan->bluestein)
{
for (m=1; m<2*n; m+=2)
data[m]=0;
bluestein (plan->length, data, plan->work, -1);
data[1]=0;
for (m=2; m<n; m+=2)
{
double avg;
avg = 0.5*(data[2*n-m]+data[m]);
data[2*n-m] = data[m] = avg;
avg = 0.5*(data[2*n-m+1]-data[m+1]);
data[2*n-m+1] = avg;
data[m+1] = -avg;
}
if ((n&1)==0) data[n+1] = 0.;
}
else
{
/* using "m+m" instead of "2*m" to avoid a nasty bug in Intel's compiler */
for (m=0; m<n; ++m) data[m+1] = data[m+m];
rfftf (n, data+1, plan->work);
data[0] = data[1];
data[1] = 0;
for (m=2; m<n; m+=2)
{
data[2*n-m] = data[m];
data[2*n-m+1] = -data[m+1];
}
if ((n&1)==0) data[n+1] = 0.;
}
}
void real_plan_backward_c (real_plan plan, double *data)
{
size_t n=plan->length;
if (plan->bluestein)
{
size_t m;
data[1]=0;
for (m=2; m<n; m+=2)
{
double avg;
avg = 0.5*(data[2*n-m]+data[m]);
data[2*n-m] = data[m] = avg;
avg = 0.5*(data[2*n-m+1]-data[m+1]);
data[2*n-m+1] = avg;
data[m+1] = -avg;
}
if ((n&1)==0) data[n+1] = 0.;
bluestein (plan->length, data, plan->work, 1);
for (m=1; m<2*n; m+=2)
data[m]=0;
}
else
{
ptrdiff_t m;
data[1] = data[0];
rfftb (n, data+1, plan->work);
for (m=n-1; m>=0; --m)
{
data[2*m] = data[m+1];
data[2*m+1] = 0.;
}
}
}

162
external/sharp/libfftpack/ls_fft.h vendored Normal file
View file

@ -0,0 +1,162 @@
/*
* This file is part of libfftpack.
*
* libfftpack is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libfftpack is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libfftpack; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file ls_fft.h
* Interface for the LevelS FFT package.
*
* Copyright (C) 2004 Max-Planck-Society
* \author Martin Reinecke
*/
#ifndef PLANCK_LS_FFT_H
#define PLANCK_LS_FFT_H
#include "c_utils.h"
#ifdef __cplusplus
extern "C" {
#endif
/*!\defgroup fftgroup FFT interface
This package is intended to calculate one-dimensional real or complex FFTs
with high accuracy and good efficiency even for lengths containing large
prime factors.
The code is written in C, but a Fortran wrapper exists as well.
Before any FFT is executed, a plan must be generated for it. Plan creation
is designed to be fast, so that there is no significant overhead if the
plan is only used once or a few times.
The main component of the code is based on Paul N. Swarztrauber's FFTPACK in the
double precision incarnation by Hugh C. Pumphrey
(http://www.netlib.org/fftpack/dp.tgz).
I replaced the iterative sine and cosine calculations in radfg() and radbg()
by an exact calculation, which slightly improves the transform accuracy for
real FFTs with lengths containing large prime factors.
Since FFTPACK becomes quite slow for FFT lengths with large prime factors
(in the worst case of prime lengths it reaches \f$\mathcal{O}(n^2)\f$
complexity), I implemented Bluestein's algorithm, which computes a FFT of length
\f$n\f$ by several FFTs of length \f$n_2\ge 2n-1\f$ and a convolution. Since
\f$n_2\f$ can be chosen to be highly composite, this algorithm is more efficient
if \f$n\f$ has large prime factors. The longer FFTs themselves are then computed
using the FFTPACK routines.
Bluestein's algorithm was implemented according to the description on Wikipedia
(<a href="http://en.wikipedia.org/wiki/Bluestein%27s_FFT_algorithm">
http://en.wikipedia.org/wiki/Bluestein%27s_FFT_algorithm</a>).
\b Thread-safety:
All routines can be called concurrently; all information needed by
<tt>ls_fft</tt> is stored in the plan variable. However, using the same plan
variable on multiple threads simultaneously is not supported and will lead to
data corruption.
*/
/*! \{ */
typedef struct
{
double *work;
size_t length, worksize;
int bluestein;
} complex_plan_i;
/*! The opaque handle type for complex-FFT plans. */
typedef complex_plan_i * complex_plan;
/*! Returns a plan for a complex FFT with \a length elements. */
complex_plan make_complex_plan (size_t length);
/*! Constructs a copy of \a plan. */
complex_plan copy_complex_plan (complex_plan plan);
/*! Destroys a plan for a complex FFT. */
void kill_complex_plan (complex_plan plan);
/*! Computes a complex forward FFT on \a data, using \a plan.
\a Data has the form <tt>r0, i0, r1, i1, ...,
r[length-1], i[length-1]</tt>. */
void complex_plan_forward (complex_plan plan, double *data);
/*! Computes a complex backward FFT on \a data, using \a plan.
\a Data has the form <tt>r0, i0, r1, i1, ...,
r[length-1], i[length-1]</tt>. */
void complex_plan_backward (complex_plan plan, double *data);
typedef struct
{
double *work;
size_t length, worksize;
int bluestein;
} real_plan_i;
/*! The opaque handle type for real-FFT plans. */
typedef real_plan_i * real_plan;
/*! Returns a plan for a real FFT with \a length elements. */
real_plan make_real_plan (size_t length);
/*! Constructs a copy of \a plan. */
real_plan copy_real_plan (real_plan plan);
/*! Destroys a plan for a real FFT. */
void kill_real_plan (real_plan plan);
/*! Computes a real forward FFT on \a data, using \a plan
and assuming the FFTPACK storage scheme:
- on entry, \a data has the form <tt>r0, r1, ..., r[length-1]</tt>;
- on exit, it has the form <tt>r0, r1, i1, r2, i2, ...</tt>
(a total of \a length values). */
void real_plan_forward_fftpack (real_plan plan, double *data);
/*! Computes a real forward FFT on \a data, using \a plan
and assuming the FFTPACK storage scheme:
- on entry, \a data has the form <tt>r0, r1, i1, r2, i2, ...</tt>
(a total of \a length values);
- on exit, it has the form <tt>r0, r1, ..., r[length-1]</tt>. */
void real_plan_backward_fftpack (real_plan plan, double *data);
/*! Computes a real forward FFT on \a data, using \a plan
and assuming the FFTW halfcomplex storage scheme:
- on entry, \a data has the form <tt>r0, r1, ..., r[length-1]</tt>;
- on exit, it has the form <tt>r0, r1, r2, ..., i2, i1</tt>. */
void real_plan_forward_fftw (real_plan plan, double *data);
/*! Computes a real backward FFT on \a data, using \a plan
and assuming the FFTW halfcomplex storage scheme:
- on entry, \a data has the form <tt>r0, r1, r2, ..., i2, i1</tt>.
- on exit, it has the form <tt>r0, r1, ..., r[length-1]</tt>. */
void real_plan_backward_fftw (real_plan plan, double *data);
/*! Computes a real forward FFT on \a data, using \a plan
and assuming a full-complex storage scheme:
- on entry, \a data has the form <tt>r0, [ignored], r1, [ignored], ...,
r[length-1], [ignored]</tt>;
- on exit, it has the form <tt>r0, i0, r1, i1, ...,
r[length-1], i[length-1]</tt>.
*/
void real_plan_forward_c (real_plan plan, double *data);
/*! Computes a real backward FFT on \a data, using \a plan
and assuming a full-complex storage scheme:
- on entry, \a data has the form <tt>r0, i0, r1, i1, ...,
r[length-1], i[length-1]</tt>;
- on exit, it has the form <tt>r0, 0, r1, 0, ..., r[length-1], 0</tt>. */
void real_plan_backward_c (real_plan plan, double *data);
/*! \} */
#ifdef __cplusplus
}
#endif
#endif

21
external/sharp/libfftpack/planck.make vendored Normal file
View file

@ -0,0 +1,21 @@
PKG:=libfftpack
SD:=$(SRCROOT)/$(PKG)
OD:=$(BLDROOT)/$(PKG)
FULL_INCLUDE+= -I$(SD)
HDR_$(PKG):=$(SD)/*.h
LIB_$(PKG):=$(LIBDIR)/libfftpack.a
OBJ:=fftpack.o bluestein.o ls_fft.o
OBJ:=$(OBJ:%=$(OD)/%)
ODEP:=$(HDR_$(PKG)) $(HDR_c_utils)
$(OD)/fftpack.o: $(SD)/fftpack_inc.c
$(OBJ): $(ODEP) | $(OD)_mkdir
$(LIB_$(PKG)): $(OBJ)
all_hdr+=$(HDR_$(PKG))
all_lib+=$(LIB_$(PKG))