This commit is contained in:
Martin Reinecke 2018-12-19 12:10:12 +01:00
parent c118c4fd61
commit b545c1cfe0
3 changed files with 49 additions and 60 deletions

View file

@ -800,16 +800,16 @@ NOINLINE static void inner_loop_a2m(sharp_job *job, const int *ispair,
{
if (job->spin==0)
{
//adjust the a_lm for the new algorithm
dcmplx * restrict alm=job->almtmp;
for (int il=0, l=gen->m; l<=gen->lmax; ++il,l+=2)
{
dcmplx al = alm[l];
dcmplx al1 = (l+1>gen->lmax) ? 0. : alm[l+1];
dcmplx al2 = (l+2>gen->lmax) ? 0. : alm[l+2];
alm[l ] = gen->alpha[il]*(gen->eps[l+1]*al + gen->eps[l+2]*al2);
alm[l+1] = gen->alpha[il]*al1;
}
//adjust the a_lm for the new algorithm
dcmplx * restrict alm=job->almtmp;
for (int il=0, l=gen->m; l<=gen->lmax; ++il,l+=2)
{
dcmplx al = alm[l];
dcmplx al1 = (l+1>gen->lmax) ? 0. : alm[l+1];
dcmplx al2 = (l+2>gen->lmax) ? 0. : alm[l+2];
alm[l ] = gen->alpha[il]*(gen->eps[l+1]*al + gen->eps[l+2]*al2);
alm[l+1] = gen->alpha[il]*al1;
}
const int nval=nv0*VLEN;
int ith=0;
@ -848,9 +848,9 @@ for (int il=0, l=gen->m; l<=gen->lmax; ++il,l+=2)
for (int i=0; i<nth; ++i)
{
int tgt=itgt[i];
//adjust for new algorithm
d.s.p2r[i]*=cth_[tgt];
d.s.p2i[i]*=cth_[tgt];
//adjust for new algorithm
d.s.p2r[i]*=cth_[tgt];
d.s.p2i[i]*=cth_[tgt];
int phas_idx = tgt*job->s_th + mi*job->s_m;
complex double r1 = d.s.p1r[i] + d.s.p1i[i]*_Complex_I,
r2 = d.s.p2r[i] + d.s.p2i[i]*_Complex_I;
@ -963,9 +963,9 @@ NOINLINE static void inner_loop_m2a(sharp_job *job, const int *ispair,
dcmplx ph2=ispair[ith] ? job->phase[phas_idx+1] : 0.;
d.s.p1r[nth]=creal(ph1+ph2); d.s.p1i[nth]=cimag(ph1+ph2);
d.s.p2r[nth]=creal(ph1-ph2); d.s.p2i[nth]=cimag(ph1-ph2);
//adjust for new algorithm
d.s.p2r[nth]*=cth_[ith];
d.s.p2i[nth]*=cth_[ith];
//adjust for new algorithm
d.s.p2r[nth]*=cth_[ith];
d.s.p2i[nth]*=cth_[ith];
++nth;
}
++ith;
@ -982,19 +982,19 @@ d.s.p2i[nth]*=cth_[ith];
calc_map2alm (job, gen, &d.v, nth);
}
}
//adjust the a_lm for the new algorithm
dcmplx * restrict alm=job->almtmp;
dcmplx alm2 = 0.;
double alold=0;
for (int il=0, l=gen->m; l<=gen->lmax; ++il,l+=2)
{
dcmplx al = alm[l];
dcmplx al1 = (l+1>gen->lmax) ? 0. : alm[l+1];
alm[l ] = gen->alpha[il]*gen->eps[l+1]*al + alold*gen->eps[l]*alm2;
alm[l+1] = gen->alpha[il]*al1;
alm2=al;
alold=gen->alpha[il];
}
//adjust the a_lm for the new algorithm
dcmplx * restrict alm=job->almtmp;
dcmplx alm2 = 0.;
double alold=0;
for (int il=0, l=gen->m; l<=gen->lmax; ++il,l+=2)
{
dcmplx al = alm[l];
dcmplx al1 = (l+1>gen->lmax) ? 0. : alm[l+1];
alm[l ] = gen->alpha[il]*gen->eps[l+1]*al + alold*gen->eps[l]*alm2;
alm[l+1] = gen->alpha[il]*al1;
alm2=al;
alold=gen->alpha[il];
}
}
else
{

View file

@ -69,7 +69,6 @@ void sharp_Ylmgen_init (sharp_Ylmgen_C *gen, int l_max, int m_max, int spin)
gen->m = -1;
if (spin==0)
{
gen->rf = RALLOC(sharp_ylmgen_dbl2,gen->lmax+2);
gen->mfac = RALLOC(double,gen->mmax+1);
gen->mfac[0] = inv_sqrt4pi;
for (int m=1; m<=gen->mmax; ++m)
@ -81,9 +80,9 @@ void sharp_Ylmgen_init (sharp_Ylmgen_C *gen, int l_max, int m_max, int spin)
gen->root[m] = sqrt(m);
gen->iroot[m] = (m==0) ? 0. : 1./gen->root[m];
}
gen->eps=RALLOC(double, gen->lmax+10);
gen->alpha=RALLOC(double, gen->lmax/2+10);
gen->ab=RALLOC(sharp_ylmgen_dbl2, gen->lmax/2+10);
gen->eps=RALLOC(double, gen->lmax+10);
gen->alpha=RALLOC(double, gen->lmax/2+10);
gen->ab=RALLOC(sharp_ylmgen_dbl2, gen->lmax/2+10);
}
else
{
@ -136,13 +135,12 @@ void sharp_Ylmgen_destroy (sharp_Ylmgen_C *gen)
DEALLOC(gen->powlimit);
if (gen->s==0)
{
DEALLOC(gen->rf);
DEALLOC(gen->mfac);
DEALLOC(gen->root);
DEALLOC(gen->iroot);
DEALLOC(gen->eps);
DEALLOC(gen->alpha);
DEALLOC(gen->ab);
DEALLOC(gen->eps);
DEALLOC(gen->alpha);
DEALLOC(gen->ab);
}
else
{
@ -163,26 +161,20 @@ void sharp_Ylmgen_prepare (sharp_Ylmgen_C *gen, int m)
if (gen->s==0)
{
gen->rf[m].f[0] = gen->root[2*m+3];
gen->rf[m].f[1] = 0.;
for (int l=m+1; l<=gen->lmax+1; ++l)
for (int l=m; l<gen->lmax+10; ++l)
gen->eps[l] = gen->root[l+m]*gen->root[l-m]
*gen->iroot[2*l+1]*gen->iroot[2*l-1];
gen->alpha[0] = 1./gen->eps[m+1];
gen->alpha[1] = gen->eps[m+1]/(gen->eps[m+2]*gen->eps[m+3]);
for (int il=1, l=m+2; l<gen->lmax+5; ++il, l+=2)
gen->alpha[il+1]= ((il&1) ? -1 : 1)
/(gen->eps[l+2]*gen->eps[l+3]*gen->alpha[il]);
for (int il=0, l=m; l<gen->lmax+5; ++il, l+=2)
{
double tmp=gen->root[2*l+3]*gen->iroot[l+1+m]*gen->iroot[l+1-m];
gen->rf[l].f[0] = tmp*gen->root[2*l+1];
gen->rf[l].f[1] = tmp*gen->root[l+m]*gen->root[l-m]*gen->iroot[2*l-1];
gen->ab[il].f[0] = ((il&1) ? -1 : 1)*gen->alpha[il]*gen->alpha[il];
double t1 = gen->eps[l+2], t2 = gen->eps[l+1];
gen->ab[il].f[1] = -gen->ab[il].f[0]*(t1*t1+t2*t2);
}
for (int l=m; l<gen->lmax+10; ++l)
gen->eps[l] = sqrt((l*l-m*m)/(4.*l*l-1));
gen->alpha[0] = 1./gen->eps[m+1];
gen->alpha[1] = gen->eps[m+1]/(gen->eps[m+2]*gen->eps[m+3]);
for (int il=1, l=m+2; l<gen->lmax+5; ++il, l+=2)
gen->alpha[il+1]= ((il&1) ? -1 : 1)/(gen->eps[l+2]*gen->eps[l+3]*gen->alpha[il]);
for (int il=0, l=m; l<gen->lmax+5; ++il, l+=2)
{
gen->ab[il].f[0] = ((il&1) ? -1 : 1)*gen->alpha[il]*gen->alpha[il];
double t1 = gen->eps[l+2], t2 = gen->eps[l+1];
gen->ab[il].f[1] = -gen->ab[il].f[0]*(t1*t1+t2*t2);
}
}
else
{

View file

@ -55,11 +55,8 @@ typedef struct
int m;
/* used if s==0 */
double *mfac;
sharp_ylmgen_dbl2 *rf;
double *eps, *alpha;
sharp_ylmgen_dbl2 *ab;
double *mfac, *eps, *alpha;
sharp_ylmgen_dbl2 *ab;
/* used if s!=0 */
int sinPow, cosPow, preMinus_p, preMinus_m;