simplify Ylm data structures

This commit is contained in:
Martin Reinecke 2019-01-11 09:27:04 +01:00
parent b0b0875def
commit 253b253467
3 changed files with 52 additions and 55 deletions

View file

@ -190,8 +190,8 @@ NOINLINE static void iter_to_ieee(const sharp_Ylmgen_C * restrict gen,
{ {
if (l+4>gen->lmax) {*l_=gen->lmax+1;return;} if (l+4>gen->lmax) {*l_=gen->lmax+1;return;}
below_limit=1; below_limit=1;
Tv a1=vload(gen->ab[il ].f[0]), b1=vload(gen->ab[il ].f[1]); Tv a1=vload(gen->coef[il ][0]), b1=vload(gen->coef[il ][1]);
Tv a2=vload(gen->ab[il+1].f[0]), b2=vload(gen->ab[il+1].f[1]); Tv a2=vload(gen->coef[il+1][0]), b2=vload(gen->coef[il+1][1]);
for (int i=0; i<nv2; ++i) for (int i=0; i<nv2; ++i)
{ {
d->lam1[i] = (a1*d->csq[i] + b1)*d->lam2[i] + d->lam1[i]; d->lam1[i] = (a1*d->csq[i] + b1)*d->lam2[i] + d->lam1[i];
@ -205,7 +205,7 @@ NOINLINE static void iter_to_ieee(const sharp_Ylmgen_C * restrict gen,
} }
NOINLINE static void alm2map_kernel(s0data_v * restrict d, NOINLINE static void alm2map_kernel(s0data_v * restrict d,
const sharp_ylmgen_dbl2 * restrict ab, const dcmplx * restrict alm, const sharp_ylmgen_dbl2 * restrict coef, const dcmplx * restrict alm,
int l, int il, int lmax, int nv2) int l, int il, int lmax, int nv2)
{ {
if (nv2==nv0) if (nv2==nv0)
@ -216,8 +216,8 @@ NOINLINE static void alm2map_kernel(s0data_v * restrict d,
Tv ar2=vload(creal(alm[l+1])), ai2=vload(cimag(alm[l+1])); Tv ar2=vload(creal(alm[l+1])), ai2=vload(cimag(alm[l+1]));
Tv ar3=vload(creal(alm[l+2])), ai3=vload(cimag(alm[l+2])); Tv ar3=vload(creal(alm[l+2])), ai3=vload(cimag(alm[l+2]));
Tv ar4=vload(creal(alm[l+3])), ai4=vload(cimag(alm[l+3])); Tv ar4=vload(creal(alm[l+3])), ai4=vload(cimag(alm[l+3]));
Tv a1=vload(ab[il ].f[0]), b1=vload(ab[il ].f[1]); Tv a1=vload(coef[il ][0]), b1=vload(coef[il ][1]);
Tv a2=vload(ab[il+1].f[0]), b2=vload(ab[il+1].f[1]); Tv a2=vload(coef[il+1][0]), b2=vload(coef[il+1][1]);
for (int i=0; i<nv0; ++i) for (int i=0; i<nv0; ++i)
{ {
d->p1r[i] += d->lam2[i]*ar1; d->p1r[i] += d->lam2[i]*ar1;
@ -241,8 +241,8 @@ NOINLINE static void alm2map_kernel(s0data_v * restrict d,
Tv ar2=vload(creal(alm[l+1])), ai2=vload(cimag(alm[l+1])); Tv ar2=vload(creal(alm[l+1])), ai2=vload(cimag(alm[l+1]));
Tv ar3=vload(creal(alm[l+2])), ai3=vload(cimag(alm[l+2])); Tv ar3=vload(creal(alm[l+2])), ai3=vload(cimag(alm[l+2]));
Tv ar4=vload(creal(alm[l+3])), ai4=vload(cimag(alm[l+3])); Tv ar4=vload(creal(alm[l+3])), ai4=vload(cimag(alm[l+3]));
Tv a1=vload(ab[il ].f[0]), b1=vload(ab[il ].f[1]); Tv a1=vload(coef[il ][0]), b1=vload(coef[il ][1]);
Tv a2=vload(ab[il+1].f[0]), b2=vload(ab[il+1].f[1]); Tv a2=vload(coef[il+1][0]), b2=vload(coef[il+1][1]);
for (int i=0; i<nv2; ++i) for (int i=0; i<nv2; ++i)
{ {
d->p1r[i] += d->lam2[i]*ar1; d->p1r[i] += d->lam2[i]*ar1;
@ -262,7 +262,7 @@ NOINLINE static void alm2map_kernel(s0data_v * restrict d,
{ {
Tv ar1=vload(creal(alm[l ])), ai1=vload(cimag(alm[l ])); Tv ar1=vload(creal(alm[l ])), ai1=vload(cimag(alm[l ]));
Tv ar2=vload(creal(alm[l+1])), ai2=vload(cimag(alm[l+1])); Tv ar2=vload(creal(alm[l+1])), ai2=vload(cimag(alm[l+1]));
Tv a=vload(ab[il].f[0]), b=vload(ab[il].f[1]); Tv a=vload(coef[il][0]), b=vload(coef[il][1]);
for (int i=0; i<nv2; ++i) for (int i=0; i<nv2; ++i)
{ {
d->p1r[i] += d->lam2[i]*ar1; d->p1r[i] += d->lam2[i]*ar1;
@ -286,7 +286,7 @@ NOINLINE static void calc_alm2map (sharp_job * restrict job,
if (l>lmax) return; if (l>lmax) return;
job->opcnt += (lmax+1-l) * 6*nth; job->opcnt += (lmax+1-l) * 6*nth;
const sharp_ylmgen_dbl2 * restrict ab = gen->ab; const sharp_ylmgen_dbl2 * restrict coef = gen->coef;
const dcmplx * restrict alm=job->almtmp; const dcmplx * restrict alm=job->almtmp;
int full_ieee=1; int full_ieee=1;
for (int i=0; i<nv2; ++i) for (int i=0; i<nv2; ++i)
@ -299,7 +299,7 @@ NOINLINE static void calc_alm2map (sharp_job * restrict job,
{ {
Tv ar1=vload(creal(alm[l ])), ai1=vload(cimag(alm[l ])); Tv ar1=vload(creal(alm[l ])), ai1=vload(cimag(alm[l ]));
Tv ar2=vload(creal(alm[l+1])), ai2=vload(cimag(alm[l+1])); Tv ar2=vload(creal(alm[l+1])), ai2=vload(cimag(alm[l+1]));
Tv a=vload(ab[il].f[0]), b=vload(ab[il].f[1]); Tv a=vload(coef[il][0]), b=vload(coef[il][1]);
full_ieee=1; full_ieee=1;
for (int i=0; i<nv2; ++i) for (int i=0; i<nv2; ++i)
{ {
@ -323,17 +323,17 @@ NOINLINE static void calc_alm2map (sharp_job * restrict job,
d->lam1[i] *= d->corfac[i]; d->lam1[i] *= d->corfac[i];
d->lam2[i] *= d->corfac[i]; d->lam2[i] *= d->corfac[i];
} }
alm2map_kernel(d, ab, alm, l, il, lmax, nv2); alm2map_kernel(d, coef, alm, l, il, lmax, nv2);
} }
NOINLINE static void map2alm_kernel(s0data_v * restrict d, NOINLINE static void map2alm_kernel(s0data_v * restrict d,
const sharp_ylmgen_dbl2 * restrict ab, dcmplx * restrict alm, int l, const sharp_ylmgen_dbl2 * restrict coef, dcmplx * restrict alm, int l,
int il, int lmax, int nv2) int il, int lmax, int nv2)
{ {
for (; l<=lmax-2; il+=2, l+=4) for (; l<=lmax-2; il+=2, l+=4)
{ {
Tv a1=vload(ab[il ].f[0]), b1=vload(ab[il ].f[1]); Tv a1=vload(coef[il ][0]), b1=vload(coef[il ][1]);
Tv a2=vload(ab[il+1].f[0]), b2=vload(ab[il+1].f[1]); Tv a2=vload(coef[il+1][0]), b2=vload(coef[il+1][1]);
Tv atmp1[4] = {vzero, vzero, vzero, vzero}; Tv atmp1[4] = {vzero, vzero, vzero, vzero};
Tv atmp2[4] = {vzero, vzero, vzero, vzero}; Tv atmp2[4] = {vzero, vzero, vzero, vzero};
for (int i=0; i<nv2; ++i) for (int i=0; i<nv2; ++i)
@ -354,7 +354,7 @@ NOINLINE static void map2alm_kernel(s0data_v * restrict d,
} }
for (; l<=lmax; ++il, l+=2) for (; l<=lmax; ++il, l+=2)
{ {
Tv a=vload(ab[il].f[0]), b=vload(ab[il].f[1]); Tv a=vload(coef[il][0]), b=vload(coef[il][1]);
Tv atmp[4] = {vzero, vzero, vzero, vzero}; Tv atmp[4] = {vzero, vzero, vzero, vzero};
for (int i=0; i<nv2; ++i) for (int i=0; i<nv2; ++i)
{ {
@ -380,7 +380,7 @@ NOINLINE static void calc_map2alm (sharp_job * restrict job,
if (l>lmax) return; if (l>lmax) return;
job->opcnt += (lmax+1-l) * 6*nth; job->opcnt += (lmax+1-l) * 6*nth;
const sharp_ylmgen_dbl2 * restrict ab = gen->ab; const sharp_ylmgen_dbl2 * restrict coef = gen->coef;
dcmplx * restrict alm=job->almtmp; dcmplx * restrict alm=job->almtmp;
int full_ieee=1; int full_ieee=1;
for (int i=0; i<nv2; ++i) for (int i=0; i<nv2; ++i)
@ -391,7 +391,7 @@ NOINLINE static void calc_map2alm (sharp_job * restrict job,
while((!full_ieee) && (l<=lmax)) while((!full_ieee) && (l<=lmax))
{ {
Tv a=vload(ab[il].f[0]), b=vload(ab[il].f[1]); Tv a=vload(coef[il][0]), b=vload(coef[il][1]);
Tv atmp[4] = {vzero, vzero, vzero, vzero}; Tv atmp[4] = {vzero, vzero, vzero, vzero};
full_ieee=1; full_ieee=1;
for (int i=0; i<nv2; ++i) for (int i=0; i<nv2; ++i)
@ -417,13 +417,13 @@ NOINLINE static void calc_map2alm (sharp_job * restrict job,
d->lam1[i] *= d->corfac[i]; d->lam1[i] *= d->corfac[i];
d->lam2[i] *= d->corfac[i]; d->lam2[i] *= d->corfac[i];
} }
map2alm_kernel(d, ab, alm, l, il, lmax, nv2); map2alm_kernel(d, coef, alm, l, il, lmax, nv2);
} }
NOINLINE static void iter_to_ieee_spin (const sharp_Ylmgen_C * restrict gen, NOINLINE static void iter_to_ieee_spin (const sharp_Ylmgen_C * restrict gen,
sxdata_v * restrict d, int * restrict l_, int nv2) sxdata_v * restrict d, int * restrict l_, int nv2)
{ {
const sharp_ylmgen_dbl2 * restrict fx = gen->fx; const sharp_ylmgen_dbl2 * restrict fx = gen->coef;
Tv prefac=vload(gen->prefac[gen->m]), Tv prefac=vload(gen->prefac[gen->m]),
prescale=vload(gen->fscale[gen->m]); prescale=vload(gen->fscale[gen->m]);
Tv limscale=vload(sharp_limscale); Tv limscale=vload(sharp_limscale);
@ -474,8 +474,8 @@ NOINLINE static void iter_to_ieee_spin (const sharp_Ylmgen_C * restrict gen,
{ {
if (l+2>gen->lmax) {*l_=gen->lmax+1;return;} if (l+2>gen->lmax) {*l_=gen->lmax+1;return;}
below_limit=1; below_limit=1;
Tv fx10=vload(fx[l+1].f[0]),fx11=vload(fx[l+1].f[1]); Tv fx10=vload(fx[l+1][0]),fx11=vload(fx[l+1][1]);
Tv fx20=vload(fx[l+2].f[0]),fx21=vload(fx[l+2].f[1]); Tv fx20=vload(fx[l+2][0]),fx21=vload(fx[l+2][1]);
for (int i=0; i<nv2; ++i) for (int i=0; i<nv2; ++i)
{ {
d->l1p[i] = (d->cth[i]*fx10 - fx11)*d->l2p[i] - d->l1p[i]; d->l1p[i] = (d->cth[i]*fx10 - fx11)*d->l2p[i] - d->l1p[i];
@ -500,8 +500,8 @@ NOINLINE static void alm2map_spin_kernel(sxdata_v * restrict d,
int lsave = l; int lsave = l;
while (l<=lmax) while (l<=lmax)
{ {
Tv fx10=vload(fx[l+1].f[0]),fx11=vload(fx[l+1].f[1]); Tv fx10=vload(fx[l+1][0]),fx11=vload(fx[l+1][1]);
Tv fx20=vload(fx[l+2].f[0]),fx21=vload(fx[l+2].f[1]); Tv fx20=vload(fx[l+2][0]),fx21=vload(fx[l+2][1]);
Tv agr1=vload(creal(alm[2*l ])), agi1=vload(cimag(alm[2*l ])), Tv agr1=vload(creal(alm[2*l ])), agi1=vload(cimag(alm[2*l ])),
acr1=vload(creal(alm[2*l+1])), aci1=vload(cimag(alm[2*l+1])); acr1=vload(creal(alm[2*l+1])), aci1=vload(cimag(alm[2*l+1]));
Tv agr2=vload(creal(alm[2*l+2])), agi2=vload(cimag(alm[2*l+2])), Tv agr2=vload(creal(alm[2*l+2])), agi2=vload(cimag(alm[2*l+2])),
@ -525,8 +525,8 @@ NOINLINE static void alm2map_spin_kernel(sxdata_v * restrict d,
l=lsave; l=lsave;
while (l<=lmax) while (l<=lmax)
{ {
Tv fx10=vload(fx[l+1].f[0]),fx11=vload(fx[l+1].f[1]); Tv fx10=vload(fx[l+1][0]),fx11=vload(fx[l+1][1]);
Tv fx20=vload(fx[l+2].f[0]),fx21=vload(fx[l+2].f[1]); Tv fx20=vload(fx[l+2][0]),fx21=vload(fx[l+2][1]);
Tv agr1=vload(creal(alm[2*l ])), agi1=vload(cimag(alm[2*l ])), Tv agr1=vload(creal(alm[2*l ])), agi1=vload(cimag(alm[2*l ])),
acr1=vload(creal(alm[2*l+1])), aci1=vload(cimag(alm[2*l+1])); acr1=vload(creal(alm[2*l+1])), aci1=vload(cimag(alm[2*l+1]));
Tv agr2=vload(creal(alm[2*l+2])), agi2=vload(cimag(alm[2*l+2])), Tv agr2=vload(creal(alm[2*l+2])), agi2=vload(cimag(alm[2*l+2])),
@ -559,7 +559,7 @@ NOINLINE static void calc_alm2map_spin (sharp_job * restrict job,
if (l>lmax) return; if (l>lmax) return;
job->opcnt += (lmax+1-l) * 23*nth; job->opcnt += (lmax+1-l) * 23*nth;
const sharp_ylmgen_dbl2 * restrict fx = gen->fx; const sharp_ylmgen_dbl2 * restrict fx = gen->coef;
const dcmplx * restrict alm=job->almtmp; const dcmplx * restrict alm=job->almtmp;
int full_ieee=1; int full_ieee=1;
for (int i=0; i<nv2; ++i) for (int i=0; i<nv2; ++i)
@ -572,8 +572,8 @@ NOINLINE static void calc_alm2map_spin (sharp_job * restrict job,
while((!full_ieee) && (l<=lmax)) while((!full_ieee) && (l<=lmax))
{ {
Tv fx10=vload(fx[l+1].f[0]),fx11=vload(fx[l+1].f[1]); Tv fx10=vload(fx[l+1][0]),fx11=vload(fx[l+1][1]);
Tv fx20=vload(fx[l+2].f[0]),fx21=vload(fx[l+2].f[1]); Tv fx20=vload(fx[l+2][0]),fx21=vload(fx[l+2][1]);
Tv agr1=vload(creal(alm[2*l ])), agi1=vload(cimag(alm[2*l ])), Tv agr1=vload(creal(alm[2*l ])), agi1=vload(cimag(alm[2*l ])),
acr1=vload(creal(alm[2*l+1])), aci1=vload(cimag(alm[2*l+1])); acr1=vload(creal(alm[2*l+1])), aci1=vload(cimag(alm[2*l+1]));
Tv agr2=vload(creal(alm[2*l+2])), agi2=vload(cimag(alm[2*l+2])), Tv agr2=vload(creal(alm[2*l+2])), agi2=vload(cimag(alm[2*l+2])),
@ -636,8 +636,8 @@ NOINLINE static void map2alm_spin_kernel(sxdata_v * restrict d,
int lsave=l; int lsave=l;
while (l<=lmax) while (l<=lmax)
{ {
Tv fx10=vload(fx[l+1].f[0]),fx11=vload(fx[l+1].f[1]); Tv fx10=vload(fx[l+1][0]),fx11=vload(fx[l+1][1]);
Tv fx20=vload(fx[l+2].f[0]),fx21=vload(fx[l+2].f[1]); Tv fx20=vload(fx[l+2][0]),fx21=vload(fx[l+2][1]);
Tv agr1=vzero, agi1=vzero, acr1=vzero, aci1=vzero; Tv agr1=vzero, agi1=vzero, acr1=vzero, aci1=vzero;
Tv agr2=vzero, agi2=vzero, acr2=vzero, aci2=vzero; Tv agr2=vzero, agi2=vzero, acr2=vzero, aci2=vzero;
for (int i=0; i<nv2; ++i) for (int i=0; i<nv2; ++i)
@ -660,8 +660,8 @@ NOINLINE static void map2alm_spin_kernel(sxdata_v * restrict d,
l=lsave; l=lsave;
while (l<=lmax) while (l<=lmax)
{ {
Tv fx10=vload(fx[l+1].f[0]),fx11=vload(fx[l+1].f[1]); Tv fx10=vload(fx[l+1][0]),fx11=vload(fx[l+1][1]);
Tv fx20=vload(fx[l+2].f[0]),fx21=vload(fx[l+2].f[1]); Tv fx20=vload(fx[l+2][0]),fx21=vload(fx[l+2][1]);
Tv agr1=vzero, agi1=vzero, acr1=vzero, aci1=vzero; Tv agr1=vzero, agi1=vzero, acr1=vzero, aci1=vzero;
Tv agr2=vzero, agi2=vzero, acr2=vzero, aci2=vzero; Tv agr2=vzero, agi2=vzero, acr2=vzero, aci2=vzero;
for (int i=0; i<nv2; ++i) for (int i=0; i<nv2; ++i)
@ -693,7 +693,7 @@ NOINLINE static void calc_map2alm_spin (sharp_job * restrict job,
if (l>lmax) return; if (l>lmax) return;
job->opcnt += (lmax+1-l) * 23*nth; job->opcnt += (lmax+1-l) * 23*nth;
const sharp_ylmgen_dbl2 * restrict fx = gen->fx; const sharp_ylmgen_dbl2 * restrict fx = gen->coef;
dcmplx * restrict alm=job->almtmp; dcmplx * restrict alm=job->almtmp;
int full_ieee=1; int full_ieee=1;
for (int i=0; i<nv2; ++i) for (int i=0; i<nv2; ++i)
@ -714,8 +714,8 @@ NOINLINE static void calc_map2alm_spin (sharp_job * restrict job,
while((!full_ieee) && (l<=lmax)) while((!full_ieee) && (l<=lmax))
{ {
Tv fx10=vload(fx[l+1].f[0]),fx11=vload(fx[l+1].f[1]); Tv fx10=vload(fx[l+1][0]),fx11=vload(fx[l+1][1]);
Tv fx20=vload(fx[l+2].f[0]),fx21=vload(fx[l+2].f[1]); Tv fx20=vload(fx[l+2][0]),fx21=vload(fx[l+2][1]);
Tv agr1=vzero, agi1=vzero, acr1=vzero, aci1=vzero; Tv agr1=vzero, agi1=vzero, acr1=vzero, aci1=vzero;
Tv agr2=vzero, agi2=vzero, acr2=vzero, aci2=vzero; Tv agr2=vzero, agi2=vzero, acr2=vzero, aci2=vzero;
full_ieee=1; full_ieee=1;
@ -766,8 +766,8 @@ NOINLINE static void alm2map_deriv1_kernel(sxdata_v * restrict d,
{ {
while (l<=lmax) while (l<=lmax)
{ {
Tv fx10=vload(fx[l+1].f[0]),fx11=vload(fx[l+1].f[1]); Tv fx10=vload(fx[l+1][0]),fx11=vload(fx[l+1][1]);
Tv fx20=vload(fx[l+2].f[0]),fx21=vload(fx[l+2].f[1]); Tv fx20=vload(fx[l+2][0]),fx21=vload(fx[l+2][1]);
Tv ar1=vload(creal(alm[l ])), ai1=vload(cimag(alm[l ])), Tv ar1=vload(creal(alm[l ])), ai1=vload(cimag(alm[l ])),
ar2=vload(creal(alm[l+1])), ai2=vload(cimag(alm[l+1])); ar2=vload(creal(alm[l+1])), ai2=vload(cimag(alm[l+1]));
for (int i=0; i<nv2; ++i) for (int i=0; i<nv2; ++i)
@ -803,7 +803,7 @@ NOINLINE static void calc_alm2map_deriv1(sharp_job * restrict job,
if (l>lmax) return; if (l>lmax) return;
job->opcnt += (lmax+1-l) * 17*nth; job->opcnt += (lmax+1-l) * 17*nth;
const sharp_ylmgen_dbl2 * restrict fx = gen->fx; const sharp_ylmgen_dbl2 * restrict fx = gen->coef;
const dcmplx * restrict alm=job->almtmp; const dcmplx * restrict alm=job->almtmp;
int full_ieee=1; int full_ieee=1;
for (int i=0; i<nv2; ++i) for (int i=0; i<nv2; ++i)
@ -816,8 +816,8 @@ NOINLINE static void calc_alm2map_deriv1(sharp_job * restrict job,
while((!full_ieee) && (l<=lmax)) while((!full_ieee) && (l<=lmax))
{ {
Tv fx10=vload(fx[l+1].f[0]),fx11=vload(fx[l+1].f[1]); Tv fx10=vload(fx[l+1][0]),fx11=vload(fx[l+1][1]);
Tv fx20=vload(fx[l+2].f[0]),fx21=vload(fx[l+2].f[1]); Tv fx20=vload(fx[l+2][0]),fx21=vload(fx[l+2][1]);
Tv ar1=vload(creal(alm[l ])), ai1=vload(cimag(alm[l ])), Tv ar1=vload(creal(alm[l ])), ai1=vload(cimag(alm[l ])),
ar2=vload(creal(alm[l+1])), ai2=vload(cimag(alm[l+1])); ar2=vload(creal(alm[l+1])), ai2=vload(cimag(alm[l+1]));
full_ieee=1; full_ieee=1;

View file

@ -82,14 +82,14 @@ void sharp_Ylmgen_init (sharp_Ylmgen_C *gen, int l_max, int m_max, int spin)
} }
gen->eps=RALLOC(double, gen->lmax+4); gen->eps=RALLOC(double, gen->lmax+4);
gen->alpha=RALLOC(double, gen->lmax/2+2); gen->alpha=RALLOC(double, gen->lmax/2+2);
gen->ab=RALLOC(sharp_ylmgen_dbl2, gen->lmax/2+2); gen->coef=RALLOC(sharp_ylmgen_dbl2, gen->lmax/2+2);
} }
else else
{ {
gen->m=gen->mlo=gen->mhi=-1234567890; gen->m=gen->mlo=gen->mhi=-1234567890;
ALLOC(gen->fx,sharp_ylmgen_dbl2,gen->lmax+3); ALLOC(gen->coef,sharp_ylmgen_dbl2,gen->lmax+3);
for (int m=0; m<gen->lmax+3; ++m) for (int m=0; m<gen->lmax+3; ++m)
gen->fx[m].f[0]=gen->fx[m].f[1]=0.; gen->coef[m][0]=gen->coef[m][1]=0.;
ALLOC(gen->alpha,double,gen->lmax+3); ALLOC(gen->alpha,double,gen->lmax+3);
ALLOC(gen->inv,double,gen->lmax+2); ALLOC(gen->inv,double,gen->lmax+2);
gen->inv[0]=0; gen->inv[0]=0;
@ -134,19 +134,17 @@ void sharp_Ylmgen_destroy (sharp_Ylmgen_C *gen)
{ {
DEALLOC(gen->cf); DEALLOC(gen->cf);
DEALLOC(gen->powlimit); DEALLOC(gen->powlimit);
DEALLOC(gen->alpha);
DEALLOC(gen->coef);
if (gen->s==0) if (gen->s==0)
{ {
DEALLOC(gen->mfac); DEALLOC(gen->mfac);
DEALLOC(gen->root); DEALLOC(gen->root);
DEALLOC(gen->iroot); DEALLOC(gen->iroot);
DEALLOC(gen->eps); DEALLOC(gen->eps);
DEALLOC(gen->alpha);
DEALLOC(gen->ab);
} }
else else
{ {
DEALLOC(gen->fx);
DEALLOC(gen->alpha);
DEALLOC(gen->prefac); DEALLOC(gen->prefac);
DEALLOC(gen->fscale); DEALLOC(gen->fscale);
DEALLOC(gen->flm1); DEALLOC(gen->flm1);
@ -174,9 +172,9 @@ void sharp_Ylmgen_prepare (sharp_Ylmgen_C *gen, int m)
/(gen->eps[l+2]*gen->eps[l+3]*gen->alpha[il]); /(gen->eps[l+2]*gen->eps[l+3]*gen->alpha[il]);
for (int il=0, l=m; l<gen->lmax+2; ++il, l+=2) for (int il=0, l=m; l<gen->lmax+2; ++il, l+=2)
{ {
gen->ab[il].f[0] = ((il&1) ? -1 : 1)*gen->alpha[il]*gen->alpha[il]; gen->coef[il][0] = ((il&1) ? -1 : 1)*gen->alpha[il]*gen->alpha[il];
double t1 = gen->eps[l+2], t2 = gen->eps[l+1]; double t1 = gen->eps[l+2], t2 = gen->eps[l+1];
gen->ab[il].f[1] = -gen->ab[il].f[0]*(t1*t1+t2*t2); gen->coef[il][1] = -gen->coef[il][0]*(t1*t1+t2*t2);
} }
} }
else else
@ -190,7 +188,7 @@ void sharp_Ylmgen_prepare (sharp_Ylmgen_C *gen, int m)
if (!ms_similar) if (!ms_similar)
{ {
gen->alpha[gen->mhi] = 1.; gen->alpha[gen->mhi] = 1.;
gen->fx[gen->mhi].f[0] = gen->fx[gen->mhi].f[1] = 0.; gen->coef[gen->mhi][0] = gen->coef[gen->mhi][1] = 0.;
for (int l=gen->mhi; l<gen->lmax+1; ++l) for (int l=gen->mhi; l<gen->lmax+1; ++l)
{ {
double t = gen->flm1[l+gen->m]*gen->flm1[l-gen->m] double t = gen->flm1[l+gen->m]*gen->flm1[l-gen->m]
@ -206,8 +204,8 @@ void sharp_Ylmgen_prepare (sharp_Ylmgen_C *gen, int m)
gen->alpha[l+1] = gen->alpha[l-1]*flp12; gen->alpha[l+1] = gen->alpha[l-1]*flp12;
else else
gen->alpha[l+1] = 1.; gen->alpha[l+1] = 1.;
gen->fx[l+1].f[0] = flp10*gen->alpha[l]/gen->alpha[l+1]; gen->coef[l+1][0] = flp10*gen->alpha[l]/gen->alpha[l+1];
gen->fx[l+1].f[1] = flp11*gen->fx[l+1].f[0]; gen->coef[l+1][1] = flp11*gen->coef[l+1][0];
} }
} }

View file

@ -41,7 +41,7 @@ static const double sharp_fbig=0x1p+800,sharp_fsmall=0x1p-800;
static const double sharp_ftol=0x1p-60; static const double sharp_ftol=0x1p-60;
static const double sharp_fbighalf=0x1p+400; static const double sharp_fbighalf=0x1p+400;
typedef struct { double f[2]; } sharp_ylmgen_dbl2; typedef double sharp_ylmgen_dbl2[2];
typedef struct typedef struct
{ {
@ -54,16 +54,15 @@ typedef struct
int m; int m;
double *alpha; double *alpha;
sharp_ylmgen_dbl2 *coef;
/* used if s==0 */ /* used if s==0 */
double *mfac, *eps; double *mfac, *eps;
sharp_ylmgen_dbl2 *ab;
/* used if s!=0 */ /* used if s!=0 */
int sinPow, cosPow, preMinus_p, preMinus_m; int sinPow, cosPow, preMinus_p, preMinus_m;
double *prefac; double *prefac;
int *fscale; int *fscale;
sharp_ylmgen_dbl2 *fx;
/* internal usage only */ /* internal usage only */
/* used if s==0 */ /* used if s==0 */