fix SHTs for colatitudes with sin(theta)<0

This commit is contained in:
Martin Reinecke 2013-02-24 08:26:21 +01:00
parent 515ef8a4aa
commit 44cf6c2339
2 changed files with 28 additions and 17 deletions

View file

@ -220,7 +220,7 @@ static inline void Y(rec_step) (Tb * restrict rxp, Tb * restrict rxm,
}
}
static void Y(iter_to_ieee_spin) (const Tb cth, int *l_,
static void Y(iter_to_ieee_spin) (const Tb cth, const Tb sth, int *l_,
Tb * rec1p_, Tb * rec1m_, Tb * rec2p_, Tb * rec2m_,
Tb * scalep_, Tb * scalem_, const sharp_Ylmgen_C * restrict gen)
{
@ -232,6 +232,11 @@ static void Y(iter_to_ieee_spin) (const Tb cth, int *l_,
cth2.v[i]=vmax(cth2.v[i],vload(1e-15));
sth2.v[i]=vsqrt(vmul(vsub(vone,cth.v[i]),vload(0.5)));
sth2.v[i]=vmax(sth2.v[i],vload(1e-15));
Tv mask=vlt(sth.v[i],vzero);
Tv cfct=vblend(vand(mask,vlt(cth.v[i],vzero)),vload(-1.),vone);
cth2.v[i]=vmul(cth2.v[i],cfct);
Tv sfct=vblend(vand(mask,vgt(cth.v[i],vzero)),vload(-1.),vone);
sth2.v[i]=vmul(sth2.v[i],sfct);
}
Tb ccp, ccps, ssp, ssps, csp, csps, scp, scps;

View file

@ -429,12 +429,14 @@ static void Z(map2alm_spin_kernel) (Tb cth, const Y(Tbqu) * restrict p1,
Z(saddstep2)(p1, p2, &rec2p, &rec2m, &alm[2*njobs*l] NJ2);
}
static void Z(calc_alm2map_spin) (const Tb cth, const sharp_Ylmgen_C *gen,
sharp_job *job, Y(Tbqu) * restrict p1, Y(Tbqu) * restrict p2 NJ1)
static void Z(calc_alm2map_spin) (const Tb cth, const Tb sth,
const sharp_Ylmgen_C *gen, sharp_job *job, Y(Tbqu) * restrict p1,
Y(Tbqu) * restrict p2 NJ1)
{
int l, lmax=gen->lmax;
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
Y(iter_to_ieee_spin) (cth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
Y(iter_to_ieee_spin)
(cth,sth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
if (l>lmax) return;
job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec;
@ -473,12 +475,14 @@ static void Z(calc_alm2map_spin) (const Tb cth, const sharp_Ylmgen_C *gen,
lmax NJ2);
}
static void Z(calc_map2alm_spin) (Tb cth, const sharp_Ylmgen_C * restrict gen,
sharp_job *job, const Y(Tbqu) * restrict p1, const Y(Tbqu) * restrict p2 NJ1)
static void Z(calc_map2alm_spin) (Tb cth, Tb sth,
const sharp_Ylmgen_C * restrict gen, sharp_job *job,
const Y(Tbqu) * restrict p1, const Y(Tbqu) * restrict p2 NJ1)
{
int l, lmax=gen->lmax;
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
Y(iter_to_ieee_spin) (cth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
Y(iter_to_ieee_spin)
(cth,sth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
if (l>lmax) return;
job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec;
@ -568,12 +572,14 @@ static void Z(alm2map_deriv1_kernel) (Tb cth, Y(Tbqu) * restrict p1,
Z(saddstep_d)(p1, p2, rec2p, rec2m, &alm[njobs*l] NJ2);
}
static void Z(calc_alm2map_deriv1) (const Tb cth, const sharp_Ylmgen_C *gen,
sharp_job *job, Y(Tbqu) * restrict p1, Y(Tbqu) * restrict p2 NJ1)
static void Z(calc_alm2map_deriv1) (const Tb cth, const Tb sth,
const sharp_Ylmgen_C *gen, sharp_job *job, Y(Tbqu) * restrict p1,
Y(Tbqu) * restrict p2 NJ1)
{
int l, lmax=gen->lmax;
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
Y(iter_to_ieee_spin) (cth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
Y(iter_to_ieee_spin)
(cth,sth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
if (l>lmax) return;
job->opcnt += (lmax+1-l) * (12+8*njobs)*VLEN*nvec;
@ -669,7 +675,7 @@ static void Z(inner_loop) (sharp_job *job, const int *ispair,
for (int ith=0; ith<ulim-llim; ith+=nval)
{
Y(Tbuqu) p1[njobs],p2[njobs]; VZERO(p1); VZERO(p2);
Y(Tbu) cth;
Y(Tbu) cth, sth;
int skip=1;
for (int i=0; i<nval; ++i)
@ -677,14 +683,14 @@ static void Z(inner_loop) (sharp_job *job, const int *ispair,
int itot=i+ith;
if (itot>=ulim-llim) itot=ulim-llim-1;
if (mlim[itot]>=m) skip=0;
cth.s[i]=cth_[itot];
cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot];
}
if (!skip)
(job->type==SHARP_ALM2MAP) ?
Z(calc_alm2map_spin )
(cth.b,gen,job,&p1[0].b,&p2[0].b NJ2) :
(cth.b,sth.b,gen,job,&p1[0].b,&p2[0].b NJ2) :
Z(calc_alm2map_deriv1)
(cth.b,gen,job,&p1[0].b,&p2[0].b NJ2);
(cth.b,sth.b,gen,job,&p1[0].b,&p2[0].b NJ2);
for (int i=0; i<nval; ++i)
{
@ -753,7 +759,7 @@ static void Z(inner_loop) (sharp_job *job, const int *ispair,
for (int ith=0; ith<ulim-llim; ith+=nval)
{
Y(Tbuqu) p1[njobs], p2[njobs]; VZERO(p1); VZERO(p2);
Y(Tbu) cth;
Y(Tbu) cth, sth;
int skip=1;
for (int i=0; i<nval; ++i)
@ -761,7 +767,7 @@ static void Z(inner_loop) (sharp_job *job, const int *ispair,
int itot=i+ith;
if (itot>=ulim-llim) itot=ulim-llim-1;
if (mlim[itot]>=m) skip=0;
cth.s[i]=cth_[itot];
cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot];
if (i+ith<ulim-llim)
{
for (int j=0; j<njobs; ++j)
@ -781,7 +787,7 @@ static void Z(inner_loop) (sharp_job *job, const int *ispair,
}
}
if (!skip)
Z(calc_map2alm_spin) (cth.b,gen,job,&p1[0].b,&p2[0].b NJ2);
Z(calc_map2alm_spin) (cth.b,sth.b,gen,job,&p1[0].b,&p2[0].b NJ2);
}
}
break;