switch to new method of polar optimization, which is safe for all spins
This commit is contained in:
parent
32ddcae2ec
commit
72e72ea06f
4 changed files with 145 additions and 163 deletions
|
@ -55,16 +55,17 @@ static void get_chunk_info (int ndata, int nmult, int *nchunks, int *chunksize)
|
||||||
*nchunks = (ndata+*chunksize-1) / *chunksize;
|
*nchunks = (ndata+*chunksize-1) / *chunksize;
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef struct
|
static int sharp_get_mlim (int lmax, int spin, double sth, double cth,
|
||||||
|
double ofs)
|
||||||
{
|
{
|
||||||
double s;
|
double b = -2*spin*fabs(cth);
|
||||||
int i;
|
double t1 = lmax*sth+ofs;
|
||||||
} idxhelper;
|
double c = spin*spin-t1*t1;
|
||||||
|
double discr = b*b-4*c;
|
||||||
static int idx_compare (const void *xa, const void *xb)
|
if (discr<=0) return lmax;
|
||||||
{
|
double res=(-b+sqrt(discr))/2.;
|
||||||
const idxhelper *a=xa, *b=xb;
|
if (res>lmax) res=lmax;
|
||||||
return (a->s > b->s) ? -1 : (a->s < b->s) ? 1 : 0;
|
return (int)(res+0.5);
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
|
@ -249,38 +250,34 @@ static void ringhelper_phase2ring (ringhelper *self,
|
||||||
|
|
||||||
ringhelper_update (self, nph, mmax, info->phi0);
|
ringhelper_update (self, nph, mmax, info->phi0);
|
||||||
self->work[0]=phase[0];
|
self->work[0]=phase[0];
|
||||||
SET_ARRAY(self->work,1,nph,0.);
|
|
||||||
|
|
||||||
#if 0
|
if (nph>=2*mmax+1)
|
||||||
if (self->norot)
|
{
|
||||||
for (int m=1; m<=mmax; ++m)
|
for (int m=1; m<=mmax; ++m)
|
||||||
{
|
{
|
||||||
int idx1 = m%nph;
|
dcmplx tmp = phase[m*pstride];
|
||||||
int idx2 = nph-1-((m-1)%nph);
|
if(!self->norot) tmp*=self->shiftarr[m];
|
||||||
self->work[idx1]+=phase[m*pstride];
|
self->work[m]=tmp;
|
||||||
self->work[idx2]+=conj(phase[m*pstride]);
|
self->work[nph-m]=conj(tmp);
|
||||||
}
|
}
|
||||||
|
for (int m=mmax+1; m<(nph-mmax); ++m)
|
||||||
|
self->work[m]=0.;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
|
SET_ARRAY(self->work,1,nph,0.);
|
||||||
|
|
||||||
|
int idx1=1, idx2=nph-1;
|
||||||
for (int m=1; m<=mmax; ++m)
|
for (int m=1; m<=mmax; ++m)
|
||||||
{
|
{
|
||||||
int idx1 = m%nph;
|
dcmplx tmp = phase[m*pstride];
|
||||||
int idx2 = nph-1-((m-1)%nph);
|
if(!self->norot) tmp*=self->shiftarr[m];
|
||||||
dcmplx tmp = phase[m*pstride]*self->shiftarr[m];
|
|
||||||
self->work[idx1]+=tmp;
|
self->work[idx1]+=tmp;
|
||||||
self->work[idx2]+=conj(tmp);
|
self->work[idx2]+=conj(tmp);
|
||||||
|
if (++idx1>=nph) idx1=0;
|
||||||
|
if (--idx2<0) idx2=nph-1;
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
int idx1=1, idx2=nph-1;
|
|
||||||
for (int m=1; m<=mmax; ++m)
|
|
||||||
{
|
|
||||||
dcmplx tmp = phase[m*pstride];
|
|
||||||
if(!self->norot) tmp*=self->shiftarr[m];
|
|
||||||
self->work[idx1]+=tmp;
|
|
||||||
self->work[idx2]+=conj(tmp);
|
|
||||||
if (++idx1>=nph) idx1=0;
|
|
||||||
if (--idx2<0) idx2=nph-1;
|
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
real_plan_backward_c (self->plan, (double *)(self->work));
|
real_plan_backward_c (self->plan, (double *)(self->work));
|
||||||
double wgt = (flags&SHARP_USE_WEIGHTS) ? info->weight : 1.;
|
double wgt = (flags&SHARP_USE_WEIGHTS) ? info->weight : 1.;
|
||||||
if (flags&SHARP_REAL_HARMONICS)
|
if (flags&SHARP_REAL_HARMONICS)
|
||||||
|
@ -317,12 +314,24 @@ static void ringhelper_ring2phase (ringhelper *self,
|
||||||
|
|
||||||
real_plan_forward_c (self->plan, (double *)self->work);
|
real_plan_forward_c (self->plan, (double *)self->work);
|
||||||
|
|
||||||
if (self->norot)
|
if (maxidx<nph)
|
||||||
for (int m=0; m<=maxidx; ++m)
|
{
|
||||||
phase[m*pstride] = self->work[m%nph];
|
if (self->norot)
|
||||||
|
for (int m=0; m<=maxidx; ++m)
|
||||||
|
phase[m*pstride] = self->work[m];
|
||||||
|
else
|
||||||
|
for (int m=0; m<=maxidx; ++m)
|
||||||
|
phase[m*pstride]=self->work[m]*self->shiftarr[m];
|
||||||
|
}
|
||||||
else
|
else
|
||||||
for (int m=0; m<=maxidx; ++m)
|
{
|
||||||
phase[m*pstride]=self->work[m%nph]*self->shiftarr[m];
|
if (self->norot)
|
||||||
|
for (int m=0; m<=maxidx; ++m)
|
||||||
|
phase[m*pstride] = self->work[m%nph];
|
||||||
|
else
|
||||||
|
for (int m=0; m<=maxidx; ++m)
|
||||||
|
phase[m*pstride]=self->work[m%nph]*self->shiftarr[m];
|
||||||
|
}
|
||||||
|
|
||||||
for (int m=maxidx+1;m<=mmax; ++m)
|
for (int m=maxidx+1;m<=mmax; ++m)
|
||||||
phase[m*pstride]=0.;
|
phase[m*pstride]=0.;
|
||||||
|
@ -622,21 +631,15 @@ static void sharp_execute_job (sharp_job *job)
|
||||||
{
|
{
|
||||||
int llim=chunk*chunksize, ulim=IMIN(llim+chunksize,job->ginfo->npairs);
|
int llim=chunk*chunksize, ulim=IMIN(llim+chunksize,job->ginfo->npairs);
|
||||||
int *ispair = RALLOC(int,ulim-llim);
|
int *ispair = RALLOC(int,ulim-llim);
|
||||||
|
int *mlim = RALLOC(int,ulim-llim);
|
||||||
double *cth = RALLOC(double,ulim-llim), *sth = RALLOC(double,ulim-llim);
|
double *cth = RALLOC(double,ulim-llim), *sth = RALLOC(double,ulim-llim);
|
||||||
idxhelper *stmp = RALLOC(idxhelper,ulim-llim);
|
|
||||||
for (int i=0; i<ulim-llim; ++i)
|
for (int i=0; i<ulim-llim; ++i)
|
||||||
{
|
{
|
||||||
ispair[i] = job->ginfo->pair[i+llim].r2.nph>0;
|
ispair[i] = job->ginfo->pair[i+llim].r2.nph>0;
|
||||||
cth[i] = job->ginfo->pair[i+llim].r1.cth;
|
cth[i] = job->ginfo->pair[i+llim].r1.cth;
|
||||||
sth[i] = job->ginfo->pair[i+llim].r1.sth;
|
sth[i] = job->ginfo->pair[i+llim].r1.sth;
|
||||||
stmp[i].s=sth[i];
|
mlim[i] = sharp_get_mlim(lmax, job->spin, sth[i], cth[i], 100.);
|
||||||
stmp[i].i=i;
|
|
||||||
}
|
}
|
||||||
qsort (stmp,ulim-llim,sizeof(idxhelper),idx_compare);
|
|
||||||
int *idx = RALLOC(int,ulim-llim);
|
|
||||||
for (int i=0; i<ulim-llim; ++i)
|
|
||||||
idx[i]=stmp[i].i;
|
|
||||||
DEALLOC(stmp);
|
|
||||||
|
|
||||||
/* map->phase where necessary */
|
/* map->phase where necessary */
|
||||||
map2phase (job, mmax, llim, ulim);
|
map2phase (job, mmax, llim, ulim);
|
||||||
|
@ -655,7 +658,7 @@ static void sharp_execute_job (sharp_job *job)
|
||||||
/* alm->alm_tmp where necessary */
|
/* alm->alm_tmp where necessary */
|
||||||
alm2almtmp (&ljob, lmax, mi);
|
alm2almtmp (&ljob, lmax, mi);
|
||||||
|
|
||||||
inner_loop (&ljob, ispair, cth, sth, llim, ulim, &generator, mi, idx);
|
inner_loop (&ljob, ispair, cth, sth, llim, ulim, &generator, mi, mlim);
|
||||||
|
|
||||||
/* alm_tmp->alm where necessary */
|
/* alm_tmp->alm where necessary */
|
||||||
almtmp2alm (&ljob, lmax, mi);
|
almtmp2alm (&ljob, lmax, mi);
|
||||||
|
@ -672,9 +675,9 @@ static void sharp_execute_job (sharp_job *job)
|
||||||
phase2map (job, mmax, llim, ulim);
|
phase2map (job, mmax, llim, ulim);
|
||||||
|
|
||||||
DEALLOC(ispair);
|
DEALLOC(ispair);
|
||||||
|
DEALLOC(mlim);
|
||||||
DEALLOC(cth);
|
DEALLOC(cth);
|
||||||
DEALLOC(sth);
|
DEALLOC(sth);
|
||||||
DEALLOC(idx);
|
|
||||||
} /* end of chunk loop */
|
} /* end of chunk loop */
|
||||||
|
|
||||||
DEALLOC(job->norm_l);
|
DEALLOC(job->norm_l);
|
||||||
|
@ -693,7 +696,7 @@ static void sharp_build_job_common (sharp_job *job, sharp_jobtype type,
|
||||||
if (type==SHARP_Yt) type=SHARP_MAP2ALM;
|
if (type==SHARP_Yt) type=SHARP_MAP2ALM;
|
||||||
if (type==SHARP_WY) { type=SHARP_ALM2MAP; flags|=SHARP_USE_WEIGHTS; }
|
if (type==SHARP_WY) { type=SHARP_ALM2MAP; flags|=SHARP_USE_WEIGHTS; }
|
||||||
|
|
||||||
UTIL_ASSERT((spin>=0)&&(spin<=30), "bad spin");
|
UTIL_ASSERT((spin>=0)&&(spin<=alm_info->lmax), "bad spin");
|
||||||
job->type = type;
|
job->type = type;
|
||||||
job->spin = spin;
|
job->spin = spin;
|
||||||
job->norm_l = NULL;
|
job->norm_l = NULL;
|
||||||
|
@ -801,7 +804,7 @@ int sharp_nv_oracle (sharp_jobtype type, int spin, int ntrans)
|
||||||
if (type==SHARP_ALM2MAP_DERIV1) spin=1;
|
if (type==SHARP_ALM2MAP_DERIV1) spin=1;
|
||||||
UTIL_ASSERT(type<5,"bad type");
|
UTIL_ASSERT(type<5,"bad type");
|
||||||
UTIL_ASSERT((ntrans>0),"bad number of simultaneous transforms");
|
UTIL_ASSERT((ntrans>0),"bad number of simultaneous transforms");
|
||||||
UTIL_ASSERT((spin>=0)&&(spin<=30), "bad spin");
|
UTIL_ASSERT(spin>=0, "bad spin");
|
||||||
ntrans=IMIN(ntrans,maxtr);
|
ntrans=IMIN(ntrans,maxtr);
|
||||||
|
|
||||||
if (nv_opt[ntrans-1][spin!=0][type]==0)
|
if (nv_opt[ntrans-1][spin!=0][type]==0)
|
||||||
|
|
|
@ -75,7 +75,7 @@ typedef complex double dcmplx;
|
||||||
|
|
||||||
void inner_loop (sharp_job *job, const int *ispair,const double *cth,
|
void inner_loop (sharp_job *job, const int *ispair,const double *cth,
|
||||||
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
|
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
|
||||||
const int *idx)
|
const int *mlim)
|
||||||
{
|
{
|
||||||
int njobs=job->ntrans, nv=job->flags&SHARP_NVMAX;
|
int njobs=job->ntrans, nv=job->flags&SHARP_NVMAX;
|
||||||
if (njobs<=MAXJOB_SPECIAL)
|
if (njobs<=MAXJOB_SPECIAL)
|
||||||
|
@ -84,122 +84,122 @@ void inner_loop (sharp_job *job, const int *ispair,const double *cth,
|
||||||
{
|
{
|
||||||
#if ((MAXJOB_SPECIAL>=1)&&(SHARP_MAXTRANS>=1))
|
#if ((MAXJOB_SPECIAL>=1)&&(SHARP_MAXTRANS>=1))
|
||||||
case 0x11:
|
case 0x11:
|
||||||
CONCAT3(inner_loop,1,1) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,1,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x12:
|
case 0x12:
|
||||||
CONCAT3(inner_loop,2,1) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,2,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x13:
|
case 0x13:
|
||||||
CONCAT3(inner_loop,3,1) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,3,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x14:
|
case 0x14:
|
||||||
CONCAT3(inner_loop,4,1) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,4,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x15:
|
case 0x15:
|
||||||
CONCAT3(inner_loop,5,1) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,5,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x16:
|
case 0x16:
|
||||||
CONCAT3(inner_loop,6,1) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,6,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
#endif
|
#endif
|
||||||
#if ((MAXJOB_SPECIAL>=2)&&(SHARP_MAXTRANS>=2))
|
#if ((MAXJOB_SPECIAL>=2)&&(SHARP_MAXTRANS>=2))
|
||||||
case 0x21:
|
case 0x21:
|
||||||
CONCAT3(inner_loop,1,2) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,1,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x22:
|
case 0x22:
|
||||||
CONCAT3(inner_loop,2,2) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,2,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x23:
|
case 0x23:
|
||||||
CONCAT3(inner_loop,3,2) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,3,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x24:
|
case 0x24:
|
||||||
CONCAT3(inner_loop,4,2) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,4,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x25:
|
case 0x25:
|
||||||
CONCAT3(inner_loop,5,2) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,5,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x26:
|
case 0x26:
|
||||||
CONCAT3(inner_loop,6,2) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,6,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
#endif
|
#endif
|
||||||
#if ((MAXJOB_SPECIAL>=3)&&(SHARP_MAXTRANS>=3))
|
#if ((MAXJOB_SPECIAL>=3)&&(SHARP_MAXTRANS>=3))
|
||||||
case 0x31:
|
case 0x31:
|
||||||
CONCAT3(inner_loop,1,3) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,1,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x32:
|
case 0x32:
|
||||||
CONCAT3(inner_loop,2,3) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,2,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x33:
|
case 0x33:
|
||||||
CONCAT3(inner_loop,3,3) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,3,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x34:
|
case 0x34:
|
||||||
CONCAT3(inner_loop,4,3) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,4,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x35:
|
case 0x35:
|
||||||
CONCAT3(inner_loop,5,3) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,5,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x36:
|
case 0x36:
|
||||||
CONCAT3(inner_loop,6,3) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,6,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
#endif
|
#endif
|
||||||
#if ((MAXJOB_SPECIAL>=4)&&(SHARP_MAXTRANS>=4))
|
#if ((MAXJOB_SPECIAL>=4)&&(SHARP_MAXTRANS>=4))
|
||||||
case 0x41:
|
case 0x41:
|
||||||
CONCAT3(inner_loop,1,4) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,1,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x42:
|
case 0x42:
|
||||||
CONCAT3(inner_loop,2,4) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,2,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x43:
|
case 0x43:
|
||||||
CONCAT3(inner_loop,3,4) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,3,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x44:
|
case 0x44:
|
||||||
CONCAT3(inner_loop,4,4) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,4,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x45:
|
case 0x45:
|
||||||
CONCAT3(inner_loop,5,4) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,5,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x46:
|
case 0x46:
|
||||||
CONCAT3(inner_loop,6,4) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,6,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
#endif
|
#endif
|
||||||
#if ((MAXJOB_SPECIAL>=5)&&(SHARP_MAXTRANS>=5))
|
#if ((MAXJOB_SPECIAL>=5)&&(SHARP_MAXTRANS>=5))
|
||||||
case 0x51:
|
case 0x51:
|
||||||
CONCAT3(inner_loop,1,5) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,1,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x52:
|
case 0x52:
|
||||||
CONCAT3(inner_loop,2,5) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,2,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x53:
|
case 0x53:
|
||||||
CONCAT3(inner_loop,3,5) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,3,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x54:
|
case 0x54:
|
||||||
CONCAT3(inner_loop,4,5) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,4,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x55:
|
case 0x55:
|
||||||
CONCAT3(inner_loop,5,5) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,5,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x56:
|
case 0x56:
|
||||||
CONCAT3(inner_loop,6,5) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,6,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
#endif
|
#endif
|
||||||
#if ((MAXJOB_SPECIAL>=6)&&(SHARP_MAXTRANS>=6))
|
#if ((MAXJOB_SPECIAL>=6)&&(SHARP_MAXTRANS>=6))
|
||||||
case 0x61:
|
case 0x61:
|
||||||
CONCAT3(inner_loop,1,6) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,1,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x62:
|
case 0x62:
|
||||||
CONCAT3(inner_loop,2,6) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,2,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x63:
|
case 0x63:
|
||||||
CONCAT3(inner_loop,3,6) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,3,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x64:
|
case 0x64:
|
||||||
CONCAT3(inner_loop,4,6) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,4,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x65:
|
case 0x65:
|
||||||
CONCAT3(inner_loop,5,6) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,5,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
case 0x66:
|
case 0x66:
|
||||||
CONCAT3(inner_loop,6,6) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
CONCAT3(inner_loop,6,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||||
return;
|
return;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -211,27 +211,27 @@ void inner_loop (sharp_job *job, const int *ispair,const double *cth,
|
||||||
{
|
{
|
||||||
case 1:
|
case 1:
|
||||||
CONCAT2(inner_loop,1)
|
CONCAT2(inner_loop,1)
|
||||||
(job, ispair,cth,sth,llim,ulim,gen,mi,idx,job->ntrans);
|
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||||
return;
|
return;
|
||||||
case 2:
|
case 2:
|
||||||
CONCAT2(inner_loop,2)
|
CONCAT2(inner_loop,2)
|
||||||
(job, ispair,cth,sth,llim,ulim,gen,mi,idx,job->ntrans);
|
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||||
return;
|
return;
|
||||||
case 3:
|
case 3:
|
||||||
CONCAT2(inner_loop,3)
|
CONCAT2(inner_loop,3)
|
||||||
(job, ispair,cth,sth,llim,ulim,gen,mi,idx,job->ntrans);
|
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||||
return;
|
return;
|
||||||
case 4:
|
case 4:
|
||||||
CONCAT2(inner_loop,4)
|
CONCAT2(inner_loop,4)
|
||||||
(job, ispair,cth,sth,llim,ulim,gen,mi,idx,job->ntrans);
|
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||||
return;
|
return;
|
||||||
case 5:
|
case 5:
|
||||||
CONCAT2(inner_loop,5)
|
CONCAT2(inner_loop,5)
|
||||||
(job, ispair,cth,sth,llim,ulim,gen,mi,idx,job->ntrans);
|
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||||
return;
|
return;
|
||||||
case 6:
|
case 6:
|
||||||
CONCAT2(inner_loop,6)
|
CONCAT2(inner_loop,6)
|
||||||
(job, ispair,cth,sth,llim,ulim,gen,mi,idx,job->ntrans);
|
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,7 +41,7 @@ extern "C" {
|
||||||
|
|
||||||
void inner_loop (sharp_job *job, const int *ispair,const double *cth,
|
void inner_loop (sharp_job *job, const int *ispair,const double *cth,
|
||||||
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
|
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
|
||||||
const int *idx);
|
const int *mlim);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
|
@ -163,13 +163,13 @@ static void Z(map2alm_kernel) (const Tb cth, const Y(Tbri) * restrict p1,
|
||||||
|
|
||||||
static void Z(calc_alm2map) (const Tb cth, const Tb sth,
|
static void Z(calc_alm2map) (const Tb cth, const Tb sth,
|
||||||
const sharp_Ylmgen_C *gen, sharp_job *job, Y(Tbri) * restrict p1,
|
const sharp_Ylmgen_C *gen, sharp_job *job, Y(Tbri) * restrict p1,
|
||||||
Y(Tbri) * restrict p2 NJ1, int *done)
|
Y(Tbri) * restrict p2 NJ1)
|
||||||
{
|
{
|
||||||
int l,lmax=gen->lmax;
|
int l,lmax=gen->lmax;
|
||||||
Tb lam_1,lam_2,scale;
|
Tb lam_1,lam_2,scale;
|
||||||
Y(iter_to_ieee) (sth,cth,&l,&lam_1,&lam_2,&scale,gen);
|
Y(iter_to_ieee) (sth,cth,&l,&lam_1,&lam_2,&scale,gen);
|
||||||
job->opcnt += (l-gen->m) * 4*VLEN*nvec;
|
job->opcnt += (l-gen->m) * 4*VLEN*nvec;
|
||||||
if (l>lmax) { *done=1; return; }
|
if (l>lmax) return;
|
||||||
job->opcnt += (lmax+1-l) * (4+4*njobs)*VLEN*nvec;
|
job->opcnt += (lmax+1-l) * (4+4*njobs)*VLEN*nvec;
|
||||||
|
|
||||||
Tb corfac;
|
Tb corfac;
|
||||||
|
@ -213,7 +213,7 @@ static void Z(calc_alm2map) (const Tb cth, const Tb sth,
|
||||||
full_ieee = Y(TballGe)(scale,sharp_minscale);
|
full_ieee = Y(TballGe)(scale,sharp_minscale);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (l>lmax) { *done=1; return; }
|
if (l>lmax) return;
|
||||||
|
|
||||||
Y(Tbmuleq)(&lam_1,corfac); Y(Tbmuleq)(&lam_2,corfac);
|
Y(Tbmuleq)(&lam_1,corfac); Y(Tbmuleq)(&lam_2,corfac);
|
||||||
Z(alm2map_kernel) (cth, p1, p2, lam_1, lam_2, rf, alm, l, lmax NJ2);
|
Z(alm2map_kernel) (cth, p1, p2, lam_1, lam_2, rf, alm, l, lmax NJ2);
|
||||||
|
@ -221,14 +221,14 @@ static void Z(calc_alm2map) (const Tb cth, const Tb sth,
|
||||||
|
|
||||||
static void Z(calc_map2alm) (const Tb cth, const Tb sth,
|
static void Z(calc_map2alm) (const Tb cth, const Tb sth,
|
||||||
const sharp_Ylmgen_C *gen, sharp_job *job, const Y(Tbri) * restrict p1,
|
const sharp_Ylmgen_C *gen, sharp_job *job, const Y(Tbri) * restrict p1,
|
||||||
const Y(Tbri) * restrict p2 NJ1, int *done)
|
const Y(Tbri) * restrict p2 NJ1)
|
||||||
{
|
{
|
||||||
int lmax=gen->lmax;
|
int lmax=gen->lmax;
|
||||||
Tb lam_1,lam_2,scale;
|
Tb lam_1,lam_2,scale;
|
||||||
int l=gen->m;
|
int l=gen->m;
|
||||||
Y(iter_to_ieee) (sth,cth,&l,&lam_1,&lam_2,&scale,gen);
|
Y(iter_to_ieee) (sth,cth,&l,&lam_1,&lam_2,&scale,gen);
|
||||||
job->opcnt += (l-gen->m) * 4*VLEN*nvec;
|
job->opcnt += (l-gen->m) * 4*VLEN*nvec;
|
||||||
if (l>lmax) { *done=1; return; }
|
if (l>lmax) return;
|
||||||
job->opcnt += (lmax+1-l) * (4+4*njobs)*VLEN*nvec;
|
job->opcnt += (lmax+1-l) * (4+4*njobs)*VLEN*nvec;
|
||||||
|
|
||||||
const sharp_ylmgen_dbl2 * restrict rf = gen->rf;
|
const sharp_ylmgen_dbl2 * restrict rf = gen->rf;
|
||||||
|
@ -249,7 +249,7 @@ static void Z(calc_map2alm) (const Tb cth, const Tb sth,
|
||||||
}
|
}
|
||||||
alm[l*njobs+j]+=vhsum_cmplx(tre,tim);
|
alm[l*njobs+j]+=vhsum_cmplx(tre,tim);
|
||||||
}
|
}
|
||||||
if (++l>lmax) { *done=1; return; }
|
if (++l>lmax) return;
|
||||||
Tv r0=vload(rf[l-1].f[0]),r1=vload(rf[l-1].f[1]);
|
Tv r0=vload(rf[l-1].f[0]),r1=vload(rf[l-1].f[1]);
|
||||||
for (int i=0; i<nvec; ++i)
|
for (int i=0; i<nvec; ++i)
|
||||||
lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
|
lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
|
||||||
|
@ -264,7 +264,7 @@ static void Z(calc_map2alm) (const Tb cth, const Tb sth,
|
||||||
}
|
}
|
||||||
alm[l*njobs+j]+=vhsum_cmplx(tre,tim);
|
alm[l*njobs+j]+=vhsum_cmplx(tre,tim);
|
||||||
}
|
}
|
||||||
if (++l>lmax) { *done=1; return; }
|
if (++l>lmax) return;
|
||||||
r0=vload(rf[l-1].f[0]); r1=vload(rf[l-1].f[1]);
|
r0=vload(rf[l-1].f[0]); r1=vload(rf[l-1].f[1]);
|
||||||
for (int i=0; i<nvec; ++i)
|
for (int i=0; i<nvec; ++i)
|
||||||
lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_2.v[i],r1));
|
lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_2.v[i],r1));
|
||||||
|
@ -379,18 +379,8 @@ static void Z(alm2map_spin_kernel) (Tb cth, Y(Tbqu) * restrict p1,
|
||||||
rec1m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec2m.v[i])),
|
rec1m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec2m.v[i])),
|
||||||
vmul(fx2,rec1m.v[i]));
|
vmul(fx2,rec1m.v[i]));
|
||||||
}
|
}
|
||||||
#ifdef njobs
|
|
||||||
#if (njobs>1)
|
|
||||||
Z(saddstepb)(p1,p2,rec1p,rec1m,rec2p,rec2m,&alm[2*njobs*l],
|
Z(saddstepb)(p1,p2,rec1p,rec1m,rec2p,rec2m,&alm[2*njobs*l],
|
||||||
&alm[2*njobs*(l+1)] NJ2);
|
&alm[2*njobs*(l+1)] NJ2);
|
||||||
#else
|
|
||||||
Z(saddstep)(p1, p2, rec2p, rec2m, &alm[2*njobs*l] NJ2);
|
|
||||||
Z(saddstep)(p2, p1, rec1p, rec1m, &alm[2*njobs*(l+1)] NJ2);
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
Z(saddstepb)(p1,p2,rec1p,rec1m,rec2p,rec2m,&alm[2*njobs*l],
|
|
||||||
&alm[2*njobs*(l+1)] NJ2);
|
|
||||||
#endif
|
|
||||||
fx0=vload(fx[l+2].f[0]);fx1=vload(fx[l+2].f[1]);
|
fx0=vload(fx[l+2].f[0]);fx1=vload(fx[l+2].f[1]);
|
||||||
fx2=vload(fx[l+2].f[2]);
|
fx2=vload(fx[l+2].f[2]);
|
||||||
for (int i=0; i<nvec; ++i)
|
for (int i=0; i<nvec; ++i)
|
||||||
|
@ -440,15 +430,13 @@ static void Z(map2alm_spin_kernel) (Tb cth, const Y(Tbqu) * restrict p1,
|
||||||
}
|
}
|
||||||
|
|
||||||
static void Z(calc_alm2map_spin) (const Tb cth, const sharp_Ylmgen_C *gen,
|
static void Z(calc_alm2map_spin) (const Tb cth, const sharp_Ylmgen_C *gen,
|
||||||
sharp_job *job, Y(Tbqu) * restrict p1, Y(Tbqu) * restrict p2 NJ1,
|
sharp_job *job, Y(Tbqu) * restrict p1, Y(Tbqu) * restrict p2 NJ1)
|
||||||
int *done)
|
|
||||||
{
|
{
|
||||||
int l, lmax=gen->lmax;
|
int l, lmax=gen->lmax;
|
||||||
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
|
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
|
||||||
Y(iter_to_ieee_spin) (cth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
|
Y(iter_to_ieee_spin) (cth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
|
||||||
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
|
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
|
||||||
if (l>lmax)
|
if (l>lmax) return;
|
||||||
{ *done=1; return; }
|
|
||||||
job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec;
|
job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec;
|
||||||
|
|
||||||
const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
|
const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
|
||||||
|
@ -477,8 +465,7 @@ static void Z(calc_alm2map_spin) (const Tb cth, const sharp_Ylmgen_C *gen,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (l>lmax)
|
if (l>lmax) return;
|
||||||
{ *done=1; return; }
|
|
||||||
|
|
||||||
Y(Tbmuleq)(&rec1p,corfacp); Y(Tbmuleq)(&rec2p,corfacp);
|
Y(Tbmuleq)(&rec1p,corfacp); Y(Tbmuleq)(&rec2p,corfacp);
|
||||||
Y(Tbmuleq)(&rec1m,corfacm); Y(Tbmuleq)(&rec2m,corfacm);
|
Y(Tbmuleq)(&rec1m,corfacm); Y(Tbmuleq)(&rec2m,corfacm);
|
||||||
|
@ -487,14 +474,13 @@ static void Z(calc_alm2map_spin) (const Tb cth, const sharp_Ylmgen_C *gen,
|
||||||
}
|
}
|
||||||
|
|
||||||
static void Z(calc_map2alm_spin) (Tb cth, const sharp_Ylmgen_C * restrict gen,
|
static void Z(calc_map2alm_spin) (Tb cth, const sharp_Ylmgen_C * restrict gen,
|
||||||
sharp_job *job, const Y(Tbqu) * restrict p1, const Y(Tbqu) * restrict p2
|
sharp_job *job, const Y(Tbqu) * restrict p1, const Y(Tbqu) * restrict p2 NJ1)
|
||||||
NJ1, int *done)
|
|
||||||
{
|
{
|
||||||
int l, lmax=gen->lmax;
|
int l, lmax=gen->lmax;
|
||||||
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
|
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
|
||||||
Y(iter_to_ieee_spin) (cth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
|
Y(iter_to_ieee_spin) (cth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
|
||||||
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
|
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
|
||||||
if (l>lmax) { *done=1; return; }
|
if (l>lmax) return;
|
||||||
job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec;
|
job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec;
|
||||||
|
|
||||||
const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
|
const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
|
||||||
|
@ -508,11 +494,11 @@ static void Z(calc_map2alm_spin) (Tb cth, const sharp_Ylmgen_C * restrict gen,
|
||||||
{
|
{
|
||||||
Tb t1=Y(Tbprod)(rec2p,corfacp), t2=Y(Tbprod)(rec2m,corfacm);
|
Tb t1=Y(Tbprod)(rec2p,corfacp), t2=Y(Tbprod)(rec2m,corfacm);
|
||||||
Z(saddstep2)(p1, p2, &t1, &t2, &alm[2*njobs*l] NJ2);
|
Z(saddstep2)(p1, p2, &t1, &t2, &alm[2*njobs*l] NJ2);
|
||||||
if (++l>lmax) { *done=1; return; }
|
if (++l>lmax) return;
|
||||||
Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]);
|
Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]);
|
||||||
t1=Y(Tbprod)(rec1p,corfacp); t2=Y(Tbprod)(rec1m,corfacm);
|
t1=Y(Tbprod)(rec1p,corfacp); t2=Y(Tbprod)(rec1m,corfacm);
|
||||||
Z(saddstep2)(p2, p1, &t1, &t2, &alm[2*njobs*l] NJ2);
|
Z(saddstep2)(p2, p1, &t1, &t2, &alm[2*njobs*l] NJ2);
|
||||||
if (++l>lmax) { *done=1; return; }
|
if (++l>lmax) return;
|
||||||
Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]);
|
Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]);
|
||||||
if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem))
|
if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem))
|
||||||
{
|
{
|
||||||
|
@ -583,15 +569,13 @@ static void Z(alm2map_deriv1_kernel) (Tb cth, Y(Tbqu) * restrict p1,
|
||||||
}
|
}
|
||||||
|
|
||||||
static void Z(calc_alm2map_deriv1) (const Tb cth, const sharp_Ylmgen_C *gen,
|
static void Z(calc_alm2map_deriv1) (const Tb cth, const sharp_Ylmgen_C *gen,
|
||||||
sharp_job *job, Y(Tbqu) * restrict p1, Y(Tbqu) * restrict p2 NJ1,
|
sharp_job *job, Y(Tbqu) * restrict p1, Y(Tbqu) * restrict p2 NJ1)
|
||||||
int *done)
|
|
||||||
{
|
{
|
||||||
int l, lmax=gen->lmax;
|
int l, lmax=gen->lmax;
|
||||||
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
|
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
|
||||||
Y(iter_to_ieee_spin) (cth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
|
Y(iter_to_ieee_spin) (cth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
|
||||||
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
|
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
|
||||||
if (l>lmax)
|
if (l>lmax) return;
|
||||||
{ *done=1; return; }
|
|
||||||
job->opcnt += (lmax+1-l) * (12+8*njobs)*VLEN*nvec;
|
job->opcnt += (lmax+1-l) * (12+8*njobs)*VLEN*nvec;
|
||||||
|
|
||||||
const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
|
const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
|
||||||
|
@ -620,8 +604,7 @@ static void Z(calc_alm2map_deriv1) (const Tb cth, const sharp_Ylmgen_C *gen,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (l>lmax)
|
if (l>lmax) return;
|
||||||
{ *done=1; return; }
|
|
||||||
|
|
||||||
Y(Tbmuleq)(&rec1p,corfacp); Y(Tbmuleq)(&rec2p,corfacp);
|
Y(Tbmuleq)(&rec1p,corfacp); Y(Tbmuleq)(&rec2p,corfacp);
|
||||||
Y(Tbmuleq)(&rec1m,corfacm); Y(Tbmuleq)(&rec2m,corfacm);
|
Y(Tbmuleq)(&rec1m,corfacm); Y(Tbmuleq)(&rec2m,corfacm);
|
||||||
|
@ -634,7 +617,7 @@ static void Z(calc_alm2map_deriv1) (const Tb cth, const sharp_Ylmgen_C *gen,
|
||||||
|
|
||||||
static void Z(inner_loop) (sharp_job *job, const int *ispair,
|
static void Z(inner_loop) (sharp_job *job, const int *ispair,
|
||||||
const double *cth_, const double *sth_, int llim, int ulim,
|
const double *cth_, const double *sth_, int llim, int ulim,
|
||||||
sharp_Ylmgen_C *gen, int mi, const int *idx NJ1)
|
sharp_Ylmgen_C *gen, int mi, const int *mlim NJ1)
|
||||||
{
|
{
|
||||||
const int nval=nvec*VLEN;
|
const int nval=nvec*VLEN;
|
||||||
const int m = job->ainfo->mval[mi];
|
const int m = job->ainfo->mval[mi];
|
||||||
|
@ -647,30 +630,27 @@ static void Z(inner_loop) (sharp_job *job, const int *ispair,
|
||||||
{
|
{
|
||||||
if (job->spin==0)
|
if (job->spin==0)
|
||||||
{
|
{
|
||||||
int done=0;
|
|
||||||
for (int ith=0; ith<ulim-llim; ith+=nval)
|
for (int ith=0; ith<ulim-llim; ith+=nval)
|
||||||
{
|
{
|
||||||
Y(Tburi) p1[njobs],p2[njobs]; VZERO(p1); VZERO(p2);
|
Y(Tburi) p1[njobs],p2[njobs]; VZERO(p1); VZERO(p2);
|
||||||
if (!done)
|
Y(Tbu) cth, sth;
|
||||||
{
|
|
||||||
Y(Tbu) cth, sth;
|
|
||||||
|
|
||||||
for (int i=0; i<nval; ++i)
|
int skip=1;
|
||||||
{
|
for (int i=0; i<nval; ++i)
|
||||||
int itot=i+ith;
|
{
|
||||||
if (itot>=ulim-llim) itot=ulim-llim-1;
|
int itot=i+ith;
|
||||||
itot=idx[itot];
|
if (itot>=ulim-llim) itot=ulim-llim-1;
|
||||||
cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot];
|
if (mlim[itot]>=m) skip=0;
|
||||||
}
|
cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot];
|
||||||
Z(calc_alm2map) (cth.b,sth.b,gen,job,&p1[0].b,&p2[0].b NJ2,&done);
|
|
||||||
}
|
}
|
||||||
|
if (!skip)
|
||||||
|
Z(calc_alm2map) (cth.b,sth.b,gen,job,&p1[0].b,&p2[0].b NJ2);
|
||||||
|
|
||||||
for (int i=0; i<nval; ++i)
|
for (int i=0; i<nval; ++i)
|
||||||
{
|
{
|
||||||
int itot=i+ith;
|
int itot=i+ith;
|
||||||
if (itot<ulim-llim)
|
if (itot<ulim-llim)
|
||||||
{
|
{
|
||||||
itot=idx[itot];
|
|
||||||
for (int j=0; j<njobs; ++j)
|
for (int j=0; j<njobs; ++j)
|
||||||
{
|
{
|
||||||
int phas_idx = 2*(j+njobs*(itot*job->ainfo->nm+mi));
|
int phas_idx = 2*(j+njobs*(itot*job->ainfo->nm+mi));
|
||||||
|
@ -686,34 +666,31 @@ static void Z(inner_loop) (sharp_job *job, const int *ispair,
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int done=0;
|
|
||||||
for (int ith=0; ith<ulim-llim; ith+=nval)
|
for (int ith=0; ith<ulim-llim; ith+=nval)
|
||||||
{
|
{
|
||||||
Y(Tbuqu) p1[njobs],p2[njobs]; VZERO(p1); VZERO(p2);
|
Y(Tbuqu) p1[njobs],p2[njobs]; VZERO(p1); VZERO(p2);
|
||||||
if (!done)
|
Y(Tbu) cth;
|
||||||
{
|
int skip=1;
|
||||||
Y(Tbu) cth;
|
|
||||||
|
|
||||||
for (int i=0; i<nval; ++i)
|
for (int i=0; i<nval; ++i)
|
||||||
{
|
{
|
||||||
int itot=i+ith;
|
int itot=i+ith;
|
||||||
if (itot>=ulim-llim) itot=ulim-llim-1;
|
if (itot>=ulim-llim) itot=ulim-llim-1;
|
||||||
itot=idx[itot];
|
if (mlim[itot]>=m) skip=0;
|
||||||
cth.s[i]=cth_[itot];
|
cth.s[i]=cth_[itot];
|
||||||
}
|
}
|
||||||
|
if (!skip)
|
||||||
(job->type==SHARP_ALM2MAP) ?
|
(job->type==SHARP_ALM2MAP) ?
|
||||||
Z(calc_alm2map_spin )
|
Z(calc_alm2map_spin )
|
||||||
(cth.b,gen,job,&p1[0].b,&p2[0].b NJ2,&done) :
|
(cth.b,gen,job,&p1[0].b,&p2[0].b NJ2) :
|
||||||
Z(calc_alm2map_deriv1)
|
Z(calc_alm2map_deriv1)
|
||||||
(cth.b,gen,job,&p1[0].b,&p2[0].b NJ2,&done);
|
(cth.b,gen,job,&p1[0].b,&p2[0].b NJ2);
|
||||||
}
|
|
||||||
|
|
||||||
for (int i=0; i<nval; ++i)
|
for (int i=0; i<nval; ++i)
|
||||||
{
|
{
|
||||||
int itot=i+ith;
|
int itot=i+ith;
|
||||||
if (itot<ulim-llim)
|
if (itot<ulim-llim)
|
||||||
{
|
{
|
||||||
itot=idx[itot];
|
|
||||||
for (int j=0; j<njobs; ++j)
|
for (int j=0; j<njobs; ++j)
|
||||||
{
|
{
|
||||||
int phas_idx = 4*(j+njobs*(itot*job->ainfo->nm+mi));
|
int phas_idx = 4*(j+njobs*(itot*job->ainfo->nm+mi));
|
||||||
|
@ -743,19 +720,19 @@ static void Z(inner_loop) (sharp_job *job, const int *ispair,
|
||||||
{
|
{
|
||||||
if (job->spin==0)
|
if (job->spin==0)
|
||||||
{
|
{
|
||||||
int done=0;
|
for (int ith=0; ith<ulim-llim; ith+=nval)
|
||||||
for (int ith=0; (ith<ulim-llim)&&(!done); ith+=nval)
|
|
||||||
{
|
{
|
||||||
Y(Tburi) p1[njobs], p2[njobs]; VZERO(p1); VZERO(p2);
|
Y(Tburi) p1[njobs], p2[njobs]; VZERO(p1); VZERO(p2);
|
||||||
Y(Tbu) cth, sth;
|
Y(Tbu) cth, sth;
|
||||||
|
int skip=1;
|
||||||
|
|
||||||
for (int i=0; i<nval; ++i)
|
for (int i=0; i<nval; ++i)
|
||||||
{
|
{
|
||||||
int itot=i+ith;
|
int itot=i+ith;
|
||||||
if (itot>=ulim-llim) itot=ulim-llim-1;
|
if (itot>=ulim-llim) itot=ulim-llim-1;
|
||||||
itot=idx[itot];
|
if (mlim[itot]>=m) skip=0;
|
||||||
cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot];
|
cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot];
|
||||||
if (i+ith<ulim-llim)
|
if ((i+ith<ulim-llim)&&(mlim[itot]>=m))
|
||||||
{
|
{
|
||||||
for (int j=0; j<njobs; ++j)
|
for (int j=0; j<njobs; ++j)
|
||||||
{
|
{
|
||||||
|
@ -767,22 +744,23 @@ static void Z(inner_loop) (sharp_job *job, const int *ispair,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Z(calc_map2alm)(cth.b,sth.b,gen,job,&p1[0].b,&p2[0].b NJ2,&done);
|
if (!skip)
|
||||||
|
Z(calc_map2alm)(cth.b,sth.b,gen,job,&p1[0].b,&p2[0].b NJ2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int done=0;
|
for (int ith=0; ith<ulim-llim; ith+=nval)
|
||||||
for (int ith=0; (ith<ulim-llim)&&(!done); ith+=nval)
|
|
||||||
{
|
{
|
||||||
Y(Tbuqu) p1[njobs], p2[njobs]; VZERO(p1); VZERO(p2);
|
Y(Tbuqu) p1[njobs], p2[njobs]; VZERO(p1); VZERO(p2);
|
||||||
Y(Tbu) cth;
|
Y(Tbu) cth;
|
||||||
|
int skip=1;
|
||||||
|
|
||||||
for (int i=0; i<nval; ++i)
|
for (int i=0; i<nval; ++i)
|
||||||
{
|
{
|
||||||
int itot=i+ith;
|
int itot=i+ith;
|
||||||
if (itot>=ulim-llim) itot=ulim-llim-1;
|
if (itot>=ulim-llim) itot=ulim-llim-1;
|
||||||
itot=idx[itot];
|
if (mlim[itot]>=m) skip=0;
|
||||||
cth.s[i]=cth_[itot];
|
cth.s[i]=cth_[itot];
|
||||||
if (i+ith<ulim-llim)
|
if (i+ith<ulim-llim)
|
||||||
{
|
{
|
||||||
|
@ -802,7 +780,8 @@ static void Z(inner_loop) (sharp_job *job, const int *ispair,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Z(calc_map2alm_spin) (cth.b,gen,job,&p1[0].b,&p2[0].b NJ2,&done);
|
if (!skip)
|
||||||
|
Z(calc_map2alm_spin) (cth.b,gen,job,&p1[0].b,&p2[0].b NJ2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue