rework interface, put mor stuff into flags
This commit is contained in:
parent
0a1a9e5716
commit
9f46084386
12 changed files with 113 additions and 160 deletions
123
libsharp/sharp.c
123
libsharp/sharp.c
|
@ -162,7 +162,7 @@ void sharp_destroy_alm_info (sharp_alm_info *info)
|
||||||
|
|
||||||
void sharp_make_geom_info (int nrings, const int *nph, const ptrdiff_t *ofs,
|
void sharp_make_geom_info (int nrings, const int *nph, const ptrdiff_t *ofs,
|
||||||
const int *stride, const double *phi0, const double *theta,
|
const int *stride, const double *phi0, const double *theta,
|
||||||
const double *weight, sharp_geom_info **geom_info)
|
const double *wgt_a2m, const double *wgt_m2a, sharp_geom_info **geom_info)
|
||||||
{
|
{
|
||||||
sharp_geom_info *info = RALLOC(sharp_geom_info,1);
|
sharp_geom_info *info = RALLOC(sharp_geom_info,1);
|
||||||
sharp_ringinfo *infos = RALLOC(sharp_ringinfo,nrings);
|
sharp_ringinfo *infos = RALLOC(sharp_ringinfo,nrings);
|
||||||
|
@ -177,7 +177,8 @@ void sharp_make_geom_info (int nrings, const int *nph, const ptrdiff_t *ofs,
|
||||||
infos[m].theta = theta[m];
|
infos[m].theta = theta[m];
|
||||||
infos[m].cth = cos(theta[m]);
|
infos[m].cth = cos(theta[m]);
|
||||||
infos[m].sth = sin(theta[m]);
|
infos[m].sth = sin(theta[m]);
|
||||||
infos[m].weight = (weight != NULL) ? weight[m] : 1.;
|
infos[m].w_a2m = (wgt_a2m != NULL) ? wgt_a2m[m] : 1.;
|
||||||
|
infos[m].w_m2a = (wgt_m2a != NULL) ? wgt_m2a[m] : 1.;
|
||||||
infos[m].phi0 = phi0[m];
|
infos[m].phi0 = phi0[m];
|
||||||
infos[m].ofs = ofs[m];
|
infos[m].ofs = ofs[m];
|
||||||
infos[m].stride = stride[m];
|
infos[m].stride = stride[m];
|
||||||
|
@ -234,7 +235,7 @@ static int sharp_get_mmax (int *mval, int nm)
|
||||||
|
|
||||||
static void ringhelper_phase2ring (ringhelper *self,
|
static void ringhelper_phase2ring (ringhelper *self,
|
||||||
const sharp_ringinfo *info, void *data, int mmax, const dcmplx *phase,
|
const sharp_ringinfo *info, void *data, int mmax, const dcmplx *phase,
|
||||||
int pstride, sharp_fde fde, int flags)
|
int pstride, int flags)
|
||||||
{
|
{
|
||||||
int nph = info->nph;
|
int nph = info->nph;
|
||||||
int stride = info->stride;
|
int stride = info->stride;
|
||||||
|
@ -274,30 +275,18 @@ static void ringhelper_phase2ring (ringhelper *self,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
real_plan_backward_c (self->plan, (double *)(self->work));
|
real_plan_backward_c (self->plan, (double *)(self->work));
|
||||||
if (flags & SHARP_ALM2MAP_USE_WEIGHTS)
|
if (flags&SHARP_DP)
|
||||||
{
|
for (int m=0; m<nph; ++m)
|
||||||
if (fde==DOUBLE)
|
((double *)data)[m*stride+info->ofs]+=creal(self->work[m])*info->w_a2m;
|
||||||
for (int m=0; m<nph; ++m)
|
|
||||||
((double *)data)[m*stride+info->ofs]+=creal(self->work[m])*info->weight;
|
|
||||||
else
|
|
||||||
for (int m=0; m<nph; ++m)
|
|
||||||
((float *)data)[m*stride+info->ofs] +=
|
|
||||||
(float)(creal(self->work[m])*info->weight);
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
for (int m=0; m<nph; ++m)
|
||||||
if (fde==DOUBLE)
|
((float *)data)[m*stride+info->ofs] +=
|
||||||
for (int m=0; m<nph; ++m)
|
(float)(creal(self->work[m])*info->w_a2m);
|
||||||
((double *)data)[m*stride+info->ofs] += creal(self->work[m]);
|
|
||||||
else
|
|
||||||
for (int m=0; m<nph; ++m)
|
|
||||||
((float *)data)[m*stride+info->ofs] += (float)creal(self->work[m]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ringhelper_ring2phase (ringhelper *self,
|
static void ringhelper_ring2phase (ringhelper *self,
|
||||||
const sharp_ringinfo *info, const void *data, int mmax, dcmplx *phase,
|
const sharp_ringinfo *info, const void *data, int mmax, dcmplx *phase,
|
||||||
int pstride, sharp_fde fde, int flags)
|
int pstride, int flags)
|
||||||
{
|
{
|
||||||
int nph = info->nph;
|
int nph = info->nph;
|
||||||
#if 1
|
#if 1
|
||||||
|
@ -307,24 +296,12 @@ static void ringhelper_ring2phase (ringhelper *self,
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ringhelper_update (self, nph, mmax, -info->phi0);
|
ringhelper_update (self, nph, mmax, -info->phi0);
|
||||||
if (flags & SHARP_MAP2ALM_IGNORE_WEIGHTS)
|
if (flags&SHARP_DP)
|
||||||
{
|
for (int m=0; m<nph; ++m)
|
||||||
if (fde==DOUBLE)
|
self->work[m] = ((double *)data)[info->ofs+m*info->stride]*info->w_m2a;
|
||||||
for (int m=0; m<nph; ++m)
|
|
||||||
self->work[m] = ((double *)data)[info->ofs+m*info->stride];
|
|
||||||
else
|
|
||||||
for (int m=0; m<nph; ++m)
|
|
||||||
self->work[m] = ((float *)data)[info->ofs+m*info->stride];
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
for (int m=0; m<nph; ++m)
|
||||||
if (fde==DOUBLE)
|
self->work[m] = ((float *)data)[info->ofs+m*info->stride]*info->w_m2a;
|
||||||
for (int m=0; m<nph; ++m)
|
|
||||||
self->work[m] = ((double *)data)[info->ofs+m*info->stride]*info->weight;
|
|
||||||
else
|
|
||||||
for (int m=0; m<nph; ++m)
|
|
||||||
self->work[m] = ((float *)data)[info->ofs+m*info->stride]*info->weight;
|
|
||||||
}
|
|
||||||
|
|
||||||
real_plan_forward_c (self->plan, (double *)self->work);
|
real_plan_forward_c (self->plan, (double *)self->work);
|
||||||
|
|
||||||
|
@ -341,28 +318,28 @@ static void ringhelper_ring2phase (ringhelper *self,
|
||||||
|
|
||||||
static void ringhelper_pair2phase (ringhelper *self, int mmax,
|
static void ringhelper_pair2phase (ringhelper *self, int mmax,
|
||||||
const sharp_ringpair *pair, const void *data, dcmplx *phase1, dcmplx *phase2,
|
const sharp_ringpair *pair, const void *data, dcmplx *phase1, dcmplx *phase2,
|
||||||
int pstride, sharp_fde fde, int flags)
|
int pstride, int flags)
|
||||||
{
|
{
|
||||||
ringhelper_ring2phase (self,&(pair->r1),data,mmax,phase1,pstride,fde,flags);
|
ringhelper_ring2phase (self,&(pair->r1),data,mmax,phase1,pstride,flags);
|
||||||
if (pair->r2.nph>0)
|
if (pair->r2.nph>0)
|
||||||
ringhelper_ring2phase (self,&(pair->r2),data,mmax,phase2,pstride,fde,flags);
|
ringhelper_ring2phase (self,&(pair->r2),data,mmax,phase2,pstride,flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ringhelper_phase2pair (ringhelper *self, int mmax,
|
static void ringhelper_phase2pair (ringhelper *self, int mmax,
|
||||||
const dcmplx *phase1, const dcmplx *phase2, int pstride,
|
const dcmplx *phase1, const dcmplx *phase2, int pstride,
|
||||||
const sharp_ringpair *pair, void *data, sharp_fde fde, int flags)
|
const sharp_ringpair *pair, void *data, int flags)
|
||||||
{
|
{
|
||||||
ringhelper_phase2ring (self,&(pair->r1),data,mmax,phase1,pstride,fde,flags);
|
ringhelper_phase2ring (self,&(pair->r1),data,mmax,phase1,pstride,flags);
|
||||||
if (pair->r2.nph>0)
|
if (pair->r2.nph>0)
|
||||||
ringhelper_phase2ring (self,&(pair->r2),data,mmax,phase2,pstride,fde,flags);
|
ringhelper_phase2ring (self,&(pair->r2),data,mmax,phase2,pstride,flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void fill_map (const sharp_geom_info *ginfo, void *map, double value,
|
static void fill_map (const sharp_geom_info *ginfo, void *map, double value,
|
||||||
sharp_fde fde)
|
int flags)
|
||||||
{
|
{
|
||||||
for (int j=0;j<ginfo->npairs;++j)
|
for (int j=0;j<ginfo->npairs;++j)
|
||||||
{
|
{
|
||||||
if (fde==DOUBLE)
|
if (flags&SHARP_DP)
|
||||||
{
|
{
|
||||||
for (int i=0;i<ginfo->pair[j].r1.nph;++i)
|
for (int i=0;i<ginfo->pair[j].r1.nph;++i)
|
||||||
((double *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=value;
|
((double *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=value;
|
||||||
|
@ -382,9 +359,9 @@ static void fill_map (const sharp_geom_info *ginfo, void *map, double value,
|
||||||
}
|
}
|
||||||
|
|
||||||
static void fill_alm (const sharp_alm_info *ainfo, void *alm, dcmplx value,
|
static void fill_alm (const sharp_alm_info *ainfo, void *alm, dcmplx value,
|
||||||
sharp_fde fde)
|
int flags)
|
||||||
{
|
{
|
||||||
if (fde==DOUBLE)
|
if (flags&SHARP_DP)
|
||||||
for (int mi=0;mi<ainfo->nm;++mi)
|
for (int mi=0;mi<ainfo->nm;++mi)
|
||||||
for (int l=ainfo->mval[mi];l<=ainfo->lmax;++l)
|
for (int l=ainfo->mval[mi];l<=ainfo->lmax;++l)
|
||||||
((dcmplx *)alm)[sharp_alm_index(ainfo,l,mi)] = value;
|
((dcmplx *)alm)[sharp_alm_index(ainfo,l,mi)] = value;
|
||||||
|
@ -396,13 +373,13 @@ static void fill_alm (const sharp_alm_info *ainfo, void *alm, dcmplx value,
|
||||||
|
|
||||||
static void init_output (sharp_job *job)
|
static void init_output (sharp_job *job)
|
||||||
{
|
{
|
||||||
if (job->add_output) return;
|
if (job->flags&SHARP_ADD) return;
|
||||||
if (job->type == SHARP_MAP2ALM)
|
if (job->type == SHARP_MAP2ALM)
|
||||||
for (int i=0; i<job->ntrans*job->nalm; ++i)
|
for (int i=0; i<job->ntrans*job->nalm; ++i)
|
||||||
fill_alm (job->ainfo,job->alm[i],0.,job->fde);
|
fill_alm (job->ainfo,job->alm[i],0.,job->flags);
|
||||||
else
|
else
|
||||||
for (int i=0; i<job->ntrans*job->nmaps; ++i)
|
for (int i=0; i<job->ntrans*job->nmaps; ++i)
|
||||||
fill_map (job->ginfo,job->map[i],0.,job->fde);
|
fill_map (job->ginfo,job->map[i],0.,job->flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void alloc_phase (sharp_job *job, int nm, int ntheta)
|
static void alloc_phase (sharp_job *job, int nm, int ntheta)
|
||||||
|
@ -426,8 +403,7 @@ static void map2phase (sharp_job *job, int mmax, int llim, int ulim)
|
||||||
int dim2 = pstride*(ith-llim)*(mmax+1);
|
int dim2 = pstride*(ith-llim)*(mmax+1);
|
||||||
for (int i=0; i<job->ntrans*job->nmaps; ++i)
|
for (int i=0; i<job->ntrans*job->nmaps; ++i)
|
||||||
ringhelper_pair2phase(&helper,mmax,&job->ginfo->pair[ith], job->map[i],
|
ringhelper_pair2phase(&helper,mmax,&job->ginfo->pair[ith], job->map[i],
|
||||||
&job->phase[dim2+2*i], &job->phase[dim2+2*i+1], pstride, job->fde,
|
&job->phase[dim2+2*i], &job->phase[dim2+2*i+1], pstride, job->flags);
|
||||||
job->flags);
|
|
||||||
}
|
}
|
||||||
ringhelper_destroy(&helper);
|
ringhelper_destroy(&helper);
|
||||||
} /* end of parallel region */
|
} /* end of parallel region */
|
||||||
|
@ -447,7 +423,7 @@ static void alm2almtmp (sharp_job *job, int lmax, int mi)
|
||||||
int stride=job->ainfo->stride;
|
int stride=job->ainfo->stride;
|
||||||
if (job->spin==0)
|
if (job->spin==0)
|
||||||
{
|
{
|
||||||
if (job->fde==DOUBLE)
|
if (job->flags&SHARP_DP)
|
||||||
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l)
|
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l)
|
||||||
for (int i=0; i<job->ntrans*job->nalm; ++i)
|
for (int i=0; i<job->ntrans*job->nalm; ++i)
|
||||||
job->almtmp[job->ntrans*job->nalm*l+i]
|
job->almtmp[job->ntrans*job->nalm*l+i]
|
||||||
|
@ -460,7 +436,7 @@ static void alm2almtmp (sharp_job *job, int lmax, int mi)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (job->fde==DOUBLE)
|
if (job->flags&SHARP_DP)
|
||||||
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l)
|
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l)
|
||||||
for (int i=0; i<job->ntrans*job->nalm; ++i)
|
for (int i=0; i<job->ntrans*job->nalm; ++i)
|
||||||
job->almtmp[job->ntrans*job->nalm*l+i]
|
job->almtmp[job->ntrans*job->nalm*l+i]
|
||||||
|
@ -484,7 +460,7 @@ static void almtmp2alm (sharp_job *job, int lmax, int mi)
|
||||||
int stride=job->ainfo->stride;
|
int stride=job->ainfo->stride;
|
||||||
if (job->spin==0)
|
if (job->spin==0)
|
||||||
{
|
{
|
||||||
if (job->fde==DOUBLE)
|
if (job->flags&SHARP_DP)
|
||||||
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l)
|
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l)
|
||||||
for (int i=0;i<job->ntrans*job->nalm;++i)
|
for (int i=0;i<job->ntrans*job->nalm;++i)
|
||||||
((dcmplx *)job->alm[i])[ofs+l*stride] +=
|
((dcmplx *)job->alm[i])[ofs+l*stride] +=
|
||||||
|
@ -497,7 +473,7 @@ static void almtmp2alm (sharp_job *job, int lmax, int mi)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (job->fde==DOUBLE)
|
if (job->flags&SHARP_DP)
|
||||||
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l)
|
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l)
|
||||||
for (int i=0;i<job->ntrans*job->nalm;++i)
|
for (int i=0;i<job->ntrans*job->nalm;++i)
|
||||||
((dcmplx *)job->alm[i])[ofs+l*stride] +=
|
((dcmplx *)job->alm[i])[ofs+l*stride] +=
|
||||||
|
@ -525,7 +501,7 @@ static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
|
||||||
for (int i=0; i<job->ntrans*job->nmaps; ++i)
|
for (int i=0; i<job->ntrans*job->nmaps; ++i)
|
||||||
ringhelper_phase2pair(&helper,mmax,&job->phase[dim2+2*i],
|
ringhelper_phase2pair(&helper,mmax,&job->phase[dim2+2*i],
|
||||||
&job->phase[dim2+2*i+1],pstride,&job->ginfo->pair[ith],job->map[i],
|
&job->phase[dim2+2*i+1],pstride,&job->ginfo->pair[ith],job->map[i],
|
||||||
job->fde, job->flags);
|
job->flags);
|
||||||
}
|
}
|
||||||
ringhelper_destroy(&helper);
|
ringhelper_destroy(&helper);
|
||||||
} /* end of parallel region */
|
} /* end of parallel region */
|
||||||
|
@ -546,7 +522,8 @@ static void sharp_execute_job (sharp_job *job)
|
||||||
init_output (job);
|
init_output (job);
|
||||||
|
|
||||||
int nchunks, chunksize;
|
int nchunks, chunksize;
|
||||||
get_chunk_info(job->ginfo->npairs,job->nv*VLEN,&nchunks,&chunksize);
|
get_chunk_info(job->ginfo->npairs,(job->flags&SHARP_NVMAX)*VLEN,&nchunks,
|
||||||
|
&chunksize);
|
||||||
alloc_phase (job,mmax+1,chunksize);
|
alloc_phase (job,mmax+1,chunksize);
|
||||||
|
|
||||||
/* chunk loop */
|
/* chunk loop */
|
||||||
|
@ -615,9 +592,8 @@ static void sharp_execute_job (sharp_job *job)
|
||||||
}
|
}
|
||||||
|
|
||||||
static void sharp_build_job_common (sharp_job *job, sharp_jobtype type,
|
static void sharp_build_job_common (sharp_job *job, sharp_jobtype type,
|
||||||
int spin, int add_output, void *alm, void *map,
|
int spin, void *alm, void *map, const sharp_geom_info *geom_info,
|
||||||
const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans,
|
const sharp_alm_info *alm_info, int ntrans, int flags)
|
||||||
int flags, int nv)
|
|
||||||
{
|
{
|
||||||
UTIL_ASSERT((ntrans>0)&&(ntrans<=SHARP_MAXTRANS),
|
UTIL_ASSERT((ntrans>0)&&(ntrans<=SHARP_MAXTRANS),
|
||||||
"bad number of simultaneous transforms");
|
"bad number of simultaneous transforms");
|
||||||
|
@ -628,28 +604,27 @@ static void sharp_build_job_common (sharp_job *job, sharp_jobtype type,
|
||||||
job->type = type;
|
job->type = type;
|
||||||
job->spin = spin;
|
job->spin = spin;
|
||||||
job->norm_l = NULL;
|
job->norm_l = NULL;
|
||||||
job->add_output = add_output;
|
|
||||||
job->nmaps = (type==SHARP_ALM2MAP_DERIV1) ? 2 : ((spin>0) ? 2 : 1);
|
job->nmaps = (type==SHARP_ALM2MAP_DERIV1) ? 2 : ((spin>0) ? 2 : 1);
|
||||||
job->nalm = (type==SHARP_ALM2MAP_DERIV1) ? 1 : ((spin>0) ? 2 : 1);
|
job->nalm = (type==SHARP_ALM2MAP_DERIV1) ? 1 : ((spin>0) ? 2 : 1);
|
||||||
job->ginfo = geom_info;
|
job->ginfo = geom_info;
|
||||||
job->ainfo = alm_info;
|
job->ainfo = alm_info;
|
||||||
job->nv = (nv==0) ? sharp_nv_oracle (type, spin, ntrans) : nv;
|
job->flags = flags;
|
||||||
|
if ((job->flags&SHARP_NVMAX)==0)
|
||||||
|
job->flags|=sharp_nv_oracle (type, spin, ntrans);
|
||||||
job->time = 0.;
|
job->time = 0.;
|
||||||
job->opcnt = 0;
|
job->opcnt = 0;
|
||||||
job->ntrans = ntrans;
|
job->ntrans = ntrans;
|
||||||
job->alm=alm;
|
job->alm=alm;
|
||||||
job->map=map;
|
job->map=map;
|
||||||
job->flags = flags;
|
|
||||||
job->fde=(flags & SHARP_DP) ? DOUBLE : FLOAT;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void sharp_execute (sharp_jobtype type, int spin, int add_output, void *alm,
|
void sharp_execute (sharp_jobtype type, int spin, void *alm, void *map,
|
||||||
void *map, const sharp_geom_info *geom_info, const sharp_alm_info *alm_info,
|
const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans,
|
||||||
int ntrans, int flags, int nv, double *time, unsigned long long *opcnt)
|
int flags, double *time, unsigned long long *opcnt)
|
||||||
{
|
{
|
||||||
sharp_job job;
|
sharp_job job;
|
||||||
sharp_build_job_common (&job, type, spin, add_output, alm, map, geom_info,
|
sharp_build_job_common (&job, type, spin, alm, map, geom_info, alm_info,
|
||||||
alm_info, ntrans, flags, nv);
|
ntrans, flags);
|
||||||
|
|
||||||
sharp_execute_job (&job);
|
sharp_execute_job (&job);
|
||||||
if (time!=NULL) *time = job.time;
|
if (time!=NULL) *time = job.time;
|
||||||
|
@ -701,8 +676,8 @@ static int sharp_oracle (sharp_jobtype type, int spin, int ntrans)
|
||||||
int ntries=0;
|
int ntries=0;
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
sharp_execute(type,spin,0,&alm[0],&map[0],tinfo,alms,ntrans,1,nv,&jtime,
|
sharp_execute(type,spin,&alm[0],&map[0],tinfo,alms,ntrans,nv|SHARP_DP,
|
||||||
NULL);
|
&jtime,NULL);
|
||||||
|
|
||||||
if (jtime<time) { time=jtime; nvbest=nv; }
|
if (jtime<time) { time=jtime; nvbest=nv; }
|
||||||
time_acc+=jtime;
|
time_acc+=jtime;
|
||||||
|
|
|
@ -121,8 +121,8 @@ static void check_sign_scale(void)
|
||||||
for (int j=0; j<nalms; ++j)
|
for (int j=0; j<nalms; ++j)
|
||||||
alm[i][j]=1.+_Complex_I;
|
alm[i][j]=1.+_Complex_I;
|
||||||
|
|
||||||
sharp_execute(SHARP_ALM2MAP,0,0,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
|
sharp_execute(SHARP_ALM2MAP,0,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
|
||||||
0,NULL,NULL);
|
NULL,NULL);
|
||||||
for (int it=0; it<ntrans; ++it)
|
for (int it=0; it<ntrans; ++it)
|
||||||
{
|
{
|
||||||
UTIL_ASSERT(FAPPROX(map[it][0 ], 3.588246976618616912e+00,1e-12),
|
UTIL_ASSERT(FAPPROX(map[it][0 ], 3.588246976618616912e+00,1e-12),
|
||||||
|
@ -132,8 +132,8 @@ static void check_sign_scale(void)
|
||||||
UTIL_ASSERT(FAPPROX(map[it][npix-1],-1.234675107554816442e+01,1e-12),
|
UTIL_ASSERT(FAPPROX(map[it][npix-1],-1.234675107554816442e+01,1e-12),
|
||||||
"error");
|
"error");
|
||||||
}
|
}
|
||||||
sharp_execute(SHARP_ALM2MAP,1,0,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
|
sharp_execute(SHARP_ALM2MAP,1,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
|
||||||
0,NULL,NULL);
|
NULL,NULL);
|
||||||
for (int it=0; it<ntrans; ++it)
|
for (int it=0; it<ntrans; ++it)
|
||||||
{
|
{
|
||||||
UTIL_ASSERT(FAPPROX(map[2*it ][0 ], 2.750897760535633285e+00,1e-12),
|
UTIL_ASSERT(FAPPROX(map[2*it ][0 ], 2.750897760535633285e+00,1e-12),
|
||||||
|
@ -150,8 +150,8 @@ static void check_sign_scale(void)
|
||||||
"error");
|
"error");
|
||||||
}
|
}
|
||||||
|
|
||||||
sharp_execute(SHARP_ALM2MAP,2,0,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
|
sharp_execute(SHARP_ALM2MAP,2,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
|
||||||
0,NULL,NULL);
|
NULL,NULL);
|
||||||
for (int it=0; it<ntrans; ++it)
|
for (int it=0; it<ntrans; ++it)
|
||||||
{
|
{
|
||||||
UTIL_ASSERT(FAPPROX(map[2*it ][0 ],-1.398186224727334448e+00,1e-12),
|
UTIL_ASSERT(FAPPROX(map[2*it ][0 ],-1.398186224727334448e+00,1e-12),
|
||||||
|
@ -168,8 +168,8 @@ static void check_sign_scale(void)
|
||||||
"error");
|
"error");
|
||||||
}
|
}
|
||||||
|
|
||||||
sharp_execute(SHARP_ALM2MAP_DERIV1,1,0,&alm[0],&map[0],tinfo,alms,ntrans,
|
sharp_execute(SHARP_ALM2MAP_DERIV1,1,&alm[0],&map[0],tinfo,alms,ntrans,
|
||||||
SHARP_DP,0,NULL,NULL);
|
SHARP_DP,NULL,NULL);
|
||||||
for (int it=0; it<ntrans; ++it)
|
for (int it=0; it<ntrans; ++it)
|
||||||
{
|
{
|
||||||
UTIL_ASSERT(FAPPROX(map[2*it ][0 ],-6.859393905369091105e-01,1e-11),
|
UTIL_ASSERT(FAPPROX(map[2*it ][0 ],-6.859393905369091105e-01,1e-11),
|
||||||
|
@ -215,10 +215,10 @@ static void check_accuracy (sharp_geom_info *tinfo, ptrdiff_t lmax,
|
||||||
dcmplx **alm2;
|
dcmplx **alm2;
|
||||||
ALLOC2D(alm2,dcmplx,ncomp,nalms);
|
ALLOC2D(alm2,dcmplx,ncomp,nalms);
|
||||||
|
|
||||||
sharp_execute(SHARP_ALM2MAP,spin,0,&alm[0],&map[0],tinfo,alms,ntrans,
|
sharp_execute(SHARP_ALM2MAP,spin,&alm[0],&map[0],tinfo,alms,ntrans,
|
||||||
SHARP_DP,nv,NULL,NULL);
|
SHARP_DP|nv,NULL,NULL);
|
||||||
sharp_execute(SHARP_MAP2ALM,spin,0,&alm2[0],&map[0],tinfo,alms,ntrans,
|
sharp_execute(SHARP_MAP2ALM,spin,&alm2[0],&map[0],tinfo,alms,ntrans,
|
||||||
SHARP_DP,nv,NULL,NULL);
|
SHARP_DP|nv,NULL,NULL);
|
||||||
measure_errors(alm,alm2,nalms,ncomp);
|
measure_errors(alm,alm2,nalms,ncomp);
|
||||||
|
|
||||||
DEALLOC2D(map);
|
DEALLOC2D(map);
|
||||||
|
|
|
@ -73,7 +73,7 @@ static void bench_sht (int spin, int nv, sharp_jobtype type,
|
||||||
{
|
{
|
||||||
double jtime;
|
double jtime;
|
||||||
unsigned long long jopcnt;
|
unsigned long long jopcnt;
|
||||||
sharp_execute(type,spin,0,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,nv,
|
sharp_execute(type,spin,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP|nv,
|
||||||
&jtime,&jopcnt);
|
&jtime,&jopcnt);
|
||||||
|
|
||||||
if (jopcnt<*opcnt) *opcnt=jopcnt;
|
if (jopcnt<*opcnt) *opcnt=jopcnt;
|
||||||
|
|
|
@ -188,8 +188,8 @@ int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
double ltime;
|
double ltime;
|
||||||
unsigned long long lopcnt;
|
unsigned long long lopcnt;
|
||||||
sharp_execute_mpi(MPI_COMM_WORLD,jtype,spin,0,&alm[0],&map[0],
|
sharp_execute_mpi(MPI_COMM_WORLD,jtype,spin,&alm[0],&map[0],
|
||||||
tinfo,alms,ntrans,SHARP_DP,0,<ime,&lopcnt);
|
tinfo,alms,ntrans,SHARP_DP,<ime,&lopcnt);
|
||||||
|
|
||||||
ltime=maxTime(ltime);
|
ltime=maxTime(ltime);
|
||||||
if (ltime<time) { time=ltime; opcnt=totalops(lopcnt); }
|
if (ltime<time) { time=ltime; opcnt=totalops(lopcnt); }
|
||||||
|
|
|
@ -77,10 +77,10 @@ void inner_loop (sharp_job *job, const int *ispair,const double *cth,
|
||||||
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
|
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
|
||||||
const int *idx)
|
const int *idx)
|
||||||
{
|
{
|
||||||
int njobs=job->ntrans;
|
int njobs=job->ntrans, nv=job->flags&SHARP_NVMAX;
|
||||||
if (njobs<=MAXJOB_SPECIAL)
|
if (njobs<=MAXJOB_SPECIAL)
|
||||||
{
|
{
|
||||||
switch (njobs*16+job->nv)
|
switch (njobs*16+nv)
|
||||||
{
|
{
|
||||||
#if ((MAXJOB_SPECIAL>=1)&&(SHARP_MAXTRANS>=1))
|
#if ((MAXJOB_SPECIAL>=1)&&(SHARP_MAXTRANS>=1))
|
||||||
case 0x11:
|
case 0x11:
|
||||||
|
@ -207,7 +207,7 @@ void inner_loop (sharp_job *job, const int *ispair,const double *cth,
|
||||||
#if (SHARP_MAXTRANS>MAXJOB_SPECIAL)
|
#if (SHARP_MAXTRANS>MAXJOB_SPECIAL)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
switch (job->nv)
|
switch (nv)
|
||||||
{
|
{
|
||||||
case 1:
|
case 1:
|
||||||
CONCAT2(inner_loop,1)
|
CONCAT2(inner_loop,1)
|
||||||
|
|
|
@ -88,7 +88,7 @@ void sharp_make_weighted_healpix_geom_info (int nside, int stride,
|
||||||
weight_[m]=4.*pi/npix*weight[northring-1];
|
weight_[m]=4.*pi/npix*weight[northring-1];
|
||||||
}
|
}
|
||||||
|
|
||||||
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0, theta, weight_,
|
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0, theta, NULL, weight_,
|
||||||
geom_info);
|
geom_info);
|
||||||
|
|
||||||
DEALLOC(theta);
|
DEALLOC(theta);
|
||||||
|
@ -174,7 +174,7 @@ void sharp_make_gauss_geom_info (int nrings, int nphi, int stride_lon,
|
||||||
weight[m]*=2*pi/nphi;
|
weight[m]*=2*pi/nphi;
|
||||||
}
|
}
|
||||||
|
|
||||||
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0, theta, weight,
|
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0, theta, NULL, weight,
|
||||||
geom_info);
|
geom_info);
|
||||||
|
|
||||||
DEALLOC(theta);
|
DEALLOC(theta);
|
||||||
|
@ -210,7 +210,7 @@ void sharp_make_ecp_geom_info (int nrings, int nphi, double phi0,
|
||||||
weight[m]*=2*pi/nphi;
|
weight[m]*=2*pi/nphi;
|
||||||
}
|
}
|
||||||
|
|
||||||
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0_, theta, weight,
|
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0_, theta, NULL, weight,
|
||||||
geom_info);
|
geom_info);
|
||||||
|
|
||||||
DEALLOC(theta);
|
DEALLOC(theta);
|
||||||
|
|
|
@ -38,9 +38,7 @@
|
||||||
|
|
||||||
#include "sharp.h"
|
#include "sharp.h"
|
||||||
|
|
||||||
#define SHARP_MAXTRANS 10
|
#define SHARP_MAXTRANS 100
|
||||||
|
|
||||||
typedef enum { FLOAT, DOUBLE } sharp_fde;
|
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
|
@ -49,7 +47,6 @@ typedef struct
|
||||||
int add_output;
|
int add_output;
|
||||||
int nmaps, nalm;
|
int nmaps, nalm;
|
||||||
int flags;
|
int flags;
|
||||||
sharp_fde fde;
|
|
||||||
void **map;
|
void **map;
|
||||||
void **alm;
|
void **alm;
|
||||||
complex double *phase;
|
complex double *phase;
|
||||||
|
@ -57,7 +54,6 @@ typedef struct
|
||||||
complex double *almtmp;
|
complex double *almtmp;
|
||||||
const sharp_geom_info *ginfo;
|
const sharp_geom_info *ginfo;
|
||||||
const sharp_alm_info *ainfo;
|
const sharp_alm_info *ainfo;
|
||||||
int nv;
|
|
||||||
double time;
|
double time;
|
||||||
int ntrans;
|
int ntrans;
|
||||||
unsigned long long opcnt;
|
unsigned long long opcnt;
|
||||||
|
|
|
@ -42,7 +42,7 @@ extern "C" {
|
||||||
Helper type containing information about a single ring. */
|
Helper type containing information about a single ring. */
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
double theta, phi0, weight, cth, sth;
|
double theta, phi0, w_a2m, w_m2a, cth, sth;
|
||||||
ptrdiff_t ofs;
|
ptrdiff_t ofs;
|
||||||
int nph, stride;
|
int nph, stride;
|
||||||
} sharp_ringinfo;
|
} sharp_ringinfo;
|
||||||
|
@ -123,14 +123,15 @@ void sharp_destroy_alm_info (sharp_alm_info *info);
|
||||||
\param stride the stride between consecutive pixels
|
\param stride the stride between consecutive pixels
|
||||||
\param phi0 the azimuth (in radians) of the first pixel in each ring
|
\param phi0 the azimuth (in radians) of the first pixel in each ring
|
||||||
\param theta the colatitude (in radians) of each ring
|
\param theta the colatitude (in radians) of each ring
|
||||||
\param weight the pixel weight to be used for the ring. Pass NULL to use
|
\param wgt_a2m the pixel weight to be used for the ring in alm2map
|
||||||
1.0 as weight for all rings. By default weights are used for map2alm
|
transforms. Pass NULL to use 1.0 as weight for all rings.
|
||||||
but not alm2map, but the execution flags can override this.
|
\param wgt_m2a the pixel weight to be used for the ring in map2alm
|
||||||
|
transforms. Pass NULL to use 1.0 as weight for all rings.
|
||||||
\param geom_info will hold a pointer to the newly created data structure
|
\param geom_info will hold a pointer to the newly created data structure
|
||||||
*/
|
*/
|
||||||
void sharp_make_geom_info (int nrings, const int *nph, const ptrdiff_t *ofs,
|
void sharp_make_geom_info (int nrings, const int *nph, const ptrdiff_t *ofs,
|
||||||
const int *stride, const double *phi0, const double *theta,
|
const int *stride, const double *phi0, const double *theta,
|
||||||
const double *weight, sharp_geom_info **geom_info);
|
const double *wgt_a2m, const double *wgt_m2a, sharp_geom_info **geom_info);
|
||||||
|
|
||||||
/*! Deallocates the geometry information in \a info. */
|
/*! Deallocates the geometry information in \a info. */
|
||||||
void sharp_destroy_geom_info (sharp_geom_info *info);
|
void sharp_destroy_geom_info (sharp_geom_info *info);
|
||||||
|
@ -147,28 +148,15 @@ typedef enum { SHARP_MAP2ALM, /*!< analysis */
|
||||||
} sharp_jobtype;
|
} sharp_jobtype;
|
||||||
|
|
||||||
/*! Job flags */
|
/*! Job flags */
|
||||||
typedef enum { SHARP_SP = 0, /*!< map and alm is in single precision */
|
typedef enum { SHARP_DP = 1<<4, /*!< map and alm is in double precision */
|
||||||
SHARP_DP = 1 << 1, /*!< map and alm is in double precision */
|
SHARP_ADD= 1<<5, /*!< results are added to the output data */
|
||||||
|
SHARP_NVMAX = (1<<4)-1 /* internal use only */
|
||||||
SHARP_ALM2MAP_USE_WEIGHTS = 1 << 2,
|
|
||||||
/*!< apply ring weights for alm2map */
|
|
||||||
SHARP_MAP2ALM_IGNORE_WEIGHTS = 1 << 3,
|
|
||||||
/*!< do not use ring weights for map2alm */
|
|
||||||
|
|
||||||
/* convenience flag combinations
|
|
||||||
(stable API even if the default changes) */
|
|
||||||
SHARP_USE_WEIGHTS = SHARP_ALM2MAP_USE_WEIGHTS,
|
|
||||||
/*!< use ring weights for both map2alm and alm2map */
|
|
||||||
SHARP_IGNORE_WEIGHTS = SHARP_MAP2ALM_IGNORE_WEIGHTS
|
|
||||||
/*!< do not use ring weights for either map2alm or map2alm */
|
|
||||||
} sharp_jobflags;
|
} sharp_jobflags;
|
||||||
|
|
||||||
/*! Performs a libsharp SHT job. The interface deliberately does not use
|
/*! Performs a libsharp SHT job. The interface deliberately does not use
|
||||||
the C99 "complex" data type, in order to be callable from C89 and C++.
|
the C99 "complex" data type, in order to be callable from C89 and C++.
|
||||||
\param type the type of SHT
|
\param type the type of SHT
|
||||||
\param spin the spin of the quantities to be transformed
|
\param spin the spin of the quantities to be transformed
|
||||||
\param add_output if 0, the output arrays will be overwritten,
|
|
||||||
else the result will be added to the output arrays.
|
|
||||||
\param alm contains pointers to the a_lm coefficients. If \a spin==0,
|
\param alm contains pointers to the a_lm coefficients. If \a spin==0,
|
||||||
alm[0] points to the a_lm of the first SHT, alm[1] to those of the second
|
alm[0] points to the a_lm of the first SHT, alm[1] to those of the second
|
||||||
etc. If \a spin>0, alm[0] and alm[1] point to the a_lm of the first SHT,
|
etc. If \a spin>0, alm[0] and alm[1] point to the a_lm of the first SHT,
|
||||||
|
@ -185,19 +173,17 @@ typedef enum { SHARP_SP = 0, /*!< map and alm is in single precision */
|
||||||
\a alm arrays. All \c m values from 0 to some \c mmax<=lmax must be present
|
\a alm arrays. All \c m values from 0 to some \c mmax<=lmax must be present
|
||||||
exactly once.
|
exactly once.
|
||||||
\param ntrans the number of simultaneous SHTs
|
\param ntrans the number of simultaneous SHTs
|
||||||
\param flags See sharp_jobflags. In particular, if SHARP_SP is set, then
|
\param flags See sharp_jobflags. In particular, if SHARP_DP is set, then
|
||||||
\a alm is expected to have the type "complex float **" and \a map is
|
\a alm is expected to have the type "complex double **" and \a map is
|
||||||
expected to have the type "float **"; if SHARP_DP is set, the expected
|
expected to have the type "double **"; otherwise, the expected
|
||||||
types are "complex double **" and "double **", respectively.
|
types are "complex float **" and "float **", respectively.
|
||||||
\param nv Internally used SHT parameter. Set to 0 unless you know what you are
|
|
||||||
doing.
|
|
||||||
\param time If not NULL, the wall clock time required for this SHT
|
\param time If not NULL, the wall clock time required for this SHT
|
||||||
(in seconds)will be written here.
|
(in seconds)will be written here.
|
||||||
\param opcnt If not NULL, a conservative estimate of the total floating point
|
\param opcnt If not NULL, a conservative estimate of the total floating point
|
||||||
operation count for this SHT will be written here. */
|
operation count for this SHT will be written here. */
|
||||||
void sharp_execute (sharp_jobtype type, int spin, int add_output, void *alm,
|
void sharp_execute (sharp_jobtype type, int spin, void *alm, void *map,
|
||||||
void *map, const sharp_geom_info *geom_info, const sharp_alm_info *alm_info,
|
const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans,
|
||||||
int ntrans, int flags, int nv, double *time, unsigned long long *opcnt);
|
int flags, double *time, unsigned long long *opcnt);
|
||||||
|
|
||||||
void sharp_set_chunksize_min(int new_chunksize_min);
|
void sharp_set_chunksize_min(int new_chunksize_min);
|
||||||
void sharp_set_nchunks_max(int new_nchunks_max);
|
void sharp_set_nchunks_max(int new_nchunks_max);
|
||||||
|
|
|
@ -284,13 +284,13 @@ static void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm)
|
||||||
}
|
}
|
||||||
|
|
||||||
void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin,
|
void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin,
|
||||||
int add_output, void *alm, void *map, const sharp_geom_info *geom_info,
|
void *alm, void *map, const sharp_geom_info *geom_info,
|
||||||
const sharp_alm_info *alm_info, int ntrans, int flags, int nv, double *time,
|
const sharp_alm_info *alm_info, int ntrans, int flags, double *time,
|
||||||
unsigned long long *opcnt)
|
unsigned long long *opcnt)
|
||||||
{
|
{
|
||||||
sharp_job job;
|
sharp_job job;
|
||||||
sharp_build_job_common (&job, type, spin, add_output, alm, map, geom_info,
|
sharp_build_job_common (&job, type, spin, alm, map, geom_info, alm_info,
|
||||||
alm_info, ntrans, flags, nv);
|
ntrans, flags);
|
||||||
|
|
||||||
sharp_execute_job_mpi (&job, comm);
|
sharp_execute_job_mpi (&job, comm);
|
||||||
if (time!=NULL) *time = job.time;
|
if (time!=NULL) *time = job.time;
|
||||||
|
|
|
@ -44,8 +44,6 @@ extern "C" {
|
||||||
\param comm the MPI communicator to be used for this SHT
|
\param comm the MPI communicator to be used for this SHT
|
||||||
\param type the type of SHT
|
\param type the type of SHT
|
||||||
\param spin the spin of the quantities to be transformed
|
\param spin the spin of the quantities to be transformed
|
||||||
\param add_output if 0, the output arrays will be overwritten,
|
|
||||||
else the result will be added to the output arrays.
|
|
||||||
\param alm contains pointers to the a_lm coefficients. If \a spin==0,
|
\param alm contains pointers to the a_lm coefficients. If \a spin==0,
|
||||||
alm[0] points to the a_lm of the first SHT, alm[1] to those of the second
|
alm[0] points to the a_lm of the first SHT, alm[1] to those of the second
|
||||||
etc. If \a spin>0, alm[0] and alm[1] point to the a_lm of the first SHT,
|
etc. If \a spin>0, alm[0] and alm[1] point to the a_lm of the first SHT,
|
||||||
|
@ -64,19 +62,17 @@ extern "C" {
|
||||||
exactly once in the union of all \a alm_info objects over the participating
|
exactly once in the union of all \a alm_info objects over the participating
|
||||||
MPI tasks.
|
MPI tasks.
|
||||||
\param ntrans the number of simultaneous SHTs
|
\param ntrans the number of simultaneous SHTs
|
||||||
\param flags See sharp_jobflags. In particular, if SHARP_SP is set, then
|
\param flags See sharp_jobflags. In particular, if SHARP_DP is set, then
|
||||||
\a alm is expected to have the type "complex float **" and \a map is
|
\a alm is expected to have the type "complex double **" and \a map is
|
||||||
expected to have the type "float **"; if SHARP_DP is set, the expected
|
expected to have the type "double **"; otherwise, the expected
|
||||||
types are "complex double **" and "double **", respectively.
|
types are "complex float **" and "float **", respectively.
|
||||||
\param nv Internally used SHT parameter. Set to 0 unless you know what you are
|
|
||||||
doing.
|
|
||||||
\param time If not NULL, the wall clock time required for this SHT
|
\param time If not NULL, the wall clock time required for this SHT
|
||||||
(in seconds)will be written here.
|
(in seconds)will be written here.
|
||||||
\param opcnt If not NULL, a conservative estimate of the total floating point
|
\param opcnt If not NULL, a conservative estimate of the total floating point
|
||||||
operation count for this SHT will be written here. */
|
operation count for this SHT will be written here. */
|
||||||
void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin,
|
void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin,
|
||||||
int add_output, void *alm, void *map, const sharp_geom_info *geom_info,
|
void *alm, void *map, const sharp_geom_info *geom_info,
|
||||||
const sharp_alm_info *alm_info, int ntrans, int flags, int nv, double *time,
|
const sharp_alm_info *alm_info, int ntrans, int flags, double *time,
|
||||||
unsigned long long *opcnt);
|
unsigned long long *opcnt);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
|
@ -119,8 +119,8 @@ static void map2alm_iter (sharp_geom_info *tinfo, double **map,
|
||||||
|
|
||||||
double time;
|
double time;
|
||||||
unsigned long long opcnt;
|
unsigned long long opcnt;
|
||||||
sharp_execute(SHARP_MAP2ALM,spin,0,&alm[0],&map[0],tinfo,alms,ntrans,
|
sharp_execute(SHARP_MAP2ALM,spin,&alm[0],&map[0],tinfo,alms,ntrans,
|
||||||
SHARP_DP,0,&time,&opcnt);
|
SHARP_DP,&time,&opcnt);
|
||||||
printf("wall time for map2alm: %fs\n",time);
|
printf("wall time for map2alm: %fs\n",time);
|
||||||
printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time);
|
printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time);
|
||||||
measure_errors(alm_orig,alm,nalms,ncomp);
|
measure_errors(alm_orig,alm,nalms,ncomp);
|
||||||
|
@ -130,16 +130,16 @@ static void map2alm_iter (sharp_geom_info *tinfo, double **map,
|
||||||
double **map2;
|
double **map2;
|
||||||
ALLOC2D(map2,double,ncomp,npix);
|
ALLOC2D(map2,double,ncomp,npix);
|
||||||
printf ("\niteration %i:\n", iter+1);
|
printf ("\niteration %i:\n", iter+1);
|
||||||
sharp_execute(SHARP_ALM2MAP,spin,0,&alm[0],&map2[0],tinfo,alms,ntrans,
|
sharp_execute(SHARP_ALM2MAP,spin,&alm[0],&map2[0],tinfo,alms,ntrans,
|
||||||
SHARP_DP,0,&time,&opcnt);
|
SHARP_DP,&time,&opcnt);
|
||||||
printf("wall time for alm2map: %fs\n",time);
|
printf("wall time for alm2map: %fs\n",time);
|
||||||
printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time);
|
printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time);
|
||||||
for (int i=0; i<ncomp; ++i)
|
for (int i=0; i<ncomp; ++i)
|
||||||
for (ptrdiff_t m=0; m<npix; ++m)
|
for (ptrdiff_t m=0; m<npix; ++m)
|
||||||
map2[i][m] = map[i][m]-map2[i][m];
|
map2[i][m] = map[i][m]-map2[i][m];
|
||||||
|
|
||||||
sharp_execute(SHARP_MAP2ALM,spin,1,&alm[0],&map2[0],tinfo,alms,ntrans,
|
sharp_execute(SHARP_MAP2ALM,spin,&alm[0],&map2[0],tinfo,alms,ntrans,
|
||||||
SHARP_DP,0,&time,&opcnt);
|
SHARP_DP|SHARP_ADD,&time,&opcnt);
|
||||||
printf("wall time for map2alm: %fs\n",time);
|
printf("wall time for map2alm: %fs\n",time);
|
||||||
printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time);
|
printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time);
|
||||||
DEALLOC2D(map2);
|
DEALLOC2D(map2);
|
||||||
|
@ -173,8 +173,8 @@ static void check_accuracy (sharp_geom_info *tinfo, ptrdiff_t lmax,
|
||||||
double time;
|
double time;
|
||||||
unsigned long long opcnt;
|
unsigned long long opcnt;
|
||||||
printf ("\niteration 0:\n");
|
printf ("\niteration 0:\n");
|
||||||
sharp_execute(SHARP_ALM2MAP,spin,0,&alm[0],&map[0],tinfo,alms,ntrans,
|
sharp_execute(SHARP_ALM2MAP,spin,&alm[0],&map[0],tinfo,alms,ntrans,
|
||||||
SHARP_DP,0,&time,&opcnt);
|
SHARP_DP,&time,&opcnt);
|
||||||
printf("wall time for alm2map: %fs\n",time);
|
printf("wall time for alm2map: %fs\n",time);
|
||||||
printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time);
|
printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time);
|
||||||
|
|
||||||
|
|
|
@ -200,8 +200,8 @@ static void map2alm_iter (sharp_geom_info *tinfo, double **map,
|
||||||
double jtime;
|
double jtime;
|
||||||
unsigned long long jopcnt;
|
unsigned long long jopcnt;
|
||||||
|
|
||||||
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_MAP2ALM,spin,0,&alm[0],&map[0],
|
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_MAP2ALM,spin,&alm[0],&map[0],
|
||||||
tinfo,alms,ntrans,SHARP_DP,0,&jtime,&jopcnt);
|
tinfo,alms,ntrans,SHARP_DP,&jtime,&jopcnt);
|
||||||
unsigned long long opcnt=totalops(jopcnt);
|
unsigned long long opcnt=totalops(jopcnt);
|
||||||
double timer=maxTime(jtime);
|
double timer=maxTime(jtime);
|
||||||
if (mytask==0) printf("wall time for map2alm: %fs\n",timer);
|
if (mytask==0) printf("wall time for map2alm: %fs\n",timer);
|
||||||
|
@ -213,8 +213,8 @@ static void map2alm_iter (sharp_geom_info *tinfo, double **map,
|
||||||
double **map2;
|
double **map2;
|
||||||
ALLOC2D(map2,double,ncomp,npix);
|
ALLOC2D(map2,double,ncomp,npix);
|
||||||
if (mytask==0) printf ("\niteration %i:\n", iter+1);
|
if (mytask==0) printf ("\niteration %i:\n", iter+1);
|
||||||
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_ALM2MAP,spin,0,&alm[0],&map2[0],
|
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_ALM2MAP,spin,&alm[0],&map2[0],
|
||||||
tinfo,alms,ntrans,SHARP_DP,0,&jtime,&jopcnt);
|
tinfo,alms,ntrans,SHARP_DP,&jtime,&jopcnt);
|
||||||
opcnt=totalops(jopcnt);
|
opcnt=totalops(jopcnt);
|
||||||
timer=maxTime(jtime);
|
timer=maxTime(jtime);
|
||||||
if (mytask==0) printf("wall time for alm2map: %fs\n",timer);
|
if (mytask==0) printf("wall time for alm2map: %fs\n",timer);
|
||||||
|
@ -223,8 +223,8 @@ static void map2alm_iter (sharp_geom_info *tinfo, double **map,
|
||||||
for (ptrdiff_t m=0; m<npix; ++m)
|
for (ptrdiff_t m=0; m<npix; ++m)
|
||||||
map2[i][m] = map[i][m]-map2[i][m];
|
map2[i][m] = map[i][m]-map2[i][m];
|
||||||
|
|
||||||
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_MAP2ALM,spin,1,&alm[0],&map2[0],
|
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_MAP2ALM,spin,&alm[0],&map2[0],
|
||||||
tinfo,alms,ntrans,SHARP_DP,0,&jtime,&jopcnt);
|
tinfo,alms,ntrans,SHARP_DP|SHARP_ADD,&jtime,&jopcnt);
|
||||||
opcnt=totalops(jopcnt);
|
opcnt=totalops(jopcnt);
|
||||||
timer=maxTime(jtime);
|
timer=maxTime(jtime);
|
||||||
if (mytask==0) printf("wall time for map2alm: %fs\n",wallTime()-timer);
|
if (mytask==0) printf("wall time for map2alm: %fs\n",wallTime()-timer);
|
||||||
|
@ -263,8 +263,8 @@ static void check_accuracy (sharp_geom_info *tinfo, ptrdiff_t lmax,
|
||||||
ALLOC2D(alm2,dcmplx,ncomp,nalms);
|
ALLOC2D(alm2,dcmplx,ncomp,nalms);
|
||||||
|
|
||||||
if (mytask==0) printf ("\niteration 0:\n");
|
if (mytask==0) printf ("\niteration 0:\n");
|
||||||
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_ALM2MAP,spin,0,&alm[0],&map[0],
|
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_ALM2MAP,spin,&alm[0],&map[0],
|
||||||
tinfo,alms,ntrans,SHARP_DP,0,&jtime,&jopcnt);
|
tinfo,alms,ntrans,SHARP_DP,&jtime,&jopcnt);
|
||||||
unsigned long long opcnt=totalops(jopcnt);
|
unsigned long long opcnt=totalops(jopcnt);
|
||||||
double timer=maxTime(jtime);
|
double timer=maxTime(jtime);
|
||||||
if (mytask==0) printf("wall time for alm2map: %fs\n",timer);
|
if (mytask==0) printf("wall time for alm2map: %fs\n",timer);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue