rework interface, put mor stuff into flags
This commit is contained in:
parent
0a1a9e5716
commit
9f46084386
12 changed files with 113 additions and 160 deletions
123
libsharp/sharp.c
123
libsharp/sharp.c
|
@ -162,7 +162,7 @@ void sharp_destroy_alm_info (sharp_alm_info *info)
|
|||
|
||||
void sharp_make_geom_info (int nrings, const int *nph, const ptrdiff_t *ofs,
|
||||
const int *stride, const double *phi0, const double *theta,
|
||||
const double *weight, sharp_geom_info **geom_info)
|
||||
const double *wgt_a2m, const double *wgt_m2a, sharp_geom_info **geom_info)
|
||||
{
|
||||
sharp_geom_info *info = RALLOC(sharp_geom_info,1);
|
||||
sharp_ringinfo *infos = RALLOC(sharp_ringinfo,nrings);
|
||||
|
@ -177,7 +177,8 @@ void sharp_make_geom_info (int nrings, const int *nph, const ptrdiff_t *ofs,
|
|||
infos[m].theta = theta[m];
|
||||
infos[m].cth = cos(theta[m]);
|
||||
infos[m].sth = sin(theta[m]);
|
||||
infos[m].weight = (weight != NULL) ? weight[m] : 1.;
|
||||
infos[m].w_a2m = (wgt_a2m != NULL) ? wgt_a2m[m] : 1.;
|
||||
infos[m].w_m2a = (wgt_m2a != NULL) ? wgt_m2a[m] : 1.;
|
||||
infos[m].phi0 = phi0[m];
|
||||
infos[m].ofs = ofs[m];
|
||||
infos[m].stride = stride[m];
|
||||
|
@ -234,7 +235,7 @@ static int sharp_get_mmax (int *mval, int nm)
|
|||
|
||||
static void ringhelper_phase2ring (ringhelper *self,
|
||||
const sharp_ringinfo *info, void *data, int mmax, const dcmplx *phase,
|
||||
int pstride, sharp_fde fde, int flags)
|
||||
int pstride, int flags)
|
||||
{
|
||||
int nph = info->nph;
|
||||
int stride = info->stride;
|
||||
|
@ -274,30 +275,18 @@ static void ringhelper_phase2ring (ringhelper *self,
|
|||
}
|
||||
#endif
|
||||
real_plan_backward_c (self->plan, (double *)(self->work));
|
||||
if (flags & SHARP_ALM2MAP_USE_WEIGHTS)
|
||||
{
|
||||
if (fde==DOUBLE)
|
||||
for (int m=0; m<nph; ++m)
|
||||
((double *)data)[m*stride+info->ofs]+=creal(self->work[m])*info->weight;
|
||||
else
|
||||
for (int m=0; m<nph; ++m)
|
||||
((float *)data)[m*stride+info->ofs] +=
|
||||
(float)(creal(self->work[m])*info->weight);
|
||||
}
|
||||
if (flags&SHARP_DP)
|
||||
for (int m=0; m<nph; ++m)
|
||||
((double *)data)[m*stride+info->ofs]+=creal(self->work[m])*info->w_a2m;
|
||||
else
|
||||
{
|
||||
if (fde==DOUBLE)
|
||||
for (int m=0; m<nph; ++m)
|
||||
((double *)data)[m*stride+info->ofs] += creal(self->work[m]);
|
||||
else
|
||||
for (int m=0; m<nph; ++m)
|
||||
((float *)data)[m*stride+info->ofs] += (float)creal(self->work[m]);
|
||||
}
|
||||
for (int m=0; m<nph; ++m)
|
||||
((float *)data)[m*stride+info->ofs] +=
|
||||
(float)(creal(self->work[m])*info->w_a2m);
|
||||
}
|
||||
|
||||
static void ringhelper_ring2phase (ringhelper *self,
|
||||
const sharp_ringinfo *info, const void *data, int mmax, dcmplx *phase,
|
||||
int pstride, sharp_fde fde, int flags)
|
||||
int pstride, int flags)
|
||||
{
|
||||
int nph = info->nph;
|
||||
#if 1
|
||||
|
@ -307,24 +296,12 @@ static void ringhelper_ring2phase (ringhelper *self,
|
|||
#endif
|
||||
|
||||
ringhelper_update (self, nph, mmax, -info->phi0);
|
||||
if (flags & SHARP_MAP2ALM_IGNORE_WEIGHTS)
|
||||
{
|
||||
if (fde==DOUBLE)
|
||||
for (int m=0; m<nph; ++m)
|
||||
self->work[m] = ((double *)data)[info->ofs+m*info->stride];
|
||||
else
|
||||
for (int m=0; m<nph; ++m)
|
||||
self->work[m] = ((float *)data)[info->ofs+m*info->stride];
|
||||
}
|
||||
if (flags&SHARP_DP)
|
||||
for (int m=0; m<nph; ++m)
|
||||
self->work[m] = ((double *)data)[info->ofs+m*info->stride]*info->w_m2a;
|
||||
else
|
||||
{
|
||||
if (fde==DOUBLE)
|
||||
for (int m=0; m<nph; ++m)
|
||||
self->work[m] = ((double *)data)[info->ofs+m*info->stride]*info->weight;
|
||||
else
|
||||
for (int m=0; m<nph; ++m)
|
||||
self->work[m] = ((float *)data)[info->ofs+m*info->stride]*info->weight;
|
||||
}
|
||||
for (int m=0; m<nph; ++m)
|
||||
self->work[m] = ((float *)data)[info->ofs+m*info->stride]*info->w_m2a;
|
||||
|
||||
real_plan_forward_c (self->plan, (double *)self->work);
|
||||
|
||||
|
@ -341,28 +318,28 @@ static void ringhelper_ring2phase (ringhelper *self,
|
|||
|
||||
static void ringhelper_pair2phase (ringhelper *self, int mmax,
|
||||
const sharp_ringpair *pair, const void *data, dcmplx *phase1, dcmplx *phase2,
|
||||
int pstride, sharp_fde fde, int flags)
|
||||
int pstride, int flags)
|
||||
{
|
||||
ringhelper_ring2phase (self,&(pair->r1),data,mmax,phase1,pstride,fde,flags);
|
||||
ringhelper_ring2phase (self,&(pair->r1),data,mmax,phase1,pstride,flags);
|
||||
if (pair->r2.nph>0)
|
||||
ringhelper_ring2phase (self,&(pair->r2),data,mmax,phase2,pstride,fde,flags);
|
||||
ringhelper_ring2phase (self,&(pair->r2),data,mmax,phase2,pstride,flags);
|
||||
}
|
||||
|
||||
static void ringhelper_phase2pair (ringhelper *self, int mmax,
|
||||
const dcmplx *phase1, const dcmplx *phase2, int pstride,
|
||||
const sharp_ringpair *pair, void *data, sharp_fde fde, int flags)
|
||||
const sharp_ringpair *pair, void *data, int flags)
|
||||
{
|
||||
ringhelper_phase2ring (self,&(pair->r1),data,mmax,phase1,pstride,fde,flags);
|
||||
ringhelper_phase2ring (self,&(pair->r1),data,mmax,phase1,pstride,flags);
|
||||
if (pair->r2.nph>0)
|
||||
ringhelper_phase2ring (self,&(pair->r2),data,mmax,phase2,pstride,fde,flags);
|
||||
ringhelper_phase2ring (self,&(pair->r2),data,mmax,phase2,pstride,flags);
|
||||
}
|
||||
|
||||
static void fill_map (const sharp_geom_info *ginfo, void *map, double value,
|
||||
sharp_fde fde)
|
||||
int flags)
|
||||
{
|
||||
for (int j=0;j<ginfo->npairs;++j)
|
||||
{
|
||||
if (fde==DOUBLE)
|
||||
if (flags&SHARP_DP)
|
||||
{
|
||||
for (int i=0;i<ginfo->pair[j].r1.nph;++i)
|
||||
((double *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=value;
|
||||
|
@ -382,9 +359,9 @@ static void fill_map (const sharp_geom_info *ginfo, void *map, double value,
|
|||
}
|
||||
|
||||
static void fill_alm (const sharp_alm_info *ainfo, void *alm, dcmplx value,
|
||||
sharp_fde fde)
|
||||
int flags)
|
||||
{
|
||||
if (fde==DOUBLE)
|
||||
if (flags&SHARP_DP)
|
||||
for (int mi=0;mi<ainfo->nm;++mi)
|
||||
for (int l=ainfo->mval[mi];l<=ainfo->lmax;++l)
|
||||
((dcmplx *)alm)[sharp_alm_index(ainfo,l,mi)] = value;
|
||||
|
@ -396,13 +373,13 @@ static void fill_alm (const sharp_alm_info *ainfo, void *alm, dcmplx value,
|
|||
|
||||
static void init_output (sharp_job *job)
|
||||
{
|
||||
if (job->add_output) return;
|
||||
if (job->flags&SHARP_ADD) return;
|
||||
if (job->type == SHARP_MAP2ALM)
|
||||
for (int i=0; i<job->ntrans*job->nalm; ++i)
|
||||
fill_alm (job->ainfo,job->alm[i],0.,job->fde);
|
||||
fill_alm (job->ainfo,job->alm[i],0.,job->flags);
|
||||
else
|
||||
for (int i=0; i<job->ntrans*job->nmaps; ++i)
|
||||
fill_map (job->ginfo,job->map[i],0.,job->fde);
|
||||
fill_map (job->ginfo,job->map[i],0.,job->flags);
|
||||
}
|
||||
|
||||
static void alloc_phase (sharp_job *job, int nm, int ntheta)
|
||||
|
@ -426,8 +403,7 @@ static void map2phase (sharp_job *job, int mmax, int llim, int ulim)
|
|||
int dim2 = pstride*(ith-llim)*(mmax+1);
|
||||
for (int i=0; i<job->ntrans*job->nmaps; ++i)
|
||||
ringhelper_pair2phase(&helper,mmax,&job->ginfo->pair[ith], job->map[i],
|
||||
&job->phase[dim2+2*i], &job->phase[dim2+2*i+1], pstride, job->fde,
|
||||
job->flags);
|
||||
&job->phase[dim2+2*i], &job->phase[dim2+2*i+1], pstride, job->flags);
|
||||
}
|
||||
ringhelper_destroy(&helper);
|
||||
} /* end of parallel region */
|
||||
|
@ -447,7 +423,7 @@ static void alm2almtmp (sharp_job *job, int lmax, int mi)
|
|||
int stride=job->ainfo->stride;
|
||||
if (job->spin==0)
|
||||
{
|
||||
if (job->fde==DOUBLE)
|
||||
if (job->flags&SHARP_DP)
|
||||
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l)
|
||||
for (int i=0; i<job->ntrans*job->nalm; ++i)
|
||||
job->almtmp[job->ntrans*job->nalm*l+i]
|
||||
|
@ -460,7 +436,7 @@ static void alm2almtmp (sharp_job *job, int lmax, int mi)
|
|||
}
|
||||
else
|
||||
{
|
||||
if (job->fde==DOUBLE)
|
||||
if (job->flags&SHARP_DP)
|
||||
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l)
|
||||
for (int i=0; i<job->ntrans*job->nalm; ++i)
|
||||
job->almtmp[job->ntrans*job->nalm*l+i]
|
||||
|
@ -484,7 +460,7 @@ static void almtmp2alm (sharp_job *job, int lmax, int mi)
|
|||
int stride=job->ainfo->stride;
|
||||
if (job->spin==0)
|
||||
{
|
||||
if (job->fde==DOUBLE)
|
||||
if (job->flags&SHARP_DP)
|
||||
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l)
|
||||
for (int i=0;i<job->ntrans*job->nalm;++i)
|
||||
((dcmplx *)job->alm[i])[ofs+l*stride] +=
|
||||
|
@ -497,7 +473,7 @@ static void almtmp2alm (sharp_job *job, int lmax, int mi)
|
|||
}
|
||||
else
|
||||
{
|
||||
if (job->fde==DOUBLE)
|
||||
if (job->flags&SHARP_DP)
|
||||
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l)
|
||||
for (int i=0;i<job->ntrans*job->nalm;++i)
|
||||
((dcmplx *)job->alm[i])[ofs+l*stride] +=
|
||||
|
@ -525,7 +501,7 @@ static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
|
|||
for (int i=0; i<job->ntrans*job->nmaps; ++i)
|
||||
ringhelper_phase2pair(&helper,mmax,&job->phase[dim2+2*i],
|
||||
&job->phase[dim2+2*i+1],pstride,&job->ginfo->pair[ith],job->map[i],
|
||||
job->fde, job->flags);
|
||||
job->flags);
|
||||
}
|
||||
ringhelper_destroy(&helper);
|
||||
} /* end of parallel region */
|
||||
|
@ -546,7 +522,8 @@ static void sharp_execute_job (sharp_job *job)
|
|||
init_output (job);
|
||||
|
||||
int nchunks, chunksize;
|
||||
get_chunk_info(job->ginfo->npairs,job->nv*VLEN,&nchunks,&chunksize);
|
||||
get_chunk_info(job->ginfo->npairs,(job->flags&SHARP_NVMAX)*VLEN,&nchunks,
|
||||
&chunksize);
|
||||
alloc_phase (job,mmax+1,chunksize);
|
||||
|
||||
/* chunk loop */
|
||||
|
@ -615,9 +592,8 @@ static void sharp_execute_job (sharp_job *job)
|
|||
}
|
||||
|
||||
static void sharp_build_job_common (sharp_job *job, sharp_jobtype type,
|
||||
int spin, int add_output, void *alm, void *map,
|
||||
const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans,
|
||||
int flags, int nv)
|
||||
int spin, void *alm, void *map, const sharp_geom_info *geom_info,
|
||||
const sharp_alm_info *alm_info, int ntrans, int flags)
|
||||
{
|
||||
UTIL_ASSERT((ntrans>0)&&(ntrans<=SHARP_MAXTRANS),
|
||||
"bad number of simultaneous transforms");
|
||||
|
@ -628,28 +604,27 @@ static void sharp_build_job_common (sharp_job *job, sharp_jobtype type,
|
|||
job->type = type;
|
||||
job->spin = spin;
|
||||
job->norm_l = NULL;
|
||||
job->add_output = add_output;
|
||||
job->nmaps = (type==SHARP_ALM2MAP_DERIV1) ? 2 : ((spin>0) ? 2 : 1);
|
||||
job->nalm = (type==SHARP_ALM2MAP_DERIV1) ? 1 : ((spin>0) ? 2 : 1);
|
||||
job->ginfo = geom_info;
|
||||
job->ainfo = alm_info;
|
||||
job->nv = (nv==0) ? sharp_nv_oracle (type, spin, ntrans) : nv;
|
||||
job->flags = flags;
|
||||
if ((job->flags&SHARP_NVMAX)==0)
|
||||
job->flags|=sharp_nv_oracle (type, spin, ntrans);
|
||||
job->time = 0.;
|
||||
job->opcnt = 0;
|
||||
job->ntrans = ntrans;
|
||||
job->alm=alm;
|
||||
job->map=map;
|
||||
job->flags = flags;
|
||||
job->fde=(flags & SHARP_DP) ? DOUBLE : FLOAT;
|
||||
}
|
||||
|
||||
void sharp_execute (sharp_jobtype type, int spin, int add_output, void *alm,
|
||||
void *map, const sharp_geom_info *geom_info, const sharp_alm_info *alm_info,
|
||||
int ntrans, int flags, int nv, double *time, unsigned long long *opcnt)
|
||||
void sharp_execute (sharp_jobtype type, int spin, void *alm, void *map,
|
||||
const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans,
|
||||
int flags, double *time, unsigned long long *opcnt)
|
||||
{
|
||||
sharp_job job;
|
||||
sharp_build_job_common (&job, type, spin, add_output, alm, map, geom_info,
|
||||
alm_info, ntrans, flags, nv);
|
||||
sharp_build_job_common (&job, type, spin, alm, map, geom_info, alm_info,
|
||||
ntrans, flags);
|
||||
|
||||
sharp_execute_job (&job);
|
||||
if (time!=NULL) *time = job.time;
|
||||
|
@ -701,8 +676,8 @@ static int sharp_oracle (sharp_jobtype type, int spin, int ntrans)
|
|||
int ntries=0;
|
||||
do
|
||||
{
|
||||
sharp_execute(type,spin,0,&alm[0],&map[0],tinfo,alms,ntrans,1,nv,&jtime,
|
||||
NULL);
|
||||
sharp_execute(type,spin,&alm[0],&map[0],tinfo,alms,ntrans,nv|SHARP_DP,
|
||||
&jtime,NULL);
|
||||
|
||||
if (jtime<time) { time=jtime; nvbest=nv; }
|
||||
time_acc+=jtime;
|
||||
|
|
|
@ -121,8 +121,8 @@ static void check_sign_scale(void)
|
|||
for (int j=0; j<nalms; ++j)
|
||||
alm[i][j]=1.+_Complex_I;
|
||||
|
||||
sharp_execute(SHARP_ALM2MAP,0,0,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
|
||||
0,NULL,NULL);
|
||||
sharp_execute(SHARP_ALM2MAP,0,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
|
||||
NULL,NULL);
|
||||
for (int it=0; it<ntrans; ++it)
|
||||
{
|
||||
UTIL_ASSERT(FAPPROX(map[it][0 ], 3.588246976618616912e+00,1e-12),
|
||||
|
@ -132,8 +132,8 @@ static void check_sign_scale(void)
|
|||
UTIL_ASSERT(FAPPROX(map[it][npix-1],-1.234675107554816442e+01,1e-12),
|
||||
"error");
|
||||
}
|
||||
sharp_execute(SHARP_ALM2MAP,1,0,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
|
||||
0,NULL,NULL);
|
||||
sharp_execute(SHARP_ALM2MAP,1,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
|
||||
NULL,NULL);
|
||||
for (int it=0; it<ntrans; ++it)
|
||||
{
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][0 ], 2.750897760535633285e+00,1e-12),
|
||||
|
@ -150,8 +150,8 @@ static void check_sign_scale(void)
|
|||
"error");
|
||||
}
|
||||
|
||||
sharp_execute(SHARP_ALM2MAP,2,0,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
|
||||
0,NULL,NULL);
|
||||
sharp_execute(SHARP_ALM2MAP,2,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
|
||||
NULL,NULL);
|
||||
for (int it=0; it<ntrans; ++it)
|
||||
{
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][0 ],-1.398186224727334448e+00,1e-12),
|
||||
|
@ -168,8 +168,8 @@ static void check_sign_scale(void)
|
|||
"error");
|
||||
}
|
||||
|
||||
sharp_execute(SHARP_ALM2MAP_DERIV1,1,0,&alm[0],&map[0],tinfo,alms,ntrans,
|
||||
SHARP_DP,0,NULL,NULL);
|
||||
sharp_execute(SHARP_ALM2MAP_DERIV1,1,&alm[0],&map[0],tinfo,alms,ntrans,
|
||||
SHARP_DP,NULL,NULL);
|
||||
for (int it=0; it<ntrans; ++it)
|
||||
{
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][0 ],-6.859393905369091105e-01,1e-11),
|
||||
|
@ -215,10 +215,10 @@ static void check_accuracy (sharp_geom_info *tinfo, ptrdiff_t lmax,
|
|||
dcmplx **alm2;
|
||||
ALLOC2D(alm2,dcmplx,ncomp,nalms);
|
||||
|
||||
sharp_execute(SHARP_ALM2MAP,spin,0,&alm[0],&map[0],tinfo,alms,ntrans,
|
||||
SHARP_DP,nv,NULL,NULL);
|
||||
sharp_execute(SHARP_MAP2ALM,spin,0,&alm2[0],&map[0],tinfo,alms,ntrans,
|
||||
SHARP_DP,nv,NULL,NULL);
|
||||
sharp_execute(SHARP_ALM2MAP,spin,&alm[0],&map[0],tinfo,alms,ntrans,
|
||||
SHARP_DP|nv,NULL,NULL);
|
||||
sharp_execute(SHARP_MAP2ALM,spin,&alm2[0],&map[0],tinfo,alms,ntrans,
|
||||
SHARP_DP|nv,NULL,NULL);
|
||||
measure_errors(alm,alm2,nalms,ncomp);
|
||||
|
||||
DEALLOC2D(map);
|
||||
|
|
|
@ -73,7 +73,7 @@ static void bench_sht (int spin, int nv, sharp_jobtype type,
|
|||
{
|
||||
double jtime;
|
||||
unsigned long long jopcnt;
|
||||
sharp_execute(type,spin,0,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,nv,
|
||||
sharp_execute(type,spin,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP|nv,
|
||||
&jtime,&jopcnt);
|
||||
|
||||
if (jopcnt<*opcnt) *opcnt=jopcnt;
|
||||
|
|
|
@ -188,8 +188,8 @@ int main(int argc, char **argv)
|
|||
{
|
||||
double ltime;
|
||||
unsigned long long lopcnt;
|
||||
sharp_execute_mpi(MPI_COMM_WORLD,jtype,spin,0,&alm[0],&map[0],
|
||||
tinfo,alms,ntrans,SHARP_DP,0,<ime,&lopcnt);
|
||||
sharp_execute_mpi(MPI_COMM_WORLD,jtype,spin,&alm[0],&map[0],
|
||||
tinfo,alms,ntrans,SHARP_DP,<ime,&lopcnt);
|
||||
|
||||
ltime=maxTime(ltime);
|
||||
if (ltime<time) { time=ltime; opcnt=totalops(lopcnt); }
|
||||
|
|
|
@ -77,10 +77,10 @@ void inner_loop (sharp_job *job, const int *ispair,const double *cth,
|
|||
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
|
||||
const int *idx)
|
||||
{
|
||||
int njobs=job->ntrans;
|
||||
int njobs=job->ntrans, nv=job->flags&SHARP_NVMAX;
|
||||
if (njobs<=MAXJOB_SPECIAL)
|
||||
{
|
||||
switch (njobs*16+job->nv)
|
||||
switch (njobs*16+nv)
|
||||
{
|
||||
#if ((MAXJOB_SPECIAL>=1)&&(SHARP_MAXTRANS>=1))
|
||||
case 0x11:
|
||||
|
@ -207,7 +207,7 @@ void inner_loop (sharp_job *job, const int *ispair,const double *cth,
|
|||
#if (SHARP_MAXTRANS>MAXJOB_SPECIAL)
|
||||
else
|
||||
{
|
||||
switch (job->nv)
|
||||
switch (nv)
|
||||
{
|
||||
case 1:
|
||||
CONCAT2(inner_loop,1)
|
||||
|
|
|
@ -88,7 +88,7 @@ void sharp_make_weighted_healpix_geom_info (int nside, int stride,
|
|||
weight_[m]=4.*pi/npix*weight[northring-1];
|
||||
}
|
||||
|
||||
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0, theta, weight_,
|
||||
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0, theta, NULL, weight_,
|
||||
geom_info);
|
||||
|
||||
DEALLOC(theta);
|
||||
|
@ -174,7 +174,7 @@ void sharp_make_gauss_geom_info (int nrings, int nphi, int stride_lon,
|
|||
weight[m]*=2*pi/nphi;
|
||||
}
|
||||
|
||||
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0, theta, weight,
|
||||
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0, theta, NULL, weight,
|
||||
geom_info);
|
||||
|
||||
DEALLOC(theta);
|
||||
|
@ -210,7 +210,7 @@ void sharp_make_ecp_geom_info (int nrings, int nphi, double phi0,
|
|||
weight[m]*=2*pi/nphi;
|
||||
}
|
||||
|
||||
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0_, theta, weight,
|
||||
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0_, theta, NULL, weight,
|
||||
geom_info);
|
||||
|
||||
DEALLOC(theta);
|
||||
|
|
|
@ -38,9 +38,7 @@
|
|||
|
||||
#include "sharp.h"
|
||||
|
||||
#define SHARP_MAXTRANS 10
|
||||
|
||||
typedef enum { FLOAT, DOUBLE } sharp_fde;
|
||||
#define SHARP_MAXTRANS 100
|
||||
|
||||
typedef struct
|
||||
{
|
||||
|
@ -49,7 +47,6 @@ typedef struct
|
|||
int add_output;
|
||||
int nmaps, nalm;
|
||||
int flags;
|
||||
sharp_fde fde;
|
||||
void **map;
|
||||
void **alm;
|
||||
complex double *phase;
|
||||
|
@ -57,7 +54,6 @@ typedef struct
|
|||
complex double *almtmp;
|
||||
const sharp_geom_info *ginfo;
|
||||
const sharp_alm_info *ainfo;
|
||||
int nv;
|
||||
double time;
|
||||
int ntrans;
|
||||
unsigned long long opcnt;
|
||||
|
|
|
@ -42,7 +42,7 @@ extern "C" {
|
|||
Helper type containing information about a single ring. */
|
||||
typedef struct
|
||||
{
|
||||
double theta, phi0, weight, cth, sth;
|
||||
double theta, phi0, w_a2m, w_m2a, cth, sth;
|
||||
ptrdiff_t ofs;
|
||||
int nph, stride;
|
||||
} sharp_ringinfo;
|
||||
|
@ -123,14 +123,15 @@ void sharp_destroy_alm_info (sharp_alm_info *info);
|
|||
\param stride the stride between consecutive pixels
|
||||
\param phi0 the azimuth (in radians) of the first pixel in each ring
|
||||
\param theta the colatitude (in radians) of each ring
|
||||
\param weight the pixel weight to be used for the ring. Pass NULL to use
|
||||
1.0 as weight for all rings. By default weights are used for map2alm
|
||||
but not alm2map, but the execution flags can override this.
|
||||
\param wgt_a2m the pixel weight to be used for the ring in alm2map
|
||||
transforms. Pass NULL to use 1.0 as weight for all rings.
|
||||
\param wgt_m2a the pixel weight to be used for the ring in map2alm
|
||||
transforms. Pass NULL to use 1.0 as weight for all rings.
|
||||
\param geom_info will hold a pointer to the newly created data structure
|
||||
*/
|
||||
void sharp_make_geom_info (int nrings, const int *nph, const ptrdiff_t *ofs,
|
||||
const int *stride, const double *phi0, const double *theta,
|
||||
const double *weight, sharp_geom_info **geom_info);
|
||||
const double *wgt_a2m, const double *wgt_m2a, sharp_geom_info **geom_info);
|
||||
|
||||
/*! Deallocates the geometry information in \a info. */
|
||||
void sharp_destroy_geom_info (sharp_geom_info *info);
|
||||
|
@ -147,28 +148,15 @@ typedef enum { SHARP_MAP2ALM, /*!< analysis */
|
|||
} sharp_jobtype;
|
||||
|
||||
/*! Job flags */
|
||||
typedef enum { SHARP_SP = 0, /*!< map and alm is in single precision */
|
||||
SHARP_DP = 1 << 1, /*!< map and alm is in double precision */
|
||||
|
||||
SHARP_ALM2MAP_USE_WEIGHTS = 1 << 2,
|
||||
/*!< apply ring weights for alm2map */
|
||||
SHARP_MAP2ALM_IGNORE_WEIGHTS = 1 << 3,
|
||||
/*!< do not use ring weights for map2alm */
|
||||
|
||||
/* convenience flag combinations
|
||||
(stable API even if the default changes) */
|
||||
SHARP_USE_WEIGHTS = SHARP_ALM2MAP_USE_WEIGHTS,
|
||||
/*!< use ring weights for both map2alm and alm2map */
|
||||
SHARP_IGNORE_WEIGHTS = SHARP_MAP2ALM_IGNORE_WEIGHTS
|
||||
/*!< do not use ring weights for either map2alm or map2alm */
|
||||
typedef enum { SHARP_DP = 1<<4, /*!< map and alm is in double precision */
|
||||
SHARP_ADD= 1<<5, /*!< results are added to the output data */
|
||||
SHARP_NVMAX = (1<<4)-1 /* internal use only */
|
||||
} sharp_jobflags;
|
||||
|
||||
/*! Performs a libsharp SHT job. The interface deliberately does not use
|
||||
the C99 "complex" data type, in order to be callable from C89 and C++.
|
||||
\param type the type of SHT
|
||||
\param spin the spin of the quantities to be transformed
|
||||
\param add_output if 0, the output arrays will be overwritten,
|
||||
else the result will be added to the output arrays.
|
||||
\param alm contains pointers to the a_lm coefficients. If \a spin==0,
|
||||
alm[0] points to the a_lm of the first SHT, alm[1] to those of the second
|
||||
etc. If \a spin>0, alm[0] and alm[1] point to the a_lm of the first SHT,
|
||||
|
@ -185,19 +173,17 @@ typedef enum { SHARP_SP = 0, /*!< map and alm is in single precision */
|
|||
\a alm arrays. All \c m values from 0 to some \c mmax<=lmax must be present
|
||||
exactly once.
|
||||
\param ntrans the number of simultaneous SHTs
|
||||
\param flags See sharp_jobflags. In particular, if SHARP_SP is set, then
|
||||
\a alm is expected to have the type "complex float **" and \a map is
|
||||
expected to have the type "float **"; if SHARP_DP is set, the expected
|
||||
types are "complex double **" and "double **", respectively.
|
||||
\param nv Internally used SHT parameter. Set to 0 unless you know what you are
|
||||
doing.
|
||||
\param flags See sharp_jobflags. In particular, if SHARP_DP is set, then
|
||||
\a alm is expected to have the type "complex double **" and \a map is
|
||||
expected to have the type "double **"; otherwise, the expected
|
||||
types are "complex float **" and "float **", respectively.
|
||||
\param time If not NULL, the wall clock time required for this SHT
|
||||
(in seconds)will be written here.
|
||||
\param opcnt If not NULL, a conservative estimate of the total floating point
|
||||
operation count for this SHT will be written here. */
|
||||
void sharp_execute (sharp_jobtype type, int spin, int add_output, void *alm,
|
||||
void *map, const sharp_geom_info *geom_info, const sharp_alm_info *alm_info,
|
||||
int ntrans, int flags, int nv, double *time, unsigned long long *opcnt);
|
||||
void sharp_execute (sharp_jobtype type, int spin, void *alm, void *map,
|
||||
const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans,
|
||||
int flags, double *time, unsigned long long *opcnt);
|
||||
|
||||
void sharp_set_chunksize_min(int new_chunksize_min);
|
||||
void sharp_set_nchunks_max(int new_nchunks_max);
|
||||
|
|
|
@ -284,13 +284,13 @@ static void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm)
|
|||
}
|
||||
|
||||
void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin,
|
||||
int add_output, void *alm, void *map, const sharp_geom_info *geom_info,
|
||||
const sharp_alm_info *alm_info, int ntrans, int flags, int nv, double *time,
|
||||
void *alm, void *map, const sharp_geom_info *geom_info,
|
||||
const sharp_alm_info *alm_info, int ntrans, int flags, double *time,
|
||||
unsigned long long *opcnt)
|
||||
{
|
||||
sharp_job job;
|
||||
sharp_build_job_common (&job, type, spin, add_output, alm, map, geom_info,
|
||||
alm_info, ntrans, flags, nv);
|
||||
sharp_build_job_common (&job, type, spin, alm, map, geom_info, alm_info,
|
||||
ntrans, flags);
|
||||
|
||||
sharp_execute_job_mpi (&job, comm);
|
||||
if (time!=NULL) *time = job.time;
|
||||
|
|
|
@ -44,8 +44,6 @@ extern "C" {
|
|||
\param comm the MPI communicator to be used for this SHT
|
||||
\param type the type of SHT
|
||||
\param spin the spin of the quantities to be transformed
|
||||
\param add_output if 0, the output arrays will be overwritten,
|
||||
else the result will be added to the output arrays.
|
||||
\param alm contains pointers to the a_lm coefficients. If \a spin==0,
|
||||
alm[0] points to the a_lm of the first SHT, alm[1] to those of the second
|
||||
etc. If \a spin>0, alm[0] and alm[1] point to the a_lm of the first SHT,
|
||||
|
@ -64,19 +62,17 @@ extern "C" {
|
|||
exactly once in the union of all \a alm_info objects over the participating
|
||||
MPI tasks.
|
||||
\param ntrans the number of simultaneous SHTs
|
||||
\param flags See sharp_jobflags. In particular, if SHARP_SP is set, then
|
||||
\a alm is expected to have the type "complex float **" and \a map is
|
||||
expected to have the type "float **"; if SHARP_DP is set, the expected
|
||||
types are "complex double **" and "double **", respectively.
|
||||
\param nv Internally used SHT parameter. Set to 0 unless you know what you are
|
||||
doing.
|
||||
\param flags See sharp_jobflags. In particular, if SHARP_DP is set, then
|
||||
\a alm is expected to have the type "complex double **" and \a map is
|
||||
expected to have the type "double **"; otherwise, the expected
|
||||
types are "complex float **" and "float **", respectively.
|
||||
\param time If not NULL, the wall clock time required for this SHT
|
||||
(in seconds)will be written here.
|
||||
\param opcnt If not NULL, a conservative estimate of the total floating point
|
||||
operation count for this SHT will be written here. */
|
||||
void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin,
|
||||
int add_output, void *alm, void *map, const sharp_geom_info *geom_info,
|
||||
const sharp_alm_info *alm_info, int ntrans, int flags, int nv, double *time,
|
||||
void *alm, void *map, const sharp_geom_info *geom_info,
|
||||
const sharp_alm_info *alm_info, int ntrans, int flags, double *time,
|
||||
unsigned long long *opcnt);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -119,8 +119,8 @@ static void map2alm_iter (sharp_geom_info *tinfo, double **map,
|
|||
|
||||
double time;
|
||||
unsigned long long opcnt;
|
||||
sharp_execute(SHARP_MAP2ALM,spin,0,&alm[0],&map[0],tinfo,alms,ntrans,
|
||||
SHARP_DP,0,&time,&opcnt);
|
||||
sharp_execute(SHARP_MAP2ALM,spin,&alm[0],&map[0],tinfo,alms,ntrans,
|
||||
SHARP_DP,&time,&opcnt);
|
||||
printf("wall time for map2alm: %fs\n",time);
|
||||
printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time);
|
||||
measure_errors(alm_orig,alm,nalms,ncomp);
|
||||
|
@ -130,16 +130,16 @@ static void map2alm_iter (sharp_geom_info *tinfo, double **map,
|
|||
double **map2;
|
||||
ALLOC2D(map2,double,ncomp,npix);
|
||||
printf ("\niteration %i:\n", iter+1);
|
||||
sharp_execute(SHARP_ALM2MAP,spin,0,&alm[0],&map2[0],tinfo,alms,ntrans,
|
||||
SHARP_DP,0,&time,&opcnt);
|
||||
sharp_execute(SHARP_ALM2MAP,spin,&alm[0],&map2[0],tinfo,alms,ntrans,
|
||||
SHARP_DP,&time,&opcnt);
|
||||
printf("wall time for alm2map: %fs\n",time);
|
||||
printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time);
|
||||
for (int i=0; i<ncomp; ++i)
|
||||
for (ptrdiff_t m=0; m<npix; ++m)
|
||||
map2[i][m] = map[i][m]-map2[i][m];
|
||||
|
||||
sharp_execute(SHARP_MAP2ALM,spin,1,&alm[0],&map2[0],tinfo,alms,ntrans,
|
||||
SHARP_DP,0,&time,&opcnt);
|
||||
sharp_execute(SHARP_MAP2ALM,spin,&alm[0],&map2[0],tinfo,alms,ntrans,
|
||||
SHARP_DP|SHARP_ADD,&time,&opcnt);
|
||||
printf("wall time for map2alm: %fs\n",time);
|
||||
printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time);
|
||||
DEALLOC2D(map2);
|
||||
|
@ -173,8 +173,8 @@ static void check_accuracy (sharp_geom_info *tinfo, ptrdiff_t lmax,
|
|||
double time;
|
||||
unsigned long long opcnt;
|
||||
printf ("\niteration 0:\n");
|
||||
sharp_execute(SHARP_ALM2MAP,spin,0,&alm[0],&map[0],tinfo,alms,ntrans,
|
||||
SHARP_DP,0,&time,&opcnt);
|
||||
sharp_execute(SHARP_ALM2MAP,spin,&alm[0],&map[0],tinfo,alms,ntrans,
|
||||
SHARP_DP,&time,&opcnt);
|
||||
printf("wall time for alm2map: %fs\n",time);
|
||||
printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time);
|
||||
|
||||
|
|
|
@ -200,8 +200,8 @@ static void map2alm_iter (sharp_geom_info *tinfo, double **map,
|
|||
double jtime;
|
||||
unsigned long long jopcnt;
|
||||
|
||||
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_MAP2ALM,spin,0,&alm[0],&map[0],
|
||||
tinfo,alms,ntrans,SHARP_DP,0,&jtime,&jopcnt);
|
||||
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_MAP2ALM,spin,&alm[0],&map[0],
|
||||
tinfo,alms,ntrans,SHARP_DP,&jtime,&jopcnt);
|
||||
unsigned long long opcnt=totalops(jopcnt);
|
||||
double timer=maxTime(jtime);
|
||||
if (mytask==0) printf("wall time for map2alm: %fs\n",timer);
|
||||
|
@ -213,8 +213,8 @@ static void map2alm_iter (sharp_geom_info *tinfo, double **map,
|
|||
double **map2;
|
||||
ALLOC2D(map2,double,ncomp,npix);
|
||||
if (mytask==0) printf ("\niteration %i:\n", iter+1);
|
||||
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_ALM2MAP,spin,0,&alm[0],&map2[0],
|
||||
tinfo,alms,ntrans,SHARP_DP,0,&jtime,&jopcnt);
|
||||
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_ALM2MAP,spin,&alm[0],&map2[0],
|
||||
tinfo,alms,ntrans,SHARP_DP,&jtime,&jopcnt);
|
||||
opcnt=totalops(jopcnt);
|
||||
timer=maxTime(jtime);
|
||||
if (mytask==0) printf("wall time for alm2map: %fs\n",timer);
|
||||
|
@ -223,8 +223,8 @@ static void map2alm_iter (sharp_geom_info *tinfo, double **map,
|
|||
for (ptrdiff_t m=0; m<npix; ++m)
|
||||
map2[i][m] = map[i][m]-map2[i][m];
|
||||
|
||||
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_MAP2ALM,spin,1,&alm[0],&map2[0],
|
||||
tinfo,alms,ntrans,SHARP_DP,0,&jtime,&jopcnt);
|
||||
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_MAP2ALM,spin,&alm[0],&map2[0],
|
||||
tinfo,alms,ntrans,SHARP_DP|SHARP_ADD,&jtime,&jopcnt);
|
||||
opcnt=totalops(jopcnt);
|
||||
timer=maxTime(jtime);
|
||||
if (mytask==0) printf("wall time for map2alm: %fs\n",wallTime()-timer);
|
||||
|
@ -263,8 +263,8 @@ static void check_accuracy (sharp_geom_info *tinfo, ptrdiff_t lmax,
|
|||
ALLOC2D(alm2,dcmplx,ncomp,nalms);
|
||||
|
||||
if (mytask==0) printf ("\niteration 0:\n");
|
||||
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_ALM2MAP,spin,0,&alm[0],&map[0],
|
||||
tinfo,alms,ntrans,SHARP_DP,0,&jtime,&jopcnt);
|
||||
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_ALM2MAP,spin,&alm[0],&map[0],
|
||||
tinfo,alms,ntrans,SHARP_DP,&jtime,&jopcnt);
|
||||
unsigned long long opcnt=totalops(jopcnt);
|
||||
double timer=maxTime(jtime);
|
||||
if (mytask==0) printf("wall time for alm2map: %fs\n",timer);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue