rework interface, put mor stuff into flags

This commit is contained in:
Martin Reinecke 2012-11-09 12:53:14 +01:00
parent 0a1a9e5716
commit 9f46084386
12 changed files with 113 additions and 160 deletions

View file

@ -162,7 +162,7 @@ void sharp_destroy_alm_info (sharp_alm_info *info)
void sharp_make_geom_info (int nrings, const int *nph, const ptrdiff_t *ofs,
const int *stride, const double *phi0, const double *theta,
const double *weight, sharp_geom_info **geom_info)
const double *wgt_a2m, const double *wgt_m2a, sharp_geom_info **geom_info)
{
sharp_geom_info *info = RALLOC(sharp_geom_info,1);
sharp_ringinfo *infos = RALLOC(sharp_ringinfo,nrings);
@ -177,7 +177,8 @@ void sharp_make_geom_info (int nrings, const int *nph, const ptrdiff_t *ofs,
infos[m].theta = theta[m];
infos[m].cth = cos(theta[m]);
infos[m].sth = sin(theta[m]);
infos[m].weight = (weight != NULL) ? weight[m] : 1.;
infos[m].w_a2m = (wgt_a2m != NULL) ? wgt_a2m[m] : 1.;
infos[m].w_m2a = (wgt_m2a != NULL) ? wgt_m2a[m] : 1.;
infos[m].phi0 = phi0[m];
infos[m].ofs = ofs[m];
infos[m].stride = stride[m];
@ -234,7 +235,7 @@ static int sharp_get_mmax (int *mval, int nm)
static void ringhelper_phase2ring (ringhelper *self,
const sharp_ringinfo *info, void *data, int mmax, const dcmplx *phase,
int pstride, sharp_fde fde, int flags)
int pstride, int flags)
{
int nph = info->nph;
int stride = info->stride;
@ -274,30 +275,18 @@ static void ringhelper_phase2ring (ringhelper *self,
}
#endif
real_plan_backward_c (self->plan, (double *)(self->work));
if (flags & SHARP_ALM2MAP_USE_WEIGHTS)
{
if (fde==DOUBLE)
for (int m=0; m<nph; ++m)
((double *)data)[m*stride+info->ofs]+=creal(self->work[m])*info->weight;
else
for (int m=0; m<nph; ++m)
((float *)data)[m*stride+info->ofs] +=
(float)(creal(self->work[m])*info->weight);
}
if (flags&SHARP_DP)
for (int m=0; m<nph; ++m)
((double *)data)[m*stride+info->ofs]+=creal(self->work[m])*info->w_a2m;
else
{
if (fde==DOUBLE)
for (int m=0; m<nph; ++m)
((double *)data)[m*stride+info->ofs] += creal(self->work[m]);
else
for (int m=0; m<nph; ++m)
((float *)data)[m*stride+info->ofs] += (float)creal(self->work[m]);
}
for (int m=0; m<nph; ++m)
((float *)data)[m*stride+info->ofs] +=
(float)(creal(self->work[m])*info->w_a2m);
}
static void ringhelper_ring2phase (ringhelper *self,
const sharp_ringinfo *info, const void *data, int mmax, dcmplx *phase,
int pstride, sharp_fde fde, int flags)
int pstride, int flags)
{
int nph = info->nph;
#if 1
@ -307,24 +296,12 @@ static void ringhelper_ring2phase (ringhelper *self,
#endif
ringhelper_update (self, nph, mmax, -info->phi0);
if (flags & SHARP_MAP2ALM_IGNORE_WEIGHTS)
{
if (fde==DOUBLE)
for (int m=0; m<nph; ++m)
self->work[m] = ((double *)data)[info->ofs+m*info->stride];
else
for (int m=0; m<nph; ++m)
self->work[m] = ((float *)data)[info->ofs+m*info->stride];
}
if (flags&SHARP_DP)
for (int m=0; m<nph; ++m)
self->work[m] = ((double *)data)[info->ofs+m*info->stride]*info->w_m2a;
else
{
if (fde==DOUBLE)
for (int m=0; m<nph; ++m)
self->work[m] = ((double *)data)[info->ofs+m*info->stride]*info->weight;
else
for (int m=0; m<nph; ++m)
self->work[m] = ((float *)data)[info->ofs+m*info->stride]*info->weight;
}
for (int m=0; m<nph; ++m)
self->work[m] = ((float *)data)[info->ofs+m*info->stride]*info->w_m2a;
real_plan_forward_c (self->plan, (double *)self->work);
@ -341,28 +318,28 @@ static void ringhelper_ring2phase (ringhelper *self,
static void ringhelper_pair2phase (ringhelper *self, int mmax,
const sharp_ringpair *pair, const void *data, dcmplx *phase1, dcmplx *phase2,
int pstride, sharp_fde fde, int flags)
int pstride, int flags)
{
ringhelper_ring2phase (self,&(pair->r1),data,mmax,phase1,pstride,fde,flags);
ringhelper_ring2phase (self,&(pair->r1),data,mmax,phase1,pstride,flags);
if (pair->r2.nph>0)
ringhelper_ring2phase (self,&(pair->r2),data,mmax,phase2,pstride,fde,flags);
ringhelper_ring2phase (self,&(pair->r2),data,mmax,phase2,pstride,flags);
}
static void ringhelper_phase2pair (ringhelper *self, int mmax,
const dcmplx *phase1, const dcmplx *phase2, int pstride,
const sharp_ringpair *pair, void *data, sharp_fde fde, int flags)
const sharp_ringpair *pair, void *data, int flags)
{
ringhelper_phase2ring (self,&(pair->r1),data,mmax,phase1,pstride,fde,flags);
ringhelper_phase2ring (self,&(pair->r1),data,mmax,phase1,pstride,flags);
if (pair->r2.nph>0)
ringhelper_phase2ring (self,&(pair->r2),data,mmax,phase2,pstride,fde,flags);
ringhelper_phase2ring (self,&(pair->r2),data,mmax,phase2,pstride,flags);
}
static void fill_map (const sharp_geom_info *ginfo, void *map, double value,
sharp_fde fde)
int flags)
{
for (int j=0;j<ginfo->npairs;++j)
{
if (fde==DOUBLE)
if (flags&SHARP_DP)
{
for (int i=0;i<ginfo->pair[j].r1.nph;++i)
((double *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=value;
@ -382,9 +359,9 @@ static void fill_map (const sharp_geom_info *ginfo, void *map, double value,
}
static void fill_alm (const sharp_alm_info *ainfo, void *alm, dcmplx value,
sharp_fde fde)
int flags)
{
if (fde==DOUBLE)
if (flags&SHARP_DP)
for (int mi=0;mi<ainfo->nm;++mi)
for (int l=ainfo->mval[mi];l<=ainfo->lmax;++l)
((dcmplx *)alm)[sharp_alm_index(ainfo,l,mi)] = value;
@ -396,13 +373,13 @@ static void fill_alm (const sharp_alm_info *ainfo, void *alm, dcmplx value,
static void init_output (sharp_job *job)
{
if (job->add_output) return;
if (job->flags&SHARP_ADD) return;
if (job->type == SHARP_MAP2ALM)
for (int i=0; i<job->ntrans*job->nalm; ++i)
fill_alm (job->ainfo,job->alm[i],0.,job->fde);
fill_alm (job->ainfo,job->alm[i],0.,job->flags);
else
for (int i=0; i<job->ntrans*job->nmaps; ++i)
fill_map (job->ginfo,job->map[i],0.,job->fde);
fill_map (job->ginfo,job->map[i],0.,job->flags);
}
static void alloc_phase (sharp_job *job, int nm, int ntheta)
@ -426,8 +403,7 @@ static void map2phase (sharp_job *job, int mmax, int llim, int ulim)
int dim2 = pstride*(ith-llim)*(mmax+1);
for (int i=0; i<job->ntrans*job->nmaps; ++i)
ringhelper_pair2phase(&helper,mmax,&job->ginfo->pair[ith], job->map[i],
&job->phase[dim2+2*i], &job->phase[dim2+2*i+1], pstride, job->fde,
job->flags);
&job->phase[dim2+2*i], &job->phase[dim2+2*i+1], pstride, job->flags);
}
ringhelper_destroy(&helper);
} /* end of parallel region */
@ -447,7 +423,7 @@ static void alm2almtmp (sharp_job *job, int lmax, int mi)
int stride=job->ainfo->stride;
if (job->spin==0)
{
if (job->fde==DOUBLE)
if (job->flags&SHARP_DP)
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l)
for (int i=0; i<job->ntrans*job->nalm; ++i)
job->almtmp[job->ntrans*job->nalm*l+i]
@ -460,7 +436,7 @@ static void alm2almtmp (sharp_job *job, int lmax, int mi)
}
else
{
if (job->fde==DOUBLE)
if (job->flags&SHARP_DP)
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l)
for (int i=0; i<job->ntrans*job->nalm; ++i)
job->almtmp[job->ntrans*job->nalm*l+i]
@ -484,7 +460,7 @@ static void almtmp2alm (sharp_job *job, int lmax, int mi)
int stride=job->ainfo->stride;
if (job->spin==0)
{
if (job->fde==DOUBLE)
if (job->flags&SHARP_DP)
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l)
for (int i=0;i<job->ntrans*job->nalm;++i)
((dcmplx *)job->alm[i])[ofs+l*stride] +=
@ -497,7 +473,7 @@ static void almtmp2alm (sharp_job *job, int lmax, int mi)
}
else
{
if (job->fde==DOUBLE)
if (job->flags&SHARP_DP)
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l)
for (int i=0;i<job->ntrans*job->nalm;++i)
((dcmplx *)job->alm[i])[ofs+l*stride] +=
@ -525,7 +501,7 @@ static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
for (int i=0; i<job->ntrans*job->nmaps; ++i)
ringhelper_phase2pair(&helper,mmax,&job->phase[dim2+2*i],
&job->phase[dim2+2*i+1],pstride,&job->ginfo->pair[ith],job->map[i],
job->fde, job->flags);
job->flags);
}
ringhelper_destroy(&helper);
} /* end of parallel region */
@ -546,7 +522,8 @@ static void sharp_execute_job (sharp_job *job)
init_output (job);
int nchunks, chunksize;
get_chunk_info(job->ginfo->npairs,job->nv*VLEN,&nchunks,&chunksize);
get_chunk_info(job->ginfo->npairs,(job->flags&SHARP_NVMAX)*VLEN,&nchunks,
&chunksize);
alloc_phase (job,mmax+1,chunksize);
/* chunk loop */
@ -615,9 +592,8 @@ static void sharp_execute_job (sharp_job *job)
}
static void sharp_build_job_common (sharp_job *job, sharp_jobtype type,
int spin, int add_output, void *alm, void *map,
const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans,
int flags, int nv)
int spin, void *alm, void *map, const sharp_geom_info *geom_info,
const sharp_alm_info *alm_info, int ntrans, int flags)
{
UTIL_ASSERT((ntrans>0)&&(ntrans<=SHARP_MAXTRANS),
"bad number of simultaneous transforms");
@ -628,28 +604,27 @@ static void sharp_build_job_common (sharp_job *job, sharp_jobtype type,
job->type = type;
job->spin = spin;
job->norm_l = NULL;
job->add_output = add_output;
job->nmaps = (type==SHARP_ALM2MAP_DERIV1) ? 2 : ((spin>0) ? 2 : 1);
job->nalm = (type==SHARP_ALM2MAP_DERIV1) ? 1 : ((spin>0) ? 2 : 1);
job->ginfo = geom_info;
job->ainfo = alm_info;
job->nv = (nv==0) ? sharp_nv_oracle (type, spin, ntrans) : nv;
job->flags = flags;
if ((job->flags&SHARP_NVMAX)==0)
job->flags|=sharp_nv_oracle (type, spin, ntrans);
job->time = 0.;
job->opcnt = 0;
job->ntrans = ntrans;
job->alm=alm;
job->map=map;
job->flags = flags;
job->fde=(flags & SHARP_DP) ? DOUBLE : FLOAT;
}
void sharp_execute (sharp_jobtype type, int spin, int add_output, void *alm,
void *map, const sharp_geom_info *geom_info, const sharp_alm_info *alm_info,
int ntrans, int flags, int nv, double *time, unsigned long long *opcnt)
void sharp_execute (sharp_jobtype type, int spin, void *alm, void *map,
const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans,
int flags, double *time, unsigned long long *opcnt)
{
sharp_job job;
sharp_build_job_common (&job, type, spin, add_output, alm, map, geom_info,
alm_info, ntrans, flags, nv);
sharp_build_job_common (&job, type, spin, alm, map, geom_info, alm_info,
ntrans, flags);
sharp_execute_job (&job);
if (time!=NULL) *time = job.time;
@ -701,8 +676,8 @@ static int sharp_oracle (sharp_jobtype type, int spin, int ntrans)
int ntries=0;
do
{
sharp_execute(type,spin,0,&alm[0],&map[0],tinfo,alms,ntrans,1,nv,&jtime,
NULL);
sharp_execute(type,spin,&alm[0],&map[0],tinfo,alms,ntrans,nv|SHARP_DP,
&jtime,NULL);
if (jtime<time) { time=jtime; nvbest=nv; }
time_acc+=jtime;

View file

@ -121,8 +121,8 @@ static void check_sign_scale(void)
for (int j=0; j<nalms; ++j)
alm[i][j]=1.+_Complex_I;
sharp_execute(SHARP_ALM2MAP,0,0,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
0,NULL,NULL);
sharp_execute(SHARP_ALM2MAP,0,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
NULL,NULL);
for (int it=0; it<ntrans; ++it)
{
UTIL_ASSERT(FAPPROX(map[it][0 ], 3.588246976618616912e+00,1e-12),
@ -132,8 +132,8 @@ static void check_sign_scale(void)
UTIL_ASSERT(FAPPROX(map[it][npix-1],-1.234675107554816442e+01,1e-12),
"error");
}
sharp_execute(SHARP_ALM2MAP,1,0,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
0,NULL,NULL);
sharp_execute(SHARP_ALM2MAP,1,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
NULL,NULL);
for (int it=0; it<ntrans; ++it)
{
UTIL_ASSERT(FAPPROX(map[2*it ][0 ], 2.750897760535633285e+00,1e-12),
@ -150,8 +150,8 @@ static void check_sign_scale(void)
"error");
}
sharp_execute(SHARP_ALM2MAP,2,0,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
0,NULL,NULL);
sharp_execute(SHARP_ALM2MAP,2,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
NULL,NULL);
for (int it=0; it<ntrans; ++it)
{
UTIL_ASSERT(FAPPROX(map[2*it ][0 ],-1.398186224727334448e+00,1e-12),
@ -168,8 +168,8 @@ static void check_sign_scale(void)
"error");
}
sharp_execute(SHARP_ALM2MAP_DERIV1,1,0,&alm[0],&map[0],tinfo,alms,ntrans,
SHARP_DP,0,NULL,NULL);
sharp_execute(SHARP_ALM2MAP_DERIV1,1,&alm[0],&map[0],tinfo,alms,ntrans,
SHARP_DP,NULL,NULL);
for (int it=0; it<ntrans; ++it)
{
UTIL_ASSERT(FAPPROX(map[2*it ][0 ],-6.859393905369091105e-01,1e-11),
@ -215,10 +215,10 @@ static void check_accuracy (sharp_geom_info *tinfo, ptrdiff_t lmax,
dcmplx **alm2;
ALLOC2D(alm2,dcmplx,ncomp,nalms);
sharp_execute(SHARP_ALM2MAP,spin,0,&alm[0],&map[0],tinfo,alms,ntrans,
SHARP_DP,nv,NULL,NULL);
sharp_execute(SHARP_MAP2ALM,spin,0,&alm2[0],&map[0],tinfo,alms,ntrans,
SHARP_DP,nv,NULL,NULL);
sharp_execute(SHARP_ALM2MAP,spin,&alm[0],&map[0],tinfo,alms,ntrans,
SHARP_DP|nv,NULL,NULL);
sharp_execute(SHARP_MAP2ALM,spin,&alm2[0],&map[0],tinfo,alms,ntrans,
SHARP_DP|nv,NULL,NULL);
measure_errors(alm,alm2,nalms,ncomp);
DEALLOC2D(map);

View file

@ -73,7 +73,7 @@ static void bench_sht (int spin, int nv, sharp_jobtype type,
{
double jtime;
unsigned long long jopcnt;
sharp_execute(type,spin,0,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,nv,
sharp_execute(type,spin,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP|nv,
&jtime,&jopcnt);
if (jopcnt<*opcnt) *opcnt=jopcnt;

View file

@ -188,8 +188,8 @@ int main(int argc, char **argv)
{
double ltime;
unsigned long long lopcnt;
sharp_execute_mpi(MPI_COMM_WORLD,jtype,spin,0,&alm[0],&map[0],
tinfo,alms,ntrans,SHARP_DP,0,&ltime,&lopcnt);
sharp_execute_mpi(MPI_COMM_WORLD,jtype,spin,&alm[0],&map[0],
tinfo,alms,ntrans,SHARP_DP,&ltime,&lopcnt);
ltime=maxTime(ltime);
if (ltime<time) { time=ltime; opcnt=totalops(lopcnt); }

View file

@ -77,10 +77,10 @@ void inner_loop (sharp_job *job, const int *ispair,const double *cth,
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
const int *idx)
{
int njobs=job->ntrans;
int njobs=job->ntrans, nv=job->flags&SHARP_NVMAX;
if (njobs<=MAXJOB_SPECIAL)
{
switch (njobs*16+job->nv)
switch (njobs*16+nv)
{
#if ((MAXJOB_SPECIAL>=1)&&(SHARP_MAXTRANS>=1))
case 0x11:
@ -207,7 +207,7 @@ void inner_loop (sharp_job *job, const int *ispair,const double *cth,
#if (SHARP_MAXTRANS>MAXJOB_SPECIAL)
else
{
switch (job->nv)
switch (nv)
{
case 1:
CONCAT2(inner_loop,1)

View file

@ -88,7 +88,7 @@ void sharp_make_weighted_healpix_geom_info (int nside, int stride,
weight_[m]=4.*pi/npix*weight[northring-1];
}
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0, theta, weight_,
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0, theta, NULL, weight_,
geom_info);
DEALLOC(theta);
@ -174,7 +174,7 @@ void sharp_make_gauss_geom_info (int nrings, int nphi, int stride_lon,
weight[m]*=2*pi/nphi;
}
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0, theta, weight,
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0, theta, NULL, weight,
geom_info);
DEALLOC(theta);
@ -210,7 +210,7 @@ void sharp_make_ecp_geom_info (int nrings, int nphi, double phi0,
weight[m]*=2*pi/nphi;
}
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0_, theta, weight,
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0_, theta, NULL, weight,
geom_info);
DEALLOC(theta);

View file

@ -38,9 +38,7 @@
#include "sharp.h"
#define SHARP_MAXTRANS 10
typedef enum { FLOAT, DOUBLE } sharp_fde;
#define SHARP_MAXTRANS 100
typedef struct
{
@ -49,7 +47,6 @@ typedef struct
int add_output;
int nmaps, nalm;
int flags;
sharp_fde fde;
void **map;
void **alm;
complex double *phase;
@ -57,7 +54,6 @@ typedef struct
complex double *almtmp;
const sharp_geom_info *ginfo;
const sharp_alm_info *ainfo;
int nv;
double time;
int ntrans;
unsigned long long opcnt;

View file

@ -42,7 +42,7 @@ extern "C" {
Helper type containing information about a single ring. */
typedef struct
{
double theta, phi0, weight, cth, sth;
double theta, phi0, w_a2m, w_m2a, cth, sth;
ptrdiff_t ofs;
int nph, stride;
} sharp_ringinfo;
@ -123,14 +123,15 @@ void sharp_destroy_alm_info (sharp_alm_info *info);
\param stride the stride between consecutive pixels
\param phi0 the azimuth (in radians) of the first pixel in each ring
\param theta the colatitude (in radians) of each ring
\param weight the pixel weight to be used for the ring. Pass NULL to use
1.0 as weight for all rings. By default weights are used for map2alm
but not alm2map, but the execution flags can override this.
\param wgt_a2m the pixel weight to be used for the ring in alm2map
transforms. Pass NULL to use 1.0 as weight for all rings.
\param wgt_m2a the pixel weight to be used for the ring in map2alm
transforms. Pass NULL to use 1.0 as weight for all rings.
\param geom_info will hold a pointer to the newly created data structure
*/
void sharp_make_geom_info (int nrings, const int *nph, const ptrdiff_t *ofs,
const int *stride, const double *phi0, const double *theta,
const double *weight, sharp_geom_info **geom_info);
const double *wgt_a2m, const double *wgt_m2a, sharp_geom_info **geom_info);
/*! Deallocates the geometry information in \a info. */
void sharp_destroy_geom_info (sharp_geom_info *info);
@ -147,28 +148,15 @@ typedef enum { SHARP_MAP2ALM, /*!< analysis */
} sharp_jobtype;
/*! Job flags */
typedef enum { SHARP_SP = 0, /*!< map and alm is in single precision */
SHARP_DP = 1 << 1, /*!< map and alm is in double precision */
SHARP_ALM2MAP_USE_WEIGHTS = 1 << 2,
/*!< apply ring weights for alm2map */
SHARP_MAP2ALM_IGNORE_WEIGHTS = 1 << 3,
/*!< do not use ring weights for map2alm */
/* convenience flag combinations
(stable API even if the default changes) */
SHARP_USE_WEIGHTS = SHARP_ALM2MAP_USE_WEIGHTS,
/*!< use ring weights for both map2alm and alm2map */
SHARP_IGNORE_WEIGHTS = SHARP_MAP2ALM_IGNORE_WEIGHTS
/*!< do not use ring weights for either map2alm or map2alm */
typedef enum { SHARP_DP = 1<<4, /*!< map and alm is in double precision */
SHARP_ADD= 1<<5, /*!< results are added to the output data */
SHARP_NVMAX = (1<<4)-1 /* internal use only */
} sharp_jobflags;
/*! Performs a libsharp SHT job. The interface deliberately does not use
the C99 "complex" data type, in order to be callable from C89 and C++.
\param type the type of SHT
\param spin the spin of the quantities to be transformed
\param add_output if 0, the output arrays will be overwritten,
else the result will be added to the output arrays.
\param alm contains pointers to the a_lm coefficients. If \a spin==0,
alm[0] points to the a_lm of the first SHT, alm[1] to those of the second
etc. If \a spin>0, alm[0] and alm[1] point to the a_lm of the first SHT,
@ -185,19 +173,17 @@ typedef enum { SHARP_SP = 0, /*!< map and alm is in single precision */
\a alm arrays. All \c m values from 0 to some \c mmax<=lmax must be present
exactly once.
\param ntrans the number of simultaneous SHTs
\param flags See sharp_jobflags. In particular, if SHARP_SP is set, then
\a alm is expected to have the type "complex float **" and \a map is
expected to have the type "float **"; if SHARP_DP is set, the expected
types are "complex double **" and "double **", respectively.
\param nv Internally used SHT parameter. Set to 0 unless you know what you are
doing.
\param flags See sharp_jobflags. In particular, if SHARP_DP is set, then
\a alm is expected to have the type "complex double **" and \a map is
expected to have the type "double **"; otherwise, the expected
types are "complex float **" and "float **", respectively.
\param time If not NULL, the wall clock time required for this SHT
(in seconds)will be written here.
\param opcnt If not NULL, a conservative estimate of the total floating point
operation count for this SHT will be written here. */
void sharp_execute (sharp_jobtype type, int spin, int add_output, void *alm,
void *map, const sharp_geom_info *geom_info, const sharp_alm_info *alm_info,
int ntrans, int flags, int nv, double *time, unsigned long long *opcnt);
void sharp_execute (sharp_jobtype type, int spin, void *alm, void *map,
const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans,
int flags, double *time, unsigned long long *opcnt);
void sharp_set_chunksize_min(int new_chunksize_min);
void sharp_set_nchunks_max(int new_nchunks_max);

View file

@ -284,13 +284,13 @@ static void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm)
}
void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin,
int add_output, void *alm, void *map, const sharp_geom_info *geom_info,
const sharp_alm_info *alm_info, int ntrans, int flags, int nv, double *time,
void *alm, void *map, const sharp_geom_info *geom_info,
const sharp_alm_info *alm_info, int ntrans, int flags, double *time,
unsigned long long *opcnt)
{
sharp_job job;
sharp_build_job_common (&job, type, spin, add_output, alm, map, geom_info,
alm_info, ntrans, flags, nv);
sharp_build_job_common (&job, type, spin, alm, map, geom_info, alm_info,
ntrans, flags);
sharp_execute_job_mpi (&job, comm);
if (time!=NULL) *time = job.time;

View file

@ -44,8 +44,6 @@ extern "C" {
\param comm the MPI communicator to be used for this SHT
\param type the type of SHT
\param spin the spin of the quantities to be transformed
\param add_output if 0, the output arrays will be overwritten,
else the result will be added to the output arrays.
\param alm contains pointers to the a_lm coefficients. If \a spin==0,
alm[0] points to the a_lm of the first SHT, alm[1] to those of the second
etc. If \a spin>0, alm[0] and alm[1] point to the a_lm of the first SHT,
@ -64,19 +62,17 @@ extern "C" {
exactly once in the union of all \a alm_info objects over the participating
MPI tasks.
\param ntrans the number of simultaneous SHTs
\param flags See sharp_jobflags. In particular, if SHARP_SP is set, then
\a alm is expected to have the type "complex float **" and \a map is
expected to have the type "float **"; if SHARP_DP is set, the expected
types are "complex double **" and "double **", respectively.
\param nv Internally used SHT parameter. Set to 0 unless you know what you are
doing.
\param flags See sharp_jobflags. In particular, if SHARP_DP is set, then
\a alm is expected to have the type "complex double **" and \a map is
expected to have the type "double **"; otherwise, the expected
types are "complex float **" and "float **", respectively.
\param time If not NULL, the wall clock time required for this SHT
(in seconds)will be written here.
\param opcnt If not NULL, a conservative estimate of the total floating point
operation count for this SHT will be written here. */
void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin,
int add_output, void *alm, void *map, const sharp_geom_info *geom_info,
const sharp_alm_info *alm_info, int ntrans, int flags, int nv, double *time,
void *alm, void *map, const sharp_geom_info *geom_info,
const sharp_alm_info *alm_info, int ntrans, int flags, double *time,
unsigned long long *opcnt);
#ifdef __cplusplus

View file

@ -119,8 +119,8 @@ static void map2alm_iter (sharp_geom_info *tinfo, double **map,
double time;
unsigned long long opcnt;
sharp_execute(SHARP_MAP2ALM,spin,0,&alm[0],&map[0],tinfo,alms,ntrans,
SHARP_DP,0,&time,&opcnt);
sharp_execute(SHARP_MAP2ALM,spin,&alm[0],&map[0],tinfo,alms,ntrans,
SHARP_DP,&time,&opcnt);
printf("wall time for map2alm: %fs\n",time);
printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time);
measure_errors(alm_orig,alm,nalms,ncomp);
@ -130,16 +130,16 @@ static void map2alm_iter (sharp_geom_info *tinfo, double **map,
double **map2;
ALLOC2D(map2,double,ncomp,npix);
printf ("\niteration %i:\n", iter+1);
sharp_execute(SHARP_ALM2MAP,spin,0,&alm[0],&map2[0],tinfo,alms,ntrans,
SHARP_DP,0,&time,&opcnt);
sharp_execute(SHARP_ALM2MAP,spin,&alm[0],&map2[0],tinfo,alms,ntrans,
SHARP_DP,&time,&opcnt);
printf("wall time for alm2map: %fs\n",time);
printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time);
for (int i=0; i<ncomp; ++i)
for (ptrdiff_t m=0; m<npix; ++m)
map2[i][m] = map[i][m]-map2[i][m];
sharp_execute(SHARP_MAP2ALM,spin,1,&alm[0],&map2[0],tinfo,alms,ntrans,
SHARP_DP,0,&time,&opcnt);
sharp_execute(SHARP_MAP2ALM,spin,&alm[0],&map2[0],tinfo,alms,ntrans,
SHARP_DP|SHARP_ADD,&time,&opcnt);
printf("wall time for map2alm: %fs\n",time);
printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time);
DEALLOC2D(map2);
@ -173,8 +173,8 @@ static void check_accuracy (sharp_geom_info *tinfo, ptrdiff_t lmax,
double time;
unsigned long long opcnt;
printf ("\niteration 0:\n");
sharp_execute(SHARP_ALM2MAP,spin,0,&alm[0],&map[0],tinfo,alms,ntrans,
SHARP_DP,0,&time,&opcnt);
sharp_execute(SHARP_ALM2MAP,spin,&alm[0],&map[0],tinfo,alms,ntrans,
SHARP_DP,&time,&opcnt);
printf("wall time for alm2map: %fs\n",time);
printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time);

View file

@ -200,8 +200,8 @@ static void map2alm_iter (sharp_geom_info *tinfo, double **map,
double jtime;
unsigned long long jopcnt;
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_MAP2ALM,spin,0,&alm[0],&map[0],
tinfo,alms,ntrans,SHARP_DP,0,&jtime,&jopcnt);
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_MAP2ALM,spin,&alm[0],&map[0],
tinfo,alms,ntrans,SHARP_DP,&jtime,&jopcnt);
unsigned long long opcnt=totalops(jopcnt);
double timer=maxTime(jtime);
if (mytask==0) printf("wall time for map2alm: %fs\n",timer);
@ -213,8 +213,8 @@ static void map2alm_iter (sharp_geom_info *tinfo, double **map,
double **map2;
ALLOC2D(map2,double,ncomp,npix);
if (mytask==0) printf ("\niteration %i:\n", iter+1);
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_ALM2MAP,spin,0,&alm[0],&map2[0],
tinfo,alms,ntrans,SHARP_DP,0,&jtime,&jopcnt);
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_ALM2MAP,spin,&alm[0],&map2[0],
tinfo,alms,ntrans,SHARP_DP,&jtime,&jopcnt);
opcnt=totalops(jopcnt);
timer=maxTime(jtime);
if (mytask==0) printf("wall time for alm2map: %fs\n",timer);
@ -223,8 +223,8 @@ static void map2alm_iter (sharp_geom_info *tinfo, double **map,
for (ptrdiff_t m=0; m<npix; ++m)
map2[i][m] = map[i][m]-map2[i][m];
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_MAP2ALM,spin,1,&alm[0],&map2[0],
tinfo,alms,ntrans,SHARP_DP,0,&jtime,&jopcnt);
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_MAP2ALM,spin,&alm[0],&map2[0],
tinfo,alms,ntrans,SHARP_DP|SHARP_ADD,&jtime,&jopcnt);
opcnt=totalops(jopcnt);
timer=maxTime(jtime);
if (mytask==0) printf("wall time for map2alm: %fs\n",wallTime()-timer);
@ -263,8 +263,8 @@ static void check_accuracy (sharp_geom_info *tinfo, ptrdiff_t lmax,
ALLOC2D(alm2,dcmplx,ncomp,nalms);
if (mytask==0) printf ("\niteration 0:\n");
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_ALM2MAP,spin,0,&alm[0],&map[0],
tinfo,alms,ntrans,SHARP_DP,0,&jtime,&jopcnt);
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_ALM2MAP,spin,&alm[0],&map[0],
tinfo,alms,ntrans,SHARP_DP,&jtime,&jopcnt);
unsigned long long opcnt=totalops(jopcnt);
double timer=maxTime(jtime);
if (mytask==0) printf("wall time for alm2map: %fs\n",timer);