diff --git a/.gitignore b/.gitignore index 4cde3de..12a6531 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,11 @@ -*.o -*.so -#* -*~ -*.pyc -*.pyo +**.o +**.lo +**.la +**.so +**/#* +**~ +**.pyc +**.pyo /auto /autom4te.cache diff --git a/libsharp/sharp.c b/libsharp/sharp.c index 884a644..d882689 100644 --- a/libsharp/sharp.c +++ b/libsharp/sharp.c @@ -487,10 +487,10 @@ NOINLINE static void init_output (sharp_job *job) { if (job->flags&SHARP_ADD) return; if (job->type == SHARP_MAP2ALM) - for (int i=0; intrans*job->nalm; ++i) + for (int i=0; inalm; ++i) clear_alm (job->ainfo,job->alm[i],job->flags); else - for (int i=0; intrans*job->nmaps; ++i) + for (int i=0; inmaps; ++i) clear_map (job->ginfo,job->map[i],job->flags); } @@ -498,24 +498,24 @@ NOINLINE static void alloc_phase (sharp_job *job, int nm, int ntheta) { if (job->type==SHARP_MAP2ALM) { - job->s_m=2*job->ntrans*job->nmaps; + job->s_m=2*job->nmaps; if (((job->s_m*16*nm)&1023)==0) nm+=3; // hack to avoid critical strides job->s_th=job->s_m*nm; } else { - job->s_th=2*job->ntrans*job->nmaps; + job->s_th=2*job->nmaps; if (((job->s_th*16*ntheta)&1023)==0) ntheta+=3; // hack to avoid critical strides job->s_m=job->s_th*ntheta; } - job->phase=RALLOC(dcmplx,2*job->ntrans*job->nmaps*nm*ntheta); + job->phase=RALLOC(dcmplx,2*job->nmaps*nm*ntheta); } static void dealloc_phase (sharp_job *job) { DEALLOC(job->phase); } static void alloc_almtmp (sharp_job *job, int lmax) - { job->almtmp=RALLOC(dcmplx,job->ntrans*job->nalm*(lmax+1)); } + { job->almtmp=RALLOC(dcmplx,job->nalm*(lmax+1)); } static void dealloc_almtmp (sharp_job *job) { DEALLOC(job->almtmp); } @@ -526,13 +526,13 @@ NOINLINE static void alm2almtmp (sharp_job *job, int lmax, int mi) #define COPY_LOOP(real_t, source_t, expr_of_x) \ { \ for (int l=m; lntrans*job->nalm; ++i) \ - job->almtmp[job->ntrans*job->nalm*l+i] = 0; \ + for (int i=0; inalm; ++i) \ + job->almtmp[job->nalm*l+i] = 0; \ for (int l=lmin; l<=lmax; ++l) \ - for (int i=0; intrans*job->nalm; ++i) \ + for (int i=0; inalm; ++i) \ { \ source_t x = *(source_t *)(((real_t *)job->alm[i])+ofs+l*stride); \ - job->almtmp[job->ntrans*job->nalm*l+i] = expr_of_x; \ + job->almtmp[job->nalm*l+i] = expr_of_x; \ } \ } @@ -586,8 +586,8 @@ NOINLINE static void alm2almtmp (sharp_job *job, int lmax, int mi) } } else - memset (job->almtmp+job->ntrans*job->nalm*job->ainfo->mval[mi], 0, - job->ntrans*job->nalm*(lmax+1-job->ainfo->mval[mi])*sizeof(dcmplx)); + memset (job->almtmp+job->nalm*job->ainfo->mval[mi], 0, + job->nalm*(lmax+1-job->ainfo->mval[mi])*sizeof(dcmplx)); #undef COPY_LOOP } @@ -597,9 +597,9 @@ NOINLINE static void almtmp2alm (sharp_job *job, int lmax, int mi) #define COPY_LOOP(real_t, target_t, expr_of_x) \ for (int l=lmin; l<=lmax; ++l) \ - for (int i=0; intrans*job->nalm; ++i) \ + for (int i=0; inalm; ++i) \ { \ - dcmplx x = job->almtmp[job->ntrans*job->nalm*l+i]; \ + dcmplx x = job->almtmp[job->nalm*l+i]; \ *(target_t *)(((real_t *)job->alm[i])+ofs+l*stride) += expr_of_x; \ } @@ -660,7 +660,7 @@ NOINLINE static void ringtmp2ring (sharp_job *job, sharp_ringinfo *ri, if (job->flags & SHARP_DP) { double **dmap = (double **)job->map; - for (int i=0; intrans*job->nmaps; ++i) + for (int i=0; inmaps; ++i) { double *restrict p1=&dmap[i][ri->ofs]; const double *restrict p2=&ringtmp[i*rstride+1]; @@ -680,7 +680,7 @@ NOINLINE static void ringtmp2ring (sharp_job *job, sharp_ringinfo *ri, else { float **fmap = (float **)job->map; - for (int i=0; intrans*job->nmaps; ++i) + for (int i=0; inmaps; ++i) for (int m=0; mnph; ++m) fmap[i][ri->ofs+m*ri->stride] += (float)ringtmp[i*rstride+m+1]; } @@ -690,7 +690,7 @@ NOINLINE static void ring2ringtmp (sharp_job *job, sharp_ringinfo *ri, double *ringtmp, int rstride) { if (job->flags & SHARP_DP) - for (int i=0; intrans*job->nmaps; ++i) + for (int i=0; inmaps; ++i) { double *restrict p1=&ringtmp[i*rstride+1], *restrict p2=&(((double *)(job->map[i]))[ri->ofs]); @@ -701,7 +701,7 @@ NOINLINE static void ring2ringtmp (sharp_job *job, sharp_ringinfo *ri, p1[m] = p2[m*ri->stride]; } else - for (int i=0; intrans*job->nmaps; ++i) + for (int i=0; inmaps; ++i) for (int m=0; mnph; ++m) ringtmp[i*rstride+m+1] = ((float *)(job->map[i]))[ri->ofs+m*ri->stride]; } @@ -711,7 +711,7 @@ static void ring2phase_direct (sharp_job *job, sharp_ringinfo *ri, int mmax, { if (ri->nph<0) { - for (int i=0; intrans*job->nmaps; ++i) + for (int i=0; inmaps; ++i) for (int m=0; m<=mmax; ++m) phase[2*i+job->s_m*m]=0.; } @@ -721,7 +721,7 @@ static void ring2phase_direct (sharp_job *job, sharp_ringinfo *ri, int mmax, double wgt = (job->flags&SHARP_USE_WEIGHTS) ? (ri->nph*ri->weight) : 1.; if (job->flags&SHARP_REAL_HARMONICS) wgt *= sqrt_two; - for (int i=0; intrans*job->nmaps; ++i) + for (int i=0; inmaps; ++i) for (int m=0; m<=mmax; ++m) phase[2*i+job->s_m*m]= (job->flags & SHARP_DP) ? ((dcmplx *)(job->map[i]))[ri->ofs+m*ri->stride]*wgt : @@ -738,7 +738,7 @@ static void phase2ring_direct (sharp_job *job, sharp_ringinfo *ri, int mmax, double wgt = (job->flags&SHARP_USE_WEIGHTS) ? (ri->nph*ri->weight) : 1.; if (job->flags&SHARP_REAL_HARMONICS) wgt *= sqrt_one_half; - for (int i=0; intrans*job->nmaps; ++i) + for (int i=0; inmaps; ++i) for (int m=0; m<=mmax; ++m) if (job->flags & SHARP_DP) dmap[i][ri->ofs+m*ri->stride] += wgt*phase[2*i+job->s_m*m]; @@ -769,19 +769,19 @@ NOINLINE static void map2phase (sharp_job *job, int mmax, int llim, int ulim) ringhelper helper; ringhelper_init(&helper); int rstride=job->ginfo->nphmax+2; - double *ringtmp=RALLOC(double,job->ntrans*job->nmaps*rstride); + double *ringtmp=RALLOC(double,job->nmaps*rstride); #pragma omp for schedule(dynamic,1) for (int ith=llim; iths_th*(ith-llim); ring2ringtmp(job,&(job->ginfo->pair[ith].r1),ringtmp,rstride); - for (int i=0; intrans*job->nmaps; ++i) + for (int i=0; inmaps; ++i) ringhelper_ring2phase (&helper,&(job->ginfo->pair[ith].r1), &ringtmp[i*rstride],mmax,&job->phase[dim2+2*i],pstride,job->flags); if (job->ginfo->pair[ith].r2.nph>0) { ring2ringtmp(job,&(job->ginfo->pair[ith].r2),ringtmp,rstride); - for (int i=0; intrans*job->nmaps; ++i) + for (int i=0; inmaps; ++i) ringhelper_ring2phase (&helper,&(job->ginfo->pair[ith].r2), &ringtmp[i*rstride],mmax,&job->phase[dim2+2*i+1],pstride,job->flags); } @@ -814,18 +814,18 @@ NOINLINE static void phase2map (sharp_job *job, int mmax, int llim, int ulim) ringhelper helper; ringhelper_init(&helper); int rstride=job->ginfo->nphmax+2; - double *ringtmp=RALLOC(double,job->ntrans*job->nmaps*rstride); + double *ringtmp=RALLOC(double,job->nmaps*rstride); #pragma omp for schedule(dynamic,1) for (int ith=llim; iths_th*(ith-llim); - for (int i=0; intrans*job->nmaps; ++i) + for (int i=0; inmaps; ++i) ringhelper_phase2ring (&helper,&(job->ginfo->pair[ith].r1), &ringtmp[i*rstride],mmax,&job->phase[dim2+2*i],pstride,job->flags); ringtmp2ring(job,&(job->ginfo->pair[ith].r1),ringtmp,rstride); if (job->ginfo->pair[ith].r2.nph>0) { - for (int i=0; intrans*job->nmaps; ++i) + for (int i=0; inmaps; ++i) ringhelper_phase2ring (&helper,&(job->ginfo->pair[ith].r2), &ringtmp[i*rstride],mmax,&job->phase[dim2+2*i+1],pstride,job->flags); ringtmp2ring(job,&(job->ginfo->pair[ith].r2),ringtmp,rstride); @@ -918,10 +918,8 @@ NOINLINE static void sharp_execute_job (sharp_job *job) static void sharp_build_job_common (sharp_job *job, sharp_jobtype type, int spin, void *alm, void *map, const sharp_geom_info *geom_info, - const sharp_alm_info *alm_info, int ntrans, int flags) + const sharp_alm_info *alm_info, int flags) { - UTIL_ASSERT((ntrans>0)&&(ntrans<=SHARP_MAXTRANS), - "bad number of simultaneous transforms"); if (type==SHARP_ALM2MAP_DERIV1) spin=1; if (type==SHARP_MAP2ALM) flags|=SHARP_USE_WEIGHTS; if (type==SHARP_Yt) type=SHARP_MAP2ALM; @@ -937,23 +935,22 @@ static void sharp_build_job_common (sharp_job *job, sharp_jobtype type, job->ainfo = alm_info; job->flags = flags; if ((job->flags&SHARP_NVMAX)==0) - job->flags|=sharp_nv_oracle (type, spin, ntrans); + job->flags|=sharp_nv_oracle (type, spin); if (alm_info->flags&SHARP_REAL_HARMONICS) job->flags|=SHARP_REAL_HARMONICS; job->time = 0.; job->opcnt = 0; - job->ntrans = ntrans; job->alm=alm; job->map=map; } void sharp_execute (sharp_jobtype type, int spin, void *alm, void *map, - const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans, + const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int flags, double *time, unsigned long long *opcnt) { sharp_job job; sharp_build_job_common (&job, type, spin, alm, map, geom_info, alm_info, - ntrans, flags); + flags); sharp_execute_job (&job); if (time!=NULL) *time = job.time; @@ -968,7 +965,7 @@ void sharp_set_nchunks_max(int new_nchunks_max) int sharp_get_nv_max (void) { return 6; } -static int sharp_oracle (sharp_jobtype type, int spin, int ntrans) +static int sharp_oracle (sharp_jobtype type, int spin) { int lmax=511; int mmax=(lmax+1)/2; @@ -982,7 +979,7 @@ static int sharp_oracle (sharp_jobtype type, int spin, int ntrans) sharp_make_gauss_geom_info (nrings, ppring, 0., 1, ppring, &tinfo); ptrdiff_t nalms = ((mmax+1)*(mmax+2))/2 + (mmax+1)*(lmax-mmax); - int ncomp = ntrans*((spin==0) ? 1 : 2); + int ncomp = (spin==0) ? 1 : 2; double **map; ALLOC2D(map,double,ncomp,npix); @@ -1005,7 +1002,7 @@ static int sharp_oracle (sharp_jobtype type, int spin, int ntrans) int ntries=0; do { - sharp_execute(type,spin,&alm[0],&map[0],tinfo,alms,ntrans, + sharp_execute(type,spin,&alm[0],&map[0],tinfo,alms, nv|SHARP_DP|SHARP_NO_OPENMP,&jtime,NULL); if (jtime0),"bad number of simultaneous transforms"); UTIL_ASSERT(spin>=0, "bad spin"); - ntrans=IMIN(ntrans,maxtr); - if (nv_opt[ntrans-1][spin!=0][type]==0) - nv_opt[ntrans-1][spin!=0][type]=sharp_oracle(type,spin,ntrans); - return nv_opt[ntrans-1][spin!=0][type]; + if (nv_opt[spin!=0][type]==0) + nv_opt[spin!=0][type]=sharp_oracle(type,spin); + return nv_opt[spin!=0][type]; } #ifdef USE_MPI @@ -1050,11 +1039,11 @@ int sharp_nv_oracle (sharp_jobtype type, int spin, int ntrans) int sharp_execute_mpi_maybe (void *pcomm, sharp_jobtype type, int spin, void *alm, void *map, const sharp_geom_info *geom_info, - const sharp_alm_info *alm_info, int ntrans, int flags, double *time, + const sharp_alm_info *alm_info, int flags, double *time, unsigned long long *opcnt) { MPI_Comm comm = *(MPI_Comm*)pcomm; - sharp_execute_mpi((MPI_Comm)comm, type, spin, alm, map, geom_info, alm_info, ntrans, + sharp_execute_mpi((MPI_Comm)comm, type, spin, alm, map, geom_info, alm_info, flags, time, opcnt); return 0; } @@ -1063,12 +1052,12 @@ int sharp_execute_mpi_maybe (void *pcomm, sharp_jobtype type, int spin, int sharp_execute_mpi_maybe (void *pcomm, sharp_jobtype type, int spin, void *alm, void *map, const sharp_geom_info *geom_info, - const sharp_alm_info *alm_info, int ntrans, int flags, double *time, + const sharp_alm_info *alm_info, int flags, double *time, unsigned long long *opcnt) { /* Suppress unused warning: */ (void)pcomm; (void)type; (void)spin; (void)alm; (void)map; (void)geom_info; - (void)alm_info; (void)ntrans; (void)flags; (void)time; (void)opcnt; + (void)alm_info; (void)flags; (void)time; (void)opcnt; return SHARP_ERROR_NO_MPI; } diff --git a/libsharp/sharp_core_inc0.c b/libsharp/sharp_core_inc0.c index 7a34e40..d7c3624 100644 --- a/libsharp/sharp_core_inc0.c +++ b/libsharp/sharp_core_inc0.c @@ -78,164 +78,27 @@ void CONCATX(inner_loop,ARCH) (sharp_job *job, const int *ispair,const double *c const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi, const int *mlim) { - int njobs=job->ntrans, nv=job->flags&SHARP_NVMAX; - if (njobs<=MAXJOB_SPECIAL) + int nv=job->flags&SHARP_NVMAX; + switch (nv) { - switch (njobs*16+nv) - { -#if ((MAXJOB_SPECIAL>=1)&&(SHARP_MAXTRANS>=1)) - case 0x11: - CONCAT3(inner_loop,1,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x12: - CONCAT3(inner_loop,2,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x13: - CONCAT3(inner_loop,3,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x14: - CONCAT3(inner_loop,4,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x15: - CONCAT3(inner_loop,5,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x16: - CONCAT3(inner_loop,6,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; -#endif -#if ((MAXJOB_SPECIAL>=2)&&(SHARP_MAXTRANS>=2)) - case 0x21: - CONCAT3(inner_loop,1,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x22: - CONCAT3(inner_loop,2,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x23: - CONCAT3(inner_loop,3,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x24: - CONCAT3(inner_loop,4,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x25: - CONCAT3(inner_loop,5,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x26: - CONCAT3(inner_loop,6,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; -#endif -#if ((MAXJOB_SPECIAL>=3)&&(SHARP_MAXTRANS>=3)) - case 0x31: - CONCAT3(inner_loop,1,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x32: - CONCAT3(inner_loop,2,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x33: - CONCAT3(inner_loop,3,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x34: - CONCAT3(inner_loop,4,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x35: - CONCAT3(inner_loop,5,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x36: - CONCAT3(inner_loop,6,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; -#endif -#if ((MAXJOB_SPECIAL>=4)&&(SHARP_MAXTRANS>=4)) - case 0x41: - CONCAT3(inner_loop,1,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x42: - CONCAT3(inner_loop,2,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x43: - CONCAT3(inner_loop,3,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x44: - CONCAT3(inner_loop,4,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x45: - CONCAT3(inner_loop,5,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x46: - CONCAT3(inner_loop,6,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; -#endif -#if ((MAXJOB_SPECIAL>=5)&&(SHARP_MAXTRANS>=5)) - case 0x51: - CONCAT3(inner_loop,1,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x52: - CONCAT3(inner_loop,2,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x53: - CONCAT3(inner_loop,3,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x54: - CONCAT3(inner_loop,4,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x55: - CONCAT3(inner_loop,5,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x56: - CONCAT3(inner_loop,6,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; -#endif -#if ((MAXJOB_SPECIAL>=6)&&(SHARP_MAXTRANS>=6)) - case 0x61: - CONCAT3(inner_loop,1,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x62: - CONCAT3(inner_loop,2,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x63: - CONCAT3(inner_loop,3,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x64: - CONCAT3(inner_loop,4,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x65: - CONCAT3(inner_loop,5,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; - case 0x66: - CONCAT3(inner_loop,6,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); - return; -#endif - } + case 0x1: + CONCAT3(inner_loop,1,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); + return; + case 0x2: + CONCAT3(inner_loop,2,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); + return; + case 0x3: + CONCAT3(inner_loop,3,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); + return; + case 0x4: + CONCAT3(inner_loop,4,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); + return; + case 0x5: + CONCAT3(inner_loop,5,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); + return; + case 0x6: + CONCAT3(inner_loop,6,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim); + return; } -#if (SHARP_MAXTRANS>MAXJOB_SPECIAL) - else - { - switch (nv) - { - case 1: - CONCAT2(inner_loop,1) - (job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans); - return; - case 2: - CONCAT2(inner_loop,2) - (job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans); - return; - case 3: - CONCAT2(inner_loop,3) - (job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans); - return; - case 4: - CONCAT2(inner_loop,4) - (job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans); - return; - case 5: - CONCAT2(inner_loop,5) - (job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans); - return; - case 6: - CONCAT2(inner_loop,6) - (job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans); - return; - } - } -#endif UTIL_FAIL("Incorrect vector parameters"); } diff --git a/libsharp/sharp_internal.h b/libsharp/sharp_internal.h index fb56877..11f23cb 100644 --- a/libsharp/sharp_internal.h +++ b/libsharp/sharp_internal.h @@ -55,12 +55,11 @@ typedef struct const sharp_geom_info *ginfo; const sharp_alm_info *ainfo; double time; - int ntrans; unsigned long long opcnt; } sharp_job; int sharp_get_nv_max (void); -int sharp_nv_oracle (sharp_jobtype type, int spin, int ntrans); +int sharp_nv_oracle (sharp_jobtype type, int spin); int sharp_get_mlim (int lmax, int spin, double sth, double cth); #endif diff --git a/libsharp/sharp_lowlevel.h b/libsharp/sharp_lowlevel.h index d9aa01b..f36f5a8 100644 --- a/libsharp/sharp_lowlevel.h +++ b/libsharp/sharp_lowlevel.h @@ -223,7 +223,6 @@ typedef enum { SHARP_DP = 1<<4, \param alm_info A \c sharp_alm_info object compatible with the provided \a alm arrays. All \c m values from 0 to some \c mmax<=lmax must be present exactly once. - \param ntrans the number of simultaneous SHTs \param flags See sharp_jobflags. In particular, if SHARP_DP is set, then \a alm is expected to have the type "complex double **" and \a map is expected to have the type "double **"; otherwise, the expected @@ -233,7 +232,7 @@ typedef enum { SHARP_DP = 1<<4, \param opcnt If not NULL, a conservative estimate of the total floating point operation count for this SHT will be written here. */ void sharp_execute (sharp_jobtype type, int spin, void *alm, void *map, - const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans, + const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int flags, double *time, unsigned long long *opcnt); void sharp_set_chunksize_min(int new_chunksize_min); @@ -258,7 +257,7 @@ typedef enum { SHARP_ERROR_NO_MPI = 1, */ int sharp_execute_mpi_maybe (void *pcomm, sharp_jobtype type, int spin, void *alm, void *map, const sharp_geom_info *geom_info, - const sharp_alm_info *alm_info, int ntrans, int flags, double *time, + const sharp_alm_info *alm_info, int flags, double *time, unsigned long long *opcnt); diff --git a/libsharp/sharp_mpi.c b/libsharp/sharp_mpi.c index a364ed4..b23409a 100644 --- a/libsharp/sharp_mpi.c +++ b/libsharp/sharp_mpi.c @@ -101,7 +101,7 @@ static void sharp_make_mpi_info (MPI_Comm comm, const sharp_job *job, DEALLOC(theta_tmp); DEALLOC(ispair_tmp); - minfo->nph=2*job->nmaps*job->ntrans; + minfo->nph=2*job->nmaps; minfo->almcount=RALLOC(int,minfo->ntasks); minfo->almdisp=RALLOC(int,minfo->ntasks+1); @@ -184,8 +184,8 @@ static void alloc_phase_mpi (sharp_job *job, int nm, int ntheta, { ptrdiff_t phase_size = (job->type==SHARP_MAP2ALM) ? (ptrdiff_t)(nmfull)*ntheta : (ptrdiff_t)(nm)*nthetafull; - job->phase=RALLOC(dcmplx,2*job->ntrans*job->nmaps*phase_size); - job->s_m=2*job->ntrans*job->nmaps; + job->phase=RALLOC(dcmplx,2*job->nmaps*phase_size); + job->s_m=2*job->nmaps; job->s_th = job->s_m * ((job->type==SHARP_MAP2ALM) ? nmfull : nm); } @@ -315,12 +315,12 @@ static void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm) void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin, void *alm, void *map, const sharp_geom_info *geom_info, - const sharp_alm_info *alm_info, int ntrans, int flags, double *time, + const sharp_alm_info *alm_info, int flags, double *time, unsigned long long *opcnt) { sharp_job job; sharp_build_job_common (&job, type, spin, alm, map, geom_info, alm_info, - ntrans, flags); + flags); sharp_execute_job_mpi (&job, comm); if (time!=NULL) *time = job.time; @@ -331,15 +331,15 @@ void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin, without declaring it in C header as it should not be available to C code */ void sharp_execute_mpi_fortran(MPI_Fint comm, sharp_jobtype type, int spin, void *alm, void *map, const sharp_geom_info *geom_info, - const sharp_alm_info *alm_info, int ntrans, int flags, double *time, + const sharp_alm_info *alm_info, int flags, double *time, unsigned long long *opcnt); void sharp_execute_mpi_fortran(MPI_Fint comm, sharp_jobtype type, int spin, void *alm, void *map, const sharp_geom_info *geom_info, - const sharp_alm_info *alm_info, int ntrans, int flags, double *time, + const sharp_alm_info *alm_info, int flags, double *time, unsigned long long *opcnt) { sharp_execute_mpi(MPI_Comm_f2c(comm), type, spin, alm, map, geom_info, - alm_info, ntrans, flags, time, opcnt); + alm_info, flags, time, opcnt); } #endif diff --git a/libsharp/sharp_mpi.h b/libsharp/sharp_mpi.h index 1053a65..df07117 100644 --- a/libsharp/sharp_mpi.h +++ b/libsharp/sharp_mpi.h @@ -62,7 +62,6 @@ extern "C" { \a alm arrays. All \c m values from 0 to some \c mmax<=lmax must be present exactly once in the union of all \a alm_info objects over the participating MPI tasks. - \param ntrans the number of simultaneous SHTs \param flags See sharp_jobflags. In particular, if SHARP_DP is set, then \a alm is expected to have the type "complex double **" and \a map is expected to have the type "double **"; otherwise, the expected @@ -73,7 +72,7 @@ extern "C" { operation count for this SHT will be written here. */ void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin, void *alm, void *map, const sharp_geom_info *geom_info, - const sharp_alm_info *alm_info, int ntrans, int flags, double *time, + const sharp_alm_info *alm_info, int flags, double *time, unsigned long long *opcnt); #ifdef __cplusplus diff --git a/libsharp/sharp_testsuite.c b/libsharp/sharp_testsuite.c index f02f9fd..c08fb20 100644 --- a/libsharp/sharp_testsuite.c +++ b/libsharp/sharp_testsuite.c @@ -358,97 +358,83 @@ static void check_sign_scale(void) sharp_make_triangular_alm_info(lmax,mmax,1,&alms); ptrdiff_t nalms = ((mmax+1)*(mmax+2))/2 + (mmax+1)*(lmax-mmax); - for (int ntrans=1; ntrans<10; ++ntrans) - { - double **map; - ALLOC2D(map,double,2*ntrans,npix); + double **map; + ALLOC2D(map,double,2,npix); - dcmplx **alm; - ALLOC2D(alm,dcmplx,2*ntrans,nalms); - for (int i=0; i<2*ntrans; ++i) - for (int j=0; j=9,"usage: grid lmax mmax geom1 geom2 spin ntrans"); + UTIL_ASSERT(argc>=8,"usage: grid lmax mmax geom1 geom2 spin"); int lmax=atoi(argv[3]); int mmax=atoi(argv[4]); int gpar1=atoi(argv[5]); int gpar2=atoi(argv[6]); int spin=atoi(argv[7]); - int ntrans=atoi(argv[8]); if (mytask==0) printf("Testing map analysis accuracy.\n"); - if (mytask==0) printf("spin=%d, ntrans=%d\n", spin, ntrans); + if (mytask==0) printf("spin=%d\n", spin); sharp_geom_info *ginfo; sharp_alm_info *ainfo; get_infos (argv[2], lmax, &mmax, &gpar1, &gpar2, &ginfo, &ainfo); - int ncomp = ntrans*((spin==0) ? 1 : 2); + int ncomp = (spin==0) ? 1 : 2; double t_a2m=1e30, t_m2a=1e30; unsigned long long op_a2m, op_m2a; double *err_abs,*err_rel; @@ -557,7 +541,7 @@ static void sharp_test (int argc, const char **argv) { ++nrpt; double ta2m2, tm2a2; - do_sht (ginfo, ainfo, spin, ntrans, 0, &err_abs, &err_rel, &ta2m2, &tm2a2, + do_sht (ginfo, ainfo, spin, 0, &err_abs, &err_rel, &ta2m2, &tm2a2, &op_a2m, &op_m2a); if (ta2m2=9,"usage: grid lmax mmax geom1 geom2 spin ntrans"); + UTIL_ASSERT(argc>=8,"usage: grid lmax mmax geom1 geom2 spin"); int lmax=atoi(argv[3]); int mmax=atoi(argv[4]); int gpar1=atoi(argv[5]); int gpar2=atoi(argv[6]); int spin=atoi(argv[7]); - int ntrans=atoi(argv[8]); if (mytask==0) printf("Testing map analysis accuracy.\n"); - if (mytask==0) printf("spin=%d, ntrans=%d\n", spin, ntrans); + if (mytask==0) printf("spin=%d\n", spin); sharp_geom_info *ginfo; sharp_alm_info *ainfo; @@ -647,7 +630,7 @@ static void sharp_bench (int argc, const char **argv) double t_a2m, t_m2a; unsigned long long op_a2m, op_m2a; double *err_abs,*err_rel; - do_sht (ginfo, ainfo, spin, ntrans, nv, &err_abs, &err_rel, + do_sht (ginfo, ainfo, spin, nv, &err_abs, &err_rel, &t_a2m, &t_m2a, &op_a2m, &op_m2a); DEALLOC(err_abs);