diff --git a/libsharp/sharp.c b/libsharp/sharp.c index 718e8ac..094deb0 100644 --- a/libsharp/sharp.c +++ b/libsharp/sharp.c @@ -355,23 +355,52 @@ static void ringhelper_ring2phase (ringhelper *self, static void fill_map (const sharp_geom_info *ginfo, void *map, double value, int flags) { - for (int j=0;jnpairs;++j) + if (flags & SHARP_NO_FFT) { - if (flags&SHARP_DP) + for (int j=0;jnpairs;++j) { - for (ptrdiff_t i=0;ipair[j].r1.nph;++i) - ((double *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=value; - for (ptrdiff_t i=0;ipair[j].r2.nph;++i) - ((double *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]=value; + if (flags&SHARP_DP) + { + for (ptrdiff_t i=0;ipair[j].r1.nph;++i) + ((dcmplx *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride] + =value; + for (ptrdiff_t i=0;ipair[j].r2.nph;++i) + ((dcmplx *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride] + =value; + } + else + { + for (ptrdiff_t i=0;ipair[j].r1.nph;++i) + ((fcmplx *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride] + =(float)value; + for (ptrdiff_t i=0;ipair[j].r2.nph;++i) + ((fcmplx *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride] + =(float)value; + } } - else + } + else + { + for (int j=0;jnpairs;++j) { - for (ptrdiff_t i=0;ipair[j].r1.nph;++i) - ((float *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride] - =(float)value; - for (ptrdiff_t i=0;ipair[j].r2.nph;++i) - ((float *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride] - =(float)value; + if (flags&SHARP_DP) + { + for (ptrdiff_t i=0;ipair[j].r1.nph;++i) + ((double *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride] + =value; + for (ptrdiff_t i=0;ipair[j].r2.nph;++i) + ((double *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride] + =value; + } + else + { + for (ptrdiff_t i=0;ipair[j].r1.nph;++i) + ((float *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride] + =(float)value; + for (ptrdiff_t i=0;ipair[j].r2.nph;++i) + ((float *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride] + =(float)value; + } } } } @@ -599,67 +628,135 @@ static void ring2ringtmp (sharp_job *job, sharp_ringinfo *ri, double *ringtmp, ((float *)(job->map[i]))[ri->ofs+m*ri->stride]; } +static void ring2phase_direct (sharp_job *job, sharp_ringinfo *ri, int mmax, + dcmplx *phase) + { + if (ri->nph<0) + { + for (int i=0; intrans*job->nmaps; ++i) + for (int m=0; m<=mmax; ++m) + phase[2*i+job->s_m*m]=0.; + } + else + { + UTIL_ASSERT(ri->nph==mmax+1,"bad ring size"); + double wgt = (job->flags&SHARP_USE_WEIGHTS) ? (ri->nph*ri->weight) : 1.; + if (job->flags&SHARP_REAL_HARMONICS) + wgt *= sqrt_two; + for (int i=0; intrans*job->nmaps; ++i) + for (int m=0; m<=mmax; ++m) + phase[2*i+job->s_m*m]= (job->flags & SHARP_DP) ? + ((dcmplx *)(job->map[i]))[ri->ofs+m*ri->stride]*wgt : + ((fcmplx *)(job->map[i]))[ri->ofs+m*ri->stride]*wgt; + } + } +static void phase2ring_direct (sharp_job *job, sharp_ringinfo *ri, int mmax, + dcmplx *phase) + { + if (ri->nph<0) return; + UTIL_ASSERT(ri->nph==mmax+1,"bad ring size"); + dcmplx **dmap = (dcmplx **)job->map; + fcmplx **fmap = (fcmplx **)job->map; + double wgt = (job->flags&SHARP_USE_WEIGHTS) ? (ri->nph*ri->weight) : 1.; + if (job->flags&SHARP_REAL_HARMONICS) + wgt *= sqrt_one_half; + for (int i=0; intrans*job->nmaps; ++i) + for (int m=0; m<=mmax; ++m) + if (job->flags & SHARP_DP) + dmap[i][ri->ofs+m*ri->stride] += wgt*phase[2*i+job->s_m*m]; + else + fmap[i][ri->ofs+m*ri->stride] += (fcmplx)(wgt*phase[2*i+job->s_m*m]); + } + //FIXME: set phase to zero if not SHARP_MAP2ALM? static void map2phase (sharp_job *job, int mmax, int llim, int ulim) { if (job->type != SHARP_MAP2ALM) return; int pstride = job->s_m; -#pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0) -{ - ringhelper helper; - ringhelper_init(&helper); - int rstride=job->ginfo->nphmax+2; - double *ringtmp=RALLOC(double,job->ntrans*job->nmaps*rstride); -#pragma omp for schedule(dynamic,1) - for (int ith=llim; ithflags & SHARP_NO_FFT) { - int dim2 = job->s_th*(ith-llim); - ring2ringtmp(job,&(job->ginfo->pair[ith].r1),ringtmp,rstride); - for (int i=0; intrans*job->nmaps; ++i) - ringhelper_ring2phase (&helper,&(job->ginfo->pair[ith].r1), - &ringtmp[i*rstride],mmax,&job->phase[dim2+2*i],pstride,job->flags); - if (job->ginfo->pair[ith].r2.nph>0) + for (int ith=llim; ithginfo->pair[ith].r2),ringtmp,rstride); - for (int i=0; intrans*job->nmaps; ++i) - ringhelper_ring2phase (&helper,&(job->ginfo->pair[ith].r2), - &ringtmp[i*rstride],mmax,&job->phase[dim2+2*i+1],pstride,job->flags); + int dim2 = job->s_th*(ith-llim); + ring2phase_direct(job,&(job->ginfo->pair[ith].r1),mmax, + &(job->phase[dim2])); + ring2phase_direct(job,&(job->ginfo->pair[ith].r2),mmax, + &(job->phase[dim2+1])); } } - DEALLOC(ringtmp); - ringhelper_destroy(&helper); + else + { +#pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0) +{ + ringhelper helper; + ringhelper_init(&helper); + int rstride=job->ginfo->nphmax+2; + double *ringtmp=RALLOC(double,job->ntrans*job->nmaps*rstride); +#pragma omp for schedule(dynamic,1) + for (int ith=llim; iths_th*(ith-llim); + ring2ringtmp(job,&(job->ginfo->pair[ith].r1),ringtmp,rstride); + for (int i=0; intrans*job->nmaps; ++i) + ringhelper_ring2phase (&helper,&(job->ginfo->pair[ith].r1), + &ringtmp[i*rstride],mmax,&job->phase[dim2+2*i],pstride,job->flags); + if (job->ginfo->pair[ith].r2.nph>0) + { + ring2ringtmp(job,&(job->ginfo->pair[ith].r2),ringtmp,rstride); + for (int i=0; intrans*job->nmaps; ++i) + ringhelper_ring2phase (&helper,&(job->ginfo->pair[ith].r2), + &ringtmp[i*rstride],mmax,&job->phase[dim2+2*i+1],pstride,job->flags); + } + } + DEALLOC(ringtmp); + ringhelper_destroy(&helper); } /* end of parallel region */ + } } static void phase2map (sharp_job *job, int mmax, int llim, int ulim) { if (job->type == SHARP_MAP2ALM) return; int pstride = job->s_m; -#pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0) -{ - ringhelper helper; - ringhelper_init(&helper); - int rstride=job->ginfo->nphmax+2; - double *ringtmp=RALLOC(double,job->ntrans*job->nmaps*rstride); -#pragma omp for schedule(dynamic,1) - for (int ith=llim; ithflags & SHARP_NO_FFT) { - int dim2 = job->s_th*(ith-llim); - for (int i=0; intrans*job->nmaps; ++i) - ringhelper_phase2ring (&helper,&(job->ginfo->pair[ith].r1), - &ringtmp[i*rstride],mmax,&job->phase[dim2+2*i],pstride,job->flags); - ringtmp2ring(job,&(job->ginfo->pair[ith].r1),ringtmp,rstride); - if (job->ginfo->pair[ith].r2.nph>0) + for (int ith=llim; ithntrans*job->nmaps; ++i) - ringhelper_phase2ring (&helper,&(job->ginfo->pair[ith].r2), - &ringtmp[i*rstride],mmax,&job->phase[dim2+2*i+1],pstride,job->flags); - ringtmp2ring(job,&(job->ginfo->pair[ith].r2),ringtmp,rstride); + int dim2 = job->s_th*(ith-llim); + phase2ring_direct(job,&(job->ginfo->pair[ith].r1),mmax, + &(job->phase[dim2])); + phase2ring_direct(job,&(job->ginfo->pair[ith].r2),mmax, + &(job->phase[dim2+1])); } } - DEALLOC(ringtmp); - ringhelper_destroy(&helper); + else + { +#pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0) +{ + ringhelper helper; + ringhelper_init(&helper); + int rstride=job->ginfo->nphmax+2; + double *ringtmp=RALLOC(double,job->ntrans*job->nmaps*rstride); +#pragma omp for schedule(dynamic,1) + for (int ith=llim; iths_th*(ith-llim); + for (int i=0; intrans*job->nmaps; ++i) + ringhelper_phase2ring (&helper,&(job->ginfo->pair[ith].r1), + &ringtmp[i*rstride],mmax,&job->phase[dim2+2*i],pstride,job->flags); + ringtmp2ring(job,&(job->ginfo->pair[ith].r1),ringtmp,rstride); + if (job->ginfo->pair[ith].r2.nph>0) + { + for (int i=0; intrans*job->nmaps; ++i) + ringhelper_phase2ring (&helper,&(job->ginfo->pair[ith].r2), + &ringtmp[i*rstride],mmax,&job->phase[dim2+2*i+1],pstride,job->flags); + ringtmp2ring(job,&(job->ginfo->pair[ith].r2),ringtmp,rstride); + } + } + DEALLOC(ringtmp); + ringhelper_destroy(&helper); } /* end of parallel region */ + } } static void sharp_execute_job (sharp_job *job) diff --git a/libsharp/sharp_lowlevel.h b/libsharp/sharp_lowlevel.h index 0c38b3d..cdf3077 100644 --- a/libsharp/sharp_lowlevel.h +++ b/libsharp/sharp_lowlevel.h @@ -183,6 +183,7 @@ typedef enum { SHARP_DP = 1<<4, corresponding complex coefficient (when accessed as complex). */ + SHARP_NO_FFT = 1<<7, SHARP_USE_WEIGHTS = 1<<20, /* internal use only */ SHARP_NO_OPENMP = 1<<21, /* internal use only */