From 1ec2c44b85e7376ae245efc09c293033e346327c Mon Sep 17 00:00:00 2001 From: Dag Sverre Seljebotn Date: Mon, 7 Jan 2013 11:05:57 +0100 Subject: [PATCH 1/4] gitignore --- .gitignore | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cd0bf21 --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +*.o +#* +*~ + +/auto +/autom4te.cache +/config.log +/config.status +/config/config.auto +/configure +/sharp_oracle.inc From 80b2f9abffa623ff39f92af9a0f08d592c95dbed Mon Sep 17 00:00:00 2001 From: Dag Sverre Seljebotn Date: Mon, 7 Jan 2013 11:23:15 +0100 Subject: [PATCH 2/4] Use macros to shorten alm2almtmp and almtmp2alm --- libsharp/sharp.c | 62 +++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 32 deletions(-) diff --git a/libsharp/sharp.c b/libsharp/sharp.c index a7aa208..90b9048 100644 --- a/libsharp/sharp.c +++ b/libsharp/sharp.c @@ -417,6 +417,15 @@ static void dealloc_almtmp (sharp_job *job) static void alm2almtmp (sharp_job *job, int lmax, int mi) { + +#define COPY_LOOP(source_t, expr_of_x) \ + for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) \ + for (int i=0; intrans*job->nalm; ++i) \ + { \ + source_t x = ((source_t *)job->alm[i])[ofs+l*stride]; \ + job->almtmp[job->ntrans*job->nalm*l+i] = expr_of_x; \ + } + if (job->type!=SHARP_MAP2ALM) { ptrdiff_t ofs=job->ainfo->mvstart[mi]; @@ -424,66 +433,55 @@ static void alm2almtmp (sharp_job *job, int lmax, int mi) if (job->spin==0) { if (job->flags&SHARP_DP) - for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) - for (int i=0; intrans*job->nalm; ++i) - job->almtmp[job->ntrans*job->nalm*l+i] - = ((dcmplx *)job->alm[i])[ofs+l*stride]; + COPY_LOOP(dcmplx, x) else - for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) - for (int i=0; intrans*job->nalm; ++i) - job->almtmp[job->ntrans*job->nalm*l+i] - = ((fcmplx *)job->alm[i])[ofs+l*stride]; + COPY_LOOP(fcmplx, x) } else { if (job->flags&SHARP_DP) - for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) - for (int i=0; intrans*job->nalm; ++i) - job->almtmp[job->ntrans*job->nalm*l+i] - = ((dcmplx *)job->alm[i])[ofs+l*stride]*job->norm_l[l]; + COPY_LOOP(dcmplx, x*job->norm_l[l]) else - for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) - for (int i=0; intrans*job->nalm; ++i) - job->almtmp[job->ntrans*job->nalm*l+i] - = ((fcmplx *)job->alm[i])[ofs+l*stride]*job->norm_l[l]; + COPY_LOOP(fcmplx, x*job->norm_l[l]) } } else SET_ARRAY(job->almtmp,job->ntrans*job->nalm*job->ainfo->mval[mi], job->ntrans*job->nalm*(lmax+1),0.); + +#undef COPY_LOOP } static void almtmp2alm (sharp_job *job, int lmax, int mi) { + +#define COPY_LOOP(target_t, expr_of_x) \ + for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) \ + for (int i=0; intrans*job->nalm; ++i) \ + { \ + dcmplx x = job->almtmp[job->ntrans*job->nalm*l+i]; \ + ((target_t *)job->alm[i])[ofs+l*stride] += expr_of_x; \ + } + if (job->type != SHARP_MAP2ALM) return; ptrdiff_t ofs=job->ainfo->mvstart[mi]; int stride=job->ainfo->stride; if (job->spin==0) { if (job->flags&SHARP_DP) - for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) - for (int i=0;intrans*job->nalm;++i) - ((dcmplx *)job->alm[i])[ofs+l*stride] += - job->almtmp[job->ntrans*job->nalm*l+i]; + COPY_LOOP(dcmplx, x) else - for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) - for (int i=0;intrans*job->nalm;++i) - ((fcmplx *)job->alm[i])[ofs+l*stride] += - (fcmplx)(job->almtmp[job->ntrans*job->nalm*l+i]); + COPY_LOOP(fcmplx, (fcmplx)x) } else { if (job->flags&SHARP_DP) - for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) - for (int i=0;intrans*job->nalm;++i) - ((dcmplx *)job->alm[i])[ofs+l*stride] += - job->almtmp[job->ntrans*job->nalm*l+i]*job->norm_l[l]; + COPY_LOOP(dcmplx, x * job->norm_l[l]) else - for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) - for (int i=0;intrans*job->nalm;++i) - ((fcmplx *)job->alm[i])[ofs+l*stride] += - (fcmplx)(job->almtmp[job->ntrans*job->nalm*l+i]*job->norm_l[l]); + COPY_LOOP(fcmplx, (fcmplx)(x * job->norm_l[l])) } + +#undef COPY_LOOP } static void phase2map (sharp_job *job, int mmax, int llim, int ulim) From d5679f2c43fe64f09b9256bfd5bcfef16fc0beb7 Mon Sep 17 00:00:00 2001 From: Dag Sverre Seljebotn Date: Tue, 8 Jan 2013 13:31:11 +0100 Subject: [PATCH 3/4] SHARP_PACKED alm_info flag --- libsharp/sharp.c | 138 ++++++++++++++++++++++++++++-------- libsharp/sharp_almhelpers.c | 2 + libsharp/sharp_lowlevel.h | 14 +++- 3 files changed, 123 insertions(+), 31 deletions(-) diff --git a/libsharp/sharp.c b/libsharp/sharp.c index 90b9048..0870e64 100644 --- a/libsharp/sharp.c +++ b/libsharp/sharp.c @@ -124,7 +124,7 @@ static int ringpair_compare (const void *xa, const void *xb) } void sharp_make_general_alm_info (int lmax, int nm, int stride, const int *mval, - const ptrdiff_t *mstart, sharp_alm_info **alm_info) + const ptrdiff_t *mstart, int flags, sharp_alm_info **alm_info) { sharp_alm_info *info = RALLOC(sharp_alm_info,1); info->lmax = lmax; @@ -132,6 +132,7 @@ void sharp_make_general_alm_info (int lmax, int nm, int stride, const int *mval, info->mval = RALLOC(int,nm); info->mvstart = RALLOC(ptrdiff_t,nm); info->stride = stride; + info->flags = flags; for (int mi=0; mimval[mi] = mval[mi]; @@ -146,12 +147,16 @@ void sharp_make_alm_info (int lmax, int mmax, int stride, int *mval=RALLOC(int,mmax+1); for (int i=0; i<=mmax; ++i) mval[i]=i; - sharp_make_general_alm_info (lmax, mmax+1, stride, mval, mstart, alm_info); + sharp_make_general_alm_info (lmax, mmax+1, stride, mval, mstart, 0, alm_info); DEALLOC(mval); } ptrdiff_t sharp_alm_index (const sharp_alm_info *self, int l, int mi) - { return self->mvstart[mi]+self->stride*l; } + { + UTIL_ASSERT(!(self->flags & SHARP_PACKED), + "sharp_alm_index not applicable with SHARP_PACKED alms"); + return self->mvstart[mi]+self->stride*l; + } void sharp_destroy_alm_info (sharp_alm_info *info) { @@ -358,17 +363,40 @@ static void fill_map (const sharp_geom_info *ginfo, void *map, double value, } } -static void fill_alm (const sharp_alm_info *ainfo, void *alm, dcmplx value, - int flags) +static void clear_alm (const sharp_alm_info *ainfo, void *alm, int flags) { - if (flags&SHARP_DP) - for (int mi=0;minm;++mi) - for (int l=ainfo->mval[mi];l<=ainfo->lmax;++l) - ((dcmplx *)alm)[sharp_alm_index(ainfo,l,mi)] = value; - else - for (int mi=0;minm;++mi) - for (int l=ainfo->mval[mi];l<=ainfo->lmax;++l) - ((fcmplx *)alm)[sharp_alm_index(ainfo,l,mi)] = (fcmplx)value; +#define CLEARLOOP(real_t,body) \ + { \ + real_t *talm = (real_t *)alm; \ + for (int l=m;l<=ainfo->lmax;++l) \ + body \ + } + + for (int mi=0;minm;++mi) + { + int m=ainfo->mval[mi]; + ptrdiff_t mvstart = ainfo->mvstart[mi]; + ptrdiff_t stride = ainfo->stride; + if (!(ainfo->flags&SHARP_PACKED)) + mvstart*=2; + if ((ainfo->flags&SHARP_PACKED)&&(m==0)) + { + if (flags&SHARP_DP) + CLEARLOOP(double, talm[mvstart+l*stride] = 0.;) + else + CLEARLOOP(float, talm[mvstart+l*stride] = 0.;) + } + else + { + stride*=2; + if (flags&SHARP_DP) + CLEARLOOP(double, talm[mvstart+l*stride] = talm[mvstart+l*stride+1] = 0.;) + else + CLEARLOOP(float, talm[mvstart+l*stride] = talm[mvstart+l*stride+1] = 0.;) + } + +#undef CLEARLOOP + } } static void init_output (sharp_job *job) @@ -376,7 +404,7 @@ static void init_output (sharp_job *job) if (job->flags&SHARP_ADD) return; if (job->type == SHARP_MAP2ALM) for (int i=0; intrans*job->nalm; ++i) - fill_alm (job->ainfo,job->alm[i],0.,job->flags); + clear_alm (job->ainfo,job->alm[i],job->flags); else for (int i=0; intrans*job->nmaps; ++i) fill_map (job->ginfo,job->map[i],0.,job->flags); @@ -418,11 +446,11 @@ static void dealloc_almtmp (sharp_job *job) static void alm2almtmp (sharp_job *job, int lmax, int mi) { -#define COPY_LOOP(source_t, expr_of_x) \ +#define COPY_LOOP(real_t, source_t, expr_of_x) \ for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) \ for (int i=0; intrans*job->nalm; ++i) \ { \ - source_t x = ((source_t *)job->alm[i])[ofs+l*stride]; \ + source_t x = *(source_t *)(((real_t *)job->alm[i])+ofs+l*stride); \ job->almtmp[job->ntrans*job->nalm*l+i] = expr_of_x; \ } @@ -430,19 +458,44 @@ static void alm2almtmp (sharp_job *job, int lmax, int mi) { ptrdiff_t ofs=job->ainfo->mvstart[mi]; int stride=job->ainfo->stride; + int packed_m0=(job->ainfo->flags&SHARP_PACKED)&&(job->ainfo->mval[mi]==0); + if (!(job->ainfo->flags&SHARP_PACKED)) + ofs *= 2; + if (!packed_m0) + stride *= 2; if (job->spin==0) { - if (job->flags&SHARP_DP) - COPY_LOOP(dcmplx, x) + if (packed_m0) + { + if (job->flags&SHARP_DP) + COPY_LOOP(double, double, x) + else + COPY_LOOP(float, float, x) + } else - COPY_LOOP(fcmplx, x) + { + if (job->flags&SHARP_DP) + COPY_LOOP(double, dcmplx, x) + else + COPY_LOOP(float, fcmplx, x) + } } else { - if (job->flags&SHARP_DP) - COPY_LOOP(dcmplx, x*job->norm_l[l]) + if (packed_m0) + { + if (job->flags&SHARP_DP) + COPY_LOOP(double, double, x*job->norm_l[l]) + else + COPY_LOOP(float, float, x*job->norm_l[l]) + } else - COPY_LOOP(fcmplx, x*job->norm_l[l]) + { + if (job->flags&SHARP_DP) + COPY_LOOP(double, dcmplx, x*job->norm_l[l]) + else + COPY_LOOP(float, fcmplx, x*job->norm_l[l]) + } } } else @@ -455,30 +508,55 @@ static void alm2almtmp (sharp_job *job, int lmax, int mi) static void almtmp2alm (sharp_job *job, int lmax, int mi) { -#define COPY_LOOP(target_t, expr_of_x) \ +#define COPY_LOOP(real_t, target_t, expr_of_x) \ for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) \ for (int i=0; intrans*job->nalm; ++i) \ { \ dcmplx x = job->almtmp[job->ntrans*job->nalm*l+i]; \ - ((target_t *)job->alm[i])[ofs+l*stride] += expr_of_x; \ + *(target_t *)(((real_t *)job->alm[i])+ofs+l*stride) += expr_of_x; \ } if (job->type != SHARP_MAP2ALM) return; ptrdiff_t ofs=job->ainfo->mvstart[mi]; int stride=job->ainfo->stride; + int packed_m0=(job->ainfo->flags&SHARP_PACKED)&&(job->ainfo->mval[mi]==0); + if (!(job->ainfo->flags&SHARP_PACKED)) + ofs *= 2; + if (!packed_m0) + stride *= 2; if (job->spin==0) { - if (job->flags&SHARP_DP) - COPY_LOOP(dcmplx, x) + if (packed_m0) + { + if (job->flags&SHARP_DP) + COPY_LOOP(double, double, creal(x)) + else + COPY_LOOP(float, float, crealf(x)) + } else - COPY_LOOP(fcmplx, (fcmplx)x) + { + if (job->flags&SHARP_DP) + COPY_LOOP(double, dcmplx, x) + else + COPY_LOOP(float, fcmplx, (fcmplx)x) + } } else { - if (job->flags&SHARP_DP) - COPY_LOOP(dcmplx, x * job->norm_l[l]) + if (packed_m0) + { + if (job->flags&SHARP_DP) + COPY_LOOP(double, double, creal(x) * job->norm_l[l]) + else + COPY_LOOP(float, fcmplx, (float)(creal(x) * job->norm_l[l])) + } else - COPY_LOOP(fcmplx, (fcmplx)(x * job->norm_l[l])) + { + if (job->flags&SHARP_DP) + COPY_LOOP(double, dcmplx, x * job->norm_l[l]) + else + COPY_LOOP(float, fcmplx, (fcmplx)(x * job->norm_l[l])) + } } #undef COPY_LOOP diff --git a/libsharp/sharp_almhelpers.c b/libsharp/sharp_almhelpers.c index 039e6f9..f159c03 100644 --- a/libsharp/sharp_almhelpers.c +++ b/libsharp/sharp_almhelpers.c @@ -41,6 +41,7 @@ void sharp_make_triangular_alm_info (int lmax, int mmax, int stride, info->mval = RALLOC(int,mmax+1); info->mvstart = RALLOC(ptrdiff_t,mmax+1); info->stride = stride; + info->flags = 0; int tval = 2*lmax+1; for (ptrdiff_t m=0; m<=mmax; ++m) { @@ -59,6 +60,7 @@ void sharp_make_rectangular_alm_info (int lmax, int mmax, int stride, info->mval = RALLOC(int,mmax+1); info->mvstart = RALLOC(ptrdiff_t,mmax+1); info->stride = stride; + info->flags = 0; for (ptrdiff_t m=0; m<=mmax; ++m) { info->mval[m] = m; diff --git a/libsharp/sharp_lowlevel.h b/libsharp/sharp_lowlevel.h index ac28022..4336d9d 100644 --- a/libsharp/sharp_lowlevel.h +++ b/libsharp/sharp_lowlevel.h @@ -76,6 +76,8 @@ typedef struct int nm; /*! Array with \a nm entries containing the individual m values */ int *mval; + /*! Combination of flags from sharp_almflags */ + int flags; /*! Array with \a nm entries containing the (hypothetical) indices of the coefficients with quantum numbers 0,\a mval[i] */ ptrdiff_t *mvstart; @@ -83,6 +85,15 @@ typedef struct ptrdiff_t stride; } sharp_alm_info; +/*! alm_info flags */ +typedef enum { SHARP_PACKED = 1 + /*< m=0-coefficients are packed so that the (zero) imaginary part is + not present. mvstart is in units of *real* float/double for all + m; stride is in units of reals for m=0 and complex for m!=0 */ + } sharp_almflags; + + + /*! Creates an a_lm data structure from the following parameters: \param lmax maximum \a l quantum number (>=0) \param mmax maximum \a m quantum number (0<= \a mmax <= \a lmax) @@ -102,10 +113,11 @@ void sharp_make_alm_info (int lmax, int mmax, int stride, \param mval array with \a nm entries containing the individual m values \param mvstart array with \a nm entries containing the (hypothetical) indices of the coefficients with the quantum numbers 0,\a mval[i] + \param flags a combination of sharp_almflags (pass 0 unless you know you need this) \param alm_info will hold a pointer to the newly created data structure */ void sharp_make_general_alm_info (int lmax, int nm, int stride, const int *mval, - const ptrdiff_t *mvstart, sharp_alm_info **alm_info); + const ptrdiff_t *mvstart, int flags, sharp_alm_info **alm_info); /*! Returns the index of the coefficient with quantum numbers \a l, \a mval[mi]. \note for a \a sharp_alm_info generated by sharp_make_alm_info() this is From 35494f39261c3a5fd9597befff2ac9f0360afebe Mon Sep 17 00:00:00 2001 From: Dag Sverre Seljebotn Date: Tue, 8 Jan 2013 14:14:29 +0100 Subject: [PATCH 4/4] SHARP_REAL_HARMONICS flag --- libsharp/sharp.c | 51 +++++++++++++++++++++++++-------------- libsharp/sharp_lowlevel.h | 14 ++++++++++- 2 files changed, 46 insertions(+), 19 deletions(-) diff --git a/libsharp/sharp.c b/libsharp/sharp.c index 0870e64..5dee3f7 100644 --- a/libsharp/sharp.c +++ b/libsharp/sharp.c @@ -43,6 +43,9 @@ typedef complex double dcmplx; typedef complex float fcmplx; +static const double sqrt_one_half = 0.707106781186547572737310929369; +static const double sqrt_two = 1.414213562373095145474621858739; + static int chunksize_min=500, nchunks_max=10; static void get_chunk_info (int ndata, int nmult, int *nchunks, int *chunksize) @@ -280,6 +283,8 @@ static void ringhelper_phase2ring (ringhelper *self, #endif real_plan_backward_c (self->plan, (double *)(self->work)); double wgt = (flags&SHARP_USE_WEIGHTS) ? info->weight : 1.; + if (flags&SHARP_REAL_HARMONICS) + wgt *= sqrt_one_half; if (flags&SHARP_DP) for (int m=0; mofs]+=creal(self->work[m])*wgt; @@ -301,6 +306,8 @@ static void ringhelper_ring2phase (ringhelper *self, ringhelper_update (self, nph, mmax, -info->phi0); double wgt = (flags&SHARP_USE_WEIGHTS) ? info->weight : 1; + if (flags&SHARP_REAL_HARMONICS) + wgt *= sqrt_two; if (flags&SHARP_DP) for (int m=0; mwork[m] = ((double *)data)[info->ofs+m*info->stride]*wgt; @@ -458,19 +465,23 @@ static void alm2almtmp (sharp_job *job, int lmax, int mi) { ptrdiff_t ofs=job->ainfo->mvstart[mi]; int stride=job->ainfo->stride; - int packed_m0=(job->ainfo->flags&SHARP_PACKED)&&(job->ainfo->mval[mi]==0); + int m=job->ainfo->mval[mi]; + /* in the case of SHARP_REAL_HARMONICS, phase2ring scales all the + coefficients by sqrt_one_half; here we must compensate to avoid scaling + m=0 */ + double norm_m0=(job->flags&SHARP_REAL_HARMONICS) ? sqrt_two : 1.; if (!(job->ainfo->flags&SHARP_PACKED)) ofs *= 2; - if (!packed_m0) + if (!((job->ainfo->flags&SHARP_PACKED)&&(m==0))) stride *= 2; if (job->spin==0) { - if (packed_m0) + if (m==0) { if (job->flags&SHARP_DP) - COPY_LOOP(double, double, x) + COPY_LOOP(double, double, x*norm_m0) else - COPY_LOOP(float, float, x) + COPY_LOOP(float, float, x*norm_m0) } else { @@ -482,12 +493,12 @@ static void alm2almtmp (sharp_job *job, int lmax, int mi) } else { - if (packed_m0) + if (m==0) { if (job->flags&SHARP_DP) - COPY_LOOP(double, double, x*job->norm_l[l]) + COPY_LOOP(double, double, x*job->norm_l[l]*norm_m0) else - COPY_LOOP(float, float, x*job->norm_l[l]) + COPY_LOOP(float, float, x*job->norm_l[l]*norm_m0) } else { @@ -519,19 +530,23 @@ static void almtmp2alm (sharp_job *job, int lmax, int mi) if (job->type != SHARP_MAP2ALM) return; ptrdiff_t ofs=job->ainfo->mvstart[mi]; int stride=job->ainfo->stride; - int packed_m0=(job->ainfo->flags&SHARP_PACKED)&&(job->ainfo->mval[mi]==0); + int m=job->ainfo->mval[mi]; + /* in the case of SHARP_REAL_HARMONICS, ring2phase scales all the + coefficients by sqrt_two; here we must compensate to avoid scaling + m=0 */ + double norm_m0=(job->flags&SHARP_REAL_HARMONICS) ? sqrt_one_half : 1.; if (!(job->ainfo->flags&SHARP_PACKED)) ofs *= 2; - if (!packed_m0) + if (!((job->ainfo->flags&SHARP_PACKED)&&(m==0))) stride *= 2; if (job->spin==0) { - if (packed_m0) + if (m==0) { if (job->flags&SHARP_DP) - COPY_LOOP(double, double, creal(x)) + COPY_LOOP(double, double, creal(x)*norm_m0) else - COPY_LOOP(float, float, crealf(x)) + COPY_LOOP(float, float, crealf(x)*norm_m0) } else { @@ -543,19 +558,19 @@ static void almtmp2alm (sharp_job *job, int lmax, int mi) } else { - if (packed_m0) + if (m==0) { if (job->flags&SHARP_DP) - COPY_LOOP(double, double, creal(x) * job->norm_l[l]) + COPY_LOOP(double, double, creal(x)*job->norm_l[l]*norm_m0) else - COPY_LOOP(float, fcmplx, (float)(creal(x) * job->norm_l[l])) + COPY_LOOP(float, fcmplx, (float)(creal(x)*job->norm_l[l]*norm_m0)) } else { if (job->flags&SHARP_DP) - COPY_LOOP(double, dcmplx, x * job->norm_l[l]) + COPY_LOOP(double, dcmplx, x*job->norm_l[l]) else - COPY_LOOP(float, fcmplx, (fcmplx)(x * job->norm_l[l])) + COPY_LOOP(float, fcmplx, (fcmplx)(x*job->norm_l[l])) } } diff --git a/libsharp/sharp_lowlevel.h b/libsharp/sharp_lowlevel.h index 4336d9d..7fdfa29 100644 --- a/libsharp/sharp_lowlevel.h +++ b/libsharp/sharp_lowlevel.h @@ -172,7 +172,19 @@ typedef enum { SHARP_DP = 1<<4, SHARP_ADD = 1<<5, /*!< results are added to the output arrays, instead of overwriting them */ - SHARP_USE_WEIGHTS = 1<<6, /* internal use only */ + SHARP_REAL_HARMONICS = 1<<6, + /*!< Use the real spherical harmonic convention. For + m==0, the alm are treated exactly the same as in + the complex case. For m!=0, alm[i] represent a + pair (+abs(m), -abs(m)) instead of (real, imag), + and the coefficients are scaled by a factor of + sqrt(2) relative to the complex case. In other + words, (sqrt(.5) * alm[i]) recovers the + corresponding complex coefficient (when accessed + as complex). + */ + + SHARP_USE_WEIGHTS = 1<<20, /* internal use only */ SHARP_NVMAX = (1<<4)-1 /* internal use only */ } sharp_jobflags;