Merge branch 'packed' of git://github.com/dagss/libsharp

This commit is contained in:
Martin Reinecke 2013-01-08 15:48:57 +01:00
commit 32ddcae2ec
4 changed files with 180 additions and 52 deletions

11
.gitignore vendored Normal file
View file

@ -0,0 +1,11 @@
*.o
#*
*~
/auto
/autom4te.cache
/config.log
/config.status
/config/config.auto
/configure
/sharp_oracle.inc

View file

@ -43,6 +43,9 @@
typedef complex double dcmplx; typedef complex double dcmplx;
typedef complex float fcmplx; typedef complex float fcmplx;
static const double sqrt_one_half = 0.707106781186547572737310929369;
static const double sqrt_two = 1.414213562373095145474621858739;
static int chunksize_min=500, nchunks_max=10; static int chunksize_min=500, nchunks_max=10;
static void get_chunk_info (int ndata, int nmult, int *nchunks, int *chunksize) static void get_chunk_info (int ndata, int nmult, int *nchunks, int *chunksize)
@ -124,7 +127,7 @@ static int ringpair_compare (const void *xa, const void *xb)
} }
void sharp_make_general_alm_info (int lmax, int nm, int stride, const int *mval, void sharp_make_general_alm_info (int lmax, int nm, int stride, const int *mval,
const ptrdiff_t *mstart, sharp_alm_info **alm_info) const ptrdiff_t *mstart, int flags, sharp_alm_info **alm_info)
{ {
sharp_alm_info *info = RALLOC(sharp_alm_info,1); sharp_alm_info *info = RALLOC(sharp_alm_info,1);
info->lmax = lmax; info->lmax = lmax;
@ -132,6 +135,7 @@ void sharp_make_general_alm_info (int lmax, int nm, int stride, const int *mval,
info->mval = RALLOC(int,nm); info->mval = RALLOC(int,nm);
info->mvstart = RALLOC(ptrdiff_t,nm); info->mvstart = RALLOC(ptrdiff_t,nm);
info->stride = stride; info->stride = stride;
info->flags = flags;
for (int mi=0; mi<nm; ++mi) for (int mi=0; mi<nm; ++mi)
{ {
info->mval[mi] = mval[mi]; info->mval[mi] = mval[mi];
@ -146,12 +150,16 @@ void sharp_make_alm_info (int lmax, int mmax, int stride,
int *mval=RALLOC(int,mmax+1); int *mval=RALLOC(int,mmax+1);
for (int i=0; i<=mmax; ++i) for (int i=0; i<=mmax; ++i)
mval[i]=i; mval[i]=i;
sharp_make_general_alm_info (lmax, mmax+1, stride, mval, mstart, alm_info); sharp_make_general_alm_info (lmax, mmax+1, stride, mval, mstart, 0, alm_info);
DEALLOC(mval); DEALLOC(mval);
} }
ptrdiff_t sharp_alm_index (const sharp_alm_info *self, int l, int mi) ptrdiff_t sharp_alm_index (const sharp_alm_info *self, int l, int mi)
{ return self->mvstart[mi]+self->stride*l; } {
UTIL_ASSERT(!(self->flags & SHARP_PACKED),
"sharp_alm_index not applicable with SHARP_PACKED alms");
return self->mvstart[mi]+self->stride*l;
}
void sharp_destroy_alm_info (sharp_alm_info *info) void sharp_destroy_alm_info (sharp_alm_info *info)
{ {
@ -275,6 +283,8 @@ static void ringhelper_phase2ring (ringhelper *self,
#endif #endif
real_plan_backward_c (self->plan, (double *)(self->work)); real_plan_backward_c (self->plan, (double *)(self->work));
double wgt = (flags&SHARP_USE_WEIGHTS) ? info->weight : 1.; double wgt = (flags&SHARP_USE_WEIGHTS) ? info->weight : 1.;
if (flags&SHARP_REAL_HARMONICS)
wgt *= sqrt_one_half;
if (flags&SHARP_DP) if (flags&SHARP_DP)
for (int m=0; m<nph; ++m) for (int m=0; m<nph; ++m)
((double *)data)[m*stride+info->ofs]+=creal(self->work[m])*wgt; ((double *)data)[m*stride+info->ofs]+=creal(self->work[m])*wgt;
@ -296,6 +306,8 @@ static void ringhelper_ring2phase (ringhelper *self,
ringhelper_update (self, nph, mmax, -info->phi0); ringhelper_update (self, nph, mmax, -info->phi0);
double wgt = (flags&SHARP_USE_WEIGHTS) ? info->weight : 1; double wgt = (flags&SHARP_USE_WEIGHTS) ? info->weight : 1;
if (flags&SHARP_REAL_HARMONICS)
wgt *= sqrt_two;
if (flags&SHARP_DP) if (flags&SHARP_DP)
for (int m=0; m<nph; ++m) for (int m=0; m<nph; ++m)
self->work[m] = ((double *)data)[info->ofs+m*info->stride]*wgt; self->work[m] = ((double *)data)[info->ofs+m*info->stride]*wgt;
@ -358,17 +370,40 @@ static void fill_map (const sharp_geom_info *ginfo, void *map, double value,
} }
} }
static void fill_alm (const sharp_alm_info *ainfo, void *alm, dcmplx value, static void clear_alm (const sharp_alm_info *ainfo, void *alm, int flags)
int flags)
{ {
if (flags&SHARP_DP) #define CLEARLOOP(real_t,body) \
for (int mi=0;mi<ainfo->nm;++mi) { \
for (int l=ainfo->mval[mi];l<=ainfo->lmax;++l) real_t *talm = (real_t *)alm; \
((dcmplx *)alm)[sharp_alm_index(ainfo,l,mi)] = value; for (int l=m;l<=ainfo->lmax;++l) \
else body \
for (int mi=0;mi<ainfo->nm;++mi) }
for (int l=ainfo->mval[mi];l<=ainfo->lmax;++l)
((fcmplx *)alm)[sharp_alm_index(ainfo,l,mi)] = (fcmplx)value; for (int mi=0;mi<ainfo->nm;++mi)
{
int m=ainfo->mval[mi];
ptrdiff_t mvstart = ainfo->mvstart[mi];
ptrdiff_t stride = ainfo->stride;
if (!(ainfo->flags&SHARP_PACKED))
mvstart*=2;
if ((ainfo->flags&SHARP_PACKED)&&(m==0))
{
if (flags&SHARP_DP)
CLEARLOOP(double, talm[mvstart+l*stride] = 0.;)
else
CLEARLOOP(float, talm[mvstart+l*stride] = 0.;)
}
else
{
stride*=2;
if (flags&SHARP_DP)
CLEARLOOP(double, talm[mvstart+l*stride] = talm[mvstart+l*stride+1] = 0.;)
else
CLEARLOOP(float, talm[mvstart+l*stride] = talm[mvstart+l*stride+1] = 0.;)
}
#undef CLEARLOOP
}
} }
static void init_output (sharp_job *job) static void init_output (sharp_job *job)
@ -376,7 +411,7 @@ static void init_output (sharp_job *job)
if (job->flags&SHARP_ADD) return; if (job->flags&SHARP_ADD) return;
if (job->type == SHARP_MAP2ALM) if (job->type == SHARP_MAP2ALM)
for (int i=0; i<job->ntrans*job->nalm; ++i) for (int i=0; i<job->ntrans*job->nalm; ++i)
fill_alm (job->ainfo,job->alm[i],0.,job->flags); clear_alm (job->ainfo,job->alm[i],job->flags);
else else
for (int i=0; i<job->ntrans*job->nmaps; ++i) for (int i=0; i<job->ntrans*job->nmaps; ++i)
fill_map (job->ginfo,job->map[i],0.,job->flags); fill_map (job->ginfo,job->map[i],0.,job->flags);
@ -417,73 +452,129 @@ static void dealloc_almtmp (sharp_job *job)
static void alm2almtmp (sharp_job *job, int lmax, int mi) static void alm2almtmp (sharp_job *job, int lmax, int mi)
{ {
#define COPY_LOOP(real_t, source_t, expr_of_x) \
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) \
for (int i=0; i<job->ntrans*job->nalm; ++i) \
{ \
source_t x = *(source_t *)(((real_t *)job->alm[i])+ofs+l*stride); \
job->almtmp[job->ntrans*job->nalm*l+i] = expr_of_x; \
}
if (job->type!=SHARP_MAP2ALM) if (job->type!=SHARP_MAP2ALM)
{ {
ptrdiff_t ofs=job->ainfo->mvstart[mi]; ptrdiff_t ofs=job->ainfo->mvstart[mi];
int stride=job->ainfo->stride; int stride=job->ainfo->stride;
int m=job->ainfo->mval[mi];
/* in the case of SHARP_REAL_HARMONICS, phase2ring scales all the
coefficients by sqrt_one_half; here we must compensate to avoid scaling
m=0 */
double norm_m0=(job->flags&SHARP_REAL_HARMONICS) ? sqrt_two : 1.;
if (!(job->ainfo->flags&SHARP_PACKED))
ofs *= 2;
if (!((job->ainfo->flags&SHARP_PACKED)&&(m==0)))
stride *= 2;
if (job->spin==0) if (job->spin==0)
{ {
if (job->flags&SHARP_DP) if (m==0)
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) {
for (int i=0; i<job->ntrans*job->nalm; ++i) if (job->flags&SHARP_DP)
job->almtmp[job->ntrans*job->nalm*l+i] COPY_LOOP(double, double, x*norm_m0)
= ((dcmplx *)job->alm[i])[ofs+l*stride]; else
COPY_LOOP(float, float, x*norm_m0)
}
else else
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) {
for (int i=0; i<job->ntrans*job->nalm; ++i) if (job->flags&SHARP_DP)
job->almtmp[job->ntrans*job->nalm*l+i] COPY_LOOP(double, dcmplx, x)
= ((fcmplx *)job->alm[i])[ofs+l*stride]; else
COPY_LOOP(float, fcmplx, x)
}
} }
else else
{ {
if (job->flags&SHARP_DP) if (m==0)
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) {
for (int i=0; i<job->ntrans*job->nalm; ++i) if (job->flags&SHARP_DP)
job->almtmp[job->ntrans*job->nalm*l+i] COPY_LOOP(double, double, x*job->norm_l[l]*norm_m0)
= ((dcmplx *)job->alm[i])[ofs+l*stride]*job->norm_l[l]; else
COPY_LOOP(float, float, x*job->norm_l[l]*norm_m0)
}
else else
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) {
for (int i=0; i<job->ntrans*job->nalm; ++i) if (job->flags&SHARP_DP)
job->almtmp[job->ntrans*job->nalm*l+i] COPY_LOOP(double, dcmplx, x*job->norm_l[l])
= ((fcmplx *)job->alm[i])[ofs+l*stride]*job->norm_l[l]; else
COPY_LOOP(float, fcmplx, x*job->norm_l[l])
}
} }
} }
else else
SET_ARRAY(job->almtmp,job->ntrans*job->nalm*job->ainfo->mval[mi], SET_ARRAY(job->almtmp,job->ntrans*job->nalm*job->ainfo->mval[mi],
job->ntrans*job->nalm*(lmax+1),0.); job->ntrans*job->nalm*(lmax+1),0.);
#undef COPY_LOOP
} }
static void almtmp2alm (sharp_job *job, int lmax, int mi) static void almtmp2alm (sharp_job *job, int lmax, int mi)
{ {
#define COPY_LOOP(real_t, target_t, expr_of_x) \
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) \
for (int i=0; i<job->ntrans*job->nalm; ++i) \
{ \
dcmplx x = job->almtmp[job->ntrans*job->nalm*l+i]; \
*(target_t *)(((real_t *)job->alm[i])+ofs+l*stride) += expr_of_x; \
}
if (job->type != SHARP_MAP2ALM) return; if (job->type != SHARP_MAP2ALM) return;
ptrdiff_t ofs=job->ainfo->mvstart[mi]; ptrdiff_t ofs=job->ainfo->mvstart[mi];
int stride=job->ainfo->stride; int stride=job->ainfo->stride;
int m=job->ainfo->mval[mi];
/* in the case of SHARP_REAL_HARMONICS, ring2phase scales all the
coefficients by sqrt_two; here we must compensate to avoid scaling
m=0 */
double norm_m0=(job->flags&SHARP_REAL_HARMONICS) ? sqrt_one_half : 1.;
if (!(job->ainfo->flags&SHARP_PACKED))
ofs *= 2;
if (!((job->ainfo->flags&SHARP_PACKED)&&(m==0)))
stride *= 2;
if (job->spin==0) if (job->spin==0)
{ {
if (job->flags&SHARP_DP) if (m==0)
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) {
for (int i=0;i<job->ntrans*job->nalm;++i) if (job->flags&SHARP_DP)
((dcmplx *)job->alm[i])[ofs+l*stride] += COPY_LOOP(double, double, creal(x)*norm_m0)
job->almtmp[job->ntrans*job->nalm*l+i]; else
COPY_LOOP(float, float, crealf(x)*norm_m0)
}
else else
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) {
for (int i=0;i<job->ntrans*job->nalm;++i) if (job->flags&SHARP_DP)
((fcmplx *)job->alm[i])[ofs+l*stride] += COPY_LOOP(double, dcmplx, x)
(fcmplx)(job->almtmp[job->ntrans*job->nalm*l+i]); else
COPY_LOOP(float, fcmplx, (fcmplx)x)
}
} }
else else
{ {
if (job->flags&SHARP_DP) if (m==0)
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) {
for (int i=0;i<job->ntrans*job->nalm;++i) if (job->flags&SHARP_DP)
((dcmplx *)job->alm[i])[ofs+l*stride] += COPY_LOOP(double, double, creal(x)*job->norm_l[l]*norm_m0)
job->almtmp[job->ntrans*job->nalm*l+i]*job->norm_l[l]; else
COPY_LOOP(float, fcmplx, (float)(creal(x)*job->norm_l[l]*norm_m0))
}
else else
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) {
for (int i=0;i<job->ntrans*job->nalm;++i) if (job->flags&SHARP_DP)
((fcmplx *)job->alm[i])[ofs+l*stride] += COPY_LOOP(double, dcmplx, x*job->norm_l[l])
(fcmplx)(job->almtmp[job->ntrans*job->nalm*l+i]*job->norm_l[l]); else
COPY_LOOP(float, fcmplx, (fcmplx)(x*job->norm_l[l]))
}
} }
#undef COPY_LOOP
} }
static void phase2map (sharp_job *job, int mmax, int llim, int ulim) static void phase2map (sharp_job *job, int mmax, int llim, int ulim)

View file

@ -41,6 +41,7 @@ void sharp_make_triangular_alm_info (int lmax, int mmax, int stride,
info->mval = RALLOC(int,mmax+1); info->mval = RALLOC(int,mmax+1);
info->mvstart = RALLOC(ptrdiff_t,mmax+1); info->mvstart = RALLOC(ptrdiff_t,mmax+1);
info->stride = stride; info->stride = stride;
info->flags = 0;
int tval = 2*lmax+1; int tval = 2*lmax+1;
for (ptrdiff_t m=0; m<=mmax; ++m) for (ptrdiff_t m=0; m<=mmax; ++m)
{ {
@ -59,6 +60,7 @@ void sharp_make_rectangular_alm_info (int lmax, int mmax, int stride,
info->mval = RALLOC(int,mmax+1); info->mval = RALLOC(int,mmax+1);
info->mvstart = RALLOC(ptrdiff_t,mmax+1); info->mvstart = RALLOC(ptrdiff_t,mmax+1);
info->stride = stride; info->stride = stride;
info->flags = 0;
for (ptrdiff_t m=0; m<=mmax; ++m) for (ptrdiff_t m=0; m<=mmax; ++m)
{ {
info->mval[m] = m; info->mval[m] = m;

View file

@ -76,6 +76,8 @@ typedef struct
int nm; int nm;
/*! Array with \a nm entries containing the individual m values */ /*! Array with \a nm entries containing the individual m values */
int *mval; int *mval;
/*! Combination of flags from sharp_almflags */
int flags;
/*! Array with \a nm entries containing the (hypothetical) indices of /*! Array with \a nm entries containing the (hypothetical) indices of
the coefficients with quantum numbers 0,\a mval[i] */ the coefficients with quantum numbers 0,\a mval[i] */
ptrdiff_t *mvstart; ptrdiff_t *mvstart;
@ -83,6 +85,15 @@ typedef struct
ptrdiff_t stride; ptrdiff_t stride;
} sharp_alm_info; } sharp_alm_info;
/*! alm_info flags */
typedef enum { SHARP_PACKED = 1
/*< m=0-coefficients are packed so that the (zero) imaginary part is
not present. mvstart is in units of *real* float/double for all
m; stride is in units of reals for m=0 and complex for m!=0 */
} sharp_almflags;
/*! Creates an a_lm data structure from the following parameters: /*! Creates an a_lm data structure from the following parameters:
\param lmax maximum \a l quantum number (>=0) \param lmax maximum \a l quantum number (>=0)
\param mmax maximum \a m quantum number (0<= \a mmax <= \a lmax) \param mmax maximum \a m quantum number (0<= \a mmax <= \a lmax)
@ -102,10 +113,11 @@ void sharp_make_alm_info (int lmax, int mmax, int stride,
\param mval array with \a nm entries containing the individual m values \param mval array with \a nm entries containing the individual m values
\param mvstart array with \a nm entries containing the (hypothetical) \param mvstart array with \a nm entries containing the (hypothetical)
indices of the coefficients with the quantum numbers 0,\a mval[i] indices of the coefficients with the quantum numbers 0,\a mval[i]
\param flags a combination of sharp_almflags (pass 0 unless you know you need this)
\param alm_info will hold a pointer to the newly created data structure \param alm_info will hold a pointer to the newly created data structure
*/ */
void sharp_make_general_alm_info (int lmax, int nm, int stride, const int *mval, void sharp_make_general_alm_info (int lmax, int nm, int stride, const int *mval,
const ptrdiff_t *mvstart, sharp_alm_info **alm_info); const ptrdiff_t *mvstart, int flags, sharp_alm_info **alm_info);
/*! Returns the index of the coefficient with quantum numbers \a l, /*! Returns the index of the coefficient with quantum numbers \a l,
\a mval[mi]. \a mval[mi].
\note for a \a sharp_alm_info generated by sharp_make_alm_info() this is \note for a \a sharp_alm_info generated by sharp_make_alm_info() this is
@ -160,7 +172,19 @@ typedef enum { SHARP_DP = 1<<4,
SHARP_ADD = 1<<5, SHARP_ADD = 1<<5,
/*!< results are added to the output arrays, instead of /*!< results are added to the output arrays, instead of
overwriting them */ overwriting them */
SHARP_USE_WEIGHTS = 1<<6, /* internal use only */ SHARP_REAL_HARMONICS = 1<<6,
/*!< Use the real spherical harmonic convention. For
m==0, the alm are treated exactly the same as in
the complex case. For m!=0, alm[i] represent a
pair (+abs(m), -abs(m)) instead of (real, imag),
and the coefficients are scaled by a factor of
sqrt(2) relative to the complex case. In other
words, (sqrt(.5) * alm[i]) recovers the
corresponding complex coefficient (when accessed
as complex).
*/
SHARP_USE_WEIGHTS = 1<<20, /* internal use only */
SHARP_NVMAX = (1<<4)-1 /* internal use only */ SHARP_NVMAX = (1<<4)-1 /* internal use only */
} sharp_jobflags; } sharp_jobflags;