updates
This commit is contained in:
parent
dce3c2b430
commit
18c82762c3
15 changed files with 424 additions and 219 deletions
195
libsharp/sharp.c
195
libsharp/sharp.c
|
@ -25,11 +25,12 @@
|
|||
/*! \file sharp.c
|
||||
* Spherical transform library
|
||||
*
|
||||
* Copyright (C) 2006-2013 Max-Planck-Society
|
||||
* Copyright (C) 2006-2016 Max-Planck-Society
|
||||
* \author Martin Reinecke \author Dag Sverre Seljebotn
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include "pocketfft/pocketfft.h"
|
||||
#include "sharp_ylmgen_c.h"
|
||||
#include "sharp_internal.h"
|
||||
|
@ -63,7 +64,7 @@ static void get_chunk_info (int ndata, int nmult, int *nchunks, int *chunksize)
|
|||
*nchunks = (ndata+(*chunksize)-1)/(*chunksize);
|
||||
}
|
||||
|
||||
int sharp_get_mlim (int lmax, int spin, double sth, double cth)
|
||||
NOINLINE int sharp_get_mlim (int lmax, int spin, double sth, double cth)
|
||||
{
|
||||
double ofs=lmax*0.01;
|
||||
if (ofs<100.) ofs=100.;
|
||||
|
@ -83,12 +84,13 @@ typedef struct
|
|||
dcmplx *shiftarr;
|
||||
int s_shift;
|
||||
rfft_plan plan;
|
||||
int length;
|
||||
int norot;
|
||||
} ringhelper;
|
||||
|
||||
static void ringhelper_init (ringhelper *self)
|
||||
{
|
||||
static ringhelper rh_null = { 0, NULL, 0, NULL, 0 };
|
||||
static ringhelper rh_null = { 0, NULL, 0, NULL, 0, 0 };
|
||||
*self = rh_null;
|
||||
}
|
||||
|
||||
|
@ -99,7 +101,7 @@ static void ringhelper_destroy (ringhelper *self)
|
|||
ringhelper_init(self);
|
||||
}
|
||||
|
||||
static void ringhelper_update (ringhelper *self, int nph, int mmax, double phi0)
|
||||
NOINLINE static void ringhelper_update (ringhelper *self, int nph, int mmax, double phi0)
|
||||
{
|
||||
self->norot = (fabs(phi0)<1e-14);
|
||||
if (!(self->norot))
|
||||
|
@ -110,12 +112,15 @@ static void ringhelper_update (ringhelper *self, int nph, int mmax, double phi0)
|
|||
self->phi0_ = phi0;
|
||||
for (int m=0; m<=mmax; ++m)
|
||||
self->shiftarr[m] = cos(m*phi0) + _Complex_I*sin(m*phi0);
|
||||
// double *tmp=(double *) self->shiftarr;
|
||||
// sincos_multi (mmax+1, phi0, &tmp[1], &tmp[0], 2);
|
||||
}
|
||||
if (!self->plan) self->plan=make_rfft_plan(nph);
|
||||
if (nph!=(int)rfft_length(self->plan))
|
||||
if (nph!=(int)self->length)
|
||||
{
|
||||
destroy_rfft_plan(self->plan);
|
||||
self->plan=make_rfft_plan(nph);
|
||||
self->length=nph;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -127,6 +132,7 @@ static int ringinfo_compare (const void *xa, const void *xb)
|
|||
static int ringpair_compare (const void *xa, const void *xb)
|
||||
{
|
||||
const sharp_ringpair *a=xa, *b=xb;
|
||||
// return (a->r1.sth < b->r1.sth) ? -1 : (a->r1.sth > b->r1.sth) ? 1 : 0;
|
||||
if (a->r1.nph==b->r1.nph)
|
||||
return (a->r1.phi0 < b->r1.phi0) ? -1 :
|
||||
((a->r1.phi0 > b->r1.phi0) ? 1 :
|
||||
|
@ -261,6 +267,7 @@ void sharp_destroy_geom_info (sharp_geom_info *geom_info)
|
|||
distribution are permissible. */
|
||||
static int sharp_get_mmax (int *mval, int nm)
|
||||
{
|
||||
//FIXME: if gaps are allowed, we have to search the maximum m in the array
|
||||
int *mcheck=RALLOC(int,nm);
|
||||
SET_ARRAY(mcheck,0,nm,0);
|
||||
for (int i=0; i<nm; ++i)
|
||||
|
@ -274,7 +281,7 @@ static int sharp_get_mmax (int *mval, int nm)
|
|||
return nm-1;
|
||||
}
|
||||
|
||||
static void ringhelper_phase2ring (ringhelper *self,
|
||||
NOINLINE static void ringhelper_phase2ring (ringhelper *self,
|
||||
const sharp_ringinfo *info, double *data, int mmax, const dcmplx *phase,
|
||||
int pstride, int flags)
|
||||
{
|
||||
|
@ -288,13 +295,19 @@ static void ringhelper_phase2ring (ringhelper *self,
|
|||
|
||||
if (nph>=2*mmax+1)
|
||||
{
|
||||
for (int m=0; m<=mmax; ++m)
|
||||
{
|
||||
dcmplx tmp = phase[m*pstride]*wgt;
|
||||
if(!self->norot) tmp*=self->shiftarr[m];
|
||||
data[2*m]=creal(tmp);
|
||||
data[2*m+1]=cimag(tmp);
|
||||
}
|
||||
if (self->norot)
|
||||
for (int m=0; m<=mmax; ++m)
|
||||
{
|
||||
data[2*m]=creal(phase[m*pstride])*wgt;
|
||||
data[2*m+1]=cimag(phase[m*pstride])*wgt;
|
||||
}
|
||||
else
|
||||
for (int m=0; m<=mmax; ++m)
|
||||
{
|
||||
dcmplx tmp = phase[m*pstride]*self->shiftarr[m];
|
||||
data[2*m]=creal(tmp)*wgt;
|
||||
data[2*m+1]=cimag(tmp)*wgt;
|
||||
}
|
||||
for (int m=2*(mmax+1); m<nph+2; ++m)
|
||||
data[m]=0.;
|
||||
}
|
||||
|
@ -326,7 +339,7 @@ static void ringhelper_phase2ring (ringhelper *self,
|
|||
rfft_backward (self->plan, &(data[1]), 1.);
|
||||
}
|
||||
|
||||
static void ringhelper_ring2phase (ringhelper *self,
|
||||
NOINLINE static void ringhelper_ring2phase (ringhelper *self,
|
||||
const sharp_ringinfo *info, double *data, int mmax, dcmplx *phase,
|
||||
int pstride, int flags)
|
||||
{
|
||||
|
@ -376,7 +389,7 @@ static void ringhelper_ring2phase (ringhelper *self,
|
|||
phase[m*pstride]=0.;
|
||||
}
|
||||
|
||||
static void fill_map (const sharp_geom_info *ginfo, void *map, double value,
|
||||
NOINLINE static void clear_map (const sharp_geom_info *ginfo, void *map,
|
||||
int flags)
|
||||
{
|
||||
if (flags & SHARP_NO_FFT)
|
||||
|
@ -386,50 +399,55 @@ static void fill_map (const sharp_geom_info *ginfo, void *map, double value,
|
|||
if (flags&SHARP_DP)
|
||||
{
|
||||
for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
|
||||
((dcmplx *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]
|
||||
=value;
|
||||
((dcmplx *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=0;
|
||||
for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
|
||||
((dcmplx *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]
|
||||
=value;
|
||||
((dcmplx *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]=0;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
|
||||
((fcmplx *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]
|
||||
=(float)value;
|
||||
((fcmplx *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=0;
|
||||
for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
|
||||
((fcmplx *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]
|
||||
=(float)value;
|
||||
((fcmplx *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]=0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int j=0;j<ginfo->npairs;++j)
|
||||
if (flags&SHARP_DP)
|
||||
{
|
||||
if (flags&SHARP_DP)
|
||||
for (int j=0;j<ginfo->npairs;++j)
|
||||
{
|
||||
for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
|
||||
((double *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]
|
||||
=value;
|
||||
for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
|
||||
((double *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]
|
||||
=value;
|
||||
double *dmap=(double *)map;
|
||||
if (ginfo->pair[j].r1.stride==1)
|
||||
memset(&dmap[ginfo->pair[j].r1.ofs],0,
|
||||
ginfo->pair[j].r1.nph*sizeof(double));
|
||||
else
|
||||
for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
|
||||
dmap[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=0;
|
||||
if ((ginfo->pair[j].r2.nph>0)&&(ginfo->pair[j].r2.stride==1))
|
||||
memset(&dmap[ginfo->pair[j].r2.ofs],0,
|
||||
ginfo->pair[j].r2.nph*sizeof(double));
|
||||
else
|
||||
for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
|
||||
dmap[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]=0;
|
||||
}
|
||||
else
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int j=0;j<ginfo->npairs;++j)
|
||||
{
|
||||
for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
|
||||
((float *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]
|
||||
=(float)value;
|
||||
((float *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=0;
|
||||
for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
|
||||
((float *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]
|
||||
=(float)value;
|
||||
((float *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]=0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void clear_alm (const sharp_alm_info *ainfo, void *alm, int flags)
|
||||
NOINLINE static void clear_alm (const sharp_alm_info *ainfo, void *alm,
|
||||
int flags)
|
||||
{
|
||||
#define CLEARLOOP(real_t,body) \
|
||||
{ \
|
||||
|
@ -465,7 +483,7 @@ static void clear_alm (const sharp_alm_info *ainfo, void *alm, int flags)
|
|||
}
|
||||
}
|
||||
|
||||
static void init_output (sharp_job *job)
|
||||
NOINLINE static void init_output (sharp_job *job)
|
||||
{
|
||||
if (job->flags&SHARP_ADD) return;
|
||||
if (job->type == SHARP_MAP2ALM)
|
||||
|
@ -473,21 +491,21 @@ static void init_output (sharp_job *job)
|
|||
clear_alm (job->ainfo,job->alm[i],job->flags);
|
||||
else
|
||||
for (int i=0; i<job->ntrans*job->nmaps; ++i)
|
||||
fill_map (job->ginfo,job->map[i],0.,job->flags);
|
||||
clear_map (job->ginfo,job->map[i],job->flags);
|
||||
}
|
||||
|
||||
static void alloc_phase (sharp_job *job, int nm, int ntheta)
|
||||
NOINLINE static void alloc_phase (sharp_job *job, int nm, int ntheta)
|
||||
{
|
||||
if (job->type==SHARP_MAP2ALM)
|
||||
{
|
||||
if ((nm&1023)==0) nm+=3; // hack to avoid critical strides
|
||||
job->s_m=2*job->ntrans*job->nmaps;
|
||||
if (((job->s_m*16*nm)&1023)==0) nm+=3; // hack to avoid critical strides
|
||||
job->s_th=job->s_m*nm;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((ntheta&1023)==0) ntheta+=3; // hack to avoid critical strides
|
||||
job->s_th=2*job->ntrans*job->nmaps;
|
||||
if (((job->s_th*16*ntheta)&1023)==0) ntheta+=3; // hack to avoid critical strides
|
||||
job->s_m=job->s_th*ntheta;
|
||||
}
|
||||
job->phase=RALLOC(dcmplx,2*job->ntrans*job->nmaps*nm*ntheta);
|
||||
|
@ -502,22 +520,28 @@ static void alloc_almtmp (sharp_job *job, int lmax)
|
|||
static void dealloc_almtmp (sharp_job *job)
|
||||
{ DEALLOC(job->almtmp); }
|
||||
|
||||
static void alm2almtmp (sharp_job *job, int lmax, int mi)
|
||||
NOINLINE static void alm2almtmp (sharp_job *job, int lmax, int mi)
|
||||
{
|
||||
|
||||
#define COPY_LOOP(real_t, source_t, expr_of_x) \
|
||||
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) \
|
||||
#define COPY_LOOP(real_t, source_t, expr_of_x) \
|
||||
{ \
|
||||
for (int l=m; l<lmin; ++l) \
|
||||
for (int i=0; i<job->ntrans*job->nalm; ++i) \
|
||||
job->almtmp[job->ntrans*job->nalm*l+i] = 0; \
|
||||
for (int l=lmin; l<=lmax; ++l) \
|
||||
for (int i=0; i<job->ntrans*job->nalm; ++i) \
|
||||
{ \
|
||||
source_t x = *(source_t *)(((real_t *)job->alm[i])+ofs+l*stride); \
|
||||
job->almtmp[job->ntrans*job->nalm*l+i] = expr_of_x; \
|
||||
}
|
||||
source_t x = *(source_t *)(((real_t *)job->alm[i])+ofs+l*stride); \
|
||||
job->almtmp[job->ntrans*job->nalm*l+i] = expr_of_x; \
|
||||
} \
|
||||
}
|
||||
|
||||
if (job->type!=SHARP_MAP2ALM)
|
||||
{
|
||||
ptrdiff_t ofs=job->ainfo->mvstart[mi];
|
||||
int stride=job->ainfo->stride;
|
||||
int m=job->ainfo->mval[mi];
|
||||
int lmin=(m<job->spin) ? job->spin : m;
|
||||
/* in the case of SHARP_REAL_HARMONICS, phase2ring scales all the
|
||||
coefficients by sqrt_one_half; here we must compensate to avoid scaling
|
||||
m=0 */
|
||||
|
@ -562,17 +586,17 @@ static void alm2almtmp (sharp_job *job, int lmax, int mi)
|
|||
}
|
||||
}
|
||||
else
|
||||
SET_ARRAY(job->almtmp,job->ntrans*job->nalm*job->ainfo->mval[mi],
|
||||
job->ntrans*job->nalm*(lmax+1),0.);
|
||||
memset (job->almtmp+job->ntrans*job->nalm*job->ainfo->mval[mi], 0,
|
||||
job->ntrans*job->nalm*(lmax+1-job->ainfo->mval[mi])*sizeof(dcmplx));
|
||||
|
||||
#undef COPY_LOOP
|
||||
}
|
||||
|
||||
static void almtmp2alm (sharp_job *job, int lmax, int mi)
|
||||
NOINLINE static void almtmp2alm (sharp_job *job, int lmax, int mi)
|
||||
{
|
||||
|
||||
#define COPY_LOOP(real_t, target_t, expr_of_x) \
|
||||
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) \
|
||||
for (int l=lmin; l<=lmax; ++l) \
|
||||
for (int i=0; i<job->ntrans*job->nalm; ++i) \
|
||||
{ \
|
||||
dcmplx x = job->almtmp[job->ntrans*job->nalm*l+i]; \
|
||||
|
@ -583,6 +607,7 @@ static void almtmp2alm (sharp_job *job, int lmax, int mi)
|
|||
ptrdiff_t ofs=job->ainfo->mvstart[mi];
|
||||
int stride=job->ainfo->stride;
|
||||
int m=job->ainfo->mval[mi];
|
||||
int lmin=(m<job->spin) ? job->spin : m;
|
||||
/* in the case of SHARP_REAL_HARMONICS, ring2phase scales all the
|
||||
coefficients by sqrt_two; here we must compensate to avoid scaling
|
||||
m=0 */
|
||||
|
@ -629,27 +654,56 @@ static void almtmp2alm (sharp_job *job, int lmax, int mi)
|
|||
#undef COPY_LOOP
|
||||
}
|
||||
|
||||
static void ringtmp2ring (sharp_job *job, sharp_ringinfo *ri, double *ringtmp,
|
||||
int rstride)
|
||||
NOINLINE static void ringtmp2ring (sharp_job *job, sharp_ringinfo *ri,
|
||||
const double *ringtmp, int rstride)
|
||||
{
|
||||
double **dmap = (double **)job->map;
|
||||
float **fmap = (float **)job->map;
|
||||
for (int i=0; i<job->ntrans*job->nmaps; ++i)
|
||||
for (int m=0; m<ri->nph; ++m)
|
||||
if (job->flags & SHARP_DP)
|
||||
dmap[i][ri->ofs+m*ri->stride] += ringtmp[i*rstride+m+1];
|
||||
if (job->flags & SHARP_DP)
|
||||
{
|
||||
double **dmap = (double **)job->map;
|
||||
for (int i=0; i<job->ntrans*job->nmaps; ++i)
|
||||
{
|
||||
double *restrict p1=&dmap[i][ri->ofs];
|
||||
const double *restrict p2=&ringtmp[i*rstride+1];
|
||||
if (ri->stride==1)
|
||||
{
|
||||
if (job->flags&SHARP_ADD)
|
||||
for (int m=0; m<ri->nph; ++m)
|
||||
p1[m] += p2[m];
|
||||
else
|
||||
memcpy(p1,p2,ri->nph*sizeof(double));
|
||||
}
|
||||
else
|
||||
for (int m=0; m<ri->nph; ++m)
|
||||
p1[m*ri->stride] += p2[m];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
float **fmap = (float **)job->map;
|
||||
for (int i=0; i<job->ntrans*job->nmaps; ++i)
|
||||
for (int m=0; m<ri->nph; ++m)
|
||||
fmap[i][ri->ofs+m*ri->stride] += (float)ringtmp[i*rstride+m+1];
|
||||
}
|
||||
}
|
||||
|
||||
static void ring2ringtmp (sharp_job *job, sharp_ringinfo *ri, double *ringtmp,
|
||||
int rstride)
|
||||
NOINLINE static void ring2ringtmp (sharp_job *job, sharp_ringinfo *ri,
|
||||
double *ringtmp, int rstride)
|
||||
{
|
||||
for (int i=0; i<job->ntrans*job->nmaps; ++i)
|
||||
for (int m=0; m<ri->nph; ++m)
|
||||
ringtmp[i*rstride+m+1] = (job->flags & SHARP_DP) ?
|
||||
((double *)(job->map[i]))[ri->ofs+m*ri->stride] :
|
||||
((float *)(job->map[i]))[ri->ofs+m*ri->stride];
|
||||
if (job->flags & SHARP_DP)
|
||||
for (int i=0; i<job->ntrans*job->nmaps; ++i)
|
||||
{
|
||||
double *restrict p1=&ringtmp[i*rstride+1],
|
||||
*restrict p2=&(((double *)(job->map[i]))[ri->ofs]);
|
||||
if (ri->stride==1)
|
||||
memcpy(p1,p2,ri->nph*sizeof(double));
|
||||
else
|
||||
for (int m=0; m<ri->nph; ++m)
|
||||
p1[m] = p2[m*ri->stride];
|
||||
}
|
||||
else
|
||||
for (int i=0; i<job->ntrans*job->nmaps; ++i)
|
||||
for (int m=0; m<ri->nph; ++m)
|
||||
ringtmp[i*rstride+m+1] = ((float *)(job->map[i]))[ri->ofs+m*ri->stride];
|
||||
}
|
||||
|
||||
static void ring2phase_direct (sharp_job *job, sharp_ringinfo *ri, int mmax,
|
||||
|
@ -693,7 +747,7 @@ static void phase2ring_direct (sharp_job *job, sharp_ringinfo *ri, int mmax,
|
|||
}
|
||||
|
||||
//FIXME: set phase to zero if not SHARP_MAP2ALM?
|
||||
static void map2phase (sharp_job *job, int mmax, int llim, int ulim)
|
||||
NOINLINE static void map2phase (sharp_job *job, int mmax, int llim, int ulim)
|
||||
{
|
||||
if (job->type != SHARP_MAP2ALM) return;
|
||||
int pstride = job->s_m;
|
||||
|
@ -738,7 +792,7 @@ static void map2phase (sharp_job *job, int mmax, int llim, int ulim)
|
|||
}
|
||||
}
|
||||
|
||||
static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
|
||||
NOINLINE static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
|
||||
{
|
||||
if (job->type == SHARP_MAP2ALM) return;
|
||||
int pstride = job->s_m;
|
||||
|
@ -783,7 +837,7 @@ static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
|
|||
}
|
||||
}
|
||||
|
||||
static void sharp_execute_job (sharp_job *job)
|
||||
NOINLINE static void sharp_execute_job (sharp_job *job)
|
||||
{
|
||||
double timer=wallTime();
|
||||
job->opcnt=0;
|
||||
|
@ -800,6 +854,7 @@ static void sharp_execute_job (sharp_job *job)
|
|||
int nchunks, chunksize;
|
||||
get_chunk_info(job->ginfo->npairs,(job->flags&SHARP_NVMAX)*VLEN,&nchunks,
|
||||
&chunksize);
|
||||
//FIXME: needs to be changed to "nm"
|
||||
alloc_phase (job,mmax+1,chunksize);
|
||||
|
||||
/* chunk loop */
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue