diff --git a/Makefile b/Makefile
index e6c51bc..3d6f20e 100644
--- a/Makefile
+++ b/Makefile
@@ -32,7 +32,7 @@ compile_all: $(all_cbin) hdrcopy
autotune: sharp_bench
$(BINDIR)/sharp_bench
- mv oracle.inc $(SRCROOT)/libsharp
+ mv sharp_oracle.inc $(SRCROOT)/libsharp
$(MAKE)
hdrclean:
diff --git a/libsharp/libsharp.dox b/libsharp/libsharp.dox
index 2a5067c..810abd0 100644
--- a/libsharp/libsharp.dox
+++ b/libsharp/libsharp.dox
@@ -61,11 +61,11 @@
- scalar a_lm to map
- scalar map to a_lm
-
+
- spin a_lm to map
- spin map to a_lm
-
+ - scalar a_lm to maps of first derivatives
SHARP supports shared-memory parallelisation via OpenMP; this feature will
diff --git a/libsharp/oracle.inc b/libsharp/oracle.inc
deleted file mode 100644
index 74638c6..0000000
--- a/libsharp/oracle.inc
+++ /dev/null
@@ -1,9 +0,0 @@
-static const int maxtr = 6;
-static const int nv_opt[6][2][3] = {
-{{4,2,-1},{2,1,1}},
-{{4,2,-1},{2,1,1}},
-{{5,2,-1},{5,2,1}},
-{{5,2,-1},{5,2,1}},
-{{5,2,-1},{5,2,1}},
-{{5,2,-1},{5,2,1}}
-};
diff --git a/libsharp/planck.make b/libsharp/planck.make
index c8621cd..6c72945 100644
--- a/libsharp/planck.make
+++ b/libsharp/planck.make
@@ -15,7 +15,7 @@ ALLOBJ:=$(ALLOBJ:%=$(OD)/%)
ODEP:=$(HDR_$(PKG)) $(HDR_libfftpack) $(HDR_c_utils)
$(OD)/sharp_core.o: $(SD)/sharp_inchelper1.inc.c $(SD)/sharp_core_inc.c $(SD)/sharp_core_inc2.c $(SD)/sharp_core_inc3.c
-$(OD)/sharp.o: $(SD)/sharp_mpi.c $(SD)/oracle.inc
+$(OD)/sharp.o: $(SD)/sharp_mpi.c $(SD)/sharp_oracle.inc
BDEP:=$(LIB_$(PKG)) $(LIB_libfftpack) $(LIB_c_utils)
$(LIB_$(PKG)): $(LIBOBJ)
diff --git a/libsharp/sharp.c b/libsharp/sharp.c
index 4cc659c..60c416e 100644
--- a/libsharp/sharp.c
+++ b/libsharp/sharp.c
@@ -464,7 +464,7 @@ void sharp_execute_job (sharp_job *job)
int lmax = job->ainfo->lmax,
mmax=sharp_get_mmax(job->ainfo->mval, job->ainfo->nm);
- job->norm_l = Ylmgen_get_norm (lmax, job->spin);
+ job->norm_l = sharp_Ylmgen_get_norm (lmax, job->spin);
/* clear output arrays if requested */
init_output (job);
@@ -501,8 +501,8 @@ void sharp_execute_job (sharp_job *job)
{
sharp_job ljob = *job;
ljob.opcnt=0;
- Ylmgen_C generator;
- Ylmgen_init (&generator,lmax,mmax,ljob.spin);
+ sharp_Ylmgen_C generator;
+ sharp_Ylmgen_init (&generator,lmax,mmax,ljob.spin);
alloc_almtmp(&ljob,lmax);
#pragma omp for schedule(dynamic,1)
@@ -517,7 +517,7 @@ void sharp_execute_job (sharp_job *job)
almtmp2alm (&ljob, lmax, mi);
}
- Ylmgen_destroy(&generator);
+ sharp_Ylmgen_destroy(&generator);
dealloc_almtmp(&ljob);
#pragma omp critical
@@ -589,7 +589,7 @@ int sharp_nv_oracle (sharp_jobtype type, int spin, int ntrans)
if (type==ALM2MAP_DERIV1) spin=1;
UTIL_ASSERT((ntrans>0),"bad number of simultaneous transforms");
UTIL_ASSERT((spin>=0)&&(spin<=30), "bad spin");
-#include "oracle.inc"
+#include "sharp_oracle.inc"
return nv_opt[IMIN(ntrans,maxtr)-1][spin!=0][type];
}
diff --git a/libsharp/sharp_bench.c b/libsharp/sharp_bench.c
index 09d44d6..74e5af6 100644
--- a/libsharp/sharp_bench.c
+++ b/libsharp/sharp_bench.c
@@ -98,7 +98,7 @@ int main(void)
sharp_module_startup("sharp_bench",1,1,"",1);
printf("Benchmarking SHTs.\n\n");
- FILE *fp=fopen("oracle.inc","w");
+ FILE *fp=fopen("sharp_oracle.inc","w");
UTIL_ASSERT(fp, "failed to open oracle file for writing");
fprintf(fp,"static const int maxtr = 6;\n");
fprintf(fp,"static const int nv_opt[6][2][3] = {\n");
diff --git a/libsharp/sharp_core.c b/libsharp/sharp_core.c
index ec02975..1d48a6f 100644
--- a/libsharp/sharp_core.c
+++ b/libsharp/sharp_core.c
@@ -73,7 +73,8 @@ typedef complex double dcmplx;
#undef nvec
void inner_loop (sharp_job *job, const int *ispair,const double *cth,
- const double *sth, int llim, int ulim, Ylmgen_C *gen, int mi, const int *idx)
+ const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
+ const int *idx)
{
int njobs=job->ntrans;
if (njobs<=MAXJOB_SPECIAL)
diff --git a/libsharp/sharp_core.h b/libsharp/sharp_core.h
index 38ec41d..6d68210 100644
--- a/libsharp/sharp_core.h
+++ b/libsharp/sharp_core.h
@@ -40,7 +40,8 @@ extern "C" {
#endif
void inner_loop (sharp_job *job, const int *ispair,const double *cth,
- const double *sth, int llim, int ulim, Ylmgen_C *gen, int mi, const int *idx);
+ const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
+ const int *idx);
#ifdef __cplusplus
}
diff --git a/libsharp/sharp_core_inc.c b/libsharp/sharp_core_inc.c
index b892f53..2c86463 100644
--- a/libsharp/sharp_core_inc.c
+++ b/libsharp/sharp_core_inc.c
@@ -83,8 +83,8 @@ static inline void Y(mypow) (Tb val, int npow, Tb * restrict resd,
{
vmuleq(res.v[i],val.v[i]);
vaddeq(scale.v[i],scaleint.v[i]);
- Tv mask=vlt(vabs(res.v[i]),vload(fsmall));
- vmuleq(res.v[i],vblend(mask,vload(fbig),vone));
+ Tv mask=vlt(vabs(res.v[i]),vload(sharp_fsmall));
+ vmuleq(res.v[i],vblend(mask,vload(sharp_fbig),vone));
vsubeq(scale.v[i],vblend(mask,vone,vzero));
}
}
@@ -92,8 +92,8 @@ static inline void Y(mypow) (Tb val, int npow, Tb * restrict resd,
{
vmuleq(val.v[i],val.v[i]);
vaddeq(scaleint.v[i],scaleint.v[i]);
- Tv mask = vlt(vabs(val.v[i]),vload(fsmall));
- vmuleq(val.v[i],vblend(mask,vload(fbig),vone));
+ Tv mask = vlt(vabs(val.v[i]),vload(sharp_fsmall));
+ vmuleq(val.v[i],vblend(mask,vload(sharp_fbig),vone));
vsubeq(scaleint.v[i],vblend(mask,vone,vzero));
}
}
@@ -113,7 +113,7 @@ static inline int Y(rescale) (Tb * restrict lam1, Tb * restrict lam2,
if (vanyTrue(mask))
{
did_scale=1;
- Tv fact = vblend(mask,vload(fsmall),vone);
+ Tv fact = vblend(mask,vload(sharp_fsmall),vone);
vmuleq(lam1->v[i],fact); vmuleq(lam2->v[i],fact);
vaddeq(scale->v[i],vblend(mask,vone,vzero));
}
@@ -123,7 +123,7 @@ static inline int Y(rescale) (Tb * restrict lam1, Tb * restrict lam2,
static inline void Y(normalize) (Tb * restrict val, Tb * restrict scale)
{
- const Tv vfsmall=vload(fsmall), vfbig=vload(fbig);
+ const Tv vfsmall=vload(sharp_fsmall), vfbig=vload(sharp_fbig);
for (int i=0;iv[i]),vone);
@@ -168,13 +168,14 @@ static inline void Y(getCorfac)(Tb scale, Tb * restrict corfac,
Y(Tbu) sc, corf;
sc.b=scale;
for (int i=0; im;
Tb lam_1=Y(Tbconst)(0.), lam_2, scale;
@@ -182,7 +183,7 @@ static void Y(iter_to_ieee) (const Tb sth, Tb cth, int *l_,
Y(Tbmuleq1) (&lam_2,(gen->m&1) ? -gen->mfac[gen->m]:gen->mfac[gen->m]);
Y(normalize)(&lam_2,&scale);
- int below_limit = Y(TballLt)(scale,limscale);
+ int below_limit = Y(TballLt)(scale,sharp_limscale);
while (below_limit)
{
if (l+2>gen->lmax) {*l_=gen->lmax+1;return;}
@@ -193,14 +194,15 @@ static void Y(iter_to_ieee) (const Tb sth, Tb cth, int *l_,
for (int i=0; ifx;
+ const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
Tb cth2, sth2;
for (int i=0; imhi;
- int below_limit = Y(TballLt)(scalep,limscale) && Y(TballLt)(scalem,limscale);
+ int below_limit = Y(TballLt)(scalep,sharp_limscale)
+ && Y(TballLt)(scalem,sharp_limscale);
while (below_limit)
{
if (l+2>gen->lmax) {*l_=gen->lmax+1;return;}
Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l+1]);
Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l+2]);
if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem))
- below_limit = Y(TballLt)(scalep,limscale) && Y(TballLt)(scalem,limscale);
+ below_limit = Y(TballLt)(scalep,sharp_limscale)
+ && Y(TballLt)(scalem,sharp_limscale);
l+=2;
}
diff --git a/libsharp/sharp_core_inc2.c b/libsharp/sharp_core_inc2.c
index dd3fb5d..aa40bcc 100644
--- a/libsharp/sharp_core_inc2.c
+++ b/libsharp/sharp_core_inc2.c
@@ -40,7 +40,7 @@ typedef union
static void Z(alm2map_kernel) (const Tb cth, Z(Tbrij) * restrict p1,
Z(Tbrij) * restrict p2, Tb lam_1, Tb lam_2,
- const ylmgen_dbl2 * restrict rf, const dcmplx * restrict alm,
+ const sharp_ylmgen_dbl2 * restrict rf, const dcmplx * restrict alm,
int l, int lmax)
{
#if (njobs>1)
@@ -126,7 +126,7 @@ static void Z(alm2map_kernel) (const Tb cth, Z(Tbrij) * restrict p1,
static void Z(map2alm_kernel) (const Tb cth, const Z(Tbrij) * restrict p1,
const Z(Tbrij) * restrict p2, Tb lam_1, Tb lam_2,
- const ylmgen_dbl2 * restrict rf, dcmplx * restrict alm, int l, int lmax)
+ const sharp_ylmgen_dbl2 * restrict rf, dcmplx * restrict alm, int l, int lmax)
{
while (llmax;
Tb lam_1,lam_2,scale;
@@ -180,9 +181,9 @@ static void Z(calc_alm2map) (const Tb cth, const Tb sth, const Ylmgen_C *gen,
Tb corfac;
Y(getCorfac)(scale,&corfac,gen->cf);
- const ylmgen_dbl2 * restrict rf = gen->rf;
+ const sharp_ylmgen_dbl2 * restrict rf = gen->rf;
const dcmplx * restrict alm=job->almtmp;
- int full_ieee = Y(TballGt)(scale,minscale);
+ int full_ieee = Y(TballGt)(scale,sharp_minscale);
while (!full_ieee)
{
for (int j=0; jcf);
- full_ieee = Y(TballGt)(scale,minscale);
+ full_ieee = Y(TballGt)(scale,sharp_minscale);
}
}
if (l>lmax) { *done=1; return; }
@@ -226,7 +227,7 @@ static void Z(calc_alm2map) (const Tb cth, const Tb sth, const Ylmgen_C *gen,
}
static void Z(calc_map2alm) (const Tb cth, const Tb sth,
- const Ylmgen_C *gen, sharp_job *job, const Z(Tbrij) * restrict p1,
+ const sharp_Ylmgen_C *gen, sharp_job *job, const Z(Tbrij) * restrict p1,
const Z(Tbrij) * restrict p2, int *done)
{
int lmax=gen->lmax;
@@ -237,11 +238,11 @@ static void Z(calc_map2alm) (const Tb cth, const Tb sth,
if (l>lmax) { *done=1; return; }
job->opcnt += (lmax+1-l) * (4+4*njobs)*VLEN*nvec;
- const ylmgen_dbl2 * restrict rf = gen->rf;
+ const sharp_ylmgen_dbl2 * restrict rf = gen->rf;
Tb corfac;
Y(getCorfac)(scale,&corfac,gen->cf);
dcmplx * restrict alm=job->almtmp;
- int full_ieee = Y(TballGt)(scale,minscale);
+ int full_ieee = Y(TballGt)(scale,sharp_minscale);
while (!full_ieee)
{
for (int j=0; jcf);
- full_ieee = Y(TballGt)(scale,minscale);
+ full_ieee = Y(TballGt)(scale,sharp_minscale);
}
}
@@ -371,7 +372,8 @@ static inline void Z(saddstep2) (const Z(Tbquj) * restrict px,
static void Z(alm2map_spin_kernel) (Tb cth, Z(Tbquj) * restrict p1,
Z(Tbquj) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
- const ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l, int lmax)
+ const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l,
+ int lmax)
{
while (llmax;
@@ -449,12 +451,13 @@ static void Z(calc_alm2map_spin) (const Tb cth, const Ylmgen_C *gen,
{ *done=1; return; }
job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec;
- const ylmgen_dbl3 * restrict fx = gen->fx;
+ const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
Tb corfacp,corfacm;
Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf);
const dcmplx * restrict alm=job->almtmp;
- int full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale);
+ int full_ieee = Y(TballGt)(scalep,sharp_minscale)
+ && Y(TballGt)(scalem,sharp_minscale);
while (!full_ieee)
{
Z(saddstep)(p1, p2,
@@ -469,7 +472,8 @@ static void Z(calc_alm2map_spin) (const Tb cth, const Ylmgen_C *gen,
{
Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf);
- full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale);
+ full_ieee = Y(TballGt)(scalep,sharp_minscale)
+ && Y(TballGt)(scalem,sharp_minscale);
}
}
@@ -482,7 +486,7 @@ static void Z(calc_alm2map_spin) (const Tb cth, const Ylmgen_C *gen,
rec1p, rec1m, rec2p, rec2m, fx, alm, l, lmax);
}
-static void Z(calc_map2alm_spin) (Tb cth, const Ylmgen_C * restrict gen,
+static void Z(calc_map2alm_spin) (Tb cth, const sharp_Ylmgen_C * restrict gen,
sharp_job *job, const Z(Tbquj) * restrict p1, const Z(Tbquj) * restrict p2,
int *done)
{
@@ -493,12 +497,13 @@ static void Z(calc_map2alm_spin) (Tb cth, const Ylmgen_C * restrict gen,
if (l>lmax) { *done=1; return; }
job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec;
- const ylmgen_dbl3 * restrict fx = gen->fx;
+ const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
Tb corfacp,corfacm;
Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf);
dcmplx * restrict alm=job->almtmp;
- int full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale);
+ int full_ieee = Y(TballGt)(scalep,sharp_minscale)
+ && Y(TballGt)(scalem,sharp_minscale);
while (!full_ieee)
{
Tb t1=Y(Tbprod)(rec2p,corfacp), t2=Y(Tbprod)(rec2m,corfacm);
@@ -513,7 +518,8 @@ static void Z(calc_map2alm_spin) (Tb cth, const Ylmgen_C * restrict gen,
{
Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf);
- full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale);
+ full_ieee = Y(TballGt)(scalep,sharp_minscale)
+ && Y(TballGt)(scalem,sharp_minscale);
}
}
@@ -522,8 +528,9 @@ static void Z(calc_map2alm_spin) (Tb cth, const Ylmgen_C * restrict gen,
Z(map2alm_spin_kernel) (cth,p1,p2,rec1p,rec1m,rec2p,rec2m,fx,alm,l,lmax);
}
-static inline void Z(saddstep_d) (Z(Tbquj) * restrict px, Z(Tbquj) * restrict py,
- const Tb rxp, const Tb rxm, const dcmplx * restrict alm)
+static inline void Z(saddstep_d) (Z(Tbquj) * restrict px,
+ Z(Tbquj) * restrict py, const Tb rxp, const Tb rxm,
+ const dcmplx * restrict alm)
{
for (int j=0; jlmax;
@@ -587,12 +594,13 @@ static void Z(calc_alm2map_deriv1) (const Tb cth, const Ylmgen_C *gen,
{ *done=1; return; }
job->opcnt += (lmax+1-l) * (12+8*njobs)*VLEN*nvec;
- const ylmgen_dbl3 * restrict fx = gen->fx;
+ const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
Tb corfacp,corfacm;
Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf);
const dcmplx * restrict alm=job->almtmp;
- int full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale);
+ int full_ieee = Y(TballGt)(scalep,sharp_minscale)
+ && Y(TballGt)(scalem,sharp_minscale);
while (!full_ieee)
{
Z(saddstep_d)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm),
@@ -607,7 +615,8 @@ static void Z(calc_alm2map_deriv1) (const Tb cth, const Ylmgen_C *gen,
{
Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf);
- full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale);
+ full_ieee = Y(TballGt)(scalep,sharp_minscale)
+ && Y(TballGt)(scalem,sharp_minscale);
}
}
@@ -623,12 +632,12 @@ static void Z(calc_alm2map_deriv1) (const Tb cth, const Ylmgen_C *gen,
#define VZERO(var) do { memset(&(var),0,sizeof(var)); } while(0)
static void Z(inner_loop) (sharp_job *job, const int *ispair,
- const double *cth_, const double *sth_, int llim, int ulim, Ylmgen_C *gen,
- int mi, const int *idx)
+ const double *cth_, const double *sth_, int llim, int ulim,
+ sharp_Ylmgen_C *gen, int mi, const int *idx)
{
const int nval=nvec*VLEN;
const int m = job->ainfo->mval[mi];
- Ylmgen_prepare (gen, m);
+ sharp_Ylmgen_prepare (gen, m);
switch (job->type)
{
diff --git a/libsharp/sharp_core_inc3.c b/libsharp/sharp_core_inc3.c
index 950c84c..816f062 100644
--- a/libsharp/sharp_core_inc3.c
+++ b/libsharp/sharp_core_inc3.c
@@ -31,7 +31,7 @@
static void Y(alm2map_kernel) (const Tb cth, Y(Tbri) * restrict p1,
Y(Tbri) * restrict p2, Tb lam_1, Tb lam_2,
- const ylmgen_dbl2 * restrict rf, const dcmplx * restrict alm,
+ const sharp_ylmgen_dbl2 * restrict rf, const dcmplx * restrict alm,
int l, int lmax, int njobs)
{
while (llmax;
Tb lam_1,lam_2,scale;
@@ -171,9 +171,9 @@ static void Y(calc_alm2map) (const Tb cth, const Tb sth, const Ylmgen_C *gen,
Tb corfac;
Y(getCorfac)(scale,&corfac,gen->cf);
- const ylmgen_dbl2 * restrict rf = gen->rf;
+ const sharp_ylmgen_dbl2 * restrict rf = gen->rf;
const dcmplx * restrict alm=job->almtmp;
- int full_ieee = Y(TballGt)(scale,minscale);
+ int full_ieee = Y(TballGt)(scale,sharp_minscale);
while (!full_ieee)
{
for (int j=0; jcf);
- full_ieee = Y(TballGt)(scale,minscale);
+ full_ieee = Y(TballGt)(scale,sharp_minscale);
}
}
if (l>lmax) { *done=1; return; }
@@ -217,7 +217,7 @@ static void Y(calc_alm2map) (const Tb cth, const Tb sth, const Ylmgen_C *gen,
}
static void Y(calc_map2alm) (const Tb cth, const Tb sth,
- const Ylmgen_C *gen, sharp_job *job, const Y(Tbri) * restrict p1,
+ const sharp_Ylmgen_C *gen, sharp_job *job, const Y(Tbri) * restrict p1,
const Y(Tbri) * restrict p2, int njobs, int *done)
{
int lmax=gen->lmax;
@@ -228,11 +228,11 @@ static void Y(calc_map2alm) (const Tb cth, const Tb sth,
if (l>lmax) { *done=1; return; }
job->opcnt += (lmax+1-l) * (4+4*njobs)*VLEN*nvec;
- const ylmgen_dbl2 * restrict rf = gen->rf;
+ const sharp_ylmgen_dbl2 * restrict rf = gen->rf;
Tb corfac;
Y(getCorfac)(scale,&corfac,gen->cf);
dcmplx * restrict alm=job->almtmp;
- int full_ieee = Y(TballGt)(scale,minscale);
+ int full_ieee = Y(TballGt)(scale,sharp_minscale);
while (!full_ieee)
{
for (int j=0; jcf);
- full_ieee = Y(TballGt)(scale,minscale);
+ full_ieee = Y(TballGt)(scale,sharp_minscale);
}
}
@@ -362,7 +362,7 @@ static inline void Y(saddstep2) (const Y(Tbqu) * restrict px,
static void Y(alm2map_spin_kernel) (Tb cth, Y(Tbqu) * restrict p1,
Y(Tbqu) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
- const ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l,
+ const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l,
int lmax, int njobs)
{
while (lopcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec;
- const ylmgen_dbl3 * restrict fx = gen->fx;
+ const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
Tb corfacp,corfacm;
Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf);
const dcmplx * restrict alm=job->almtmp;
- int full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale);
+ int full_ieee = Y(TballGt)(scalep,sharp_minscale)
+ && Y(TballGt)(scalem,sharp_minscale);
while (!full_ieee)
{
Y(saddstep)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm),
@@ -458,7 +459,8 @@ static void Y(calc_alm2map_spin) (const Tb cth, const Ylmgen_C *gen,
{
Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf);
- full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale);
+ full_ieee = Y(TballGt)(scalep,sharp_minscale)
+ && Y(TballGt)(scalem,sharp_minscale);
}
}
@@ -471,7 +473,7 @@ static void Y(calc_alm2map_spin) (const Tb cth, const Ylmgen_C *gen,
lmax, njobs);
}
-static void Y(calc_map2alm_spin) (Tb cth, const Ylmgen_C * restrict gen,
+static void Y(calc_map2alm_spin) (Tb cth, const sharp_Ylmgen_C * restrict gen,
sharp_job *job, const Y(Tbqu) * restrict p1, const Y(Tbqu) * restrict p2,
int njobs, int *done)
{
@@ -482,12 +484,13 @@ static void Y(calc_map2alm_spin) (Tb cth, const Ylmgen_C * restrict gen,
if (l>lmax) { *done=1; return; }
job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec;
- const ylmgen_dbl3 * restrict fx = gen->fx;
+ const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
Tb corfacp,corfacm;
Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf);
dcmplx * restrict alm=job->almtmp;
- int full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale);
+ int full_ieee = Y(TballGt)(scalep,sharp_minscale)
+ && Y(TballGt)(scalem,sharp_minscale);
while (!full_ieee)
{
Tb t1=Y(Tbprod)(rec2p,corfacp), t2=Y(Tbprod)(rec2m,corfacm);
@@ -502,7 +505,8 @@ static void Y(calc_map2alm_spin) (Tb cth, const Ylmgen_C * restrict gen,
{
Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf);
- full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale);
+ full_ieee = Y(TballGt)(scalep,sharp_minscale)
+ && Y(TballGt)(scalem,sharp_minscale);
}
}
@@ -534,7 +538,7 @@ static inline void Y(saddstep_d) (Y(Tbqu) * restrict px, Y(Tbqu) * restrict py,
static void Y(alm2map_deriv1_kernel) (Tb cth, Y(Tbqu) * restrict p1,
Y(Tbqu) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
- const ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l,
+ const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l,
int lmax, int njobs)
{
while (lopcnt += (lmax+1-l) * (12+8*njobs)*VLEN*nvec;
- const ylmgen_dbl3 * restrict fx = gen->fx;
+ const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
Tb corfacp,corfacm;
Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf);
const dcmplx * restrict alm=job->almtmp;
- int full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale);
+ int full_ieee = Y(TballGt)(scalep,sharp_minscale)
+ && Y(TballGt)(scalem,sharp_minscale);
while (!full_ieee)
{
Y(saddstep_d)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm),
@@ -597,7 +602,8 @@ static void Y(calc_alm2map_deriv1) (const Tb cth, const Ylmgen_C *gen,
{
Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf);
- full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale);
+ full_ieee = Y(TballGt)(scalep,sharp_minscale)
+ && Y(TballGt)(scalem,sharp_minscale);
}
}
@@ -614,12 +620,12 @@ static void Y(calc_alm2map_deriv1) (const Tb cth, const Ylmgen_C *gen,
#define VZERO(var) do { memset(&(var),0,sizeof(var)); } while(0)
static void Y(inner_loop) (sharp_job *job, const int *ispair,
- const double *cth_, const double *sth_, int llim, int ulim, Ylmgen_C *gen,
- int mi, const int *idx, int njobs)
+ const double *cth_, const double *sth_, int llim, int ulim,
+ sharp_Ylmgen_C *gen, int mi, const int *idx, int njobs)
{
const int nval=nvec*VLEN;
const int m = job->ainfo->mval[mi];
- Ylmgen_prepare (gen, m);
+ sharp_Ylmgen_prepare (gen, m);
switch (job->type)
{
diff --git a/libsharp/sharp_oracle.inc b/libsharp/sharp_oracle.inc
new file mode 100644
index 0000000..40c9ed0
--- /dev/null
+++ b/libsharp/sharp_oracle.inc
@@ -0,0 +1,9 @@
+static const int maxtr = 6;
+static const int nv_opt[6][2][3] = {
+{{4,2,-1},{2,1,1}},
+{{5,2,-1},{2,1,1}},
+{{5,2,-1},{5,2,2}},
+{{5,2,-1},{5,2,2}},
+{{5,2,-1},{5,2,2}},
+{{5,2,-1},{5,2,2}}
+};
diff --git a/libsharp/sharp_ylmgen_c.c b/libsharp/sharp_ylmgen_c.c
index 48a1f7b..7a7ec8a 100644
--- a/libsharp/sharp_ylmgen_c.c
+++ b/libsharp/sharp_ylmgen_c.c
@@ -34,7 +34,7 @@
#include "sharp_ylmgen_c.h"
#include "c_utils.h"
-void Ylmgen_init (Ylmgen_C *gen, int l_max, int m_max, int spin)
+void sharp_Ylmgen_init (sharp_Ylmgen_C *gen, int l_max, int m_max, int spin)
{
const double inv_sqrt4pi = 0.2820947917738781434740397257803862929220;
@@ -42,18 +42,19 @@ void Ylmgen_init (Ylmgen_C *gen, int l_max, int m_max, int spin)
gen->mmax = m_max;
UTIL_ASSERT(spin>=0,"incorrect spin");
gen->s = spin;
- UTIL_ASSERT((minscale<=0)&&(maxscale>0),"bad value for min/maxscale");
- gen->cf=RALLOC(double,maxscale-minscale+1);
- gen->cf[-minscale]=1.;
- for (int m=-minscale-1; m>=0; --m)
- gen->cf[m]=gen->cf[m+1]*fsmall;
- for (int m=-minscale+1; m<(maxscale-minscale+1); ++m)
- gen->cf[m]=gen->cf[m-1]*fbig;
+ UTIL_ASSERT((sharp_minscale<=0)&&(sharp_maxscale>0),
+ "bad value for min/maxscale");
+ gen->cf=RALLOC(double,sharp_maxscale-sharp_minscale+1);
+ gen->cf[-sharp_minscale]=1.;
+ for (int m=-sharp_minscale-1; m>=0; --m)
+ gen->cf[m]=gen->cf[m+1]*sharp_fsmall;
+ for (int m=-sharp_minscale+1; m<(sharp_maxscale-sharp_minscale+1); ++m)
+ gen->cf[m]=gen->cf[m-1]*sharp_fbig;
gen->m = -1;
if (spin==0)
{
- gen->rf = RALLOC(ylmgen_dbl2,gen->lmax+1);
+ gen->rf = RALLOC(sharp_ylmgen_dbl2,gen->lmax+1);
gen->mfac = RALLOC(double,gen->mmax+1);
gen->mfac[0] = inv_sqrt4pi;
for (int m=1; m<=gen->mmax; ++m)
@@ -69,7 +70,7 @@ void Ylmgen_init (Ylmgen_C *gen, int l_max, int m_max, int spin)
else
{
gen->m=gen->mlo=gen->mhi=-1234567890;
- ALLOC(gen->fx,ylmgen_dbl3,gen->lmax+2);
+ ALLOC(gen->fx,sharp_ylmgen_dbl3,gen->lmax+2);
for (int m=0; mlmax+2; ++m)
gen->fx[m].f[0]=gen->fx[m].f[1]=gen->fx[m].f[2]=0.;
ALLOC(gen->inv,double,gen->lmax+1);
@@ -91,7 +92,7 @@ void Ylmgen_init (Ylmgen_C *gen, int l_max, int m_max, int spin)
{
fac[m]=fac[m-1]*sqrt(m);
facscale[m]=facscale[m-1];
- if (fac[m]>1.) { fac[m]*=fsmall; ++facscale[m]; }
+ if (fac[m]>1.) { fac[m]*=sharp_fsmall; ++facscale[m]; }
}
for (int m=0; m<=gen->mmax; ++m)
{
@@ -105,7 +106,7 @@ void Ylmgen_init (Ylmgen_C *gen, int l_max, int m_max, int spin)
}
}
-void Ylmgen_destroy (Ylmgen_C *gen)
+void sharp_Ylmgen_destroy (sharp_Ylmgen_C *gen)
{
DEALLOC(gen->cf);
if (gen->s==0)
@@ -126,7 +127,7 @@ void Ylmgen_destroy (Ylmgen_C *gen)
}
}
-void Ylmgen_prepare (Ylmgen_C *gen, int m)
+void sharp_Ylmgen_prepare (sharp_Ylmgen_C *gen, int m)
{
if (m==gen->m) return;
UTIL_ASSERT(m>=0,"incorrect m");
@@ -181,7 +182,7 @@ void Ylmgen_prepare (Ylmgen_C *gen, int m)
}
}
-double *Ylmgen_get_norm (int lmax, int spin)
+double *sharp_Ylmgen_get_norm (int lmax, int spin)
{
const double pi = 3.141592653589793238462643383279502884197;
double *res=RALLOC(double,lmax+1);
diff --git a/libsharp/sharp_ylmgen_c.h b/libsharp/sharp_ylmgen_c.h
index cbfd42c..18d5a9d 100644
--- a/libsharp/sharp_ylmgen_c.h
+++ b/libsharp/sharp_ylmgen_c.h
@@ -36,11 +36,11 @@
extern "C" {
#endif
-enum { minscale=-8, limscale=-3, maxscale=5 };
-static const double fbig=0x1p+90,fsmall=0x1p-90;
+enum { sharp_minscale=-8, sharp_limscale=-3, sharp_maxscale=5 };
+static const double sharp_fbig=0x1p+90,sharp_fsmall=0x1p-90;
-typedef struct { double f[2]; } ylmgen_dbl2;
-typedef struct { double f[3]; } ylmgen_dbl3;
+typedef struct { double f[2]; } sharp_ylmgen_dbl2;
+typedef struct { double f[3]; } sharp_ylmgen_dbl3;
typedef struct
{
@@ -53,13 +53,13 @@ typedef struct
/* used if s==0 */
double *mfac;
- ylmgen_dbl2 *rf;
+ sharp_ylmgen_dbl2 *rf;
/* used if s!=0 */
int sinPow, cosPow, preMinus_p, preMinus_m;
double *prefac;
int *fscale;
- ylmgen_dbl3 *fx;
+ sharp_ylmgen_dbl3 *fx;
/* internal usage only */
/* used if s==0 */
@@ -68,22 +68,22 @@ typedef struct
/* used if s!=0 */
double *flm1, *flm2, *inv;
int mlo, mhi;
- } Ylmgen_C;
+ } sharp_Ylmgen_C;
/*! Creates a generator which will calculate helper data for Y_lm calculation
up to \a l=l_max and \a m=m_max. */
-void Ylmgen_init (Ylmgen_C *gen, int l_max, int m_max, int spin);
+void sharp_Ylmgen_init (sharp_Ylmgen_C *gen, int l_max, int m_max, int spin);
/*! Deallocates a generator previously initialised by Ylmgen_init(). */
-void Ylmgen_destroy (Ylmgen_C *gen);
+void sharp_Ylmgen_destroy (sharp_Ylmgen_C *gen);
/*! Prepares the object for the calculation at \a m. */
-void Ylmgen_prepare (Ylmgen_C *gen, int m);
+void sharp_Ylmgen_prepare (sharp_Ylmgen_C *gen, int m);
/*! Returns a pointer to an array with \a lmax+1 entries containing
normalisation factors that must be applied to Y_lm values computed for
\a spin. The array must be deallocated (using free()) by the user. */
-double *Ylmgen_get_norm (int lmax, int spin);
+double *sharp_Ylmgen_get_norm (int lmax, int spin);
#ifdef __cplusplus
}