From 82dc2a541fd3ada683e283d0e5be368b6b582872 Mon Sep 17 00:00:00 2001 From: Martin Reinecke Date: Thu, 5 Jul 2012 16:14:28 +0200 Subject: [PATCH] sharpification --- Makefile | 2 +- libsharp/libsharp.dox | 6 ++-- libsharp/oracle.inc | 9 ----- libsharp/planck.make | 2 +- libsharp/sharp.c | 10 +++--- libsharp/sharp_bench.c | 2 +- libsharp/sharp_core.c | 3 +- libsharp/sharp_core.h | 3 +- libsharp/sharp_core_inc.c | 34 ++++++++++-------- libsharp/sharp_core_inc2.c | 71 +++++++++++++++++++++----------------- libsharp/sharp_core_inc3.c | 66 +++++++++++++++++++---------------- libsharp/sharp_oracle.inc | 9 +++++ libsharp/sharp_ylmgen_c.c | 29 ++++++++-------- libsharp/sharp_ylmgen_c.h | 22 ++++++------ 14 files changed, 145 insertions(+), 123 deletions(-) delete mode 100644 libsharp/oracle.inc create mode 100644 libsharp/sharp_oracle.inc diff --git a/Makefile b/Makefile index e6c51bc..3d6f20e 100644 --- a/Makefile +++ b/Makefile @@ -32,7 +32,7 @@ compile_all: $(all_cbin) hdrcopy autotune: sharp_bench $(BINDIR)/sharp_bench - mv oracle.inc $(SRCROOT)/libsharp + mv sharp_oracle.inc $(SRCROOT)/libsharp $(MAKE) hdrclean: diff --git a/libsharp/libsharp.dox b/libsharp/libsharp.dox index 2a5067c..810abd0 100644 --- a/libsharp/libsharp.dox +++ b/libsharp/libsharp.dox @@ -61,11 +61,11 @@ SHARP supports shared-memory parallelisation via OpenMP; this feature will diff --git a/libsharp/oracle.inc b/libsharp/oracle.inc deleted file mode 100644 index 74638c6..0000000 --- a/libsharp/oracle.inc +++ /dev/null @@ -1,9 +0,0 @@ -static const int maxtr = 6; -static const int nv_opt[6][2][3] = { -{{4,2,-1},{2,1,1}}, -{{4,2,-1},{2,1,1}}, -{{5,2,-1},{5,2,1}}, -{{5,2,-1},{5,2,1}}, -{{5,2,-1},{5,2,1}}, -{{5,2,-1},{5,2,1}} -}; diff --git a/libsharp/planck.make b/libsharp/planck.make index c8621cd..6c72945 100644 --- a/libsharp/planck.make +++ b/libsharp/planck.make @@ -15,7 +15,7 @@ ALLOBJ:=$(ALLOBJ:%=$(OD)/%) ODEP:=$(HDR_$(PKG)) $(HDR_libfftpack) $(HDR_c_utils) $(OD)/sharp_core.o: $(SD)/sharp_inchelper1.inc.c $(SD)/sharp_core_inc.c $(SD)/sharp_core_inc2.c $(SD)/sharp_core_inc3.c -$(OD)/sharp.o: $(SD)/sharp_mpi.c $(SD)/oracle.inc +$(OD)/sharp.o: $(SD)/sharp_mpi.c $(SD)/sharp_oracle.inc BDEP:=$(LIB_$(PKG)) $(LIB_libfftpack) $(LIB_c_utils) $(LIB_$(PKG)): $(LIBOBJ) diff --git a/libsharp/sharp.c b/libsharp/sharp.c index 4cc659c..60c416e 100644 --- a/libsharp/sharp.c +++ b/libsharp/sharp.c @@ -464,7 +464,7 @@ void sharp_execute_job (sharp_job *job) int lmax = job->ainfo->lmax, mmax=sharp_get_mmax(job->ainfo->mval, job->ainfo->nm); - job->norm_l = Ylmgen_get_norm (lmax, job->spin); + job->norm_l = sharp_Ylmgen_get_norm (lmax, job->spin); /* clear output arrays if requested */ init_output (job); @@ -501,8 +501,8 @@ void sharp_execute_job (sharp_job *job) { sharp_job ljob = *job; ljob.opcnt=0; - Ylmgen_C generator; - Ylmgen_init (&generator,lmax,mmax,ljob.spin); + sharp_Ylmgen_C generator; + sharp_Ylmgen_init (&generator,lmax,mmax,ljob.spin); alloc_almtmp(&ljob,lmax); #pragma omp for schedule(dynamic,1) @@ -517,7 +517,7 @@ void sharp_execute_job (sharp_job *job) almtmp2alm (&ljob, lmax, mi); } - Ylmgen_destroy(&generator); + sharp_Ylmgen_destroy(&generator); dealloc_almtmp(&ljob); #pragma omp critical @@ -589,7 +589,7 @@ int sharp_nv_oracle (sharp_jobtype type, int spin, int ntrans) if (type==ALM2MAP_DERIV1) spin=1; UTIL_ASSERT((ntrans>0),"bad number of simultaneous transforms"); UTIL_ASSERT((spin>=0)&&(spin<=30), "bad spin"); -#include "oracle.inc" +#include "sharp_oracle.inc" return nv_opt[IMIN(ntrans,maxtr)-1][spin!=0][type]; } diff --git a/libsharp/sharp_bench.c b/libsharp/sharp_bench.c index 09d44d6..74e5af6 100644 --- a/libsharp/sharp_bench.c +++ b/libsharp/sharp_bench.c @@ -98,7 +98,7 @@ int main(void) sharp_module_startup("sharp_bench",1,1,"",1); printf("Benchmarking SHTs.\n\n"); - FILE *fp=fopen("oracle.inc","w"); + FILE *fp=fopen("sharp_oracle.inc","w"); UTIL_ASSERT(fp, "failed to open oracle file for writing"); fprintf(fp,"static const int maxtr = 6;\n"); fprintf(fp,"static const int nv_opt[6][2][3] = {\n"); diff --git a/libsharp/sharp_core.c b/libsharp/sharp_core.c index ec02975..1d48a6f 100644 --- a/libsharp/sharp_core.c +++ b/libsharp/sharp_core.c @@ -73,7 +73,8 @@ typedef complex double dcmplx; #undef nvec void inner_loop (sharp_job *job, const int *ispair,const double *cth, - const double *sth, int llim, int ulim, Ylmgen_C *gen, int mi, const int *idx) + const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi, + const int *idx) { int njobs=job->ntrans; if (njobs<=MAXJOB_SPECIAL) diff --git a/libsharp/sharp_core.h b/libsharp/sharp_core.h index 38ec41d..6d68210 100644 --- a/libsharp/sharp_core.h +++ b/libsharp/sharp_core.h @@ -40,7 +40,8 @@ extern "C" { #endif void inner_loop (sharp_job *job, const int *ispair,const double *cth, - const double *sth, int llim, int ulim, Ylmgen_C *gen, int mi, const int *idx); + const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi, + const int *idx); #ifdef __cplusplus } diff --git a/libsharp/sharp_core_inc.c b/libsharp/sharp_core_inc.c index b892f53..2c86463 100644 --- a/libsharp/sharp_core_inc.c +++ b/libsharp/sharp_core_inc.c @@ -83,8 +83,8 @@ static inline void Y(mypow) (Tb val, int npow, Tb * restrict resd, { vmuleq(res.v[i],val.v[i]); vaddeq(scale.v[i],scaleint.v[i]); - Tv mask=vlt(vabs(res.v[i]),vload(fsmall)); - vmuleq(res.v[i],vblend(mask,vload(fbig),vone)); + Tv mask=vlt(vabs(res.v[i]),vload(sharp_fsmall)); + vmuleq(res.v[i],vblend(mask,vload(sharp_fbig),vone)); vsubeq(scale.v[i],vblend(mask,vone,vzero)); } } @@ -92,8 +92,8 @@ static inline void Y(mypow) (Tb val, int npow, Tb * restrict resd, { vmuleq(val.v[i],val.v[i]); vaddeq(scaleint.v[i],scaleint.v[i]); - Tv mask = vlt(vabs(val.v[i]),vload(fsmall)); - vmuleq(val.v[i],vblend(mask,vload(fbig),vone)); + Tv mask = vlt(vabs(val.v[i]),vload(sharp_fsmall)); + vmuleq(val.v[i],vblend(mask,vload(sharp_fbig),vone)); vsubeq(scaleint.v[i],vblend(mask,vone,vzero)); } } @@ -113,7 +113,7 @@ static inline int Y(rescale) (Tb * restrict lam1, Tb * restrict lam2, if (vanyTrue(mask)) { did_scale=1; - Tv fact = vblend(mask,vload(fsmall),vone); + Tv fact = vblend(mask,vload(sharp_fsmall),vone); vmuleq(lam1->v[i],fact); vmuleq(lam2->v[i],fact); vaddeq(scale->v[i],vblend(mask,vone,vzero)); } @@ -123,7 +123,7 @@ static inline int Y(rescale) (Tb * restrict lam1, Tb * restrict lam2, static inline void Y(normalize) (Tb * restrict val, Tb * restrict scale) { - const Tv vfsmall=vload(fsmall), vfbig=vload(fbig); + const Tv vfsmall=vload(sharp_fsmall), vfbig=vload(sharp_fbig); for (int i=0;iv[i]),vone); @@ -168,13 +168,14 @@ static inline void Y(getCorfac)(Tb scale, Tb * restrict corfac, Y(Tbu) sc, corf; sc.b=scale; for (int i=0; im; Tb lam_1=Y(Tbconst)(0.), lam_2, scale; @@ -182,7 +183,7 @@ static void Y(iter_to_ieee) (const Tb sth, Tb cth, int *l_, Y(Tbmuleq1) (&lam_2,(gen->m&1) ? -gen->mfac[gen->m]:gen->mfac[gen->m]); Y(normalize)(&lam_2,&scale); - int below_limit = Y(TballLt)(scale,limscale); + int below_limit = Y(TballLt)(scale,sharp_limscale); while (below_limit) { if (l+2>gen->lmax) {*l_=gen->lmax+1;return;} @@ -193,14 +194,15 @@ static void Y(iter_to_ieee) (const Tb sth, Tb cth, int *l_, for (int i=0; ifx; + const sharp_ylmgen_dbl3 * restrict fx = gen->fx; Tb cth2, sth2; for (int i=0; imhi; - int below_limit = Y(TballLt)(scalep,limscale) && Y(TballLt)(scalem,limscale); + int below_limit = Y(TballLt)(scalep,sharp_limscale) + && Y(TballLt)(scalem,sharp_limscale); while (below_limit) { if (l+2>gen->lmax) {*l_=gen->lmax+1;return;} Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l+1]); Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l+2]); if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem)) - below_limit = Y(TballLt)(scalep,limscale) && Y(TballLt)(scalem,limscale); + below_limit = Y(TballLt)(scalep,sharp_limscale) + && Y(TballLt)(scalem,sharp_limscale); l+=2; } diff --git a/libsharp/sharp_core_inc2.c b/libsharp/sharp_core_inc2.c index dd3fb5d..aa40bcc 100644 --- a/libsharp/sharp_core_inc2.c +++ b/libsharp/sharp_core_inc2.c @@ -40,7 +40,7 @@ typedef union static void Z(alm2map_kernel) (const Tb cth, Z(Tbrij) * restrict p1, Z(Tbrij) * restrict p2, Tb lam_1, Tb lam_2, - const ylmgen_dbl2 * restrict rf, const dcmplx * restrict alm, + const sharp_ylmgen_dbl2 * restrict rf, const dcmplx * restrict alm, int l, int lmax) { #if (njobs>1) @@ -126,7 +126,7 @@ static void Z(alm2map_kernel) (const Tb cth, Z(Tbrij) * restrict p1, static void Z(map2alm_kernel) (const Tb cth, const Z(Tbrij) * restrict p1, const Z(Tbrij) * restrict p2, Tb lam_1, Tb lam_2, - const ylmgen_dbl2 * restrict rf, dcmplx * restrict alm, int l, int lmax) + const sharp_ylmgen_dbl2 * restrict rf, dcmplx * restrict alm, int l, int lmax) { while (llmax; Tb lam_1,lam_2,scale; @@ -180,9 +181,9 @@ static void Z(calc_alm2map) (const Tb cth, const Tb sth, const Ylmgen_C *gen, Tb corfac; Y(getCorfac)(scale,&corfac,gen->cf); - const ylmgen_dbl2 * restrict rf = gen->rf; + const sharp_ylmgen_dbl2 * restrict rf = gen->rf; const dcmplx * restrict alm=job->almtmp; - int full_ieee = Y(TballGt)(scale,minscale); + int full_ieee = Y(TballGt)(scale,sharp_minscale); while (!full_ieee) { for (int j=0; jcf); - full_ieee = Y(TballGt)(scale,minscale); + full_ieee = Y(TballGt)(scale,sharp_minscale); } } if (l>lmax) { *done=1; return; } @@ -226,7 +227,7 @@ static void Z(calc_alm2map) (const Tb cth, const Tb sth, const Ylmgen_C *gen, } static void Z(calc_map2alm) (const Tb cth, const Tb sth, - const Ylmgen_C *gen, sharp_job *job, const Z(Tbrij) * restrict p1, + const sharp_Ylmgen_C *gen, sharp_job *job, const Z(Tbrij) * restrict p1, const Z(Tbrij) * restrict p2, int *done) { int lmax=gen->lmax; @@ -237,11 +238,11 @@ static void Z(calc_map2alm) (const Tb cth, const Tb sth, if (l>lmax) { *done=1; return; } job->opcnt += (lmax+1-l) * (4+4*njobs)*VLEN*nvec; - const ylmgen_dbl2 * restrict rf = gen->rf; + const sharp_ylmgen_dbl2 * restrict rf = gen->rf; Tb corfac; Y(getCorfac)(scale,&corfac,gen->cf); dcmplx * restrict alm=job->almtmp; - int full_ieee = Y(TballGt)(scale,minscale); + int full_ieee = Y(TballGt)(scale,sharp_minscale); while (!full_ieee) { for (int j=0; jcf); - full_ieee = Y(TballGt)(scale,minscale); + full_ieee = Y(TballGt)(scale,sharp_minscale); } } @@ -371,7 +372,8 @@ static inline void Z(saddstep2) (const Z(Tbquj) * restrict px, static void Z(alm2map_spin_kernel) (Tb cth, Z(Tbquj) * restrict p1, Z(Tbquj) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m, - const ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l, int lmax) + const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l, + int lmax) { while (llmax; @@ -449,12 +451,13 @@ static void Z(calc_alm2map_spin) (const Tb cth, const Ylmgen_C *gen, { *done=1; return; } job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec; - const ylmgen_dbl3 * restrict fx = gen->fx; + const sharp_ylmgen_dbl3 * restrict fx = gen->fx; Tb corfacp,corfacm; Y(getCorfac)(scalep,&corfacp,gen->cf); Y(getCorfac)(scalem,&corfacm,gen->cf); const dcmplx * restrict alm=job->almtmp; - int full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale); + int full_ieee = Y(TballGt)(scalep,sharp_minscale) + && Y(TballGt)(scalem,sharp_minscale); while (!full_ieee) { Z(saddstep)(p1, p2, @@ -469,7 +472,8 @@ static void Z(calc_alm2map_spin) (const Tb cth, const Ylmgen_C *gen, { Y(getCorfac)(scalep,&corfacp,gen->cf); Y(getCorfac)(scalem,&corfacm,gen->cf); - full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale); + full_ieee = Y(TballGt)(scalep,sharp_minscale) + && Y(TballGt)(scalem,sharp_minscale); } } @@ -482,7 +486,7 @@ static void Z(calc_alm2map_spin) (const Tb cth, const Ylmgen_C *gen, rec1p, rec1m, rec2p, rec2m, fx, alm, l, lmax); } -static void Z(calc_map2alm_spin) (Tb cth, const Ylmgen_C * restrict gen, +static void Z(calc_map2alm_spin) (Tb cth, const sharp_Ylmgen_C * restrict gen, sharp_job *job, const Z(Tbquj) * restrict p1, const Z(Tbquj) * restrict p2, int *done) { @@ -493,12 +497,13 @@ static void Z(calc_map2alm_spin) (Tb cth, const Ylmgen_C * restrict gen, if (l>lmax) { *done=1; return; } job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec; - const ylmgen_dbl3 * restrict fx = gen->fx; + const sharp_ylmgen_dbl3 * restrict fx = gen->fx; Tb corfacp,corfacm; Y(getCorfac)(scalep,&corfacp,gen->cf); Y(getCorfac)(scalem,&corfacm,gen->cf); dcmplx * restrict alm=job->almtmp; - int full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale); + int full_ieee = Y(TballGt)(scalep,sharp_minscale) + && Y(TballGt)(scalem,sharp_minscale); while (!full_ieee) { Tb t1=Y(Tbprod)(rec2p,corfacp), t2=Y(Tbprod)(rec2m,corfacm); @@ -513,7 +518,8 @@ static void Z(calc_map2alm_spin) (Tb cth, const Ylmgen_C * restrict gen, { Y(getCorfac)(scalep,&corfacp,gen->cf); Y(getCorfac)(scalem,&corfacm,gen->cf); - full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale); + full_ieee = Y(TballGt)(scalep,sharp_minscale) + && Y(TballGt)(scalem,sharp_minscale); } } @@ -522,8 +528,9 @@ static void Z(calc_map2alm_spin) (Tb cth, const Ylmgen_C * restrict gen, Z(map2alm_spin_kernel) (cth,p1,p2,rec1p,rec1m,rec2p,rec2m,fx,alm,l,lmax); } -static inline void Z(saddstep_d) (Z(Tbquj) * restrict px, Z(Tbquj) * restrict py, - const Tb rxp, const Tb rxm, const dcmplx * restrict alm) +static inline void Z(saddstep_d) (Z(Tbquj) * restrict px, + Z(Tbquj) * restrict py, const Tb rxp, const Tb rxm, + const dcmplx * restrict alm) { for (int j=0; jlmax; @@ -587,12 +594,13 @@ static void Z(calc_alm2map_deriv1) (const Tb cth, const Ylmgen_C *gen, { *done=1; return; } job->opcnt += (lmax+1-l) * (12+8*njobs)*VLEN*nvec; - const ylmgen_dbl3 * restrict fx = gen->fx; + const sharp_ylmgen_dbl3 * restrict fx = gen->fx; Tb corfacp,corfacm; Y(getCorfac)(scalep,&corfacp,gen->cf); Y(getCorfac)(scalem,&corfacm,gen->cf); const dcmplx * restrict alm=job->almtmp; - int full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale); + int full_ieee = Y(TballGt)(scalep,sharp_minscale) + && Y(TballGt)(scalem,sharp_minscale); while (!full_ieee) { Z(saddstep_d)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm), @@ -607,7 +615,8 @@ static void Z(calc_alm2map_deriv1) (const Tb cth, const Ylmgen_C *gen, { Y(getCorfac)(scalep,&corfacp,gen->cf); Y(getCorfac)(scalem,&corfacm,gen->cf); - full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale); + full_ieee = Y(TballGt)(scalep,sharp_minscale) + && Y(TballGt)(scalem,sharp_minscale); } } @@ -623,12 +632,12 @@ static void Z(calc_alm2map_deriv1) (const Tb cth, const Ylmgen_C *gen, #define VZERO(var) do { memset(&(var),0,sizeof(var)); } while(0) static void Z(inner_loop) (sharp_job *job, const int *ispair, - const double *cth_, const double *sth_, int llim, int ulim, Ylmgen_C *gen, - int mi, const int *idx) + const double *cth_, const double *sth_, int llim, int ulim, + sharp_Ylmgen_C *gen, int mi, const int *idx) { const int nval=nvec*VLEN; const int m = job->ainfo->mval[mi]; - Ylmgen_prepare (gen, m); + sharp_Ylmgen_prepare (gen, m); switch (job->type) { diff --git a/libsharp/sharp_core_inc3.c b/libsharp/sharp_core_inc3.c index 950c84c..816f062 100644 --- a/libsharp/sharp_core_inc3.c +++ b/libsharp/sharp_core_inc3.c @@ -31,7 +31,7 @@ static void Y(alm2map_kernel) (const Tb cth, Y(Tbri) * restrict p1, Y(Tbri) * restrict p2, Tb lam_1, Tb lam_2, - const ylmgen_dbl2 * restrict rf, const dcmplx * restrict alm, + const sharp_ylmgen_dbl2 * restrict rf, const dcmplx * restrict alm, int l, int lmax, int njobs) { while (llmax; Tb lam_1,lam_2,scale; @@ -171,9 +171,9 @@ static void Y(calc_alm2map) (const Tb cth, const Tb sth, const Ylmgen_C *gen, Tb corfac; Y(getCorfac)(scale,&corfac,gen->cf); - const ylmgen_dbl2 * restrict rf = gen->rf; + const sharp_ylmgen_dbl2 * restrict rf = gen->rf; const dcmplx * restrict alm=job->almtmp; - int full_ieee = Y(TballGt)(scale,minscale); + int full_ieee = Y(TballGt)(scale,sharp_minscale); while (!full_ieee) { for (int j=0; jcf); - full_ieee = Y(TballGt)(scale,minscale); + full_ieee = Y(TballGt)(scale,sharp_minscale); } } if (l>lmax) { *done=1; return; } @@ -217,7 +217,7 @@ static void Y(calc_alm2map) (const Tb cth, const Tb sth, const Ylmgen_C *gen, } static void Y(calc_map2alm) (const Tb cth, const Tb sth, - const Ylmgen_C *gen, sharp_job *job, const Y(Tbri) * restrict p1, + const sharp_Ylmgen_C *gen, sharp_job *job, const Y(Tbri) * restrict p1, const Y(Tbri) * restrict p2, int njobs, int *done) { int lmax=gen->lmax; @@ -228,11 +228,11 @@ static void Y(calc_map2alm) (const Tb cth, const Tb sth, if (l>lmax) { *done=1; return; } job->opcnt += (lmax+1-l) * (4+4*njobs)*VLEN*nvec; - const ylmgen_dbl2 * restrict rf = gen->rf; + const sharp_ylmgen_dbl2 * restrict rf = gen->rf; Tb corfac; Y(getCorfac)(scale,&corfac,gen->cf); dcmplx * restrict alm=job->almtmp; - int full_ieee = Y(TballGt)(scale,minscale); + int full_ieee = Y(TballGt)(scale,sharp_minscale); while (!full_ieee) { for (int j=0; jcf); - full_ieee = Y(TballGt)(scale,minscale); + full_ieee = Y(TballGt)(scale,sharp_minscale); } } @@ -362,7 +362,7 @@ static inline void Y(saddstep2) (const Y(Tbqu) * restrict px, static void Y(alm2map_spin_kernel) (Tb cth, Y(Tbqu) * restrict p1, Y(Tbqu) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m, - const ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l, + const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l, int lmax, int njobs) { while (lopcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec; - const ylmgen_dbl3 * restrict fx = gen->fx; + const sharp_ylmgen_dbl3 * restrict fx = gen->fx; Tb corfacp,corfacm; Y(getCorfac)(scalep,&corfacp,gen->cf); Y(getCorfac)(scalem,&corfacm,gen->cf); const dcmplx * restrict alm=job->almtmp; - int full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale); + int full_ieee = Y(TballGt)(scalep,sharp_minscale) + && Y(TballGt)(scalem,sharp_minscale); while (!full_ieee) { Y(saddstep)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm), @@ -458,7 +459,8 @@ static void Y(calc_alm2map_spin) (const Tb cth, const Ylmgen_C *gen, { Y(getCorfac)(scalep,&corfacp,gen->cf); Y(getCorfac)(scalem,&corfacm,gen->cf); - full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale); + full_ieee = Y(TballGt)(scalep,sharp_minscale) + && Y(TballGt)(scalem,sharp_minscale); } } @@ -471,7 +473,7 @@ static void Y(calc_alm2map_spin) (const Tb cth, const Ylmgen_C *gen, lmax, njobs); } -static void Y(calc_map2alm_spin) (Tb cth, const Ylmgen_C * restrict gen, +static void Y(calc_map2alm_spin) (Tb cth, const sharp_Ylmgen_C * restrict gen, sharp_job *job, const Y(Tbqu) * restrict p1, const Y(Tbqu) * restrict p2, int njobs, int *done) { @@ -482,12 +484,13 @@ static void Y(calc_map2alm_spin) (Tb cth, const Ylmgen_C * restrict gen, if (l>lmax) { *done=1; return; } job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec; - const ylmgen_dbl3 * restrict fx = gen->fx; + const sharp_ylmgen_dbl3 * restrict fx = gen->fx; Tb corfacp,corfacm; Y(getCorfac)(scalep,&corfacp,gen->cf); Y(getCorfac)(scalem,&corfacm,gen->cf); dcmplx * restrict alm=job->almtmp; - int full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale); + int full_ieee = Y(TballGt)(scalep,sharp_minscale) + && Y(TballGt)(scalem,sharp_minscale); while (!full_ieee) { Tb t1=Y(Tbprod)(rec2p,corfacp), t2=Y(Tbprod)(rec2m,corfacm); @@ -502,7 +505,8 @@ static void Y(calc_map2alm_spin) (Tb cth, const Ylmgen_C * restrict gen, { Y(getCorfac)(scalep,&corfacp,gen->cf); Y(getCorfac)(scalem,&corfacm,gen->cf); - full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale); + full_ieee = Y(TballGt)(scalep,sharp_minscale) + && Y(TballGt)(scalem,sharp_minscale); } } @@ -534,7 +538,7 @@ static inline void Y(saddstep_d) (Y(Tbqu) * restrict px, Y(Tbqu) * restrict py, static void Y(alm2map_deriv1_kernel) (Tb cth, Y(Tbqu) * restrict p1, Y(Tbqu) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m, - const ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l, + const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l, int lmax, int njobs) { while (lopcnt += (lmax+1-l) * (12+8*njobs)*VLEN*nvec; - const ylmgen_dbl3 * restrict fx = gen->fx; + const sharp_ylmgen_dbl3 * restrict fx = gen->fx; Tb corfacp,corfacm; Y(getCorfac)(scalep,&corfacp,gen->cf); Y(getCorfac)(scalem,&corfacm,gen->cf); const dcmplx * restrict alm=job->almtmp; - int full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale); + int full_ieee = Y(TballGt)(scalep,sharp_minscale) + && Y(TballGt)(scalem,sharp_minscale); while (!full_ieee) { Y(saddstep_d)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm), @@ -597,7 +602,8 @@ static void Y(calc_alm2map_deriv1) (const Tb cth, const Ylmgen_C *gen, { Y(getCorfac)(scalep,&corfacp,gen->cf); Y(getCorfac)(scalem,&corfacm,gen->cf); - full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale); + full_ieee = Y(TballGt)(scalep,sharp_minscale) + && Y(TballGt)(scalem,sharp_minscale); } } @@ -614,12 +620,12 @@ static void Y(calc_alm2map_deriv1) (const Tb cth, const Ylmgen_C *gen, #define VZERO(var) do { memset(&(var),0,sizeof(var)); } while(0) static void Y(inner_loop) (sharp_job *job, const int *ispair, - const double *cth_, const double *sth_, int llim, int ulim, Ylmgen_C *gen, - int mi, const int *idx, int njobs) + const double *cth_, const double *sth_, int llim, int ulim, + sharp_Ylmgen_C *gen, int mi, const int *idx, int njobs) { const int nval=nvec*VLEN; const int m = job->ainfo->mval[mi]; - Ylmgen_prepare (gen, m); + sharp_Ylmgen_prepare (gen, m); switch (job->type) { diff --git a/libsharp/sharp_oracle.inc b/libsharp/sharp_oracle.inc new file mode 100644 index 0000000..40c9ed0 --- /dev/null +++ b/libsharp/sharp_oracle.inc @@ -0,0 +1,9 @@ +static const int maxtr = 6; +static const int nv_opt[6][2][3] = { +{{4,2,-1},{2,1,1}}, +{{5,2,-1},{2,1,1}}, +{{5,2,-1},{5,2,2}}, +{{5,2,-1},{5,2,2}}, +{{5,2,-1},{5,2,2}}, +{{5,2,-1},{5,2,2}} +}; diff --git a/libsharp/sharp_ylmgen_c.c b/libsharp/sharp_ylmgen_c.c index 48a1f7b..7a7ec8a 100644 --- a/libsharp/sharp_ylmgen_c.c +++ b/libsharp/sharp_ylmgen_c.c @@ -34,7 +34,7 @@ #include "sharp_ylmgen_c.h" #include "c_utils.h" -void Ylmgen_init (Ylmgen_C *gen, int l_max, int m_max, int spin) +void sharp_Ylmgen_init (sharp_Ylmgen_C *gen, int l_max, int m_max, int spin) { const double inv_sqrt4pi = 0.2820947917738781434740397257803862929220; @@ -42,18 +42,19 @@ void Ylmgen_init (Ylmgen_C *gen, int l_max, int m_max, int spin) gen->mmax = m_max; UTIL_ASSERT(spin>=0,"incorrect spin"); gen->s = spin; - UTIL_ASSERT((minscale<=0)&&(maxscale>0),"bad value for min/maxscale"); - gen->cf=RALLOC(double,maxscale-minscale+1); - gen->cf[-minscale]=1.; - for (int m=-minscale-1; m>=0; --m) - gen->cf[m]=gen->cf[m+1]*fsmall; - for (int m=-minscale+1; m<(maxscale-minscale+1); ++m) - gen->cf[m]=gen->cf[m-1]*fbig; + UTIL_ASSERT((sharp_minscale<=0)&&(sharp_maxscale>0), + "bad value for min/maxscale"); + gen->cf=RALLOC(double,sharp_maxscale-sharp_minscale+1); + gen->cf[-sharp_minscale]=1.; + for (int m=-sharp_minscale-1; m>=0; --m) + gen->cf[m]=gen->cf[m+1]*sharp_fsmall; + for (int m=-sharp_minscale+1; m<(sharp_maxscale-sharp_minscale+1); ++m) + gen->cf[m]=gen->cf[m-1]*sharp_fbig; gen->m = -1; if (spin==0) { - gen->rf = RALLOC(ylmgen_dbl2,gen->lmax+1); + gen->rf = RALLOC(sharp_ylmgen_dbl2,gen->lmax+1); gen->mfac = RALLOC(double,gen->mmax+1); gen->mfac[0] = inv_sqrt4pi; for (int m=1; m<=gen->mmax; ++m) @@ -69,7 +70,7 @@ void Ylmgen_init (Ylmgen_C *gen, int l_max, int m_max, int spin) else { gen->m=gen->mlo=gen->mhi=-1234567890; - ALLOC(gen->fx,ylmgen_dbl3,gen->lmax+2); + ALLOC(gen->fx,sharp_ylmgen_dbl3,gen->lmax+2); for (int m=0; mlmax+2; ++m) gen->fx[m].f[0]=gen->fx[m].f[1]=gen->fx[m].f[2]=0.; ALLOC(gen->inv,double,gen->lmax+1); @@ -91,7 +92,7 @@ void Ylmgen_init (Ylmgen_C *gen, int l_max, int m_max, int spin) { fac[m]=fac[m-1]*sqrt(m); facscale[m]=facscale[m-1]; - if (fac[m]>1.) { fac[m]*=fsmall; ++facscale[m]; } + if (fac[m]>1.) { fac[m]*=sharp_fsmall; ++facscale[m]; } } for (int m=0; m<=gen->mmax; ++m) { @@ -105,7 +106,7 @@ void Ylmgen_init (Ylmgen_C *gen, int l_max, int m_max, int spin) } } -void Ylmgen_destroy (Ylmgen_C *gen) +void sharp_Ylmgen_destroy (sharp_Ylmgen_C *gen) { DEALLOC(gen->cf); if (gen->s==0) @@ -126,7 +127,7 @@ void Ylmgen_destroy (Ylmgen_C *gen) } } -void Ylmgen_prepare (Ylmgen_C *gen, int m) +void sharp_Ylmgen_prepare (sharp_Ylmgen_C *gen, int m) { if (m==gen->m) return; UTIL_ASSERT(m>=0,"incorrect m"); @@ -181,7 +182,7 @@ void Ylmgen_prepare (Ylmgen_C *gen, int m) } } -double *Ylmgen_get_norm (int lmax, int spin) +double *sharp_Ylmgen_get_norm (int lmax, int spin) { const double pi = 3.141592653589793238462643383279502884197; double *res=RALLOC(double,lmax+1); diff --git a/libsharp/sharp_ylmgen_c.h b/libsharp/sharp_ylmgen_c.h index cbfd42c..18d5a9d 100644 --- a/libsharp/sharp_ylmgen_c.h +++ b/libsharp/sharp_ylmgen_c.h @@ -36,11 +36,11 @@ extern "C" { #endif -enum { minscale=-8, limscale=-3, maxscale=5 }; -static const double fbig=0x1p+90,fsmall=0x1p-90; +enum { sharp_minscale=-8, sharp_limscale=-3, sharp_maxscale=5 }; +static const double sharp_fbig=0x1p+90,sharp_fsmall=0x1p-90; -typedef struct { double f[2]; } ylmgen_dbl2; -typedef struct { double f[3]; } ylmgen_dbl3; +typedef struct { double f[2]; } sharp_ylmgen_dbl2; +typedef struct { double f[3]; } sharp_ylmgen_dbl3; typedef struct { @@ -53,13 +53,13 @@ typedef struct /* used if s==0 */ double *mfac; - ylmgen_dbl2 *rf; + sharp_ylmgen_dbl2 *rf; /* used if s!=0 */ int sinPow, cosPow, preMinus_p, preMinus_m; double *prefac; int *fscale; - ylmgen_dbl3 *fx; + sharp_ylmgen_dbl3 *fx; /* internal usage only */ /* used if s==0 */ @@ -68,22 +68,22 @@ typedef struct /* used if s!=0 */ double *flm1, *flm2, *inv; int mlo, mhi; - } Ylmgen_C; + } sharp_Ylmgen_C; /*! Creates a generator which will calculate helper data for Y_lm calculation up to \a l=l_max and \a m=m_max. */ -void Ylmgen_init (Ylmgen_C *gen, int l_max, int m_max, int spin); +void sharp_Ylmgen_init (sharp_Ylmgen_C *gen, int l_max, int m_max, int spin); /*! Deallocates a generator previously initialised by Ylmgen_init(). */ -void Ylmgen_destroy (Ylmgen_C *gen); +void sharp_Ylmgen_destroy (sharp_Ylmgen_C *gen); /*! Prepares the object for the calculation at \a m. */ -void Ylmgen_prepare (Ylmgen_C *gen, int m); +void sharp_Ylmgen_prepare (sharp_Ylmgen_C *gen, int m); /*! Returns a pointer to an array with \a lmax+1 entries containing normalisation factors that must be applied to Y_lm values computed for \a spin. The array must be deallocated (using free()) by the user. */ -double *Ylmgen_get_norm (int lmax, int spin); +double *sharp_Ylmgen_get_norm (int lmax, int spin); #ifdef __cplusplus }