From 82dc2a541fd3ada683e283d0e5be368b6b582872 Mon Sep 17 00:00:00 2001
From: Martin Reinecke <martin@mpa-garching.mpg.de>
Date: Thu, 5 Jul 2012 16:14:28 +0200
Subject: [PATCH] sharpification

---
 Makefile                   |  2 +-
 libsharp/libsharp.dox      |  6 ++--
 libsharp/oracle.inc        |  9 -----
 libsharp/planck.make       |  2 +-
 libsharp/sharp.c           | 10 +++---
 libsharp/sharp_bench.c     |  2 +-
 libsharp/sharp_core.c      |  3 +-
 libsharp/sharp_core.h      |  3 +-
 libsharp/sharp_core_inc.c  | 34 ++++++++++--------
 libsharp/sharp_core_inc2.c | 71 +++++++++++++++++++++-----------------
 libsharp/sharp_core_inc3.c | 66 +++++++++++++++++++----------------
 libsharp/sharp_oracle.inc  |  9 +++++
 libsharp/sharp_ylmgen_c.c  | 29 ++++++++--------
 libsharp/sharp_ylmgen_c.h  | 22 ++++++------
 14 files changed, 145 insertions(+), 123 deletions(-)
 delete mode 100644 libsharp/oracle.inc
 create mode 100644 libsharp/sharp_oracle.inc
diff --git a/Makefile b/Makefile
index e6c51bc..3d6f20e 100644
--- a/Makefile
+++ b/Makefile
@@ -32,7 +32,7 @@ compile_all: $(all_cbin) hdrcopy
 
 autotune: sharp_bench
 	$(BINDIR)/sharp_bench
-	mv oracle.inc $(SRCROOT)/libsharp
+	mv sharp_oracle.inc $(SRCROOT)/libsharp
 	$(MAKE)
 
 hdrclean:
diff --git a/libsharp/libsharp.dox b/libsharp/libsharp.dox
index 2a5067c..810abd0 100644
--- a/libsharp/libsharp.dox
+++ b/libsharp/libsharp.dox
@@ -61,11 +61,11 @@
   <ul>
   <li>scalar a_lm to map</li>
   <li>scalar map to a_lm</li>
-<!--   <li>polarised a_lm to map</li>
-  <li>polarised map to a_lm</li> -->
+<!--  <li>polarised a_lm to map</li>
+  <li>polarised map to a_lm</li> !-->
   <li>spin a_lm to map</li>
   <li>spin map to a_lm</li>
-<!--  <li>scalar a_lm to maps of first derivatives</li> -->
+  <li>scalar a_lm to maps of first derivatives</li>
   </ul>
 
   SHARP supports shared-memory parallelisation via OpenMP; this feature will
diff --git a/libsharp/oracle.inc b/libsharp/oracle.inc
deleted file mode 100644
index 74638c6..0000000
--- a/libsharp/oracle.inc
+++ /dev/null
@@ -1,9 +0,0 @@
-static const int maxtr = 6;
-static const int nv_opt[6][2][3] = {
-{{4,2,-1},{2,1,1}},
-{{4,2,-1},{2,1,1}},
-{{5,2,-1},{5,2,1}},
-{{5,2,-1},{5,2,1}},
-{{5,2,-1},{5,2,1}},
-{{5,2,-1},{5,2,1}}
-};
diff --git a/libsharp/planck.make b/libsharp/planck.make
index c8621cd..6c72945 100644
--- a/libsharp/planck.make
+++ b/libsharp/planck.make
@@ -15,7 +15,7 @@ ALLOBJ:=$(ALLOBJ:%=$(OD)/%)
 
 ODEP:=$(HDR_$(PKG)) $(HDR_libfftpack) $(HDR_c_utils)
 $(OD)/sharp_core.o: $(SD)/sharp_inchelper1.inc.c $(SD)/sharp_core_inc.c $(SD)/sharp_core_inc2.c $(SD)/sharp_core_inc3.c
-$(OD)/sharp.o: $(SD)/sharp_mpi.c $(SD)/oracle.inc
+$(OD)/sharp.o: $(SD)/sharp_mpi.c $(SD)/sharp_oracle.inc
 BDEP:=$(LIB_$(PKG)) $(LIB_libfftpack) $(LIB_c_utils)
 
 $(LIB_$(PKG)): $(LIBOBJ)
diff --git a/libsharp/sharp.c b/libsharp/sharp.c
index 4cc659c..60c416e 100644
--- a/libsharp/sharp.c
+++ b/libsharp/sharp.c
@@ -464,7 +464,7 @@ void sharp_execute_job (sharp_job *job)
   int lmax = job->ainfo->lmax,
       mmax=sharp_get_mmax(job->ainfo->mval, job->ainfo->nm);
 
-  job->norm_l = Ylmgen_get_norm (lmax, job->spin);
+  job->norm_l = sharp_Ylmgen_get_norm (lmax, job->spin);
 
 /* clear output arrays if requested */
   init_output (job);
@@ -501,8 +501,8 @@ void sharp_execute_job (sharp_job *job)
 {
     sharp_job ljob = *job;
     ljob.opcnt=0;
-    Ylmgen_C generator;
-    Ylmgen_init (&generator,lmax,mmax,ljob.spin);
+    sharp_Ylmgen_C generator;
+    sharp_Ylmgen_init (&generator,lmax,mmax,ljob.spin);
     alloc_almtmp(&ljob,lmax);
 
 #pragma omp for schedule(dynamic,1)
@@ -517,7 +517,7 @@ void sharp_execute_job (sharp_job *job)
       almtmp2alm (&ljob, lmax, mi);
       }
 
-    Ylmgen_destroy(&generator);
+    sharp_Ylmgen_destroy(&generator);
     dealloc_almtmp(&ljob);
 
 #pragma omp critical
@@ -589,7 +589,7 @@ int sharp_nv_oracle (sharp_jobtype type, int spin, int ntrans)
   if (type==ALM2MAP_DERIV1) spin=1;
   UTIL_ASSERT((ntrans>0),"bad number of simultaneous transforms");
   UTIL_ASSERT((spin>=0)&&(spin<=30), "bad spin");
-#include "oracle.inc"
+#include "sharp_oracle.inc"
 
   return nv_opt[IMIN(ntrans,maxtr)-1][spin!=0][type];
   }
diff --git a/libsharp/sharp_bench.c b/libsharp/sharp_bench.c
index 09d44d6..74e5af6 100644
--- a/libsharp/sharp_bench.c
+++ b/libsharp/sharp_bench.c
@@ -98,7 +98,7 @@ int main(void)
   sharp_module_startup("sharp_bench",1,1,"",1);
 
   printf("Benchmarking SHTs.\n\n");
-  FILE *fp=fopen("oracle.inc","w");
+  FILE *fp=fopen("sharp_oracle.inc","w");
   UTIL_ASSERT(fp, "failed to open oracle file for writing");
   fprintf(fp,"static const int maxtr = 6;\n");
   fprintf(fp,"static const int nv_opt[6][2][3] = {\n");
diff --git a/libsharp/sharp_core.c b/libsharp/sharp_core.c
index ec02975..1d48a6f 100644
--- a/libsharp/sharp_core.c
+++ b/libsharp/sharp_core.c
@@ -73,7 +73,8 @@ typedef complex double dcmplx;
 #undef nvec
 
 void inner_loop (sharp_job *job, const int *ispair,const double *cth,
-  const double *sth, int llim, int ulim, Ylmgen_C *gen, int mi, const int *idx)
+  const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
+  const int *idx)
   {
   int njobs=job->ntrans;
   if (njobs<=MAXJOB_SPECIAL)
diff --git a/libsharp/sharp_core.h b/libsharp/sharp_core.h
index 38ec41d..6d68210 100644
--- a/libsharp/sharp_core.h
+++ b/libsharp/sharp_core.h
@@ -40,7 +40,8 @@ extern "C" {
 #endif
 
 void inner_loop (sharp_job *job, const int *ispair,const double *cth,
-  const double *sth, int llim, int ulim, Ylmgen_C *gen, int mi, const int *idx);
+  const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
+  const int *idx);
 
 #ifdef __cplusplus
 }
diff --git a/libsharp/sharp_core_inc.c b/libsharp/sharp_core_inc.c
index b892f53..2c86463 100644
--- a/libsharp/sharp_core_inc.c
+++ b/libsharp/sharp_core_inc.c
@@ -83,8 +83,8 @@ static inline void Y(mypow) (Tb val, int npow, Tb * restrict resd,
         {
         vmuleq(res.v[i],val.v[i]);
         vaddeq(scale.v[i],scaleint.v[i]);
-        Tv mask=vlt(vabs(res.v[i]),vload(fsmall));
-        vmuleq(res.v[i],vblend(mask,vload(fbig),vone));
+        Tv mask=vlt(vabs(res.v[i]),vload(sharp_fsmall));
+        vmuleq(res.v[i],vblend(mask,vload(sharp_fbig),vone));
         vsubeq(scale.v[i],vblend(mask,vone,vzero));
         }
       }
@@ -92,8 +92,8 @@ static inline void Y(mypow) (Tb val, int npow, Tb * restrict resd,
       {
       vmuleq(val.v[i],val.v[i]);
       vaddeq(scaleint.v[i],scaleint.v[i]);
-      Tv mask = vlt(vabs(val.v[i]),vload(fsmall));
-      vmuleq(val.v[i],vblend(mask,vload(fbig),vone));
+      Tv mask = vlt(vabs(val.v[i]),vload(sharp_fsmall));
+      vmuleq(val.v[i],vblend(mask,vload(sharp_fbig),vone));
       vsubeq(scaleint.v[i],vblend(mask,vone,vzero));
       }
     }
@@ -113,7 +113,7 @@ static inline int Y(rescale) (Tb * restrict lam1, Tb * restrict lam2,
     if (vanyTrue(mask))
       {
       did_scale=1;
-      Tv fact = vblend(mask,vload(fsmall),vone);
+      Tv fact = vblend(mask,vload(sharp_fsmall),vone);
       vmuleq(lam1->v[i],fact); vmuleq(lam2->v[i],fact);
       vaddeq(scale->v[i],vblend(mask,vone,vzero));
       }
@@ -123,7 +123,7 @@ static inline int Y(rescale) (Tb * restrict lam1, Tb * restrict lam2,
 
 static inline void Y(normalize) (Tb * restrict val, Tb * restrict scale)
   {
-  const Tv vfsmall=vload(fsmall), vfbig=vload(fbig);
+  const Tv vfsmall=vload(sharp_fsmall), vfbig=vload(sharp_fbig);
   for (int i=0;i<nvec; ++i)
     {
     Tv mask = vgt(vabs(val->v[i]),vone);
@@ -168,13 +168,14 @@ static inline void Y(getCorfac)(Tb scale, Tb * restrict corfac,
   Y(Tbu) sc, corf;
   sc.b=scale;
   for (int i=0; i<VLEN*nvec; ++i)
-    corf.s[i] = (sc.s[i]<minscale) ? 0. : cf[(int)(sc.s[i])-minscale];
+    corf.s[i] = (sc.s[i]<sharp_minscale) ?
+      0. : cf[(int)(sc.s[i])-sharp_minscale];
   *corfac=corf.b;
   }
 
 static void Y(iter_to_ieee) (const Tb sth, Tb cth, int *l_,
   Tb * restrict lam_1_, Tb * restrict lam_2_, Tb * restrict scale_,
-  const Ylmgen_C * restrict gen)
+  const sharp_Ylmgen_C * restrict gen)
   {
   int l=gen->m;
   Tb lam_1=Y(Tbconst)(0.), lam_2, scale;
@@ -182,7 +183,7 @@ static void Y(iter_to_ieee) (const Tb sth, Tb cth, int *l_,
   Y(Tbmuleq1) (&lam_2,(gen->m&1) ? -gen->mfac[gen->m]:gen->mfac[gen->m]);
   Y(normalize)(&lam_2,&scale);
 
-  int below_limit = Y(TballLt)(scale,limscale);
+  int below_limit = Y(TballLt)(scale,sharp_limscale);
   while (below_limit)
     {
     if (l+2>gen->lmax) {*l_=gen->lmax+1;return;}
@@ -193,14 +194,15 @@ static void Y(iter_to_ieee) (const Tb sth, Tb cth, int *l_,
     for (int i=0; i<nvec; ++i)
       lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_2.v[i],r1));
     if (Y(rescale)(&lam_1,&lam_2,&scale))
-      below_limit = Y(TballLt)(scale,limscale);
+      below_limit = Y(TballLt)(scale,sharp_limscale);
     l+=2;
     }
   *l_=l; *lam_1_=lam_1; *lam_2_=lam_2; *scale_=scale;
   }
 
 static inline void Y(rec_step) (Tb * restrict rxp, Tb * restrict rxm,
-  Tb * restrict ryp, Tb * restrict rym, const Tb cth, const ylmgen_dbl3 fx)
+  Tb * restrict ryp, Tb * restrict rym, const Tb cth,
+  const sharp_ylmgen_dbl3 fx)
   {
   Tv fx0=vload(fx.f[0]),fx1=vload(fx.f[1]),fx2=vload(fx.f[2]);
   for (int i=0; i<nvec; ++i)
@@ -214,9 +216,9 @@ static inline void Y(rec_step) (Tb * restrict rxp, Tb * restrict rxm,
 
 static void Y(iter_to_ieee_spin) (const Tb cth, int *l_,
   Tb * rec1p_, Tb * rec1m_, Tb * rec2p_, Tb * rec2m_,
-  Tb * scalep_, Tb * scalem_, const Ylmgen_C * restrict gen)
+  Tb * scalep_, Tb * scalem_, const sharp_Ylmgen_C * restrict gen)
   {
-  const ylmgen_dbl3 * restrict fx = gen->fx;
+  const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
   Tb cth2, sth2;
   for (int i=0; i<nvec; ++i)
     {
@@ -251,14 +253,16 @@ static void Y(iter_to_ieee_spin) (const Tb cth, int *l_,
 
   int l=gen->mhi;
 
-  int below_limit = Y(TballLt)(scalep,limscale) && Y(TballLt)(scalem,limscale);
+  int below_limit = Y(TballLt)(scalep,sharp_limscale)
+                 && Y(TballLt)(scalem,sharp_limscale);
   while (below_limit)
     {
     if (l+2>gen->lmax) {*l_=gen->lmax+1;return;}
     Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l+1]);
     Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l+2]);
     if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem))
-      below_limit = Y(TballLt)(scalep,limscale) && Y(TballLt)(scalem,limscale);
+      below_limit = Y(TballLt)(scalep,sharp_limscale)
+                 && Y(TballLt)(scalem,sharp_limscale);
     l+=2;
     }
 
diff --git a/libsharp/sharp_core_inc2.c b/libsharp/sharp_core_inc2.c
index dd3fb5d..aa40bcc 100644
--- a/libsharp/sharp_core_inc2.c
+++ b/libsharp/sharp_core_inc2.c
@@ -40,7 +40,7 @@ typedef union
 
 static void Z(alm2map_kernel) (const Tb cth, Z(Tbrij) * restrict p1,
   Z(Tbrij) * restrict p2, Tb lam_1, Tb lam_2,
-  const ylmgen_dbl2 * restrict rf, const dcmplx * restrict alm,
+  const sharp_ylmgen_dbl2 * restrict rf, const dcmplx * restrict alm,
   int l, int lmax)
   {
 #if (njobs>1)
@@ -126,7 +126,7 @@ static void Z(alm2map_kernel) (const Tb cth, Z(Tbrij) * restrict p1,
 
 static void Z(map2alm_kernel) (const Tb cth, const Z(Tbrij) * restrict p1,
   const Z(Tbrij) * restrict p2, Tb lam_1, Tb lam_2,
-  const ylmgen_dbl2 * restrict rf, dcmplx * restrict alm, int l, int lmax)
+  const sharp_ylmgen_dbl2 * restrict rf, dcmplx * restrict alm, int l, int lmax)
   {
   while (l<lmax)
     {
@@ -168,8 +168,9 @@ static void Z(map2alm_kernel) (const Tb cth, const Z(Tbrij) * restrict p1,
     }
   }
 
-static void Z(calc_alm2map) (const Tb cth, const Tb sth, const Ylmgen_C *gen,
-  sharp_job *job, Z(Tbrij) * restrict p1, Z(Tbrij) * restrict p2, int *done)
+static void Z(calc_alm2map) (const Tb cth, const Tb sth,
+  const sharp_Ylmgen_C *gen, sharp_job *job, Z(Tbrij) * restrict p1,
+  Z(Tbrij) * restrict p2, int *done)
   {
   int l,lmax=gen->lmax;
   Tb lam_1,lam_2,scale;
@@ -180,9 +181,9 @@ static void Z(calc_alm2map) (const Tb cth, const Tb sth, const Ylmgen_C *gen,
 
   Tb corfac;
   Y(getCorfac)(scale,&corfac,gen->cf);
-  const ylmgen_dbl2 * restrict rf = gen->rf;
+  const sharp_ylmgen_dbl2 * restrict rf = gen->rf;
   const dcmplx * restrict alm=job->almtmp;
-  int full_ieee = Y(TballGt)(scale,minscale);
+  int full_ieee = Y(TballGt)(scale,sharp_minscale);
   while (!full_ieee)
     {
     for (int j=0; j<njobs; ++j)
@@ -216,7 +217,7 @@ static void Z(calc_alm2map) (const Tb cth, const Tb sth, const Ylmgen_C *gen,
     if (Y(rescale)(&lam_1,&lam_2,&scale))
       {
       Y(getCorfac)(scale,&corfac,gen->cf);
-      full_ieee = Y(TballGt)(scale,minscale);
+      full_ieee = Y(TballGt)(scale,sharp_minscale);
       }
     }
   if (l>lmax) { *done=1; return; }
@@ -226,7 +227,7 @@ static void Z(calc_alm2map) (const Tb cth, const Tb sth, const Ylmgen_C *gen,
   }
 
 static void Z(calc_map2alm) (const Tb cth, const Tb sth,
-  const Ylmgen_C *gen, sharp_job *job, const Z(Tbrij) * restrict p1,
+  const sharp_Ylmgen_C *gen, sharp_job *job, const Z(Tbrij) * restrict p1,
   const Z(Tbrij) * restrict p2, int *done)
   {
   int lmax=gen->lmax;
@@ -237,11 +238,11 @@ static void Z(calc_map2alm) (const Tb cth, const Tb sth,
   if (l>lmax) { *done=1; return; }
   job->opcnt += (lmax+1-l) * (4+4*njobs)*VLEN*nvec;
 
-  const ylmgen_dbl2 * restrict rf = gen->rf;
+  const sharp_ylmgen_dbl2 * restrict rf = gen->rf;
   Tb corfac;
   Y(getCorfac)(scale,&corfac,gen->cf);
   dcmplx * restrict alm=job->almtmp;
-  int full_ieee = Y(TballGt)(scale,minscale);
+  int full_ieee = Y(TballGt)(scale,sharp_minscale);
   while (!full_ieee)
     {
     for (int j=0; j<njobs; ++j)
@@ -277,7 +278,7 @@ static void Z(calc_map2alm) (const Tb cth, const Tb sth,
     if (Y(rescale)(&lam_1,&lam_2,&scale))
       {
       Y(getCorfac)(scale,&corfac,gen->cf);
-      full_ieee = Y(TballGt)(scale,minscale);
+      full_ieee = Y(TballGt)(scale,sharp_minscale);
       }
     }
 
@@ -371,7 +372,8 @@ static inline void Z(saddstep2) (const Z(Tbquj) * restrict px,
 
 static void Z(alm2map_spin_kernel) (Tb cth, Z(Tbquj) * restrict p1,
   Z(Tbquj) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
-  const ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l, int lmax)
+  const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l,
+  int lmax)
   {
   while (l<lmax)
     {
@@ -408,7 +410,7 @@ static void Z(alm2map_spin_kernel) (Tb cth, Z(Tbquj) * restrict p1,
 
 static void Z(map2alm_spin_kernel) (Tb cth, const Z(Tbquj) * restrict p1,
   const Z(Tbquj) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
-  const ylmgen_dbl3 * restrict fx, dcmplx * restrict alm, int l, int lmax)
+  const sharp_ylmgen_dbl3 * restrict fx, dcmplx * restrict alm, int l, int lmax)
   {
   while (l<lmax)
     {
@@ -438,7 +440,7 @@ static void Z(map2alm_spin_kernel) (Tb cth, const Z(Tbquj) * restrict p1,
     Z(saddstep2)(p1, p2, &rec2p, &rec2m, &alm[2*njobs*l]);
   }
 
-static void Z(calc_alm2map_spin) (const Tb cth, const Ylmgen_C *gen,
+static void Z(calc_alm2map_spin) (const Tb cth, const sharp_Ylmgen_C *gen,
   sharp_job *job, Z(Tbquj) * restrict p1, Z(Tbquj) * restrict p2, int *done)
   {
   int l, lmax=gen->lmax;
@@ -449,12 +451,13 @@ static void Z(calc_alm2map_spin) (const Tb cth, const Ylmgen_C *gen,
    { *done=1; return; }
   job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec;
 
-  const ylmgen_dbl3 * restrict fx = gen->fx;
+  const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
   Tb corfacp,corfacm;
   Y(getCorfac)(scalep,&corfacp,gen->cf);
   Y(getCorfac)(scalem,&corfacm,gen->cf);
   const dcmplx * restrict alm=job->almtmp;
-  int full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale);
+  int full_ieee = Y(TballGt)(scalep,sharp_minscale)
+               && Y(TballGt)(scalem,sharp_minscale);
   while (!full_ieee)
     {
     Z(saddstep)(p1, p2,
@@ -469,7 +472,8 @@ static void Z(calc_alm2map_spin) (const Tb cth, const Ylmgen_C *gen,
       {
       Y(getCorfac)(scalep,&corfacp,gen->cf);
       Y(getCorfac)(scalem,&corfacm,gen->cf);
-      full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale);
+      full_ieee = Y(TballGt)(scalep,sharp_minscale)
+               && Y(TballGt)(scalem,sharp_minscale);
       }
     }
 
@@ -482,7 +486,7 @@ static void Z(calc_alm2map_spin) (const Tb cth, const Ylmgen_C *gen,
     rec1p, rec1m, rec2p, rec2m, fx, alm, l, lmax);
   }
 
-static void Z(calc_map2alm_spin) (Tb cth, const Ylmgen_C * restrict gen,
+static void Z(calc_map2alm_spin) (Tb cth, const sharp_Ylmgen_C * restrict gen,
   sharp_job *job, const Z(Tbquj) * restrict p1, const Z(Tbquj) * restrict p2,
   int *done)
   {
@@ -493,12 +497,13 @@ static void Z(calc_map2alm_spin) (Tb cth, const Ylmgen_C * restrict gen,
   if (l>lmax) { *done=1; return; }
   job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec;
 
-  const ylmgen_dbl3 * restrict fx = gen->fx;
+  const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
   Tb corfacp,corfacm;
   Y(getCorfac)(scalep,&corfacp,gen->cf);
   Y(getCorfac)(scalem,&corfacm,gen->cf);
   dcmplx * restrict alm=job->almtmp;
-  int full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale);
+  int full_ieee = Y(TballGt)(scalep,sharp_minscale)
+               && Y(TballGt)(scalem,sharp_minscale);
   while (!full_ieee)
     {
     Tb t1=Y(Tbprod)(rec2p,corfacp), t2=Y(Tbprod)(rec2m,corfacm);
@@ -513,7 +518,8 @@ static void Z(calc_map2alm_spin) (Tb cth, const Ylmgen_C * restrict gen,
       {
       Y(getCorfac)(scalep,&corfacp,gen->cf);
       Y(getCorfac)(scalem,&corfacm,gen->cf);
-      full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale);
+      full_ieee = Y(TballGt)(scalep,sharp_minscale)
+               && Y(TballGt)(scalem,sharp_minscale);
       }
     }
 
@@ -522,8 +528,9 @@ static void Z(calc_map2alm_spin) (Tb cth, const Ylmgen_C * restrict gen,
   Z(map2alm_spin_kernel) (cth,p1,p2,rec1p,rec1m,rec2p,rec2m,fx,alm,l,lmax);
   }
 
-static inline void Z(saddstep_d) (Z(Tbquj) * restrict px, Z(Tbquj) * restrict py,
-  const Tb rxp, const Tb rxm, const dcmplx * restrict alm)
+static inline void Z(saddstep_d) (Z(Tbquj) * restrict px,
+  Z(Tbquj) * restrict py, const Tb rxp, const Tb rxm,
+  const dcmplx * restrict alm)
   {
   for (int j=0; j<njobs; ++j)
     {
@@ -545,7 +552,7 @@ static inline void Z(saddstep_d) (Z(Tbquj) * restrict px, Z(Tbquj) * restrict py
 
 static void Z(alm2map_deriv1_kernel) (Tb cth, Z(Tbquj) * restrict p1,
   Z(Tbquj) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
-  const ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l,
+  const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l,
   int lmax)
   {
   while (l<lmax)
@@ -576,7 +583,7 @@ static void Z(alm2map_deriv1_kernel) (Tb cth, Z(Tbquj) * restrict p1,
     Z(saddstep_d)(p1, p2, rec2p, rec2m, &alm[njobs*l]);
   }
 
-static void Z(calc_alm2map_deriv1) (const Tb cth, const Ylmgen_C *gen,
+static void Z(calc_alm2map_deriv1) (const Tb cth, const sharp_Ylmgen_C *gen,
   sharp_job *job, Z(Tbquj) * restrict p1, Z(Tbquj) * restrict p2, int *done)
   {
   int l, lmax=gen->lmax;
@@ -587,12 +594,13 @@ static void Z(calc_alm2map_deriv1) (const Tb cth, const Ylmgen_C *gen,
    { *done=1; return; }
   job->opcnt += (lmax+1-l) * (12+8*njobs)*VLEN*nvec;
 
-  const ylmgen_dbl3 * restrict fx = gen->fx;
+  const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
   Tb corfacp,corfacm;
   Y(getCorfac)(scalep,&corfacp,gen->cf);
   Y(getCorfac)(scalem,&corfacm,gen->cf);
   const dcmplx * restrict alm=job->almtmp;
-  int full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale);
+  int full_ieee = Y(TballGt)(scalep,sharp_minscale)
+               && Y(TballGt)(scalem,sharp_minscale);
   while (!full_ieee)
     {
     Z(saddstep_d)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm),
@@ -607,7 +615,8 @@ static void Z(calc_alm2map_deriv1) (const Tb cth, const Ylmgen_C *gen,
       {
       Y(getCorfac)(scalep,&corfacp,gen->cf);
       Y(getCorfac)(scalem,&corfacm,gen->cf);
-      full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale);
+      full_ieee = Y(TballGt)(scalep,sharp_minscale)
+               && Y(TballGt)(scalem,sharp_minscale);
       }
     }
 
@@ -623,12 +632,12 @@ static void Z(calc_alm2map_deriv1) (const Tb cth, const Ylmgen_C *gen,
 #define VZERO(var) do { memset(&(var),0,sizeof(var)); } while(0)
 
 static void Z(inner_loop) (sharp_job *job, const int *ispair,
-  const double *cth_, const double *sth_, int llim, int ulim, Ylmgen_C *gen,
-  int mi, const int *idx)
+  const double *cth_, const double *sth_, int llim, int ulim,
+  sharp_Ylmgen_C *gen, int mi, const int *idx)
   {
   const int nval=nvec*VLEN;
   const int m = job->ainfo->mval[mi];
-  Ylmgen_prepare (gen, m);
+  sharp_Ylmgen_prepare (gen, m);
 
   switch (job->type)
     {
diff --git a/libsharp/sharp_core_inc3.c b/libsharp/sharp_core_inc3.c
index 950c84c..816f062 100644
--- a/libsharp/sharp_core_inc3.c
+++ b/libsharp/sharp_core_inc3.c
@@ -31,7 +31,7 @@
 
 static void Y(alm2map_kernel) (const Tb cth, Y(Tbri) * restrict p1,
   Y(Tbri) * restrict p2, Tb lam_1, Tb lam_2,
-  const ylmgen_dbl2 * restrict rf, const dcmplx * restrict alm,
+  const sharp_ylmgen_dbl2 * restrict rf, const dcmplx * restrict alm,
   int l, int lmax, int njobs)
   {
   while (l<lmax-2)
@@ -115,7 +115,7 @@ static void Y(alm2map_kernel) (const Tb cth, Y(Tbri) * restrict p1,
 
 static void Y(map2alm_kernel) (const Tb cth, const Y(Tbri) * restrict p1,
   const Y(Tbri) * restrict p2, Tb lam_1, Tb lam_2,
-  const ylmgen_dbl2 * restrict rf, dcmplx * restrict alm, int l, int lmax,
+  const sharp_ylmgen_dbl2 * restrict rf, dcmplx * restrict alm, int l, int lmax,
   int njobs)
   {
   while (l<lmax)
@@ -158,9 +158,9 @@ static void Y(map2alm_kernel) (const Tb cth, const Y(Tbri) * restrict p1,
     }
   }
 
-static void Y(calc_alm2map) (const Tb cth, const Tb sth, const Ylmgen_C *gen,
-  sharp_job *job, Y(Tbri) * restrict p1, Y(Tbri) * restrict p2, int njobs,
-  int *done)
+static void Y(calc_alm2map) (const Tb cth, const Tb sth,
+  const sharp_Ylmgen_C *gen, sharp_job *job, Y(Tbri) * restrict p1,
+  Y(Tbri) * restrict p2, int njobs, int *done)
   {
   int l,lmax=gen->lmax;
   Tb lam_1,lam_2,scale;
@@ -171,9 +171,9 @@ static void Y(calc_alm2map) (const Tb cth, const Tb sth, const Ylmgen_C *gen,
 
   Tb corfac;
   Y(getCorfac)(scale,&corfac,gen->cf);
-  const ylmgen_dbl2 * restrict rf = gen->rf;
+  const sharp_ylmgen_dbl2 * restrict rf = gen->rf;
   const dcmplx * restrict alm=job->almtmp;
-  int full_ieee = Y(TballGt)(scale,minscale);
+  int full_ieee = Y(TballGt)(scale,sharp_minscale);
   while (!full_ieee)
     {
     for (int j=0; j<njobs; ++j)
@@ -207,7 +207,7 @@ static void Y(calc_alm2map) (const Tb cth, const Tb sth, const Ylmgen_C *gen,
     if (Y(rescale)(&lam_1,&lam_2,&scale))
       {
       Y(getCorfac)(scale,&corfac,gen->cf);
-      full_ieee = Y(TballGt)(scale,minscale);
+      full_ieee = Y(TballGt)(scale,sharp_minscale);
       }
     }
   if (l>lmax) { *done=1; return; }
@@ -217,7 +217,7 @@ static void Y(calc_alm2map) (const Tb cth, const Tb sth, const Ylmgen_C *gen,
   }
 
 static void Y(calc_map2alm) (const Tb cth, const Tb sth,
-  const Ylmgen_C *gen, sharp_job *job, const Y(Tbri) * restrict p1,
+  const sharp_Ylmgen_C *gen, sharp_job *job, const Y(Tbri) * restrict p1,
   const Y(Tbri) * restrict p2, int njobs, int *done)
   {
   int lmax=gen->lmax;
@@ -228,11 +228,11 @@ static void Y(calc_map2alm) (const Tb cth, const Tb sth,
   if (l>lmax) { *done=1; return; }
   job->opcnt += (lmax+1-l) * (4+4*njobs)*VLEN*nvec;
 
-  const ylmgen_dbl2 * restrict rf = gen->rf;
+  const sharp_ylmgen_dbl2 * restrict rf = gen->rf;
   Tb corfac;
   Y(getCorfac)(scale,&corfac,gen->cf);
   dcmplx * restrict alm=job->almtmp;
-  int full_ieee = Y(TballGt)(scale,minscale);
+  int full_ieee = Y(TballGt)(scale,sharp_minscale);
   while (!full_ieee)
     {
     for (int j=0; j<njobs; ++j)
@@ -268,7 +268,7 @@ static void Y(calc_map2alm) (const Tb cth, const Tb sth,
     if (Y(rescale)(&lam_1,&lam_2,&scale))
       {
       Y(getCorfac)(scale,&corfac,gen->cf);
-      full_ieee = Y(TballGt)(scale,minscale);
+      full_ieee = Y(TballGt)(scale,sharp_minscale);
       }
     }
 
@@ -362,7 +362,7 @@ static inline void Y(saddstep2) (const Y(Tbqu) * restrict px,
 
 static void Y(alm2map_spin_kernel) (Tb cth, Y(Tbqu) * restrict p1,
   Y(Tbqu) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
-  const ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l,
+  const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l,
   int lmax, int njobs)
   {
   while (l<lmax)
@@ -395,7 +395,7 @@ static void Y(alm2map_spin_kernel) (Tb cth, Y(Tbqu) * restrict p1,
 
 static void Y(map2alm_spin_kernel) (Tb cth, const Y(Tbqu) * restrict p1,
   const Y(Tbqu) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
-  const ylmgen_dbl3 * restrict fx, dcmplx * restrict alm, int l, int lmax,
+  const sharp_ylmgen_dbl3 * restrict fx, dcmplx * restrict alm, int l, int lmax,
   int njobs)
   {
   while (l<lmax)
@@ -426,7 +426,7 @@ static void Y(map2alm_spin_kernel) (Tb cth, const Y(Tbqu) * restrict p1,
     Y(saddstep2)(p1, p2, &rec2p, &rec2m, &alm[2*njobs*l], njobs);
   }
 
-static void Y(calc_alm2map_spin) (const Tb cth, const Ylmgen_C *gen,
+static void Y(calc_alm2map_spin) (const Tb cth, const sharp_Ylmgen_C *gen,
   sharp_job *job, Y(Tbqu) * restrict p1, Y(Tbqu) * restrict p2, int njobs,
   int *done)
   {
@@ -438,12 +438,13 @@ static void Y(calc_alm2map_spin) (const Tb cth, const Ylmgen_C *gen,
    { *done=1; return; }
   job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec;
 
-  const ylmgen_dbl3 * restrict fx = gen->fx;
+  const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
   Tb corfacp,corfacm;
   Y(getCorfac)(scalep,&corfacp,gen->cf);
   Y(getCorfac)(scalem,&corfacm,gen->cf);
   const dcmplx * restrict alm=job->almtmp;
-  int full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale);
+  int full_ieee = Y(TballGt)(scalep,sharp_minscale)
+               && Y(TballGt)(scalem,sharp_minscale);
   while (!full_ieee)
     {
     Y(saddstep)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm),
@@ -458,7 +459,8 @@ static void Y(calc_alm2map_spin) (const Tb cth, const Ylmgen_C *gen,
       {
       Y(getCorfac)(scalep,&corfacp,gen->cf);
       Y(getCorfac)(scalem,&corfacm,gen->cf);
-      full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale);
+      full_ieee = Y(TballGt)(scalep,sharp_minscale)
+               && Y(TballGt)(scalem,sharp_minscale);
       }
     }
 
@@ -471,7 +473,7 @@ static void Y(calc_alm2map_spin) (const Tb cth, const Ylmgen_C *gen,
     lmax, njobs);
   }
 
-static void Y(calc_map2alm_spin) (Tb cth, const Ylmgen_C * restrict gen,
+static void Y(calc_map2alm_spin) (Tb cth, const sharp_Ylmgen_C * restrict gen,
   sharp_job *job, const Y(Tbqu) * restrict p1, const Y(Tbqu) * restrict p2,
   int njobs, int *done)
   {
@@ -482,12 +484,13 @@ static void Y(calc_map2alm_spin) (Tb cth, const Ylmgen_C * restrict gen,
   if (l>lmax) { *done=1; return; }
   job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec;
 
-  const ylmgen_dbl3 * restrict fx = gen->fx;
+  const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
   Tb corfacp,corfacm;
   Y(getCorfac)(scalep,&corfacp,gen->cf);
   Y(getCorfac)(scalem,&corfacm,gen->cf);
   dcmplx * restrict alm=job->almtmp;
-  int full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale);
+  int full_ieee = Y(TballGt)(scalep,sharp_minscale)
+               && Y(TballGt)(scalem,sharp_minscale);
   while (!full_ieee)
     {
     Tb t1=Y(Tbprod)(rec2p,corfacp), t2=Y(Tbprod)(rec2m,corfacm);
@@ -502,7 +505,8 @@ static void Y(calc_map2alm_spin) (Tb cth, const Ylmgen_C * restrict gen,
       {
       Y(getCorfac)(scalep,&corfacp,gen->cf);
       Y(getCorfac)(scalem,&corfacm,gen->cf);
-      full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale);
+      full_ieee = Y(TballGt)(scalep,sharp_minscale)
+               && Y(TballGt)(scalem,sharp_minscale);
       }
     }
 
@@ -534,7 +538,7 @@ static inline void Y(saddstep_d) (Y(Tbqu) * restrict px, Y(Tbqu) * restrict py,
 
 static void Y(alm2map_deriv1_kernel) (Tb cth, Y(Tbqu) * restrict p1,
   Y(Tbqu) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
-  const ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l,
+  const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l,
   int lmax, int njobs)
   {
   while (l<lmax)
@@ -565,7 +569,7 @@ static void Y(alm2map_deriv1_kernel) (Tb cth, Y(Tbqu) * restrict p1,
     Y(saddstep_d)(p1, p2, rec2p, rec2m, &alm[njobs*l], njobs);
   }
 
-static void Y(calc_alm2map_deriv1) (const Tb cth, const Ylmgen_C *gen,
+static void Y(calc_alm2map_deriv1) (const Tb cth, const sharp_Ylmgen_C *gen,
   sharp_job *job, Y(Tbqu) * restrict p1, Y(Tbqu) * restrict p2, int njobs,
   int *done)
   {
@@ -577,12 +581,13 @@ static void Y(calc_alm2map_deriv1) (const Tb cth, const Ylmgen_C *gen,
    { *done=1; return; }
   job->opcnt += (lmax+1-l) * (12+8*njobs)*VLEN*nvec;
 
-  const ylmgen_dbl3 * restrict fx = gen->fx;
+  const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
   Tb corfacp,corfacm;
   Y(getCorfac)(scalep,&corfacp,gen->cf);
   Y(getCorfac)(scalem,&corfacm,gen->cf);
   const dcmplx * restrict alm=job->almtmp;
-  int full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale);
+  int full_ieee = Y(TballGt)(scalep,sharp_minscale)
+               && Y(TballGt)(scalem,sharp_minscale);
   while (!full_ieee)
     {
     Y(saddstep_d)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm),
@@ -597,7 +602,8 @@ static void Y(calc_alm2map_deriv1) (const Tb cth, const Ylmgen_C *gen,
       {
       Y(getCorfac)(scalep,&corfacp,gen->cf);
       Y(getCorfac)(scalem,&corfacm,gen->cf);
-      full_ieee = Y(TballGt)(scalep,minscale) && Y(TballGt)(scalem,minscale);
+      full_ieee = Y(TballGt)(scalep,sharp_minscale)
+               && Y(TballGt)(scalem,sharp_minscale);
       }
     }
 
@@ -614,12 +620,12 @@ static void Y(calc_alm2map_deriv1) (const Tb cth, const Ylmgen_C *gen,
 #define VZERO(var) do { memset(&(var),0,sizeof(var)); } while(0)
 
 static void Y(inner_loop) (sharp_job *job, const int *ispair,
-  const double *cth_, const double *sth_, int llim, int ulim, Ylmgen_C *gen,
-  int mi, const int *idx, int njobs)
+  const double *cth_, const double *sth_, int llim, int ulim,
+  sharp_Ylmgen_C *gen, int mi, const int *idx, int njobs)
   {
   const int nval=nvec*VLEN;
   const int m = job->ainfo->mval[mi];
-  Ylmgen_prepare (gen, m);
+  sharp_Ylmgen_prepare (gen, m);
 
   switch (job->type)
     {
diff --git a/libsharp/sharp_oracle.inc b/libsharp/sharp_oracle.inc
new file mode 100644
index 0000000..40c9ed0
--- /dev/null
+++ b/libsharp/sharp_oracle.inc
@@ -0,0 +1,9 @@
+static const int maxtr = 6;
+static const int nv_opt[6][2][3] = {
+{{4,2,-1},{2,1,1}},
+{{5,2,-1},{2,1,1}},
+{{5,2,-1},{5,2,2}},
+{{5,2,-1},{5,2,2}},
+{{5,2,-1},{5,2,2}},
+{{5,2,-1},{5,2,2}}
+};
diff --git a/libsharp/sharp_ylmgen_c.c b/libsharp/sharp_ylmgen_c.c
index 48a1f7b..7a7ec8a 100644
--- a/libsharp/sharp_ylmgen_c.c
+++ b/libsharp/sharp_ylmgen_c.c
@@ -34,7 +34,7 @@
 #include "sharp_ylmgen_c.h"
 #include "c_utils.h"
 
-void Ylmgen_init (Ylmgen_C *gen, int l_max, int m_max, int spin)
+void sharp_Ylmgen_init (sharp_Ylmgen_C *gen, int l_max, int m_max, int spin)
   {
   const double inv_sqrt4pi = 0.2820947917738781434740397257803862929220;
 
@@ -42,18 +42,19 @@ void Ylmgen_init (Ylmgen_C *gen, int l_max, int m_max, int spin)
   gen->mmax = m_max;
   UTIL_ASSERT(spin>=0,"incorrect spin");
   gen->s = spin;
-  UTIL_ASSERT((minscale<=0)&&(maxscale>0),"bad value for min/maxscale");
-  gen->cf=RALLOC(double,maxscale-minscale+1);
-  gen->cf[-minscale]=1.;
-  for (int m=-minscale-1; m>=0; --m)
-    gen->cf[m]=gen->cf[m+1]*fsmall;
-  for (int m=-minscale+1; m<(maxscale-minscale+1); ++m)
-    gen->cf[m]=gen->cf[m-1]*fbig;
+  UTIL_ASSERT((sharp_minscale<=0)&&(sharp_maxscale>0),
+    "bad value for min/maxscale");
+  gen->cf=RALLOC(double,sharp_maxscale-sharp_minscale+1);
+  gen->cf[-sharp_minscale]=1.;
+  for (int m=-sharp_minscale-1; m>=0; --m)
+    gen->cf[m]=gen->cf[m+1]*sharp_fsmall;
+  for (int m=-sharp_minscale+1; m<(sharp_maxscale-sharp_minscale+1); ++m)
+    gen->cf[m]=gen->cf[m-1]*sharp_fbig;
 
   gen->m = -1;
   if (spin==0)
     {
-    gen->rf = RALLOC(ylmgen_dbl2,gen->lmax+1);
+    gen->rf = RALLOC(sharp_ylmgen_dbl2,gen->lmax+1);
     gen->mfac = RALLOC(double,gen->mmax+1);
     gen->mfac[0] = inv_sqrt4pi;
     for (int m=1; m<=gen->mmax; ++m)
@@ -69,7 +70,7 @@ void Ylmgen_init (Ylmgen_C *gen, int l_max, int m_max, int spin)
   else
     {
     gen->m=gen->mlo=gen->mhi=-1234567890;
-    ALLOC(gen->fx,ylmgen_dbl3,gen->lmax+2);
+    ALLOC(gen->fx,sharp_ylmgen_dbl3,gen->lmax+2);
     for (int m=0; m<gen->lmax+2; ++m)
       gen->fx[m].f[0]=gen->fx[m].f[1]=gen->fx[m].f[2]=0.;
     ALLOC(gen->inv,double,gen->lmax+1);
@@ -91,7 +92,7 @@ void Ylmgen_init (Ylmgen_C *gen, int l_max, int m_max, int spin)
       {
       fac[m]=fac[m-1]*sqrt(m);
       facscale[m]=facscale[m-1];
-      if (fac[m]>1.) { fac[m]*=fsmall; ++facscale[m]; }
+      if (fac[m]>1.) { fac[m]*=sharp_fsmall; ++facscale[m]; }
       }
     for (int m=0; m<=gen->mmax; ++m)
       {
@@ -105,7 +106,7 @@ void Ylmgen_init (Ylmgen_C *gen, int l_max, int m_max, int spin)
     }
   }
 
-void Ylmgen_destroy (Ylmgen_C *gen)
+void sharp_Ylmgen_destroy (sharp_Ylmgen_C *gen)
   {
   DEALLOC(gen->cf);
   if (gen->s==0)
@@ -126,7 +127,7 @@ void Ylmgen_destroy (Ylmgen_C *gen)
     }
   }
 
-void Ylmgen_prepare (Ylmgen_C *gen, int m)
+void sharp_Ylmgen_prepare (sharp_Ylmgen_C *gen, int m)
   {
   if (m==gen->m) return;
   UTIL_ASSERT(m>=0,"incorrect m");
@@ -181,7 +182,7 @@ void Ylmgen_prepare (Ylmgen_C *gen, int m)
     }
   }
 
-double *Ylmgen_get_norm (int lmax, int spin)
+double *sharp_Ylmgen_get_norm (int lmax, int spin)
   {
   const double pi = 3.141592653589793238462643383279502884197;
   double *res=RALLOC(double,lmax+1);
diff --git a/libsharp/sharp_ylmgen_c.h b/libsharp/sharp_ylmgen_c.h
index cbfd42c..18d5a9d 100644
--- a/libsharp/sharp_ylmgen_c.h
+++ b/libsharp/sharp_ylmgen_c.h
@@ -36,11 +36,11 @@
 extern "C" {
 #endif
 
-enum { minscale=-8, limscale=-3, maxscale=5 };
-static const double fbig=0x1p+90,fsmall=0x1p-90;
+enum { sharp_minscale=-8, sharp_limscale=-3, sharp_maxscale=5 };
+static const double sharp_fbig=0x1p+90,sharp_fsmall=0x1p-90;
 
-typedef struct { double f[2]; } ylmgen_dbl2;
-typedef struct { double f[3]; } ylmgen_dbl3;
+typedef struct { double f[2]; } sharp_ylmgen_dbl2;
+typedef struct { double f[3]; } sharp_ylmgen_dbl3;
 
 typedef struct
   {
@@ -53,13 +53,13 @@ typedef struct
 
 /* used if s==0 */
   double *mfac;
-  ylmgen_dbl2 *rf;
+  sharp_ylmgen_dbl2 *rf;
 
 /* used if s!=0 */
   int sinPow, cosPow, preMinus_p, preMinus_m;
   double *prefac;
   int *fscale;
-  ylmgen_dbl3 *fx;
+  sharp_ylmgen_dbl3 *fx;
 
 /* internal usage only */
 /* used if s==0 */
@@ -68,22 +68,22 @@ typedef struct
 /* used if s!=0 */
   double *flm1, *flm2, *inv;
   int mlo, mhi;
-  } Ylmgen_C;
+  } sharp_Ylmgen_C;
 
 /*! Creates a generator which will calculate helper data for Y_lm calculation
     up to \a l=l_max and \a m=m_max. */
-void Ylmgen_init (Ylmgen_C *gen, int l_max, int m_max, int spin);
+void sharp_Ylmgen_init (sharp_Ylmgen_C *gen, int l_max, int m_max, int spin);
 
 /*! Deallocates a generator previously initialised by Ylmgen_init(). */
-void Ylmgen_destroy (Ylmgen_C *gen);
+void sharp_Ylmgen_destroy (sharp_Ylmgen_C *gen);
 
 /*! Prepares the object for the calculation at \a m. */
-void Ylmgen_prepare (Ylmgen_C *gen, int m);
+void sharp_Ylmgen_prepare (sharp_Ylmgen_C *gen, int m);
 
 /*! Returns a pointer to an array with \a lmax+1 entries containing
     normalisation factors that must be applied to Y_lm values computed for
     \a spin. The array must be deallocated (using free()) by the user. */
-double *Ylmgen_get_norm (int lmax, int spin);
+double *sharp_Ylmgen_get_norm (int lmax, int spin);
 
 #ifdef __cplusplus
 }