reorganization

2019-02-28 10:29:56 +01:00 · 2019-02-28 10:29:56 +01:00 · e4b490b34f
commit e4b490b34f
parent ef4a9512e2
22 changed files with 120 additions and 210 deletions
--- a/libsharp/sharp.c
+++ b/libsharp/sharp.c
@ -30,8 +30,7 @@
 #include "pocketfft/pocketfft.h"
 #include "libsharp/sharp_ylmgen_c.h"
 #include "libsharp/sharp_internal.h"
-#include "c_utils/c_utils.h"
-#include "c_utils/walltime_c.h"
+#include "libsharp/sharp_utils.h"
 #include "libsharp/sharp_almhelpers.h"
 #include "libsharp/sharp_geomhelpers.h"

@ -836,7 +835,7 @@ NOINLINE static void phase2map (sharp_job *job, int mmax, int llim, int ulim)

 NOINLINE static void sharp_execute_job (sharp_job *job)
  {
-  double timer=wallTime();
+  double timer=sharp_wallTime();
  job->opcnt=0;
  int lmax = job->ainfo->lmax,
      mmax=sharp_get_mmax(job->ainfo->mval, job->ainfo->nm);
@ -910,7 +909,7 @@ NOINLINE static void sharp_execute_job (sharp_job *job)

  DEALLOC(job->norm_l);
  dealloc_phase (job);
-  job->time=wallTime()-timer;
+  job->time=sharp_wallTime()-timer;
  }

 static void sharp_build_job_common (sharp_job *job, sharp_jobtype type,
--- a/libsharp/sharp.h
+++ b/libsharp/sharp.h
@ -25,8 +25,8 @@
 *  \author Martin Reinecke \author Dag Sverre Seljebotn
 */

-#ifndef PLANCK_SHARP_H
-#define PLANCK_SHARP_H
+#ifndef SHARP_SHARP_H
+#define SHARP_SHARP_H

 #include <stddef.h>

@ -253,6 +253,10 @@ int sharp_execute_mpi_maybe (void *pcomm, sharp_jobtype type, int spin,

 /*! \} */

+int sharp_get_mlim (int lmax, int spin, double sth, double cth);
+int sharp_veclen(void);
+const char *sharp_architecture(void);
+
 #ifdef __cplusplus
 }
 #endif
--- a/libsharp/sharp_almhelpers.c
+++ b/libsharp/sharp_almhelpers.c
@ -26,7 +26,7 @@
 */

 #include "libsharp/sharp_almhelpers.h"
-#include "c_utils/c_utils.h"
+#include "libsharp/sharp_utils.h"

 void sharp_make_triangular_alm_info (int lmax, int mmax, int stride,
  sharp_alm_info **alm_info)
--- a/libsharp/sharp_almhelpers.h
+++ b/libsharp/sharp_almhelpers.h
@ -25,8 +25,8 @@
 *  \author Martin Reinecke
 */

-#ifndef PLANCK_SHARP_ALMHELPERS_H
-#define PLANCK_SHARP_ALMHELPERS_H
+#ifndef SHARP_ALMHELPERS_H
+#define SHARP_ALMHELPERS_H

 #include "libsharp/sharp.h"

--- a/libsharp/sharp_core.c
+++ b/libsharp/sharp_core.c
@ -105,6 +105,7 @@ DECL2(avx)
  architecture_ = sharp_architecture_default;
  }

+#pragma GCC visibility push(hidden)

 void inner_loop (sharp_job *job, const int *ispair,const double *cth,
  const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
@ -113,16 +114,20 @@ void inner_loop (sharp_job *job, const int *ispair,const double *cth,
  if (!inner_loop_) assign_funcs();
  inner_loop_(job, ispair, cth, sth, llim, ulim, gen, mi, mlim);
  }
-int sharp_veclen(void)
-  {
-  if (!veclen_) assign_funcs();
-  return veclen_();
-  }
+
 int sharp_max_nvec(int spin)
  {
  if (!max_nvec_) assign_funcs();
  return max_nvec_(spin);
  }
+
+#pragma GCC visibility pop
+
+int sharp_veclen(void)
+  {
+  if (!veclen_) assign_funcs();
+  return veclen_();
+  }
 const char *sharp_architecture(void)
  {
  if (!architecture_) assign_funcs();
--- a/libsharp/sharp_core_inc.c
+++ b/libsharp/sharp_core_inc.c
@ -40,7 +40,9 @@
 #include "libsharp/sharp_vecsupport.h"
 #include "libsharp/sharp.h"
 #include "libsharp/sharp_internal.h"
-#include "c_utils/c_utils.h"
+#include "libsharp/sharp_utils.h"
+
+#pragma GCC visibility push(hidden)

 // In the following, we explicitly allow the compiler to contract floating
 // point operations, like multiply-and-add.
@ -1208,6 +1210,8 @@ const char *XARCH(sharp_architecture)(void)
  return xstr(ARCH);
  }

+#pragma GCC visibility pop
+
 #endif

 #endif
--- a/libsharp/sharp_cxx.h
+++ b/libsharp/sharp_cxx.h
@ -25,8 +25,8 @@
 *  \author Martin Reinecke
 */

-#ifndef PLANCK_SHARP_CXX_H
-#define PLANCK_SHARP_CXX_H
+#ifndef SHARP_CXX_H
+#define SHARP_CXX_H

 #include <complex>
 #include "libsharp/sharp.h"
--- a/libsharp/sharp_geomhelpers.c
+++ b/libsharp/sharp_geomhelpers.c
@ -28,7 +28,7 @@
 #include <math.h>
 #include "libsharp/sharp_geomhelpers.h"
 #include "libsharp/sharp_legendre_roots.h"
-#include "c_utils/c_utils.h"
+#include "libsharp/sharp_utils.h"
 #include "pocketfft/pocketfft.h"

 void sharp_make_subset_healpix_geom_info (int nside, int stride, int nrings,
--- a/libsharp/sharp_geomhelpers.h
+++ b/libsharp/sharp_geomhelpers.h
@ -25,8 +25,8 @@
 *  \author Martin Reinecke
 */

-#ifndef PLANCK_SHARP_GEOMHELPERS_H
-#define PLANCK_SHARP_GEOMHELPERS_H
+#ifndef SHARP_GEOMHELPERS_H
+#define SHARP_GEOMHELPERS_H

 #include "libsharp/sharp.h"

--- a/libsharp/sharp_internal.h
+++ b/libsharp/sharp_internal.h
@ -25,8 +25,8 @@
 *  \author Martin Reinecke \author Dag Sverre Seljebotn
 */

-#ifndef PLANCK_SHARP_INTERNAL_H
-#define PLANCK_SHARP_INTERNAL_H
+#ifndef SHARP_INTERNAL_H
+#define SHARP_INTERNAL_H

 #ifdef __cplusplus
 #error This header file cannot be included from C++, only from C
@ -54,14 +54,10 @@ typedef struct
  unsigned long long opcnt;
  } sharp_job;

-int sharp_get_mlim (int lmax, int spin, double sth, double cth);
-
 void inner_loop (sharp_job *job, const int *ispair,const double *cth,
  const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
  const int *mlim);

-int sharp_veclen(void);
 int sharp_max_nvec(int spin);
-const char *sharp_architecture(void);

 #endif
--- a/libsharp/sharp_legendre_roots.c
+++ b/libsharp/sharp_legendre_roots.c
@ -8,7 +8,7 @@

 #include <math.h>
 #include "libsharp/sharp_legendre_roots.h"
-#include "c_utils/c_utils.h"
+#include "libsharp/sharp_utils.h"

 static inline double one_minus_x2 (double x)
  { return (fabs(x)>0.1) ? (1.+x)*(1.-x) : 1.-x*x; }
--- a/libsharp/sharp_mpi.c
+++ b/libsharp/sharp_mpi.c
@ -211,7 +211,7 @@ static void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm)
    { sharp_execute_job (job); return; }

  MPI_Barrier(comm);
-  double timer=wallTime();
+  double timer=sharp_wallTime();
  job->opcnt=0;
  sharp_mpi_info minfo;
  sharp_make_mpi_info(comm, job, &minfo);
@ -306,7 +306,7 @@ static void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm)
    dealloc_phase (job);
    }
  sharp_destroy_mpi_info(&minfo);
-  job->time=wallTime()-timer;
+  job->time=sharp_wallTime()-timer;
  }

 void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin,
--- a/libsharp/sharp_mpi.h
+++ b/libsharp/sharp_mpi.h
@ -25,8 +25,8 @@
 *  \author Martin Reinecke \author Dag Sverre Seljebotn
 */

-#ifndef PLANCK_SHARP_MPI_H
-#define PLANCK_SHARP_MPI_H
+#ifndef SHARP_MPI_H
+#define SHARP_MPI_H

 #include <mpi.h>
 #include "libsharp/sharp.h"
--- a/libsharp/sharp_testsuite.c
+++ b/libsharp/sharp_testsuite.c
@ -1,680 +0,0 @@
-/*
- *  This file is part of libsharp.
- *
- *  libsharp is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  libsharp is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with libsharp; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-/* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik */
-
-/*  \file sharp_testsuite.c
- *
- *  Copyright (C) 2012-2019 Max-Planck-Society
- *  \author Martin Reinecke
- */
-
-#include <stdio.h>
-#include <string.h>
-#ifdef USE_MPI
-#include "mpi.h"
-#include "libsharp/sharp_mpi.h"
-#endif
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-#include "libsharp/sharp.h"
-#include "libsharp/sharp_internal.h"
-#include "libsharp/sharp_geomhelpers.h"
-#include "libsharp/sharp_almhelpers.h"
-#include "c_utils/c_utils.h"
-#include "c_utils/memusage.h"
-
-static void OpenMP_status(void)
-  {
-#ifndef _OPENMP
-  printf("OpenMP: not supported by this binary\n");
-#else
-  int threads = omp_get_max_threads();
-  if (threads>1)
-    printf("OpenMP active: max. %d threads.\n",threads);
-  else
-    printf("OpenMP active, but running with 1 thread only.\n");
-#endif
-  }
-
-static void MPI_status(void)
-  {
-#ifndef USE_MPI
-  printf("MPI: not supported by this binary\n");
-#else
-  int tasks;
-  MPI_Comm_size(MPI_COMM_WORLD,&tasks);
-  if (tasks>1)
-    printf("MPI active with %d tasks.\n",tasks);
-  else
-    printf("MPI active, but running with 1 task only.\n");
-#endif
-  }
-
-static void sharp_announce (const char *name)
-  {
-  size_t m, nlen=strlen(name);
-  printf("\n+-");
-  for (m=0; m<nlen; ++m) printf("-");
-  printf("-+\n");
-  printf("| %s |\n", name);
-  printf("+-");
-  for (m=0; m<nlen; ++m) printf("-");
-  printf("-+\n\n");
-  printf("Detected hardware architecture: %s\n", sharp_architecture());
-  printf("Supported vector length: %d\n", sharp_veclen());
-  OpenMP_status();
-  MPI_status();
-  printf("\n");
-  }
-
-static void sharp_module_startup (const char *name, int argc, int argc_expected,
-  const char *argv_expected, int verbose)
-  {
-  if (verbose) sharp_announce (name);
-  if (argc==argc_expected) return;
-  if (verbose) fprintf(stderr, "Usage: %s %s\n", name, argv_expected);
-  exit(1);
-  }
-
-typedef complex double dcmplx;
-
-int ntasks, mytask;
-
-static double drand (double min, double max, unsigned *state)
-  {
-  *state = (((*state) * 1103515245u) + 12345u) & 0x7fffffffu;
-  return min + (max-min)*(*state)/(0x7fffffff+1.0);
-  }
-
-static void random_alm (dcmplx *alm, sharp_alm_info *helper, int spin, int cnt)
-  {
-#pragma omp parallel
-{
-  int mi;
-#pragma omp for schedule (dynamic,100)
-  for (mi=0;mi<helper->nm; ++mi)
-    {
-    int m=helper->mval[mi];
-    unsigned state=1234567u*(unsigned)cnt+8912u*(unsigned)m; // random seed
-    for (int l=m;l<=helper->lmax; ++l)
-      {
-      if ((l<spin)&&(m<spin))
-        alm[sharp_alm_index(helper,l,mi)] = 0.;
-      else
-        {
-        double rv = drand(-1,1,&state);
-        double iv = (m==0) ? 0 : drand(-1,1,&state);
-        alm[sharp_alm_index(helper,l,mi)] = rv+_Complex_I*iv;
-        }
-      }
-    }
-} // end of parallel region
-  }
-
-static unsigned long long totalops (unsigned long long val)
-  {
-#ifdef USE_MPI
-  unsigned long long tmp;
-  MPI_Allreduce (&val, &tmp,1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
-  return tmp;
-#else
-  return val;
-#endif
-  }
-
-static double maxTime (double val)
-  {
-#ifdef USE_MPI
-  double tmp;
-  MPI_Allreduce (&val, &tmp,1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
-  return tmp;
-#else
-  return val;
-#endif
-  }
-
-static double allreduceSumDouble (double val)
-  {
-#ifdef USE_MPI
-  double tmp;
-  MPI_Allreduce (&val, &tmp,1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
-  return tmp;
-#else
-  return val;
-#endif
-  }
-
-static double totalMem()
-  {
-#ifdef USE_MPI
-  double tmp, val=VmHWM();
-  MPI_Allreduce (&val, &tmp,1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
-  return tmp;
-#else
-  return VmHWM();
-#endif
-  }
-
-#ifdef USE_MPI
-static void reduce_alm_info(sharp_alm_info *ainfo)
-  {
-  int nmnew=0;
-  ptrdiff_t ofs = 0;
-  for (int i=mytask; i<ainfo->nm; i+=ntasks,++nmnew)
-    {
-    ainfo->mval[nmnew]=ainfo->mval[i];
-    ainfo->mvstart[nmnew]=ofs-ainfo->mval[nmnew];
-    ofs+=ainfo->lmax-ainfo->mval[nmnew]+1;
-    }
-  ainfo->nm=nmnew;
-  }
-
-static void reduce_geom_info(sharp_geom_info *ginfo)
-  {
-  int npairsnew=0;
-  ptrdiff_t ofs = 0;
-  for (int i=mytask; i<ginfo->npairs; i+=ntasks,++npairsnew)
-    {
-    ginfo->pair[npairsnew]=ginfo->pair[i];
-    ginfo->pair[npairsnew].r1.ofs=ofs;
-    ofs+=ginfo->pair[npairsnew].r1.nph;
-    ginfo->pair[npairsnew].r2.ofs=ofs;
-    if (ginfo->pair[npairsnew].r2.nph>0) ofs+=ginfo->pair[npairsnew].r2.nph;
-    }
-  ginfo->npairs=npairsnew;
-  }
-#endif
-
-static ptrdiff_t get_nalms(const sharp_alm_info *ainfo)
-  {
-  ptrdiff_t res=0;
-  for (int i=0; i<ainfo->nm; ++i)
-    res += ainfo->lmax-ainfo->mval[i]+1;
-  return res;
-  }
-
-static ptrdiff_t get_npix(const sharp_geom_info *ginfo)
-  {
-  ptrdiff_t res=0;
-  for (int i=0; i<ginfo->npairs; ++i)
-    {
-    res += ginfo->pair[i].r1.nph;
-    if (ginfo->pair[i].r2.nph>0) res += ginfo->pair[i].r2.nph;
-    }
-  return res;
-  }
-
-static double *get_sqsum_and_invert (dcmplx **alm, ptrdiff_t nalms, int ncomp)
-  {
-  double *sqsum=RALLOC(double,ncomp);
-  for (int i=0; i<ncomp; ++i)
-    {
-    sqsum[i]=0;
-    for (ptrdiff_t j=0; j<nalms; ++j)
-      {
-      sqsum[i]+=creal(alm[i][j])*creal(alm[i][j])
-               +cimag(alm[i][j])*cimag(alm[i][j]);
-      alm[i][j]=-alm[i][j];
-      }
-    }
-  return sqsum;
-  }
-
-static void get_errors (dcmplx **alm, ptrdiff_t nalms, int ncomp, double *sqsum,
-  double **err_abs, double **err_rel)
-  {
-  long nalms_tot=nalms;
-#ifdef USE_MPI
-  MPI_Allreduce(&nalms,&nalms_tot,1,MPI_LONG,MPI_SUM,MPI_COMM_WORLD);
-#endif
-
-  *err_abs=RALLOC(double,ncomp);
-  *err_rel=RALLOC(double,ncomp);
-  for (int i=0; i<ncomp; ++i)
-    {
-    double sum=0, maxdiff=0, sumtot, sqsumtot, maxdifftot;
-    for (ptrdiff_t j=0; j<nalms; ++j)
-      {
-      double sqr=creal(alm[i][j])*creal(alm[i][j])
-                +cimag(alm[i][j])*cimag(alm[i][j]);
-      sum+=sqr;
-      if (sqr>maxdiff) maxdiff=sqr;
-      }
-   maxdiff=sqrt(maxdiff);
-
-#ifdef USE_MPI
-    MPI_Allreduce(&sum,&sumtot,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
-    MPI_Allreduce(&sqsum[i],&sqsumtot,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
-    MPI_Allreduce(&maxdiff,&maxdifftot,1,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD);
-#else
-    sumtot=sum;
-    sqsumtot=sqsum[i];
-    maxdifftot=maxdiff;
-#endif
-    sumtot=sqrt(sumtot/nalms_tot);
-    sqsumtot=sqrt(sqsumtot/nalms_tot);
-    (*err_abs)[i]=maxdifftot;
-    (*err_rel)[i]=sumtot/sqsumtot;
-    }
-  }
-
-static int good_fft_size(int n)
-  {
-  if (n<=6) return n;
-  int bestfac=2*n;
-
-  for (int f2=1; f2<bestfac; f2*=2)
-    for (int f23=f2; f23<bestfac; f23*=3)
-      for (int f235=f23; f235<bestfac; f235*=5)
-        if (f235>=n) bestfac=f235;
-
-  return bestfac;
-  }
-
-static void get_infos (const char *gname, int lmax, int *mmax, int *gpar1,
-  int *gpar2, sharp_geom_info **ginfo, sharp_alm_info **ainfo, int verbose)
-  {
-  UTIL_ASSERT(lmax>=0,"lmax must not be negative");
-  if (*mmax<0) *mmax=lmax;
-  UTIL_ASSERT(*mmax<=lmax,"mmax larger than lmax");
-
-  verbose &= (mytask==0);
-  if (verbose) printf ("lmax: %d, mmax: %d\n",lmax,*mmax);
-
-  sharp_make_triangular_alm_info(lmax,*mmax,1,ainfo);
-#ifdef USE_MPI
-  reduce_alm_info(*ainfo);
-#endif
-
-  if (strcmp(gname,"healpix")==0)
-    {
-    if (*gpar1<1) *gpar1=lmax/2;
-    if (*gpar1==0) ++(*gpar1);
-    sharp_make_healpix_geom_info (*gpar1, 1, ginfo);
-    if (verbose) printf ("HEALPix grid, nside=%d\n",*gpar1);
-    }
-  else if (strcmp(gname,"gauss")==0)
-    {
-    if (*gpar1<1) *gpar1=lmax+1;
-    if (*gpar2<1) *gpar2=2*(*mmax)+1;
-    sharp_make_gauss_geom_info (*gpar1, *gpar2, 0., 1, *gpar2, ginfo);
-    if (verbose)
-      printf ("Gauss-Legendre grid, nlat=%d, nlon=%d\n",*gpar1,*gpar2);
-    }
-  else if (strcmp(gname,"fejer1")==0)
-    {
-    if (*gpar1<1) *gpar1=2*lmax+1;
-    if (*gpar2<1) *gpar2=2*(*mmax)+1;
-    sharp_make_fejer1_geom_info (*gpar1, *gpar2, 0., 1, *gpar2, ginfo);
-    if (verbose) printf ("Fejer1 grid, nlat=%d, nlon=%d\n",*gpar1,*gpar2);
-    }
-  else if (strcmp(gname,"fejer2")==0)
-    {
-    if (*gpar1<1) *gpar1=2*lmax+1;
-    if (*gpar2<1) *gpar2=2*(*mmax)+1;
-    sharp_make_fejer2_geom_info (*gpar1, *gpar2, 0., 1, *gpar2, ginfo);
-    if (verbose) printf ("Fejer2 grid, nlat=%d, nlon=%d\n",*gpar1,*gpar2);
-    }
-  else if (strcmp(gname,"cc")==0)
-    {
-    if (*gpar1<1) *gpar1=2*lmax+1;
-    if (*gpar2<1) *gpar2=2*(*mmax)+1;
-    sharp_make_cc_geom_info (*gpar1, *gpar2, 0., 1, *gpar2, ginfo);
-    if (verbose)
-      printf("Clenshaw-Curtis grid, nlat=%d, nlon=%d\n",*gpar1,*gpar2);
-    }
-  else if (strcmp(gname,"smallgauss")==0)
-    {
-    int nlat=*gpar1, nlon=*gpar2;
-    if (nlat<1) nlat=lmax+1;
-    if (nlon<1) nlon=2*(*mmax)+1;
-    *gpar1=nlat; *gpar2=nlon;
-    sharp_make_gauss_geom_info (nlat, nlon, 0., 1, nlon, ginfo);
-    ptrdiff_t npix_o=get_npix(*ginfo);
-    size_t ofs=0;
-    for (int i=0; i<(*ginfo)->npairs; ++i)
-      {
-      sharp_ringpair *pair=&((*ginfo)->pair[i]);
-      int pring=1+2*sharp_get_mlim(lmax,0,pair->r1.sth,pair->r1.cth);
-      if (pring>nlon) pring=nlon;
-      pring=good_fft_size(pring);
-      pair->r1.nph=pring;
-      pair->r1.weight*=nlon*1./pring;
-      pair->r1.ofs=ofs;
-      ofs+=pring;
-      if (pair->r2.nph>0)
-        {
-        pair->r2.nph=pring;
-        pair->r2.weight*=nlon*1./pring;
-        pair->r2.ofs=ofs;
-        ofs+=pring;
-        }
-      }
-    if (verbose)
-      {
-      ptrdiff_t npix=get_npix(*ginfo);
-      printf("Small Gauss grid, nlat=%d, npix=%ld, savings=%.2f%%\n",
-        nlat,(long)npix,(npix_o-npix)*100./npix_o);
-      }
-    }
-  else
-    UTIL_FAIL("unknown grid geometry");
-
-#ifdef USE_MPI
-  reduce_geom_info(*ginfo);
-#endif
-  }
-
-static void check_sign_scale(void)
-  {
-  int lmax=50;
-  int mmax=lmax;
-  sharp_geom_info *tinfo;
-  int nrings=lmax+1;
-  int ppring=2*lmax+2;
-  ptrdiff_t npix=(ptrdiff_t)nrings*ppring;
-  sharp_make_gauss_geom_info (nrings, ppring, 0., 1, ppring, &tinfo);
-
-  /* flip theta to emulate the "old" Gaussian grid geometry */
-  for (int i=0; i<tinfo->npairs; ++i)
-    {
-    const double pi=3.141592653589793238462643383279502884197;
-    tinfo->pair[i].r1.cth=-tinfo->pair[i].r1.cth;
-    tinfo->pair[i].r2.cth=-tinfo->pair[i].r2.cth;
-    tinfo->pair[i].r1.theta=pi-tinfo->pair[i].r1.theta;
-    tinfo->pair[i].r2.theta=pi-tinfo->pair[i].r2.theta;
-    }
-
-  sharp_alm_info *alms;
-  sharp_make_triangular_alm_info(lmax,mmax,1,&alms);
-  ptrdiff_t nalms = ((mmax+1)*(mmax+2))/2 + (mmax+1)*(lmax-mmax);
-
-  double **map;
-  ALLOC2D(map,double,2,npix);
-
-  dcmplx **alm;
-  ALLOC2D(alm,dcmplx,2,nalms);
-  for (int i=0; i<2; ++i)
-    for (int j=0; j<nalms; ++j)
-      alm[i][j]=1.+_Complex_I;
-
-  sharp_execute(SHARP_ALM2MAP,0,&alm[0],&map[0],tinfo,alms,SHARP_DP,
-    NULL,NULL);
-  UTIL_ASSERT(FAPPROX(map[0][0     ], 3.588246976618616912e+00,1e-12),
-    "error");
-  UTIL_ASSERT(FAPPROX(map[0][npix/2], 4.042209792157496651e+01,1e-12),
-    "error");
-  UTIL_ASSERT(FAPPROX(map[0][npix-1],-1.234675107554816442e+01,1e-12),
-    "error");
-
-  sharp_execute(SHARP_ALM2MAP,1,&alm[0],&map[0],tinfo,alms,SHARP_DP,
-    NULL,NULL);
-  UTIL_ASSERT(FAPPROX(map[0][0     ], 2.750897760535633285e+00,1e-12),
-    "error");
-  UTIL_ASSERT(FAPPROX(map[0][npix/2], 3.137704477368562905e+01,1e-12),
-    "error");
-  UTIL_ASSERT(FAPPROX(map[0][npix-1],-8.405730859837063917e+01,1e-12),
-    "error");
-  UTIL_ASSERT(FAPPROX(map[1][0     ],-2.398026536095463346e+00,1e-12),
-    "error");
-  UTIL_ASSERT(FAPPROX(map[1][npix/2],-4.961140548331700728e+01,1e-12),
-    "error");
-  UTIL_ASSERT(FAPPROX(map[1][npix-1],-1.412765834230440021e+01,1e-12),
-    "error");
-
-  sharp_execute(SHARP_ALM2MAP,2,&alm[0],&map[0],tinfo,alms,SHARP_DP,
-    NULL,NULL);
-  UTIL_ASSERT(FAPPROX(map[0][0     ],-1.398186224727334448e+00,1e-12),
-    "error");
-  UTIL_ASSERT(FAPPROX(map[0][npix/2],-2.456676000884031197e+01,1e-12),
-    "error");
-  UTIL_ASSERT(FAPPROX(map[0][npix-1],-1.516249174408820863e+02,1e-12),
-    "error");
-  UTIL_ASSERT(FAPPROX(map[1][0     ],-3.173406200299964119e+00,1e-12),
-    "error");
-  UTIL_ASSERT(FAPPROX(map[1][npix/2],-5.831327404513146462e+01,1e-12),
-    "error");
-  UTIL_ASSERT(FAPPROX(map[1][npix-1],-1.863257892248353897e+01,1e-12),
-    "error");
-
-  sharp_execute(SHARP_ALM2MAP_DERIV1,1,&alm[0],&map[0],tinfo,alms,
-    SHARP_DP,NULL,NULL);
-  UTIL_ASSERT(FAPPROX(map[0][0     ],-6.859393905369091105e-01,1e-11),
-    "error");
-  UTIL_ASSERT(FAPPROX(map[0][npix/2],-2.103947835973212364e+02,1e-12),
-    "error");
-  UTIL_ASSERT(FAPPROX(map[0][npix-1],-1.092463246472086439e+03,1e-12),
-    "error");
-  UTIL_ASSERT(FAPPROX(map[1][0     ],-1.411433220713928165e+02,1e-12),
-    "error");
-  UTIL_ASSERT(FAPPROX(map[1][npix/2],-1.146122859381925082e+03,1e-12),
-    "error");
-  UTIL_ASSERT(FAPPROX(map[1][npix-1], 7.821618677689795049e+02,1e-12),
-    "error");
-
-  DEALLOC2D(map);
-  DEALLOC2D(alm);
-
-  sharp_destroy_alm_info(alms);
-  sharp_destroy_geom_info(tinfo);
-  }
-
-static void do_sht (sharp_geom_info *ginfo, sharp_alm_info *ainfo,
-  int spin, double **err_abs, double **err_rel,
-  double *t_a2m, double *t_m2a, unsigned long long *op_a2m,
-  unsigned long long *op_m2a)
-  {
-  ptrdiff_t nalms = get_nalms(ainfo);
-  int ncomp = (spin==0) ? 1 : 2;
-
-  size_t npix = get_npix(ginfo);
-  double **map;
-  ALLOC2D(map,double,ncomp,npix);
-  for (int i=0; i<ncomp; ++i)
-    SET_ARRAY(map[i],0,(int)npix,0);
-
-  dcmplx **alm;
-  ALLOC2D(alm,dcmplx,ncomp,nalms);
-  for (int i=0; i<ncomp; ++i)
-    random_alm(alm[i],ainfo,spin,i+1);
-
-#ifdef USE_MPI
-  sharp_execute_mpi(MPI_COMM_WORLD,SHARP_ALM2MAP,spin,&alm[0],&map[0],ginfo,
-    ainfo, SHARP_DP|SHARP_ADD,t_a2m,op_a2m);
-#else
-  sharp_execute(SHARP_ALM2MAP,spin,&alm[0],&map[0],ginfo,ainfo,
-    SHARP_DP,t_a2m,op_a2m);
-#endif
-  if (t_a2m!=NULL) *t_a2m=maxTime(*t_a2m);
-  if (op_a2m!=NULL) *op_a2m=totalops(*op_a2m);
-  double *sqsum=get_sqsum_and_invert(alm,nalms,ncomp);
-#ifdef USE_MPI
-  sharp_execute_mpi(MPI_COMM_WORLD,SHARP_MAP2ALM,spin,&alm[0],&map[0],ginfo,
-    ainfo,SHARP_DP|SHARP_ADD,t_m2a,op_m2a);
-#else
-  sharp_execute(SHARP_MAP2ALM,spin,&alm[0],&map[0],ginfo,ainfo,
-    SHARP_DP|SHARP_ADD,t_m2a,op_m2a);
-#endif
-  if (t_m2a!=NULL) *t_m2a=maxTime(*t_m2a);
-  if (op_m2a!=NULL) *op_m2a=totalops(*op_m2a);
-  get_errors(alm, nalms, ncomp, sqsum, err_abs, err_rel);
-
-  DEALLOC(sqsum);
-  DEALLOC2D(map);
-  DEALLOC2D(alm);
-  }
-
-static void check_accuracy (sharp_geom_info *ginfo, sharp_alm_info *ainfo,
-  int spin)
-  {
-  int ncomp = (spin==0) ? 1 : 2;
-  double *err_abs, *err_rel;
-  do_sht (ginfo, ainfo, spin, &err_abs, &err_rel, NULL, NULL,
-    NULL, NULL);
-  for (int i=0; i<ncomp; ++i)
-    UTIL_ASSERT((err_rel[i]<1e-10) && (err_abs[i]<1e-10),"error");
-  DEALLOC(err_rel);
-  DEALLOC(err_abs);
-  }
-
-static void run(int lmax, int mmax, int nlat, int nlon, int spin)
-  {
-  sharp_geom_info *ginfo;
-  sharp_alm_info *ainfo;
-  get_infos ("gauss", lmax, &mmax, &nlat, &nlon, &ginfo, &ainfo, 0);
-  check_accuracy(ginfo,ainfo,spin);
-  sharp_destroy_alm_info(ainfo);
-  sharp_destroy_geom_info(ginfo);
-  }
-
-static void sharp_acctest(void)
-  {
-  if (mytask==0) sharp_module_startup("sharp_acctest",1,1,"",1);
-
-  if (mytask==0) printf("Checking signs and scales.\n");
-  check_sign_scale();
-  if (mytask==0) printf("Passed.\n\n");
-
-  if (mytask==0) printf("Testing map analysis accuracy.\n");
-
-  run(127, 127, 128, 256, 0);
-  run(127, 127, 128, 256, 1);
-  run(127, 127, 128, 256, 2);
-  run(127, 127, 128, 256, 3);
-  run(127, 127, 128, 256, 30);
-  run(5, 0, 6, 1, 0);
-  run(5, 0, 7, 2, 0);
-  run(8, 8, 9, 17, 0);
-  run(8, 8, 9, 17, 2);
-  if (mytask==0) printf("Passed.\n\n");
-  }
-
-static void sharp_test (int argc, const char **argv)
-  {
-  if (mytask==0) sharp_announce("sharp_test");
-  UTIL_ASSERT(argc>=8,"usage: grid lmax mmax geom1 geom2 spin");
-  int lmax=atoi(argv[3]);
-  int mmax=atoi(argv[4]);
-  int gpar1=atoi(argv[5]);
-  int gpar2=atoi(argv[6]);
-  int spin=atoi(argv[7]);
-
-  if (mytask==0) printf("Testing map analysis accuracy.\n");
-  if (mytask==0) printf("spin=%d\n", spin);
-
-  sharp_geom_info *ginfo;
-  sharp_alm_info *ainfo;
-  get_infos (argv[2], lmax, &mmax, &gpar1, &gpar2, &ginfo, &ainfo, 1);
-
-  int ncomp = (spin==0) ? 1 : 2;
-  double t_a2m=1e30, t_m2a=1e30;
-  unsigned long long op_a2m, op_m2a;
-  double *err_abs,*err_rel;
-
-  double t_acc=0;
-  int nrpt=0;
-  while(1)
-    {
-    ++nrpt;
-    double ta2m2, tm2a2;
-    do_sht (ginfo, ainfo, spin, &err_abs, &err_rel, &ta2m2, &tm2a2,
-      &op_a2m, &op_m2a);
-    if (ta2m2<t_a2m) t_a2m=ta2m2;
-    if (tm2a2<t_m2a) t_m2a=tm2a2;
-    t_acc+=t_a2m+t_m2a;
-    if (t_acc>2.)
-      {
-      if (mytask==0) printf("Best of %d runs\n",nrpt);
-      break;
-      }
-    DEALLOC(err_abs);
-    DEALLOC(err_rel);
-    }
-
-  if (mytask==0) printf("wall time for alm2map: %fs\n",t_a2m);
-  if (mytask==0) printf("Performance: %fGFLOPs/s\n",1e-9*op_a2m/t_a2m);
-  if (mytask==0) printf("wall time for map2alm: %fs\n",t_m2a);
-  if (mytask==0) printf("Performance: %fGFLOPs/s\n",1e-9*op_m2a/t_m2a);
-
-  if (mytask==0)
-    for (int i=0; i<ncomp; ++i)
-      printf("component %i: rms %e, maxerr %e\n",i,err_rel[i], err_abs[i]);
-
-  double iosize = ncomp*(16.*get_nalms(ainfo) + 8.*get_npix(ginfo));
-  iosize = allreduceSumDouble(iosize);
-
-  sharp_destroy_alm_info(ainfo);
-  sharp_destroy_geom_info(ginfo);
-
-  double tmem=totalMem();
-  if (mytask==0)
-    printf("\nMemory high water mark: %.2f MB\n",tmem/(1<<20));
-  if (mytask==0)
-    printf("Memory overhead: %.2f MB (%.2f%% of working set)\n",
-      (tmem-iosize)/(1<<20),100.*(1.-iosize/tmem));
-
-#ifdef _OPENMP
-  int nomp=omp_get_max_threads();
-#else
-  int nomp=1;
-#endif
-
-  double maxerel=0., maxeabs=0.;
-  for (int i=0; i<ncomp; ++i)
-    {
-    if (maxerel<err_rel[i]) maxerel=err_rel[i];
-    if (maxeabs<err_abs[i]) maxeabs=err_abs[i];
-    }
-
-  if (mytask==0)
-    printf("%-12s %-10s %2d %d %2d %3d %6d %6d %6d %6d %.2e %7.2f %.2e %7.2f"
-           " %9.2f %6.2f %.2e %.2e\n",
-      getenv("HOST"),argv[2],spin,sharp_veclen(),nomp,ntasks,lmax,mmax,gpar1,gpar2,
-      t_a2m,1e-9*op_a2m/t_a2m,t_m2a,1e-9*op_m2a/t_m2a,tmem/(1<<20),
-      100.*(1.-iosize/tmem),maxerel,maxeabs);
-
-  DEALLOC(err_abs);
-  DEALLOC(err_rel);
-  }
-
-int main(int argc, const char **argv)
-  {
-#ifdef USE_MPI
-  MPI_Init(NULL,NULL);
-  MPI_Comm_size(MPI_COMM_WORLD,&ntasks);
-  MPI_Comm_rank(MPI_COMM_WORLD,&mytask);
-#else
-  mytask=0; ntasks=1;
-#endif
-
-  UTIL_ASSERT(argc>=2,"need at least one command line argument");
-
-  if (strcmp(argv[1],"acctest")==0)
-    sharp_acctest();
-  else if (strcmp(argv[1],"test")==0)
-    sharp_test(argc,argv);
-  else
-    UTIL_FAIL("unknown command");
-
-#ifdef USE_MPI
-  MPI_Finalize();
-#endif
-  return 0;
-  }
--- a/libsharp/sharp_utils.c
+++ b/libsharp/sharp_utils.c
@ -0,0 +1,108 @@
+/*
+ *  This file is part of libsharp.
+ *
+ *  libsharp is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  libsharp is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with libsharp; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik */
+
+/*
+ *  Convenience functions
+ *
+ *  Copyright (C) 2008-2019 Max-Planck-Society
+ *  Author: Martin Reinecke
+ */
+
+#include <stdio.h>
+#include "libsharp/sharp_utils.h"
+
+void sharp_fail_ (const char *file, int line, const char *func, const char *msg)
+  {
+  fprintf(stderr,"%s, %i (%s):\n%s\n",file,line,func,msg);
+  exit(1);
+  }
+
+/* This function tries to avoid allocations with a total size close to a high
+   power of two (called the "critical stride" here), by adding a few more bytes
+   if necessary. This lowers the probability that two arrays differ by a multiple
+   of the critical stride in their starting address, which in turn lowers the
+   risk of cache line contention. */
+static size_t manipsize(size_t sz)
+  {
+  const size_t critical_stride=4096, cacheline=64, overhead=32;
+  if (sz < (critical_stride/2)) return sz;
+  if (((sz+overhead)%critical_stride)>(2*cacheline)) return sz;
+  return sz+2*cacheline;
+  }
+
+#ifdef __SSE__
+#include <xmmintrin.h>
+void *sharp_malloc_ (size_t sz)
+  {
+  void *res;
+  if (sz==0) return NULL;
+  res = _mm_malloc(manipsize(sz),32);
+  UTIL_ASSERT(res,"_mm_malloc() failed");
+  return res;
+  }
+void sharp_free_ (void *ptr)
+  { if ((ptr)!=NULL) _mm_free(ptr); }
+#else
+void *sharp_malloc_ (size_t sz)
+  {
+  void *res;
+  if (sz==0) return NULL;
+  res = malloc(manipsize(sz));
+  UTIL_ASSERT(res,"malloc() failed");
+  return res;
+  }
+void sharp_free_ (void *ptr)
+  { if ((ptr)!=NULL) free(ptr); }
+#endif
+
+#if defined (_OPENMP)
+#include <omp.h>
+#elif defined (USE_MPI)
+#include "mpi.h"
+#elif defined (_WIN32)
+#include <Windows.h>
+#else
+#include <sys/time.h>
+#include <stdlib.h>
+#endif
+
+double sharp_wallTime(void)
+  {
+#if defined (_OPENMP)
+  return omp_get_wtime();
+#elif defined (USE_MPI)
+  return MPI_Wtime();
+#elif defined (_WIN32)
+  static double inv_freq = -1.;
+  if (inv_freq<0)
+    {
+    LARGE_INTEGER freq;
+    QueryPerformanceFrequency(&freq);
+    inv_freq = 1. / double(freq.QuadPart);
+    }
+  LARGE_INTEGER count;
+  QueryPerformanceCounter(&count);
+  return count.QuadPart*inv_freq;
+#else
+  struct timeval t;
+  gettimeofday(&t, NULL);
+  return t.tv_sec + 1e-6*t.tv_usec;
+#endif
+  }
--- a/libsharp/sharp_utils.h
+++ b/libsharp/sharp_utils.h
@ -0,0 +1,131 @@
+/*
+ *  This file is part of libc_utils.
+ *
+ *  libc_utils is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  libc_utils is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with libc_utils; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/* libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik */
+
+/*! \file c_utils.h
+ *  Convenience functions
+ *
+ *  Copyright (C) 2008-2019 Max-Planck-Society
+ *  \author Martin Reinecke
+ *  \note This file should only be included from .c files, NOT from .h files.
+ */
+
+#ifndef SHARP_UTILS_H
+#define SHARP_UTILS_H
+
+#include <math.h>
+#include <stdlib.h>
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void sharp_fail_ (const char *file, int line, const char *func, const char *msg);
+void *sharp_malloc_ (size_t sz);
+void sharp_free_ (void *ptr);
+
+#if defined (__GNUC__)
+#define UTIL_FUNC_NAME__ __func__
+#else
+#define UTIL_FUNC_NAME__ "unknown"
+#endif
+
+/*! \def UTIL_ASSERT(cond,msg)
+    If \a cond is false, print an error message containing function name,
+    source file name and line number of the call, as well as \a msg;
+    then exit the program with an error status. */
+#define UTIL_ASSERT(cond,msg) \
+  if(!(cond)) sharp_fail_(__FILE__,__LINE__,UTIL_FUNC_NAME__,msg)
+/*! \def UTIL_WARN(cond,msg)
+    If \a cond is false, print an warning containing function name,
+    source file name and line number of the call, as well as \a msg. */
+#define UTIL_FAIL(msg) \
+  sharp_fail_(__FILE__,__LINE__,UTIL_FUNC_NAME__,msg)
+
+/*! \def ALLOC(ptr,type,num)
+    Allocate space for \a num objects of type \a type. Make sure that the
+    allocation succeeded, else stop the program with an error. Return the
+    resulting pointer in \a ptr. */
+#define ALLOC(ptr,type,num) \
+  do { (ptr)=(type *)sharp_malloc_((num)*sizeof(type)); } while (0)
+/*! \def RALLOC(type,num)
+    Allocate space for \a num objects of type \a type. Make sure that the
+    allocation succeeded, else stop the program with an error. Cast the
+    resulting pointer to \a (type*). */
+#define RALLOC(type,num) \
+  ((type *)sharp_malloc_((num)*sizeof(type)))
+/*! \def DEALLOC(ptr)
+    Deallocate \a ptr. It must have been allocated using \a ALLOC or
+    \a RALLOC. */
+#define DEALLOC(ptr) \
+  do { sharp_free_(ptr); (ptr)=NULL; } while(0)
+#define RESIZE(ptr,type,num) \
+  do { sharp_free_(ptr); ALLOC(ptr,type,num); } while(0)
+/*! \def SET_ARRAY(ptr,i1,i2,val)
+    Set the entries \a ptr[i1] ... \a ptr[i2-1] to \a val. */
+#define SET_ARRAY(ptr,i1,i2,val) \
+  do { \
+    ptrdiff_t cnt_; \
+    for (cnt_=(i1);cnt_<(i2);++cnt_) (ptr)[cnt_]=(val); \
+    } while(0)
+
+#define ALLOC2D(ptr,type,num1,num2) \
+  do { \
+    size_t cnt_, num1_=(num1), num2_=(num2); \
+    ALLOC((ptr),type *,num1_); \
+    ALLOC((ptr)[0],type,num1_*num2_); \
+    for (cnt_=1; cnt_<num1_; ++cnt_) \
+      (ptr)[cnt_]=(ptr)[cnt_-1]+num2_; \
+    } while(0)
+#define DEALLOC2D(ptr) \
+  do { if(ptr) DEALLOC((ptr)[0]); DEALLOC(ptr); } while(0)
+
+#define FAPPROX(a,b,eps) \
+  (fabs((a)-(b))<((eps)*fabs(b)))
+#define IMAX(a,b) \
+  (((a)>(b)) ? (a) : (b))
+#define IMIN(a,b) \
+  (((a)<(b)) ? (a) : (b))
+
+#define SWAP(a,b,type) \
+  do { type tmp_=(a); (a)=(b); (b)=tmp_; } while(0)
+
+/*! Returns an approximation of the current wall time (in seconds).
+    The first available of the following timers will be used:
+    <ul>
+    <li> \a omp_get_wtime(), if OpenMP is available
+    <li> \a MPI_Wtime(), if MPI is available
+    <li> \a gettimeofday() otherwise
+    </ul>
+    \note Only useful for measuring time differences.
+    \note This function has an execution time between 10 and 100 nanoseconds. */
+double sharp_wallTime(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifdef __GNUC__
+#define NOINLINE __attribute__((noinline))
+#else
+#define NOINLINE
+#endif
+
+#endif
--- a/libsharp/sharp_ylmgen_c.c
+++ b/libsharp/sharp_ylmgen_c.c
@ -28,7 +28,9 @@
 #include <math.h>
 #include <stdlib.h>
 #include "libsharp/sharp_ylmgen_c.h"
-#include "c_utils/c_utils.h"
+#include "libsharp/sharp_utils.h"
+
+#pragma GCC visibility push(hidden)

 static inline void normalize (double *val, int *scale, double xfmax)
  {
@ -252,3 +254,5 @@ double *sharp_Ylmgen_get_d1norm (int lmax)
    res[l] = (l<1) ? 0. : 0.5*sqrt(l*(l+1.)*(2*l+1.)/(4*pi));
  return res;
  }
+
+#pragma GCC visibility pop