From 31cbd2acc5ab6a35cd88ff7db773d6a0fdc36bb5 Mon Sep 17 00:00:00 2001
From: Martin Reinecke <martin@mpa-garching.mpg.de>
Date: Mon, 15 Oct 2018 17:26:23 +0200
Subject: [PATCH] step 1

---
 Makefile                                      |   80 -
 Makefile.am                                   |   48 +
 README.md                                     |   43 -
 c_utils/planck.make                           |   18 -
 config/config.auto.in                         |   12 -
 config/rules.common                           |   33 -
 configure.ac                                  |  173 +-
 docsrc/c_utils.dox                            |  290 ----
 docsrc/footer.html                            |    5 -
 docsrc/index_code.html                        |   15 -
 docsrc/libfftpack.dox                         |  290 ----
 docsrc/libsharp.dox                           |  291 ----
 docsrc/planck.make                            |   20 -
 fortran/sharp.f90                             |  286 ----
 fortran/test_sharp.f90                        |   84 -
 libfftpack/README                             |   34 -
 libfftpack/bluestein.c                        |  173 --
 libfftpack/bluestein.h                        |   48 -
 libfftpack/fftpack.c                          |  833 ----------
 libfftpack/fftpack.h                          |   64 -
 libfftpack/fftpack_inc.c                      |  306 ----
 libfftpack/libfftpack.dox                     |    5 -
 libfftpack/ls_fft.c                           |  291 ----
 libfftpack/ls_fft.h                           |  161 --
 libfftpack/planck.make                        |   21 -
 libsharp/planck.make                          |   29 -
 libsharp/sharp.c                              |   18 +-
 libsharp/sharp.h                              |    3 -
 libsharp/sharp_geomhelpers.c                  |   20 +-
 libsharp/sharp_legendre.c                     | 1319 ---------------
 libsharp/sharp_legendre.c.in                  |  176 --
 libsharp/sharp_legendre.h                     |   62 -
 libsharp/sharp_legendre_table.c               | 1467 -----------------
 libsharp/sharp_legendre_table.h               |   97 --
 m4/m4_ax_create_pkgconfig_info.m4             |  351 ++++
 python/fake_pyrex/Pyrex/Distutils/__init__.py |    1 -
 .../fake_pyrex/Pyrex/Distutils/build_ext.py   |    1 -
 python/fake_pyrex/Pyrex/__init__.py           |    1 -
 python/fake_pyrex/README                      |    2 -
 python/libsharp/__init__.py                   |    1 -
 python/libsharp/libsharp.pxd                  |   92 --
 python/libsharp/libsharp.pyx                  |  324 ----
 python/libsharp/libsharp_mpi.pyx              |   17 -
 python/libsharp/tests/__init__.py             |    1 -
 python/libsharp/tests/test_legendre.py        |   58 -
 python/libsharp/tests/test_legendre_table.py  |   36 -
 python/libsharp/tests/test_sht.py             |   32 -
 .../tests/test_smoothing_noise_pol_mpi.py     |  137 --
 python/setup.py                               |   83 -
 runjinja.py                                   |   19 -
 50 files changed, 488 insertions(+), 7483 deletions(-)
 delete mode 100644 Makefile
 create mode 100644 Makefile.am
 delete mode 100644 README.md
 delete mode 100644 c_utils/planck.make
 delete mode 100644 config/config.auto.in
 delete mode 100644 config/rules.common
 delete mode 100644 docsrc/c_utils.dox
 delete mode 100644 docsrc/footer.html
 delete mode 100644 docsrc/index_code.html
 delete mode 100644 docsrc/libfftpack.dox
 delete mode 100644 docsrc/libsharp.dox
 delete mode 100644 docsrc/planck.make
 delete mode 100644 fortran/sharp.f90
 delete mode 100644 fortran/test_sharp.f90
 delete mode 100644 libfftpack/README
 delete mode 100644 libfftpack/bluestein.c
 delete mode 100644 libfftpack/bluestein.h
 delete mode 100644 libfftpack/fftpack.c
 delete mode 100644 libfftpack/fftpack.h
 delete mode 100644 libfftpack/fftpack_inc.c
 delete mode 100644 libfftpack/libfftpack.dox
 delete mode 100644 libfftpack/ls_fft.c
 delete mode 100644 libfftpack/ls_fft.h
 delete mode 100644 libfftpack/planck.make
 delete mode 100644 libsharp/planck.make
 delete mode 100644 libsharp/sharp_legendre.c
 delete mode 100644 libsharp/sharp_legendre.c.in
 delete mode 100644 libsharp/sharp_legendre.h
 delete mode 100644 libsharp/sharp_legendre_table.c
 delete mode 100644 libsharp/sharp_legendre_table.h
 create mode 100644 m4/m4_ax_create_pkgconfig_info.m4
 delete mode 100644 python/fake_pyrex/Pyrex/Distutils/__init__.py
 delete mode 100644 python/fake_pyrex/Pyrex/Distutils/build_ext.py
 delete mode 100644 python/fake_pyrex/Pyrex/__init__.py
 delete mode 100644 python/fake_pyrex/README
 delete mode 100644 python/libsharp/__init__.py
 delete mode 100644 python/libsharp/libsharp.pxd
 delete mode 100644 python/libsharp/libsharp.pyx
 delete mode 100644 python/libsharp/libsharp_mpi.pyx
 delete mode 100644 python/libsharp/tests/__init__.py
 delete mode 100644 python/libsharp/tests/test_legendre.py
 delete mode 100644 python/libsharp/tests/test_legendre_table.py
 delete mode 100644 python/libsharp/tests/test_sht.py
 delete mode 100644 python/libsharp/tests/test_smoothing_noise_pol_mpi.py
 delete mode 100644 python/setup.py
 delete mode 100755 runjinja.py

diff --git a/Makefile b/Makefile
deleted file mode 100644
index 5a3184c..0000000
--- a/Makefile
+++ /dev/null
@@ -1,80 +0,0 @@
-SHARP_TARGET?=auto
-ifndef SHARP_TARGET
-  SHARP_TARGET:=$(error SHARP_TARGET undefined. Please see README.compilation for help)UNDEFINED
-endif
-
-default: compile_all
-SRCROOT:=$(shell pwd)
-include $(SRCROOT)/config/config.$(SHARP_TARGET)
-include $(SRCROOT)/config/rules.common
-
-all_hdr:=
-all_lib:=
-all_cbin:=
-
-FULL_INCLUDE:=
-
-include c_utils/planck.make
-include libfftpack/planck.make
-include libsharp/planck.make
-include docsrc/planck.make
-
-CYTHON_MODULES=python/libsharp/libsharp.so $(if $(MPI_CFLAGS), python/libsharp/libsharp_mpi.so)
-
-$(all_lib): %: | $(LIBDIR)_mkdir
-	@echo "#  creating library $*"
-	$(ARCREATE) $@ $^
-
-$(all_cbin): %: | $(BINDIR)_mkdir
-	@echo "#  linking C binary $*"
-	$(CL) -o $@ $^ $(CLFLAGS)
-
-compile_all: $(all_cbin) hdrcopy
-
-hdrclean:
-	@if [ -d $(INCDIR) ]; then rm -rf $(INCDIR)/* ; fi
-
-hdrcopy: | $(INCDIR)_mkdir
-	@if [ "$(all_hdr)" ]; then cp -p $(all_hdr) $(INCDIR); fi
-
-$(notdir $(all_cbin)) : % : $(BINDIR)/%
-
-test: compile_all
-	$(BINDIR)/sharp_testsuite acctest && \
-	$(BINDIR)/sharp_testsuite test healpix 2048 -1 1024 -1 0 1 && \
-	$(BINDIR)/sharp_testsuite test fejer1 2047 -1 -1 4096 2 1 && \
-	$(BINDIR)/sharp_testsuite test gauss 2047 -1 -1 4096 0 2
-
-perftest: compile_all
-	$(BINDIR)/sharp_testsuite test healpix 2048 -1 1024 -1 0 1 && \
-	$(BINDIR)/sharp_testsuite test gauss 63 -1 -1 128 0 1 && \
-	$(BINDIR)/sharp_testsuite test gauss 127 -1 -1 256 0 1 && \
-	$(BINDIR)/sharp_testsuite test gauss 255 -1 -1 512 0 1 && \
-	$(BINDIR)/sharp_testsuite test gauss 511 -1 -1 1024 0 1 && \
-	$(BINDIR)/sharp_testsuite test gauss 1023 -1 -1 2048 0 1 && \
-	$(BINDIR)/sharp_testsuite test gauss 2047 -1 -1 4096 0 1 && \
-	$(BINDIR)/sharp_testsuite test gauss 4095 -1 -1 8192 0 1 && \
-	$(BINDIR)/sharp_testsuite test gauss 8191 -1 -1 16384 0 1
-
-%.c: %.c.in
-# Only do this if the md5sum changed, in order to avoid Python and Jinja
-# dependency when not modifying the c.in file
-	grep `md5sum $< | cut -d ' ' -f 1` $@ || ./runjinja.py < $< > $@
-
-genclean:
-	rm libsharp/sharp_legendre.c || exit 0
-
-$(CYTHON_MODULES): %.so: %.pyx
-ifndef PIC_CFLAGS
-	$(error Python extension must be built using the --enable-pic configure option.)
-endif
-	cython $<
-	$(CC) $(DEBUG_CFLAGS) $(OPENMP_CFLAGS) $(PIC_CFLAGS) `python-config --cflags` -I$(INCDIR) -o $(<:.pyx=.o) -c $(<:.pyx=.c)
-	$(CL) -shared $(<:.pyx=.o) $(OPENMP_CFLAGS) $(CYTHON_OBJ) -L$(LIBDIR) -lsharp -lfftpack -lc_utils -L`python-config --prefix`/lib `python-config --ldflags` -o $@
-
-python: $(all_lib) hdrcopy $(CYTHON_MODULES)
-
-# the following test files are automatic; the sht wrapper test
-# must be run manually and requires MPI at the moment..
-pytest: python
-	cd python && nosetests --nocapture libsharp/tests/test_legendre_table.py libsharp/tests/test_legendre.py
diff --git a/Makefile.am b/Makefile.am
new file mode 100644
index 0000000..0d40b92
--- /dev/null
+++ b/Makefile.am
@@ -0,0 +1,48 @@
+ACLOCAL_AMFLAGS = -I m4
+
+lib_LTLIBRARIES = libsharp.la
+
+src_sharp = \
+  libsharp/sharp.c \
+  libsharp/sharp_almhelpers.c \
+  libsharp/sharp_announce.c \
+  libsharp/sharp_core.c \
+  libsharp/sharp_geomhelpers.c \
+  libsharp/sharp_legendre_roots.c \
+  libsharp/sharp_ylmgen_c.c \
+  libsharp/sharp_announce.h \
+  libsharp/sharp_complex_hacks.h \
+  libsharp/sharp_core.h \
+  libsharp/sharp_internal.h \
+  libsharp/sharp_legendre_roots.h \
+  libsharp/sharp_lowlevel.h \
+  libsharp/sharp_vecsupport.h \
+  libsharp/sharp_vecutil.h \
+  libsharp/sharp_ylmgen_c.h
+
+include_HEADERS = \
+  libsharp/sharp.h \
+  libsharp/sharp_lowlevel.h \
+  libsharp/sharp_geomhelpers.h \
+  libsharp/sharp_almhelpers.h \
+  libsharp/sharp_cxx.h
+
+EXTRA_DIST = \
+  libsharp/sharp_core_inc.c \
+  libsharp/sharp_core_inc2.c \
+  libsharp/sharp_core_inchelper.c
+
+libsharp_la_SOURCES = $(src_sharp)
+
+#check_PROGRAMS = ffttest
+#ffttest_SOURCES = ffttest.c
+#ffttest_LDADD = libpocketfft.la -lm
+
+#TESTS = ffttest
+
+AM_CFLAGS = -I$(top_srcdir)
+
+pkgconfigdir = $(libdir)/pkgconfig
+nodist_pkgconfig_DATA = @PACKAGE_NAME@.pc
+
+DISTCLEANFILES=@PACKAGE_NAME@.pc @PACKAGE_NAME@.pc.in  @PACKAGE_NAME@-uninstalled.pc  @PACKAGE_NAME@-uninstalled.sh
diff --git a/README.md b/README.md
deleted file mode 100644
index 24652b2..0000000
--- a/README.md
+++ /dev/null
@@ -1,43 +0,0 @@
-# Libsharp
-
-*IMPORTANT NOTE*: It appears that the default branch upon cloning from
-github.com/dagss/libsharp was an outdated 'dagss' branch instead of
-the 'master' branch. To get the latest copy,
-please do `git checkout master; git pull`. New clones are no longer affected.
-
-## Paper
-
-https://arxiv.org/abs/1303.4945
-
-## Compilation
-
-GNU make is required for compilation.
-
-Libsharp compilation has been successfully tested with GNU and Intel compilers.
-When using gcc, version 4.x is required [1].
-Since libsharp was written in standard C99, other compilers should work fine,
-but SSE2/AVX support will most likely be deactivated.
-
-If you obtained libsharp directly from the git repository, you will also
-need a copy of the GNU autotools. In this case, run "autoconf" in libsharp's
-main directory before any other steps.
-For libsharp releases distributed as a .tar.gz file, this step is not necessary.
-
-Afterwards, simply run "./configure"; if this fails, please refer to the output
-of "./configure --help" for additional hints and, if necessary, provide
-additional flags to the configure script.
-Once the script finishes successfully, run "make"
-(or "gmake"). This should install the compilation products in the
-subdirectory "auto/".
-
-Documentation can be created by the command "(g)make doc".
-However this requires the doxygen application to be installed
-on your system.
-The documentation will be created in the subdirectory doc/.
-
-
-[1] Some versions of the gcc 4.4.x release series contain a bug which causes
-the compiler to crash during libsharp compilation. This appears to be fixed
-in the gcc 4.4.7 release. It is possible to work around this problem by adding
-the compiler flag "-fno-tree-fre" after the other optimization flags - the
-configure script should do this automatically.
diff --git a/c_utils/planck.make b/c_utils/planck.make
deleted file mode 100644
index 4f0ccb1..0000000
--- a/c_utils/planck.make
+++ /dev/null
@@ -1,18 +0,0 @@
-PKG:=c_utils
-
-SD:=$(SRCROOT)/$(PKG)
-OD:=$(BLDROOT)/$(PKG)
-
-FULL_INCLUDE+= -I$(SD)
-
-HDR_$(PKG):=$(SD)/*.h
-LIB_$(PKG):=$(LIBDIR)/libc_utils.a
-
-OBJ:=c_utils.o walltime_c.o memusage.o
-OBJ:=$(OBJ:%=$(OD)/%)
-
-$(OBJ): $(HDR_$(PKG)) | $(OD)_mkdir
-$(LIB_$(PKG)): $(OBJ)
-
-all_hdr+=$(HDR_$(PKG))
-all_lib+=$(LIB_$(PKG))
diff --git a/config/config.auto.in b/config/config.auto.in
deleted file mode 100644
index 841cec0..0000000
--- a/config/config.auto.in
+++ /dev/null
@@ -1,12 +0,0 @@
-@SILENT_RULE@
-
-CC=@CC@
-CL=@CC@
-CCFLAGS_NO_C=@CCFLAGS_NO_C@
-CCFLAGS=$(CCFLAGS_NO_C) -c
-CLFLAGS=-L. -L$(LIBDIR) @LDCCFLAGS@ -lm
-DEBUG_CFLAGS=@DEBUG_CFLAGS@
-MPI_CFLAGS=@MPI_CFLAGS@
-OPENMP_CFLAGS=@OPENMP_CFLAGS@
-PIC_CFLAGS=@PIC_CFLAGS@
-ARCREATE=@ARCREATE@
diff --git a/config/rules.common b/config/rules.common
deleted file mode 100644
index bac2a2c..0000000
--- a/config/rules.common
+++ /dev/null
@@ -1,33 +0,0 @@
-BLDROOT   = $(SRCROOT)/build.$(SHARP_TARGET)
-PREFIX    = $(SRCROOT)/$(SHARP_TARGET)
-BINDIR    = $(PREFIX)/bin
-INCDIR    = $(PREFIX)/include
-LIBDIR    = $(PREFIX)/lib
-DOCDIR    = $(SRCROOT)/doc
-PYTHONDIR = $(SRCROOT)/python/libsharp
-
-# do not use any suffix rules
-.SUFFIXES:
-# do not use any default rules
-.DEFAULT:
-
-echo_config:
-	@echo using configuration \'$(SHARP_TARGET)\'
-
-$(BLDROOT)/%.o : $(SRCROOT)/%.c | echo_config
-	@echo "#  compiling $*.c"
-	cd $(@D) && $(CC) $(FULL_INCLUDE) -I$(BLDROOT) $(CCFLAGS) $<
-
-$(BLDROOT)/%.o : $(SRCROOT)/%.cc | echo_config
-	@echo "#  compiling $*.cc"
-	cd $(@D) && $(CXX) $(FULL_INCLUDE) -I$(BLDROOT) $(CXXCFLAGS) $<
-
-%_mkdir:
-	@if [ ! -d $* ]; then mkdir -p $* ; fi
-
-clean:
-	rm -rf $(BLDROOT) $(PREFIX) $(DOCDIR) autom4te.cache/ config.log config.status
-	rm -rf $(PYTHONDIR)/*.c $(PYTHONDIR)/*.o $(PYTHONDIR)/*.so
-
-distclean: clean
-	rm -f config/config.auto
diff --git a/configure.ac b/configure.ac
index 79b435e..9d8e203 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,113 +1,80 @@
-AC_INIT(config/config.auto.in)
+AC_INIT([libsharp], [1.0.0])
+AM_INIT_AUTOMAKE([foreign subdir-objects -Wall -Werror])
+AM_MAINTAINER_MODE([enable])
 
-AC_CHECK_PROG([uname_found],[uname],[1],[0])
-if test $uname_found -eq 0 ; then
-    echo "No uname found; setting system type to unknown."
-    system="unknown"
-else
-    system=`uname -s`-`uname -r`
-fi
-AC_LANG([C])
+dnl
+dnl Needed for linking on Windows.
+dnl Protect with m4_ifdef because AM_PROG_AR is required in
+dnl autoconf >= 1.12 when using -Wall, but the macro is
+dnl absent in old versions of autoconf.
+dnl
+m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
 
-AC_TRY_COMPILE([], [@%:@ifndef __INTEL_COMPILER
-choke me
-@%:@endif], [ICC=[yes]], [ICC=[no]])
+LT_INIT
+AC_CONFIG_MACRO_DIR([m4])
 
-if test $ICC = yes; then GCC=no; fi
-CCTYPE=unknown
-if test $GCC = yes; then CCTYPE=gcc; fi
-if test $ICC = yes; then CCTYPE=icc; fi
-AC_OPENMP
+dnl
+dnl By default, install the headers into a subdirectory of
+dnl ${prefix}/include to avoid possible header filename collisions.
+dnl
+includedir="${includedir}/${PACKAGE_NAME}"
 
-SILENT_RULE=".SILENT:"
-AC_ARG_ENABLE(noisy-make,
-  [  --enable-noisy-make     enable detailed make output],
-  [if test "$enableval" = yes; then
-     SILENT_RULE=""
-   fi])
+dnl
+dnl Enable silent build rules if this version of Automake supports them
+dnl
+m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
 
-ENABLE_MPI=no
-AC_ARG_ENABLE(mpi,
-  [  --enable-mpi            enable generation of MPI-parallel code],
-  [if test "$enableval" = yes; then
-     ENABLE_MPI=yes
-   fi])
+AC_DEFUN([AX_CHECK_COMPILE_FLAG],
+[AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl
+AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [
+  ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS
+  _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1"
+  AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],
+    [AS_VAR_SET(CACHEVAR,[yes])],
+    [AS_VAR_SET(CACHEVAR,[no])])
+  _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])
+AS_IF([test x"AS_VAR_GET(CACHEVAR)" = xyes],
+  [m4_default([$2], :)],
+  [m4_default([$3], :)])
+AS_VAR_POPDEF([CACHEVAR])dnl
+])dnl AX_CHECK_COMPILE_FLAGS
 
-ENABLE_DEBUG=no
-AC_ARG_ENABLE(debug,
-  [  --enable-debug          enable generation of debugging symbols],
-  [if test "$enableval" = yes; then
-     ENABLE_DEBUG=yes
-   fi])
+dnl
+dnl Introduce --enable-native-optimizations command line argument to turn on
+dnl -march=native compiler flag, disabled by default.
+dnl
+AC_ARG_ENABLE(
+    [native-optimizations],
+    [AS_HELP_STRING([--enable-native-optimizations], [Enable non-portable optimizations for your own CPU by compiling with -march=native @<:@default=no@:>@])]
+)
 
-ENABLE_PIC=no
-AC_ARG_ENABLE(pic,
-  [  --enable-pic            enable generation of position independent code],
-  [if test "$enableval" = yes; then
-     ENABLE_PIC=yes
-   fi])
+AC_PROG_CC_C99
+AS_IF(
+    [test "x$enable_native_optimizations" = "xyes"],
+    [AX_CHECK_COMPILE_FLAG([-march=native],[CC="$CC -march=native"])],
+    dnl
+    dnl FIXME: On GCC 4.4, we hit an internal compiler error unless either
+    dnl -march=native or -fno-tree-fre is specified.
+    dnl
+    [
+        AS_IF(
+            [test "x$GCC" = "xyes" -a "x`$CC -dumpversion | cut -d. -f1,2`" = "x4.4"],
+            [AX_CHECK_COMPILE_FLAG([-fno-tree-fre], [CFLAGS="$CFLAGS -fno-tree-fre"])]
+        )
+    ]
+)
+AX_CHECK_COMPILE_FLAG([-fno-math-errno],[CFLAGS="$CFLAGS -fno-math-errno"])
+AX_CHECK_COMPILE_FLAG([-fno-trapping-math],[CFLAGS="$CFLAGS -fno-trapping-math"])
+AX_CHECK_COMPILE_FLAG([-fno-rounding-math],[CFLAGS="$CFLAGS -fno-rounding-math"])
+AX_CHECK_COMPILE_FLAG([-fno-signaling-nans],[CFLAGS="$CFLAGS -fno-signaling-nans"])
+AX_CHECK_COMPILE_FLAG([-fcx-limited-range],[CFLAGS="$CFLAGS -fcx-limited-range"])
 
-case $CCTYPE in
-  gcc)
-    CCFLAGS="-O3 -fno-tree-vectorize -ffast-math -fomit-frame-pointer -std=c99 -pedantic -Wextra -Wall -Wno-unknown-pragmas -Wshadow -Wmissing-prototypes -Wfatal-errors"
-    GCCVERSION="`$CC -dumpversion 2>&1`"
-    echo "Using gcc version $GCCVERSION"
-    AC_SUBST(GCCVERSION)
-    changequote(,)
-    gcc43=`echo $GCCVERSION | grep -c '^4\.[3456789]'`
-    gcc44=`echo $GCCVERSION | grep -c '^4\.4'`
-    changequote([,])
-    if test $gcc43 -gt 0; then
-      CCFLAGS="$CCFLAGS -march=native"
-    fi
-    if test $gcc44 -gt 0; then
-      CCFLAGS="$CCFLAGS -fno-tree-fre"
-    fi
-    ;;
-  icc)
-    CCFLAGS="-O3 -xHOST -std=c99 -ip -Wbrief -Wall -vec-report0 -openmp-report0 -wd383,981,1419,1572"
-    ;;
-  *)
-    CCFLAGS="-O2"
-    # Don't do anything now
-    ;;
-esac
+AC_PROG_LIBTOOL
 
-case $system in
-  Darwin-*)
-    ARCREATE="libtool -static -o"
-    ;;
-  *)
-    ARCREATE="ar cr"
-    ;;
-esac
+dnl
+dnl Create pkgconfig .pc file.
+dnl
+AX_CREATE_PKGCONFIG_INFO(,,,,[])
 
-if test $ENABLE_DEBUG = yes; then
-  DEBUG_CFLAGS="-g"
-fi
-
-if test $ENABLE_PIC = yes; then
-  PIC_CFLAGS="-fPIC"
-fi
-
-if test $ENABLE_MPI = yes; then
-  MPI_CFLAGS="-DUSE_MPI"
-fi
-
-CCFLAGS="$CCFLAGS $DEBUG_CFLAGS $OPENMP_CFLAGS $PIC_CFLAGS $MPI_CFLAGS"
-
-CCFLAGS_NO_C="$CCFLAGS $CPPFLAGS"
-
-LDCCFLAGS="$LDFLAGS $CCFLAGS"
-
-AC_SUBST(SILENT_RULE)
-AC_SUBST(CC)
-AC_SUBST(CCFLAGS_NO_C)
-AC_SUBST(LDCCFLAGS)
-AC_SUBST(DEBUG_CFLAGS)
-AC_SUBST(MPI_CFLAGS)
-AC_SUBST(OPENMP_CFLAGS)
-AC_SUBST(PIC_CFLAGS)
-AC_SUBST(ARCREATE)
-
-AC_OUTPUT(config/config.auto)
+AC_CONFIG_FILES([Makefile])
+AC_OUTPUT
diff --git a/docsrc/c_utils.dox b/docsrc/c_utils.dox
deleted file mode 100644
index daf432f..0000000
--- a/docsrc/c_utils.dox
+++ /dev/null
@@ -1,290 +0,0 @@
-# Doxyfile 1.8.1
-
-#---------------------------------------------------------------------------
-# Project related configuration options
-#---------------------------------------------------------------------------
-DOXYFILE_ENCODING      = UTF-8
-PROJECT_NAME           = "LevelS C support library"
-PROJECT_NUMBER         = 0.1
-PROJECT_BRIEF          =
-PROJECT_LOGO           =
-OUTPUT_DIRECTORY       = .
-CREATE_SUBDIRS         = NO
-OUTPUT_LANGUAGE        = English
-BRIEF_MEMBER_DESC      = NO
-REPEAT_BRIEF           = YES
-ABBREVIATE_BRIEF       =
-ALWAYS_DETAILED_SEC    = NO
-INLINE_INHERITED_MEMB  = NO
-FULL_PATH_NAMES        = NO
-STRIP_FROM_PATH        =
-STRIP_FROM_INC_PATH    =
-SHORT_NAMES            = NO
-JAVADOC_AUTOBRIEF      = NO
-QT_AUTOBRIEF           = NO
-MULTILINE_CPP_IS_BRIEF = NO
-INHERIT_DOCS           = YES
-SEPARATE_MEMBER_PAGES  = NO
-TAB_SIZE               = 8
-ALIASES                =
-TCL_SUBST              =
-OPTIMIZE_OUTPUT_FOR_C  = YES
-OPTIMIZE_OUTPUT_JAVA   = NO
-OPTIMIZE_FOR_FORTRAN   = NO
-OPTIMIZE_OUTPUT_VHDL   = NO
-EXTENSION_MAPPING      =
-MARKDOWN_SUPPORT       = YES
-BUILTIN_STL_SUPPORT    = NO
-CPP_CLI_SUPPORT        = NO
-SIP_SUPPORT            = NO
-IDL_PROPERTY_SUPPORT   = YES
-DISTRIBUTE_GROUP_DOC   = NO
-SUBGROUPING            = YES
-INLINE_GROUPED_CLASSES = NO
-INLINE_SIMPLE_STRUCTS  = NO
-TYPEDEF_HIDES_STRUCT   = NO
-SYMBOL_CACHE_SIZE      = 0
-LOOKUP_CACHE_SIZE      = 0
-#---------------------------------------------------------------------------
-# Build related configuration options
-#---------------------------------------------------------------------------
-EXTRACT_ALL            = NO
-EXTRACT_PRIVATE        = NO
-EXTRACT_PACKAGE        = NO
-EXTRACT_STATIC         = NO
-EXTRACT_LOCAL_CLASSES  = YES
-EXTRACT_LOCAL_METHODS  = NO
-EXTRACT_ANON_NSPACES   = NO
-HIDE_UNDOC_MEMBERS     = YES
-HIDE_UNDOC_CLASSES     = YES
-HIDE_FRIEND_COMPOUNDS  = YES
-HIDE_IN_BODY_DOCS      = NO
-INTERNAL_DOCS          = NO
-CASE_SENSE_NAMES       = YES
-HIDE_SCOPE_NAMES       = NO
-SHOW_INCLUDE_FILES     = YES
-FORCE_LOCAL_INCLUDES   = NO
-INLINE_INFO            = YES
-SORT_MEMBER_DOCS       = NO
-SORT_BRIEF_DOCS        = NO
-SORT_MEMBERS_CTORS_1ST = NO
-SORT_GROUP_NAMES       = NO
-SORT_BY_SCOPE_NAME     = NO
-STRICT_PROTO_MATCHING  = NO
-GENERATE_TODOLIST      = YES
-GENERATE_TESTLIST      = YES
-GENERATE_BUGLIST       = YES
-GENERATE_DEPRECATEDLIST= YES
-ENABLED_SECTIONS       =
-MAX_INITIALIZER_LINES  = 30
-SHOW_USED_FILES        = YES
-SHOW_FILES             = YES
-SHOW_NAMESPACES        = YES
-FILE_VERSION_FILTER    =
-LAYOUT_FILE            =
-CITE_BIB_FILES         =
-#---------------------------------------------------------------------------
-# configuration options related to warning and progress messages
-#---------------------------------------------------------------------------
-QUIET                  = YES
-WARNINGS               = YES
-WARN_IF_UNDOCUMENTED   = YES
-WARN_IF_DOC_ERROR      = YES
-WARN_NO_PARAMDOC       = NO
-WARN_FORMAT            = "$file:$line: $text"
-WARN_LOGFILE           =
-#---------------------------------------------------------------------------
-# configuration options related to the input files
-#---------------------------------------------------------------------------
-INPUT                  = ../c_utils
-INPUT_ENCODING         = UTF-8
-FILE_PATTERNS          = *.h \
-                         *.c \
-                         *.dox
-RECURSIVE              = YES
-EXCLUDE                =
-EXCLUDE_SYMLINKS       = NO
-EXCLUDE_PATTERNS       =
-EXCLUDE_SYMBOLS        =
-EXAMPLE_PATH           =
-EXAMPLE_PATTERNS       =
-EXAMPLE_RECURSIVE      = NO
-IMAGE_PATH             =
-INPUT_FILTER           =
-FILTER_PATTERNS        =
-FILTER_SOURCE_FILES    = NO
-FILTER_SOURCE_PATTERNS =
-#---------------------------------------------------------------------------
-# configuration options related to source browsing
-#---------------------------------------------------------------------------
-SOURCE_BROWSER         = YES
-INLINE_SOURCES         = NO
-STRIP_CODE_COMMENTS    = NO
-REFERENCED_BY_RELATION = NO
-REFERENCES_RELATION    = NO
-REFERENCES_LINK_SOURCE = YES
-USE_HTAGS              = NO
-VERBATIM_HEADERS       = YES
-#---------------------------------------------------------------------------
-# configuration options related to the alphabetical class index
-#---------------------------------------------------------------------------
-ALPHABETICAL_INDEX     = YES
-COLS_IN_ALPHA_INDEX    = 5
-IGNORE_PREFIX          =
-#---------------------------------------------------------------------------
-# configuration options related to the HTML output
-#---------------------------------------------------------------------------
-GENERATE_HTML          = YES
-HTML_OUTPUT            = htmldoc
-HTML_FILE_EXTENSION    = .html
-HTML_HEADER            =
-HTML_FOOTER            = footer.html
-HTML_STYLESHEET        =
-HTML_EXTRA_FILES       =
-HTML_COLORSTYLE_HUE    = 220
-HTML_COLORSTYLE_SAT    = 100
-HTML_COLORSTYLE_GAMMA  = 80
-HTML_TIMESTAMP         = YES
-HTML_DYNAMIC_SECTIONS  = NO
-HTML_INDEX_NUM_ENTRIES = 100
-GENERATE_DOCSET        = NO
-DOCSET_FEEDNAME        = "Doxygen generated docs"
-DOCSET_BUNDLE_ID       = org.doxygen.Project
-DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
-DOCSET_PUBLISHER_NAME  = Publisher
-GENERATE_HTMLHELP      = NO
-CHM_FILE               =
-HHC_LOCATION           =
-GENERATE_CHI           = NO
-CHM_INDEX_ENCODING     =
-BINARY_TOC             = NO
-TOC_EXPAND             = NO
-GENERATE_QHP           = NO
-QCH_FILE               =
-QHP_NAMESPACE          = org.doxygen.Project
-QHP_VIRTUAL_FOLDER     = doc
-QHP_CUST_FILTER_NAME   =
-QHP_CUST_FILTER_ATTRS  =
-QHP_SECT_FILTER_ATTRS  =
-QHG_LOCATION           =
-GENERATE_ECLIPSEHELP   = NO
-ECLIPSE_DOC_ID         = org.doxygen.Project
-DISABLE_INDEX          = NO
-GENERATE_TREEVIEW      = NO
-ENUM_VALUES_PER_LINE   = 4
-TREEVIEW_WIDTH         = 250
-EXT_LINKS_IN_WINDOW    = NO
-FORMULA_FONTSIZE       = 10
-FORMULA_TRANSPARENT    = YES
-USE_MATHJAX            = NO
-MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest
-MATHJAX_EXTENSIONS     =
-SEARCHENGINE           = NO
-SERVER_BASED_SEARCH    = NO
-#---------------------------------------------------------------------------
-# configuration options related to the LaTeX output
-#---------------------------------------------------------------------------
-GENERATE_LATEX         = NO
-LATEX_OUTPUT           = latex
-LATEX_CMD_NAME         = latex
-MAKEINDEX_CMD_NAME     = makeindex
-COMPACT_LATEX          = YES
-PAPER_TYPE             = a4wide
-EXTRA_PACKAGES         =
-LATEX_HEADER           =
-LATEX_FOOTER           =
-PDF_HYPERLINKS         = YES
-USE_PDFLATEX           = YES
-LATEX_BATCHMODE        = NO
-LATEX_HIDE_INDICES     = NO
-LATEX_SOURCE_CODE      = NO
-LATEX_BIB_STYLE        = plain
-#---------------------------------------------------------------------------
-# configuration options related to the RTF output
-#---------------------------------------------------------------------------
-GENERATE_RTF           = NO
-RTF_OUTPUT             = rtf
-COMPACT_RTF            = NO
-RTF_HYPERLINKS         = NO
-RTF_STYLESHEET_FILE    =
-RTF_EXTENSIONS_FILE    =
-#---------------------------------------------------------------------------
-# configuration options related to the man page output
-#---------------------------------------------------------------------------
-GENERATE_MAN           = NO
-MAN_OUTPUT             = man
-MAN_EXTENSION          = .3
-MAN_LINKS              = NO
-#---------------------------------------------------------------------------
-# configuration options related to the XML output
-#---------------------------------------------------------------------------
-GENERATE_XML           = NO
-XML_OUTPUT             = xml
-XML_SCHEMA             =
-XML_DTD                =
-XML_PROGRAMLISTING     = YES
-#---------------------------------------------------------------------------
-# configuration options for the AutoGen Definitions output
-#---------------------------------------------------------------------------
-GENERATE_AUTOGEN_DEF   = NO
-#---------------------------------------------------------------------------
-# configuration options related to the Perl module output
-#---------------------------------------------------------------------------
-GENERATE_PERLMOD       = NO
-PERLMOD_LATEX          = NO
-PERLMOD_PRETTY         = YES
-PERLMOD_MAKEVAR_PREFIX =
-#---------------------------------------------------------------------------
-# Configuration options related to the preprocessor
-#---------------------------------------------------------------------------
-ENABLE_PREPROCESSING   = YES
-MACRO_EXPANSION        = NO
-EXPAND_ONLY_PREDEF     = NO
-SEARCH_INCLUDES        = YES
-INCLUDE_PATH           =
-INCLUDE_FILE_PATTERNS  =
-PREDEFINED             =
-EXPAND_AS_DEFINED      =
-SKIP_FUNCTION_MACROS   = YES
-#---------------------------------------------------------------------------
-# Configuration::additions related to external references
-#---------------------------------------------------------------------------
-TAGFILES               =
-GENERATE_TAGFILE       = c_utils.tag
-ALLEXTERNALS           = NO
-EXTERNAL_GROUPS        = YES
-PERL_PATH              = /usr/bin/perl
-#---------------------------------------------------------------------------
-# Configuration options related to the dot tool
-#---------------------------------------------------------------------------
-CLASS_DIAGRAMS         = YES
-MSCGEN_PATH            =
-HIDE_UNDOC_RELATIONS   = YES
-HAVE_DOT               = NO
-DOT_NUM_THREADS        = 0
-DOT_FONTNAME           = FreeSans
-DOT_FONTSIZE           = 10
-DOT_FONTPATH           =
-CLASS_GRAPH            = YES
-COLLABORATION_GRAPH    = YES
-GROUP_GRAPHS           = YES
-UML_LOOK               = NO
-UML_LIMIT_NUM_FIELDS   = 10
-TEMPLATE_RELATIONS     = YES
-INCLUDE_GRAPH          = NO
-INCLUDED_BY_GRAPH      = NO
-CALL_GRAPH             = NO
-CALLER_GRAPH           = NO
-GRAPHICAL_HIERARCHY    = NO
-DIRECTORY_GRAPH        = YES
-DOT_IMAGE_FORMAT       = png
-INTERACTIVE_SVG        = NO
-DOT_PATH               =
-DOTFILE_DIRS           =
-MSCFILE_DIRS           =
-DOT_GRAPH_MAX_NODES    = 50
-MAX_DOT_GRAPH_DEPTH    = 0
-DOT_TRANSPARENT        = NO
-DOT_MULTI_TARGETS      = NO
-GENERATE_LEGEND        = YES
-DOT_CLEANUP            = YES
diff --git a/docsrc/footer.html b/docsrc/footer.html
deleted file mode 100644
index 6f5dbf0..0000000
--- a/docsrc/footer.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<hr><address style="align: right;"><small>
-Generated on $datetime for $projectname
-</a> </small></address>
-</body>
-</html>
diff --git a/docsrc/index_code.html b/docsrc/index_code.html
deleted file mode 100644
index d8a001d..0000000
--- a/docsrc/index_code.html
+++ /dev/null
@@ -1,15 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
-<html><head><meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1">
-<title>Libsharp source code documentation</title>
-</head><body>
-<H1>Libsharp source code documentation</H1>
-
-<H2>C interfaces</H2>
-
-<ul>
-<li><a href="c_utils/index.html">C support library</a>
-<li><a href="libfftpack/index.html">FFT interface</a>
-<li><a href="libsharp/index.html">Library for spherical harmonic transforms</a>
-</ul>
-</body>
-</html>
diff --git a/docsrc/libfftpack.dox b/docsrc/libfftpack.dox
deleted file mode 100644
index 7ff2c23..0000000
--- a/docsrc/libfftpack.dox
+++ /dev/null
@@ -1,290 +0,0 @@
-# Doxyfile 1.8.1
-
-#---------------------------------------------------------------------------
-# Project related configuration options
-#---------------------------------------------------------------------------
-DOXYFILE_ENCODING      = UTF-8
-PROJECT_NAME           = "LevelS FFT library"
-PROJECT_NUMBER         = 0.1
-PROJECT_BRIEF          =
-PROJECT_LOGO           =
-OUTPUT_DIRECTORY       = .
-CREATE_SUBDIRS         = NO
-OUTPUT_LANGUAGE        = English
-BRIEF_MEMBER_DESC      = NO
-REPEAT_BRIEF           = YES
-ABBREVIATE_BRIEF       =
-ALWAYS_DETAILED_SEC    = NO
-INLINE_INHERITED_MEMB  = NO
-FULL_PATH_NAMES        = NO
-STRIP_FROM_PATH        =
-STRIP_FROM_INC_PATH    =
-SHORT_NAMES            = NO
-JAVADOC_AUTOBRIEF      = NO
-QT_AUTOBRIEF           = NO
-MULTILINE_CPP_IS_BRIEF = NO
-INHERIT_DOCS           = YES
-SEPARATE_MEMBER_PAGES  = NO
-TAB_SIZE               = 8
-ALIASES                =
-TCL_SUBST              =
-OPTIMIZE_OUTPUT_FOR_C  = YES
-OPTIMIZE_OUTPUT_JAVA   = NO
-OPTIMIZE_FOR_FORTRAN   = NO
-OPTIMIZE_OUTPUT_VHDL   = NO
-EXTENSION_MAPPING      =
-MARKDOWN_SUPPORT       = YES
-BUILTIN_STL_SUPPORT    = NO
-CPP_CLI_SUPPORT        = NO
-SIP_SUPPORT            = NO
-IDL_PROPERTY_SUPPORT   = YES
-DISTRIBUTE_GROUP_DOC   = NO
-SUBGROUPING            = YES
-INLINE_GROUPED_CLASSES = NO
-INLINE_SIMPLE_STRUCTS  = NO
-TYPEDEF_HIDES_STRUCT   = NO
-SYMBOL_CACHE_SIZE      = 0
-LOOKUP_CACHE_SIZE      = 0
-#---------------------------------------------------------------------------
-# Build related configuration options
-#---------------------------------------------------------------------------
-EXTRACT_ALL            = NO
-EXTRACT_PRIVATE        = NO
-EXTRACT_PACKAGE        = NO
-EXTRACT_STATIC         = NO
-EXTRACT_LOCAL_CLASSES  = YES
-EXTRACT_LOCAL_METHODS  = NO
-EXTRACT_ANON_NSPACES   = NO
-HIDE_UNDOC_MEMBERS     = YES
-HIDE_UNDOC_CLASSES     = YES
-HIDE_FRIEND_COMPOUNDS  = YES
-HIDE_IN_BODY_DOCS      = NO
-INTERNAL_DOCS          = NO
-CASE_SENSE_NAMES       = YES
-HIDE_SCOPE_NAMES       = NO
-SHOW_INCLUDE_FILES     = YES
-FORCE_LOCAL_INCLUDES   = NO
-INLINE_INFO            = YES
-SORT_MEMBER_DOCS       = NO
-SORT_BRIEF_DOCS        = NO
-SORT_MEMBERS_CTORS_1ST = NO
-SORT_GROUP_NAMES       = NO
-SORT_BY_SCOPE_NAME     = NO
-STRICT_PROTO_MATCHING  = NO
-GENERATE_TODOLIST      = YES
-GENERATE_TESTLIST      = YES
-GENERATE_BUGLIST       = YES
-GENERATE_DEPRECATEDLIST= YES
-ENABLED_SECTIONS       =
-MAX_INITIALIZER_LINES  = 30
-SHOW_USED_FILES        = YES
-SHOW_FILES             = YES
-SHOW_NAMESPACES        = YES
-FILE_VERSION_FILTER    =
-LAYOUT_FILE            =
-CITE_BIB_FILES         =
-#---------------------------------------------------------------------------
-# configuration options related to warning and progress messages
-#---------------------------------------------------------------------------
-QUIET                  = YES
-WARNINGS               = YES
-WARN_IF_UNDOCUMENTED   = YES
-WARN_IF_DOC_ERROR      = YES
-WARN_NO_PARAMDOC       = NO
-WARN_FORMAT            = "$file:$line: $text"
-WARN_LOGFILE           =
-#---------------------------------------------------------------------------
-# configuration options related to the input files
-#---------------------------------------------------------------------------
-INPUT                  = ../libfftpack
-INPUT_ENCODING         = UTF-8
-FILE_PATTERNS          = *.h \
-                         *.c \
-                         *.dox
-RECURSIVE              = YES
-EXCLUDE                =
-EXCLUDE_SYMLINKS       = NO
-EXCLUDE_PATTERNS       =
-EXCLUDE_SYMBOLS        =
-EXAMPLE_PATH           =
-EXAMPLE_PATTERNS       =
-EXAMPLE_RECURSIVE      = NO
-IMAGE_PATH             =
-INPUT_FILTER           =
-FILTER_PATTERNS        =
-FILTER_SOURCE_FILES    = NO
-FILTER_SOURCE_PATTERNS =
-#---------------------------------------------------------------------------
-# configuration options related to source browsing
-#---------------------------------------------------------------------------
-SOURCE_BROWSER         = YES
-INLINE_SOURCES         = NO
-STRIP_CODE_COMMENTS    = NO
-REFERENCED_BY_RELATION = NO
-REFERENCES_RELATION    = NO
-REFERENCES_LINK_SOURCE = YES
-USE_HTAGS              = NO
-VERBATIM_HEADERS       = YES
-#---------------------------------------------------------------------------
-# configuration options related to the alphabetical class index
-#---------------------------------------------------------------------------
-ALPHABETICAL_INDEX     = YES
-COLS_IN_ALPHA_INDEX    = 5
-IGNORE_PREFIX          =
-#---------------------------------------------------------------------------
-# configuration options related to the HTML output
-#---------------------------------------------------------------------------
-GENERATE_HTML          = YES
-HTML_OUTPUT            = htmldoc
-HTML_FILE_EXTENSION    = .html
-HTML_HEADER            =
-HTML_FOOTER            = footer.html
-HTML_STYLESHEET        =
-HTML_EXTRA_FILES       =
-HTML_COLORSTYLE_HUE    = 220
-HTML_COLORSTYLE_SAT    = 100
-HTML_COLORSTYLE_GAMMA  = 80
-HTML_TIMESTAMP         = YES
-HTML_DYNAMIC_SECTIONS  = NO
-HTML_INDEX_NUM_ENTRIES = 100
-GENERATE_DOCSET        = NO
-DOCSET_FEEDNAME        = "Doxygen generated docs"
-DOCSET_BUNDLE_ID       = org.doxygen.Project
-DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
-DOCSET_PUBLISHER_NAME  = Publisher
-GENERATE_HTMLHELP      = NO
-CHM_FILE               =
-HHC_LOCATION           =
-GENERATE_CHI           = NO
-CHM_INDEX_ENCODING     =
-BINARY_TOC             = NO
-TOC_EXPAND             = NO
-GENERATE_QHP           = NO
-QCH_FILE               =
-QHP_NAMESPACE          = org.doxygen.Project
-QHP_VIRTUAL_FOLDER     = doc
-QHP_CUST_FILTER_NAME   =
-QHP_CUST_FILTER_ATTRS  =
-QHP_SECT_FILTER_ATTRS  =
-QHG_LOCATION           =
-GENERATE_ECLIPSEHELP   = NO
-ECLIPSE_DOC_ID         = org.doxygen.Project
-DISABLE_INDEX          = NO
-GENERATE_TREEVIEW      = NO
-ENUM_VALUES_PER_LINE   = 4
-TREEVIEW_WIDTH         = 250
-EXT_LINKS_IN_WINDOW    = NO
-FORMULA_FONTSIZE       = 10
-FORMULA_TRANSPARENT    = YES
-USE_MATHJAX            = NO
-MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest
-MATHJAX_EXTENSIONS     =
-SEARCHENGINE           = NO
-SERVER_BASED_SEARCH    = NO
-#---------------------------------------------------------------------------
-# configuration options related to the LaTeX output
-#---------------------------------------------------------------------------
-GENERATE_LATEX         = NO
-LATEX_OUTPUT           = latex
-LATEX_CMD_NAME         = latex
-MAKEINDEX_CMD_NAME     = makeindex
-COMPACT_LATEX          = YES
-PAPER_TYPE             = a4wide
-EXTRA_PACKAGES         =
-LATEX_HEADER           =
-LATEX_FOOTER           =
-PDF_HYPERLINKS         = YES
-USE_PDFLATEX           = YES
-LATEX_BATCHMODE        = NO
-LATEX_HIDE_INDICES     = NO
-LATEX_SOURCE_CODE      = NO
-LATEX_BIB_STYLE        = plain
-#---------------------------------------------------------------------------
-# configuration options related to the RTF output
-#---------------------------------------------------------------------------
-GENERATE_RTF           = NO
-RTF_OUTPUT             = rtf
-COMPACT_RTF            = NO
-RTF_HYPERLINKS         = NO
-RTF_STYLESHEET_FILE    =
-RTF_EXTENSIONS_FILE    =
-#---------------------------------------------------------------------------
-# configuration options related to the man page output
-#---------------------------------------------------------------------------
-GENERATE_MAN           = NO
-MAN_OUTPUT             = man
-MAN_EXTENSION          = .3
-MAN_LINKS              = NO
-#---------------------------------------------------------------------------
-# configuration options related to the XML output
-#---------------------------------------------------------------------------
-GENERATE_XML           = NO
-XML_OUTPUT             = xml
-XML_SCHEMA             =
-XML_DTD                =
-XML_PROGRAMLISTING     = YES
-#---------------------------------------------------------------------------
-# configuration options for the AutoGen Definitions output
-#---------------------------------------------------------------------------
-GENERATE_AUTOGEN_DEF   = NO
-#---------------------------------------------------------------------------
-# configuration options related to the Perl module output
-#---------------------------------------------------------------------------
-GENERATE_PERLMOD       = NO
-PERLMOD_LATEX          = NO
-PERLMOD_PRETTY         = YES
-PERLMOD_MAKEVAR_PREFIX =
-#---------------------------------------------------------------------------
-# Configuration options related to the preprocessor
-#---------------------------------------------------------------------------
-ENABLE_PREPROCESSING   = YES
-MACRO_EXPANSION        = NO
-EXPAND_ONLY_PREDEF     = NO
-SEARCH_INCLUDES        = YES
-INCLUDE_PATH           =
-INCLUDE_FILE_PATTERNS  =
-PREDEFINED             =
-EXPAND_AS_DEFINED      =
-SKIP_FUNCTION_MACROS   = YES
-#---------------------------------------------------------------------------
-# Configuration::additions related to external references
-#---------------------------------------------------------------------------
-TAGFILES               = c_utils.tag=../c_utils
-GENERATE_TAGFILE       = libfftpack.tag
-ALLEXTERNALS           = NO
-EXTERNAL_GROUPS        = YES
-PERL_PATH              = /usr/bin/perl
-#---------------------------------------------------------------------------
-# Configuration options related to the dot tool
-#---------------------------------------------------------------------------
-CLASS_DIAGRAMS         = YES
-MSCGEN_PATH            =
-HIDE_UNDOC_RELATIONS   = YES
-HAVE_DOT               = NO
-DOT_NUM_THREADS        = 0
-DOT_FONTNAME           = FreeSans
-DOT_FONTSIZE           = 10
-DOT_FONTPATH           =
-CLASS_GRAPH            = YES
-COLLABORATION_GRAPH    = YES
-GROUP_GRAPHS           = YES
-UML_LOOK               = NO
-UML_LIMIT_NUM_FIELDS   = 10
-TEMPLATE_RELATIONS     = YES
-INCLUDE_GRAPH          = NO
-INCLUDED_BY_GRAPH      = NO
-CALL_GRAPH             = NO
-CALLER_GRAPH           = NO
-GRAPHICAL_HIERARCHY    = NO
-DIRECTORY_GRAPH        = YES
-DOT_IMAGE_FORMAT       = png
-INTERACTIVE_SVG        = NO
-DOT_PATH               =
-DOTFILE_DIRS           =
-MSCFILE_DIRS           =
-DOT_GRAPH_MAX_NODES    = 50
-MAX_DOT_GRAPH_DEPTH    = 0
-DOT_TRANSPARENT        = NO
-DOT_MULTI_TARGETS      = NO
-GENERATE_LEGEND        = YES
-DOT_CLEANUP            = YES
diff --git a/docsrc/libsharp.dox b/docsrc/libsharp.dox
deleted file mode 100644
index b476ab4..0000000
--- a/docsrc/libsharp.dox
+++ /dev/null
@@ -1,291 +0,0 @@
-# Doxyfile 1.8.1
-
-#---------------------------------------------------------------------------
-# Project related configuration options
-#---------------------------------------------------------------------------
-DOXYFILE_ENCODING      = UTF-8
-PROJECT_NAME           = "LevelS SHT library"
-PROJECT_NUMBER         = 0.1
-PROJECT_BRIEF          =
-PROJECT_LOGO           =
-OUTPUT_DIRECTORY       = .
-CREATE_SUBDIRS         = NO
-OUTPUT_LANGUAGE        = English
-BRIEF_MEMBER_DESC      = NO
-REPEAT_BRIEF           = YES
-ABBREVIATE_BRIEF       =
-ALWAYS_DETAILED_SEC    = NO
-INLINE_INHERITED_MEMB  = NO
-FULL_PATH_NAMES        = NO
-STRIP_FROM_PATH        =
-STRIP_FROM_INC_PATH    =
-SHORT_NAMES            = NO
-JAVADOC_AUTOBRIEF      = NO
-QT_AUTOBRIEF           = NO
-MULTILINE_CPP_IS_BRIEF = NO
-INHERIT_DOCS           = YES
-SEPARATE_MEMBER_PAGES  = NO
-TAB_SIZE               = 8
-ALIASES                =
-TCL_SUBST              =
-OPTIMIZE_OUTPUT_FOR_C  = YES
-OPTIMIZE_OUTPUT_JAVA   = NO
-OPTIMIZE_FOR_FORTRAN   = NO
-OPTIMIZE_OUTPUT_VHDL   = NO
-EXTENSION_MAPPING      =
-MARKDOWN_SUPPORT       = YES
-BUILTIN_STL_SUPPORT    = NO
-CPP_CLI_SUPPORT        = NO
-SIP_SUPPORT            = NO
-IDL_PROPERTY_SUPPORT   = YES
-DISTRIBUTE_GROUP_DOC   = NO
-SUBGROUPING            = YES
-INLINE_GROUPED_CLASSES = NO
-INLINE_SIMPLE_STRUCTS  = NO
-TYPEDEF_HIDES_STRUCT   = NO
-SYMBOL_CACHE_SIZE      = 0
-LOOKUP_CACHE_SIZE      = 0
-#---------------------------------------------------------------------------
-# Build related configuration options
-#---------------------------------------------------------------------------
-EXTRACT_ALL            = NO
-EXTRACT_PRIVATE        = NO
-EXTRACT_PACKAGE        = NO
-EXTRACT_STATIC         = NO
-EXTRACT_LOCAL_CLASSES  = YES
-EXTRACT_LOCAL_METHODS  = NO
-EXTRACT_ANON_NSPACES   = NO
-HIDE_UNDOC_MEMBERS     = YES
-HIDE_UNDOC_CLASSES     = YES
-HIDE_FRIEND_COMPOUNDS  = YES
-HIDE_IN_BODY_DOCS      = NO
-INTERNAL_DOCS          = NO
-CASE_SENSE_NAMES       = YES
-HIDE_SCOPE_NAMES       = NO
-SHOW_INCLUDE_FILES     = YES
-FORCE_LOCAL_INCLUDES   = NO
-INLINE_INFO            = YES
-SORT_MEMBER_DOCS       = NO
-SORT_BRIEF_DOCS        = NO
-SORT_MEMBERS_CTORS_1ST = NO
-SORT_GROUP_NAMES       = NO
-SORT_BY_SCOPE_NAME     = NO
-STRICT_PROTO_MATCHING  = NO
-GENERATE_TODOLIST      = YES
-GENERATE_TESTLIST      = YES
-GENERATE_BUGLIST       = YES
-GENERATE_DEPRECATEDLIST= YES
-ENABLED_SECTIONS       =
-MAX_INITIALIZER_LINES  = 30
-SHOW_USED_FILES        = YES
-SHOW_FILES             = YES
-SHOW_NAMESPACES        = YES
-FILE_VERSION_FILTER    =
-LAYOUT_FILE            =
-CITE_BIB_FILES         =
-#---------------------------------------------------------------------------
-# configuration options related to warning and progress messages
-#---------------------------------------------------------------------------
-QUIET                  = YES
-WARNINGS               = YES
-WARN_IF_UNDOCUMENTED   = YES
-WARN_IF_DOC_ERROR      = YES
-WARN_NO_PARAMDOC       = NO
-WARN_FORMAT            = "$file:$line: $text"
-WARN_LOGFILE           =
-#---------------------------------------------------------------------------
-# configuration options related to the input files
-#---------------------------------------------------------------------------
-INPUT                  = ../libsharp
-INPUT_ENCODING         = UTF-8
-FILE_PATTERNS          = *.h \
-                         *.c \
-                         *.dox
-RECURSIVE              = YES
-EXCLUDE                =
-EXCLUDE_SYMLINKS       = NO
-EXCLUDE_PATTERNS       =
-EXCLUDE_SYMBOLS        =
-EXAMPLE_PATH           =
-EXAMPLE_PATTERNS       =
-EXAMPLE_RECURSIVE      = NO
-IMAGE_PATH             =
-INPUT_FILTER           =
-FILTER_PATTERNS        =
-FILTER_SOURCE_FILES    = NO
-FILTER_SOURCE_PATTERNS =
-#---------------------------------------------------------------------------
-# configuration options related to source browsing
-#---------------------------------------------------------------------------
-SOURCE_BROWSER         = YES
-INLINE_SOURCES         = NO
-STRIP_CODE_COMMENTS    = NO
-REFERENCED_BY_RELATION = NO
-REFERENCES_RELATION    = NO
-REFERENCES_LINK_SOURCE = YES
-USE_HTAGS              = NO
-VERBATIM_HEADERS       = YES
-#---------------------------------------------------------------------------
-# configuration options related to the alphabetical class index
-#---------------------------------------------------------------------------
-ALPHABETICAL_INDEX     = YES
-COLS_IN_ALPHA_INDEX    = 5
-IGNORE_PREFIX          =
-#---------------------------------------------------------------------------
-# configuration options related to the HTML output
-#---------------------------------------------------------------------------
-GENERATE_HTML          = YES
-HTML_OUTPUT            = htmldoc
-HTML_FILE_EXTENSION    = .html
-HTML_HEADER            =
-HTML_FOOTER            = footer.html
-HTML_STYLESHEET        =
-HTML_EXTRA_FILES       =
-HTML_COLORSTYLE_HUE    = 220
-HTML_COLORSTYLE_SAT    = 100
-HTML_COLORSTYLE_GAMMA  = 80
-HTML_TIMESTAMP         = YES
-HTML_DYNAMIC_SECTIONS  = NO
-HTML_INDEX_NUM_ENTRIES = 100
-GENERATE_DOCSET        = NO
-DOCSET_FEEDNAME        = "Doxygen generated docs"
-DOCSET_BUNDLE_ID       = org.doxygen.Project
-DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
-DOCSET_PUBLISHER_NAME  = Publisher
-GENERATE_HTMLHELP      = NO
-CHM_FILE               =
-HHC_LOCATION           =
-GENERATE_CHI           = NO
-CHM_INDEX_ENCODING     =
-BINARY_TOC             = NO
-TOC_EXPAND             = NO
-GENERATE_QHP           = NO
-QCH_FILE               =
-QHP_NAMESPACE          = org.doxygen.Project
-QHP_VIRTUAL_FOLDER     = doc
-QHP_CUST_FILTER_NAME   =
-QHP_CUST_FILTER_ATTRS  =
-QHP_SECT_FILTER_ATTRS  =
-QHG_LOCATION           =
-GENERATE_ECLIPSEHELP   = NO
-ECLIPSE_DOC_ID         = org.doxygen.Project
-DISABLE_INDEX          = NO
-GENERATE_TREEVIEW      = NO
-ENUM_VALUES_PER_LINE   = 4
-TREEVIEW_WIDTH         = 250
-EXT_LINKS_IN_WINDOW    = NO
-FORMULA_FONTSIZE       = 10
-FORMULA_TRANSPARENT    = YES
-USE_MATHJAX            = NO
-MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest
-MATHJAX_EXTENSIONS     =
-SEARCHENGINE           = NO
-SERVER_BASED_SEARCH    = NO
-#---------------------------------------------------------------------------
-# configuration options related to the LaTeX output
-#---------------------------------------------------------------------------
-GENERATE_LATEX         = NO
-LATEX_OUTPUT           = latex
-LATEX_CMD_NAME         = latex
-MAKEINDEX_CMD_NAME     = makeindex
-COMPACT_LATEX          = YES
-PAPER_TYPE             = a4wide
-EXTRA_PACKAGES         =
-LATEX_HEADER           =
-LATEX_FOOTER           =
-PDF_HYPERLINKS         = YES
-USE_PDFLATEX           = YES
-LATEX_BATCHMODE        = NO
-LATEX_HIDE_INDICES     = NO
-LATEX_SOURCE_CODE      = NO
-LATEX_BIB_STYLE        = plain
-#---------------------------------------------------------------------------
-# configuration options related to the RTF output
-#---------------------------------------------------------------------------
-GENERATE_RTF           = NO
-RTF_OUTPUT             = rtf
-COMPACT_RTF            = NO
-RTF_HYPERLINKS         = NO
-RTF_STYLESHEET_FILE    =
-RTF_EXTENSIONS_FILE    =
-#---------------------------------------------------------------------------
-# configuration options related to the man page output
-#---------------------------------------------------------------------------
-GENERATE_MAN           = NO
-MAN_OUTPUT             = man
-MAN_EXTENSION          = .3
-MAN_LINKS              = NO
-#---------------------------------------------------------------------------
-# configuration options related to the XML output
-#---------------------------------------------------------------------------
-GENERATE_XML           = NO
-XML_OUTPUT             = xml
-XML_SCHEMA             =
-XML_DTD                =
-XML_PROGRAMLISTING     = YES
-#---------------------------------------------------------------------------
-# configuration options for the AutoGen Definitions output
-#---------------------------------------------------------------------------
-GENERATE_AUTOGEN_DEF   = NO
-#---------------------------------------------------------------------------
-# configuration options related to the Perl module output
-#---------------------------------------------------------------------------
-GENERATE_PERLMOD       = NO
-PERLMOD_LATEX          = NO
-PERLMOD_PRETTY         = YES
-PERLMOD_MAKEVAR_PREFIX =
-#---------------------------------------------------------------------------
-# Configuration options related to the preprocessor
-#---------------------------------------------------------------------------
-ENABLE_PREPROCESSING   = YES
-MACRO_EXPANSION        = NO
-EXPAND_ONLY_PREDEF     = NO
-SEARCH_INCLUDES        = YES
-INCLUDE_PATH           =
-INCLUDE_FILE_PATTERNS  =
-PREDEFINED             =
-EXPAND_AS_DEFINED      =
-SKIP_FUNCTION_MACROS   = YES
-#---------------------------------------------------------------------------
-# Configuration::additions related to external references
-#---------------------------------------------------------------------------
-TAGFILES               = libfftpack.tag=../libfftpack \
-                         c_utils.tag=../c_utils
-GENERATE_TAGFILE       = libsharp.tag
-ALLEXTERNALS           = NO
-EXTERNAL_GROUPS        = YES
-PERL_PATH              = /usr/bin/perl
-#---------------------------------------------------------------------------
-# Configuration options related to the dot tool
-#---------------------------------------------------------------------------
-CLASS_DIAGRAMS         = YES
-MSCGEN_PATH            =
-HIDE_UNDOC_RELATIONS   = YES
-HAVE_DOT               = NO
-DOT_NUM_THREADS        = 0
-DOT_FONTNAME           = FreeSans
-DOT_FONTSIZE           = 10
-DOT_FONTPATH           =
-CLASS_GRAPH            = YES
-COLLABORATION_GRAPH    = YES
-GROUP_GRAPHS           = YES
-UML_LOOK               = NO
-UML_LIMIT_NUM_FIELDS   = 10
-TEMPLATE_RELATIONS     = YES
-INCLUDE_GRAPH          = NO
-INCLUDED_BY_GRAPH      = NO
-CALL_GRAPH             = NO
-CALLER_GRAPH           = NO
-GRAPHICAL_HIERARCHY    = NO
-DIRECTORY_GRAPH        = YES
-DOT_IMAGE_FORMAT       = png
-INTERACTIVE_SVG        = NO
-DOT_PATH               =
-DOTFILE_DIRS           =
-MSCFILE_DIRS           =
-DOT_GRAPH_MAX_NODES    = 50
-MAX_DOT_GRAPH_DEPTH    = 0
-DOT_TRANSPARENT        = NO
-DOT_MULTI_TARGETS      = NO
-GENERATE_LEGEND        = YES
-DOT_CLEANUP            = YES
diff --git a/docsrc/planck.make b/docsrc/planck.make
deleted file mode 100644
index 0d0a462..0000000
--- a/docsrc/planck.make
+++ /dev/null
@@ -1,20 +0,0 @@
-PKG:=docsrc
-
-docsrc_idx: $(DOCDIR)_mkdir
-	cp $(SRCROOT)/docsrc/index_code.html $(DOCDIR)/index.html
-
-docsrc_code_doc: $(DOCDIR)_mkdir docsrc_idx
-	cd $(SRCROOT)/docsrc; \
-	for i in c_utils libfftpack libsharp; do \
-	  doxygen $${i}.dox; \
-	  rm -rf $(DOCDIR)/$${i}; mv htmldoc $(DOCDIR)/$${i}; \
-	done; \
-	rm *.tag;
-
-docsrc_clean:
-	cd $(SRCROOT)/docsrc; \
-	rm -f *.tag
-	cd $(SRCROOT)/docsrc; \
-	rm -rf htmldoc
-
-doc: docsrc_code_doc
diff --git a/fortran/sharp.f90 b/fortran/sharp.f90
deleted file mode 100644
index 36a1d11..0000000
--- a/fortran/sharp.f90
+++ /dev/null
@@ -1,286 +0,0 @@
-module sharp
-  use iso_c_binding
-  implicit none
-  ! alm_info flags
-  integer, parameter :: SHARP_PACKED = 1
-
-  ! sharp job types
-  enum, bind(c)
-      enumerator :: SHARP_YtW = 0
-      enumerator :: SHARP_Y = 1
-      enumerator :: SHARP_Yt = 2
-      enumerator :: SHARP_WY = 3
-      enumerator :: SHARP_ALM2MAP_DERIV1 = 4
-   end enum
-
-  ! sharp job flags
-  integer, parameter :: SHARP_DP             = ISHFT(1, 4)
-  integer, parameter :: SHARP_ADD            = ISHFT(1, 5)
-  integer, parameter :: SHARP_REAL_HARMONICS = ISHFT(1, 6)
-  integer, parameter :: SHARP_NO_FFT         = ISHFT(1, 7)
-
-  type sharp_geom_info
-     type(c_ptr) :: handle
-     integer(c_intptr_t) :: n_local
-  end type sharp_geom_info
-
-  type sharp_alm_info
-     type(c_ptr) :: handle
-     integer(c_intptr_t) :: n_local
-  end type sharp_alm_info
-
-  interface
-
-     ! alm_info
-     subroutine sharp_make_general_alm_info( &
-         lmax, nm, stride, mval, mvstart, flags, alm_info) bind(c)
-       use iso_c_binding
-       integer(c_int), value, intent(in)    :: lmax, nm, stride, flags
-       integer(c_int), intent(in)           :: mval(nm)
-       integer(c_intptr_t), intent(in)     :: mvstart(nm)
-       type(c_ptr), intent(out)             :: alm_info
-     end subroutine sharp_make_general_alm_info
-
-     subroutine c_sharp_make_mmajor_real_packed_alm_info( &
-         lmax, stride, nm, ms, alm_info) bind(c, name='sharp_make_mmajor_real_packed_alm_info')
-       use iso_c_binding
-       integer(c_int), value, intent(in)    :: lmax, nm, stride
-       integer(c_int), intent(in), optional :: ms(nm)
-       type(c_ptr), intent(out)             :: alm_info
-     end subroutine c_sharp_make_mmajor_real_packed_alm_info
-
-     function c_sharp_alm_count(alm_info) bind(c, name='sharp_alm_count')
-       use iso_c_binding
-       integer(c_intptr_t)           :: c_sharp_alm_count
-       type(c_ptr), value, intent(in) :: alm_info
-     end function c_sharp_alm_count
-
-     subroutine c_sharp_destroy_alm_info(alm_info) bind(c, name='sharp_destroy_alm_info')
-       use iso_c_binding
-       type(c_ptr), value                   :: alm_info
-     end subroutine c_sharp_destroy_alm_info
-
-     ! geom_info
-     subroutine sharp_make_subset_healpix_geom_info ( &
-          nside, stride, nrings, rings, weight, geom_info) bind(c)
-       use iso_c_binding
-       integer(c_int), value, intent(in)    :: nside, stride, nrings
-       integer(c_int), intent(in), optional :: rings(nrings)
-       real(c_double), intent(in), optional :: weight(2 * nside)
-       type(c_ptr), intent(out)             :: geom_info
-     end subroutine sharp_make_subset_healpix_geom_info
-
-     subroutine c_sharp_destroy_geom_info(geom_info) bind(c, name='sharp_destroy_geom_info')
-       use iso_c_binding
-       type(c_ptr), value                   :: geom_info
-     end subroutine c_sharp_destroy_geom_info
-
-     function c_sharp_map_size(info) bind(c, name='sharp_map_size')
-       use iso_c_binding
-       integer(c_intptr_t) :: c_sharp_map_size
-       type(c_ptr), value   :: info
-     end function c_sharp_map_size
-
-
-     ! execute
-     subroutine c_sharp_execute(type, spin, alm, map, geom_info, alm_info, ntrans, &
-                                flags, time, opcnt) bind(c, name='sharp_execute')
-       use iso_c_binding
-       integer(c_int), value                        :: type, spin, ntrans, flags
-       type(c_ptr), value                           :: alm_info, geom_info
-       real(c_double), intent(out), optional        :: time
-       integer(c_long_long), intent(out), optional  :: opcnt
-       type(c_ptr), intent(in)                      :: alm(*), map(*)
-     end subroutine c_sharp_execute
-
-     subroutine c_sharp_execute_mpi(comm, type, spin, alm, map, geom_info, alm_info, ntrans, &
-                                    flags, time, opcnt) bind(c, name='sharp_execute_mpi_fortran')
-       use iso_c_binding
-       integer(c_int), value                        :: comm, type, spin, ntrans, flags
-       type(c_ptr), value                           :: alm_info, geom_info
-       real(c_double), intent(out), optional        :: time
-       integer(c_long_long), intent(out), optional  :: opcnt
-       type(c_ptr), intent(in)                      :: alm(*), map(*)
-     end subroutine c_sharp_execute_mpi
-
-     ! Legendre transforms
-     subroutine c_sharp_legendre_transform(bl, recfac, lmax, x, out, nx) &
-          bind(c, name='sharp_legendre_transform')
-       use iso_c_binding
-       integer(c_intptr_t), value :: lmax, nx
-       real(c_double) :: bl(lmax + 1), x(nx), out(nx)
-       real(c_double), optional :: recfac(lmax + 1)
-     end subroutine c_sharp_legendre_transform
-
-     subroutine c_sharp_legendre_transform_s(bl, recfac, lmax, x, out, nx) &
-          bind(c, name='sharp_legendre_transform_s')
-       use iso_c_binding
-       integer(c_intptr_t), value :: lmax, nx
-       real(c_float) :: bl(lmax + 1), x(nx), out(nx)
-       real(c_float), optional :: recfac(lmax + 1)
-     end subroutine c_sharp_legendre_transform_s
-  end interface
-
-  interface sharp_execute
-     module procedure sharp_execute_d
-  end interface
-
-  interface sharp_legendre_transform
-     module procedure sharp_legendre_transform_d, sharp_legendre_transform_s
-  end interface sharp_legendre_transform
-
-contains
-  ! alm info
-
-  ! if ms is not passed, we default to using m=0..lmax.
-  subroutine sharp_make_mmajor_real_packed_alm_info(lmax, ms, alm_info)
-    use iso_c_binding
-    integer(c_int), value, intent(in)    :: lmax
-    integer(c_int), intent(in), optional :: ms(:)
-    type(sharp_alm_info), intent(out)    :: alm_info
-    !--
-    integer(c_int), allocatable          :: ms_copy(:)
-    integer(c_int)                       :: nm
-
-    if (present(ms)) then
-       nm = size(ms)
-       allocate(ms_copy(nm))
-       ms_copy = ms
-       call c_sharp_make_mmajor_real_packed_alm_info(lmax, 1, nm, ms_copy, alm_info=alm_info%handle)
-       deallocate(ms_copy)
-    else
-       call c_sharp_make_mmajor_real_packed_alm_info(lmax, 1, lmax + 1, alm_info=alm_info%handle)
-    end if
-    alm_info%n_local = c_sharp_alm_count(alm_info%handle)
-  end subroutine sharp_make_mmajor_real_packed_alm_info
-
-  subroutine sharp_destroy_alm_info(alm_info)
-    use iso_c_binding
-    type(sharp_alm_info), intent(inout) :: alm_info
-    call c_sharp_destroy_alm_info(alm_info%handle)
-    alm_info%handle = c_null_ptr
-  end subroutine sharp_destroy_alm_info
-
-
-  ! geom info
-  subroutine sharp_make_healpix_geom_info(nside, rings, weight, geom_info)
-    integer(c_int), value                :: nside
-    integer(c_int), optional             :: rings(:)
-    real(c_double), intent(in), optional :: weight(2 * nside)
-    type(sharp_geom_info), intent(out)   :: geom_info
-    !--
-    integer(c_int) :: nrings
-    integer(c_int), allocatable :: rings_copy(:)
-
-    if (present(rings)) then
-       nrings = size(rings)
-       allocate(rings_copy(nrings))
-       rings_copy = rings
-       call sharp_make_subset_healpix_geom_info(nside, 1, nrings, rings_copy, &
-                                                weight, geom_info%handle)
-       deallocate(rings_copy)
-    else
-       call sharp_make_subset_healpix_geom_info(nside, 1, nrings=4 * nside - 1, &
-                                                weight=weight, geom_info=geom_info%handle)
-    end if
-    geom_info%n_local = c_sharp_map_size(geom_info%handle)
-  end subroutine sharp_make_healpix_geom_info
-
-  subroutine sharp_destroy_geom_info(geom_info)
-    use iso_c_binding
-    type(sharp_geom_info), intent(inout) :: geom_info
-    call c_sharp_destroy_geom_info(geom_info%handle)
-    geom_info%handle = c_null_ptr
-  end subroutine sharp_destroy_geom_info
-
-
-  ! Currently the only mode supported is stacked (not interleaved) maps.
-  !
-  ! Note that passing the exact dimension of alm/map is necesarry, it
-  ! prevents the caller from doing too crazy slicing prior to pass array
-  ! in...
-  !
-  ! Usage:
-  !
-  ! The alm array must have shape exactly alm(alm_info%n_local, nmaps)
-  ! The maps array must have shape exactly map(map_info%n_local, nmaps).
-  subroutine sharp_execute_d(type, spin, nmaps, alm, alm_info, map, geom_info, &
-                             add, time, opcnt, comm)
-    use iso_c_binding
-    use mpi
-    implicit none
-    integer(c_int), value                        :: type, spin, nmaps
-    integer(c_int), optional                     :: comm
-    logical, value, optional                     :: add  ! should add instead of replace out
-
-    type(sharp_alm_info)                         :: alm_info
-    type(sharp_geom_info)                        :: geom_info
-    real(c_double), intent(out), optional        :: time
-    integer(c_long_long), intent(out), optional  :: opcnt
-    real(c_double), target, intent(inout)        :: alm(0:alm_info%n_local - 1, 1:nmaps)
-    real(c_double), target, intent(inout)        :: map(0:geom_info%n_local - 1, 1:nmaps)
-    !--
-    integer(c_int)         :: mod_flags, ntrans, k
-    type(c_ptr), target    :: alm_ptr(nmaps)
-    type(c_ptr), target    :: map_ptr(nmaps)
-
-    mod_flags = SHARP_DP
-    if (present(add) .and. add) then
-       mod_flags = or(mod_flags, SHARP_ADD)
-    end if
-
-    if (spin == 0) then
-       ntrans = nmaps
-    else
-       ntrans = nmaps / 2
-    end if
-
-    ! Set up pointer table to access maps
-    alm_ptr(:) = c_null_ptr
-    map_ptr(:) = c_null_ptr
-    do k = 1, nmaps
-       if (alm_info%n_local > 0) alm_ptr(k) = c_loc(alm(0, k))
-       if (geom_info%n_local > 0) map_ptr(k) = c_loc(map(0, k))
-    end do
-
-    if (present(comm)) then
-      call c_sharp_execute_mpi(comm, type, spin, alm_ptr, map_ptr, &
-          geom_info=geom_info%handle, &
-          alm_info=alm_info%handle, &
-          ntrans=ntrans, &
-          flags=mod_flags, &
-          time=time, &
-          opcnt=opcnt)
-    else
-      call c_sharp_execute(type, spin, alm_ptr, map_ptr, &
-          geom_info=geom_info%handle, &
-          alm_info=alm_info%handle, &
-          ntrans=ntrans, &
-          flags=mod_flags, &
-          time=time, &
-          opcnt=opcnt)
-   end if
-  end subroutine sharp_execute_d
-
-  subroutine sharp_legendre_transform_d(bl, x, out)
-    use iso_c_binding
-    real(c_double) :: bl(:)
-    real(c_double) :: x(:), out(size(x))
-    !--
-    integer(c_intptr_t) :: lmax, nx
-    call c_sharp_legendre_transform(bl, lmax=int(size(bl) - 1, c_intptr_t), &
-                                    x=x, out=out, nx=int(size(x), c_intptr_t))
-  end subroutine sharp_legendre_transform_d
-
-  subroutine sharp_legendre_transform_s(bl, x, out)
-    use iso_c_binding
-    real(c_float) :: bl(:)
-    real(c_float) :: x(:), out(size(x))
-    !--
-    integer(c_intptr_t) :: lmax, nx
-    call c_sharp_legendre_transform_s(bl, lmax=int(size(bl) - 1, c_intptr_t), &
-                                      x=x, out=out, nx=int(size(x), c_intptr_t))
-  end subroutine sharp_legendre_transform_s
-
-
-end module
diff --git a/fortran/test_sharp.f90 b/fortran/test_sharp.f90
deleted file mode 100644
index 0b7cce2..0000000
--- a/fortran/test_sharp.f90
+++ /dev/null
@@ -1,84 +0,0 @@
-program test_sharp
-  use mpi
-  use sharp
-  use iso_c_binding, only : c_ptr, c_double
-  implicit none
-
-  integer, parameter :: lmax = 2, nside = 2
-  type(sharp_alm_info) :: alm_info
-  type(sharp_geom_info) :: geom_info
-
-  real(c_double), dimension(0:(lmax + 1)**2 - 1, 1:1) :: alm
-  real(c_double), dimension(0:12*nside**2 - 1, 1:1) :: map
-
-  integer(c_int), dimension(1:lmax + 1) :: ms
-  integer(c_int), dimension(1:4 * nside - 1) :: rings
-  integer(c_int) :: nm, m, nrings, iring
-  integer :: nodecount, rank, ierr
-
-  call MPI_Init(ierr)
-  call MPI_Comm_size(MPI_COMM_WORLD, nodecount, ierr)
-  call MPI_Comm_rank(MPI_COMM_WORLD, rank, ierr)
-
-  nm = 0
-  do m = rank, lmax, nodecount
-     nm = nm + 1
-     ms(nm) = m
-  end do
-
-  nrings = 0
-  do iring = rank + 1, 4 * nside - 1, nodecount
-     nrings = nrings + 1
-     rings(nrings) = iring
-  end do
-
-  alm = 0
-  map = 0
-  if (rank == 0) then
-    alm(0, 1) = 1
-  end if
-
-  print *, ms(1:nm)
-  call sharp_make_mmajor_real_packed_alm_info(lmax, ms=ms(1:nm), alm_info=alm_info)
-  print *, 'alm_info%n_local', alm_info%n_local
-  call sharp_make_healpix_geom_info(nside, rings=rings(1:nrings), geom_info=geom_info)
-  print *, 'geom_info%n_local', geom_info%n_local
-  print *, 'execute'
-  call sharp_execute(SHARP_Y, 0, 1, alm, alm_info, map, geom_info, comm=MPI_COMM_WORLD)
-
-  print *, alm
-  print *, map
-
-  call sharp_destroy_alm_info(alm_info)
-  call sharp_destroy_geom_info(geom_info)
-  print *, 'DONE'
-  call MPI_Finalize(ierr)
-
-  print *, 'LEGENDRE TRANSFORMS'
-
-  call test_legendre_transforms()
-
-contains
-  subroutine test_legendre_transforms()
-    integer, parameter :: lmax = 20, nx=10
-    real(c_double) :: bl(0:lmax)
-    real(c_double) :: x(nx), out(nx)
-    real(c_float) :: out_s(nx)
-    !--
-    integer :: l, i
-
-    do l = 0, lmax
-       bl(l) = 1.0 / real(l + 1, c_double)
-    end do
-    do i = 1, nx
-       x(i) = 1 / real(i, c_double)
-    end do
-    out = 0
-    call sharp_legendre_transform(bl, x, out)
-    print *, out
-    call sharp_legendre_transform(real(bl, c_float), real(x, c_float), out_s)
-    print *, out_s
-  end subroutine test_legendre_transforms
-
-
-end program test_sharp
diff --git a/libfftpack/README b/libfftpack/README
deleted file mode 100644
index 2c7e7cb..0000000
--- a/libfftpack/README
+++ /dev/null
@@ -1,34 +0,0 @@
-ls_fft description:
-
-This package is intended to calculate one-dimensional real or complex FFTs
-with high accuracy and good efficiency even for lengths containing large
-prime factors.
-The code is written in C, but a Fortran wrapper exists as well.
-
-Before any FFT is executed, a plan must be generated for it. Plan creation
-is designed to be fast, so that there is no significant overhead if the
-plan is only used once or a few times.
-
-The main component of the code is based on Paul N. Swarztrauber's FFTPACK in the
-double precision incarnation by Hugh C. Pumphrey
-(http://www.netlib.org/fftpack/dp.tgz).
-
-I replaced the iterative sine and cosine calculations in radfg() and radbg()
-by an exact calculation, which slightly improves the transform accuracy for
-real FFTs with lengths containing large prime factors.
-
-Since FFTPACK becomes quite slow for FFT lengths with large prime factors
-(in the worst case of prime lengths it reaches O(n*n) complexity), I
-implemented Bluestein's algorithm, which computes a FFT of length n by
-several FFTs of length n2>=2*n-1 and a convolution. Since n2 can be chosen
-to be highly composite, this algorithm is more efficient if n has large
-prime factors. The longer FFTs themselves are then computed using the FFTPACK
-routines.
-Bluestein's algorithm was implemented according to the description at
-http://en.wikipedia.org/wiki/Bluestein's_FFT_algorithm.
-
-Thread-safety:
-All routines can be called concurrently; all information needed by ls_fft
-is stored in the plan variable. However, using the same plan variable on
-multiple threads simultaneously is not supported and will lead to data
-corruption.
diff --git a/libfftpack/bluestein.c b/libfftpack/bluestein.c
deleted file mode 100644
index 2e2005c..0000000
--- a/libfftpack/bluestein.c
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- *  This file is part of libfftpack.
- *
- *  libfftpack is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  libfftpack is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with libfftpack; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-/*
- *  libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
-
-/*
- *  Copyright (C) 2005, 2006, 2007, 2008 Max-Planck-Society
- *  \author Martin Reinecke
- */
-
-#include <math.h>
-#include <stdlib.h>
-#include "fftpack.h"
-#include "bluestein.h"
-
-/* returns the sum of all prime factors of n */
-size_t prime_factor_sum (size_t n)
-  {
-  size_t result=0,x,limit,tmp;
-  while (((tmp=(n>>1))<<1)==n)
-    { result+=2; n=tmp; }
-
-  limit=(size_t)sqrt(n+0.01);
-  for (x=3; x<=limit; x+=2)
-  while ((tmp=(n/x))*x==n)
-    {
-    result+=x;
-    n=tmp;
-    limit=(size_t)sqrt(n+0.01);
-    }
-  if (n>1) result+=n;
-
-  return result;
-  }
-
-/* returns the smallest composite of 2, 3 and 5 which is >= n */
-static size_t good_size(size_t n)
-  {
-  size_t f2, f23, f235, bestfac=2*n;
-  if (n<=6) return n;
-
-  for (f2=1; f2<bestfac; f2*=2)
-    for (f23=f2; f23<bestfac; f23*=3)
-      for (f235=f23; f235<bestfac; f235*=5)
-        if (f235>=n) bestfac=f235;
-  return bestfac;
-  }
-
-void bluestein_i (size_t n, double **tstorage, size_t *worksize)
-  {
-  static const double pi=3.14159265358979323846;
-  size_t n2=good_size(n*2-1);
-  size_t m, coeff;
-  double angle, xn2;
-  double *bk, *bkf, *work;
-  double pibyn=pi/n;
-  *worksize=2+2*n+8*n2+16;
-  *tstorage = RALLOC(double,2+2*n+8*n2+16);
-  ((size_t *)(*tstorage))[0]=n2;
-  bk  = *tstorage+2;
-  bkf = *tstorage+2+2*n;
-  work= *tstorage+2+2*(n+n2);
-
-/* initialize b_k */
-  bk[0] = 1;
-  bk[1] = 0;
-
-  coeff=0;
-  for (m=1; m<n; ++m)
-    {
-    coeff+=2*m-1;
-    if (coeff>=2*n) coeff-=2*n;
-    angle = pibyn*coeff;
-    bk[2*m] = cos(angle);
-    bk[2*m+1] = sin(angle);
-    }
-
-/* initialize the zero-padded, Fourier transformed b_k. Add normalisation. */
-  xn2 = 1./n2;
-  bkf[0] = bk[0]*xn2;
-  bkf[1] = bk[1]*xn2;
-  for (m=2; m<2*n; m+=2)
-    {
-    bkf[m]   = bkf[2*n2-m]   = bk[m]   *xn2;
-    bkf[m+1] = bkf[2*n2-m+1] = bk[m+1] *xn2;
-    }
-  for (m=2*n;m<=(2*n2-2*n+1);++m)
-    bkf[m]=0.;
-  cffti (n2,work);
-  cfftf (n2,bkf,work);
-  }
-
-void bluestein (size_t n, double *data, double *tstorage, int isign)
-  {
-  size_t n2=*((size_t *)tstorage);
-  size_t m;
-  double *bk, *bkf, *akf, *work;
-  bk  = tstorage+2;
-  bkf = tstorage+2+2*n;
-  work= tstorage+2+2*(n+n2);
-  akf = tstorage+2+2*n+6*n2+16;
-
-/* initialize a_k and FFT it */
-  if (isign>0)
-    for (m=0; m<2*n; m+=2)
-      {
-      akf[m]   = data[m]*bk[m]   - data[m+1]*bk[m+1];
-      akf[m+1] = data[m]*bk[m+1] + data[m+1]*bk[m];
-      }
-  else
-    for (m=0; m<2*n; m+=2)
-      {
-      akf[m]   = data[m]*bk[m]   + data[m+1]*bk[m+1];
-      akf[m+1] =-data[m]*bk[m+1] + data[m+1]*bk[m];
-      }
-  for (m=2*n; m<2*n2; ++m)
-    akf[m]=0;
-
-  cfftf (n2,akf,work);
-
-/* do the convolution */
-  if (isign>0)
-    for (m=0; m<2*n2; m+=2)
-      {
-      double im = -akf[m]*bkf[m+1] + akf[m+1]*bkf[m];
-      akf[m  ]  =  akf[m]*bkf[m]   + akf[m+1]*bkf[m+1];
-      akf[m+1]  = im;
-      }
-  else
-    for (m=0; m<2*n2; m+=2)
-      {
-      double im = akf[m]*bkf[m+1] + akf[m+1]*bkf[m];
-      akf[m  ]  = akf[m]*bkf[m]   - akf[m+1]*bkf[m+1];
-      akf[m+1]  = im;
-      }
-
-
-/* inverse FFT */
-  cfftb (n2,akf,work);
-
-/* multiply by b_k* */
-  if (isign>0)
-    for (m=0; m<2*n; m+=2)
-      {
-      data[m]   = bk[m]  *akf[m] - bk[m+1]*akf[m+1];
-      data[m+1] = bk[m+1]*akf[m] + bk[m]  *akf[m+1];
-      }
-  else
-    for (m=0; m<2*n; m+=2)
-      {
-      data[m]   = bk[m]  *akf[m] + bk[m+1]*akf[m+1];
-      data[m+1] =-bk[m+1]*akf[m] + bk[m]  *akf[m+1];
-      }
-  }
diff --git a/libfftpack/bluestein.h b/libfftpack/bluestein.h
deleted file mode 100644
index 91e5b28..0000000
--- a/libfftpack/bluestein.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- *  This file is part of libfftpack.
- *
- *  libfftpack is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  libfftpack is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with libfftpack; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-/*
- *  libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
-
-/*
- *  Copyright (C) 2005 Max-Planck-Society
- *  \author Martin Reinecke
- */
-
-#ifndef PLANCK_BLUESTEIN_H
-#define PLANCK_BLUESTEIN_H
-
-#include "c_utils.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-size_t prime_factor_sum (size_t n);
-
-void bluestein_i (size_t n, double **tstorage, size_t *worksize);
-void bluestein (size_t n, double *data, double *tstorage, int isign);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/libfftpack/fftpack.c b/libfftpack/fftpack.c
deleted file mode 100644
index 6d09d06..0000000
--- a/libfftpack/fftpack.c
+++ /dev/null
@@ -1,833 +0,0 @@
-/*
- *  This file is part of libfftpack.
- *
- *  libfftpack is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  libfftpack is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with libfftpack; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-/*
- *  libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
-
-/*
-  fftpack.c : A set of FFT routines in C.
-  Algorithmically based on Fortran-77 FFTPACK by Paul N. Swarztrauber
-  (Version 4, 1985).
-
-  C port by Martin Reinecke (2010)
- */
-
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-#include "fftpack.h"
-
-#define WA(x,i) wa[(i)+(x)*ido]
-#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
-#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
-#define PM(a,b,c,d) { a=c+d; b=c-d; }
-#define PMC(a,b,c,d) { a.r=c.r+d.r; a.i=c.i+d.i; b.r=c.r-d.r; b.i=c.i-d.i; }
-#define ADDC(a,b,c) { a.r=b.r+c.r; a.i=b.i+c.i; }
-#define SCALEC(a,b) { a.r*=b; a.i*=b; }
-#define CONJFLIPC(a) { double tmp_=a.r; a.r=-a.i; a.i=tmp_; }
-/* (a+ib) = conj(c+id) * (e+if) */
-#define MULPM(a,b,c,d,e,f) { a=c*e+d*f; b=c*f-d*e; }
-
-typedef struct {
-  double r,i;
-} cmplx;
-
-#define CONCAT(a,b) a ## b
-
-#define X(arg) CONCAT(passb,arg)
-#define BACKWARD
-#include "fftpack_inc.c"
-#undef BACKWARD
-#undef X
-
-#define X(arg) CONCAT(passf,arg)
-#include "fftpack_inc.c"
-#undef X
-
-#undef CC
-#undef CH
-#define CC(a,b,c) cc[(a)+ido*((b)+l1*(c))]
-#define CH(a,b,c) ch[(a)+ido*((b)+cdim*(c))]
-
-static void radf2 (size_t ido, size_t l1, const double *cc, double *ch,
-  const double *wa)
-  {
-  const size_t cdim=2;
-  size_t i, k, ic;
-  double ti2, tr2;
-
-  for (k=0; k<l1; k++)
-    PM (CH(0,0,k),CH(ido-1,1,k),CC(0,k,0),CC(0,k,1))
-  if ((ido&1)==0)
-    for (k=0; k<l1; k++)
-      {
-      CH(    0,1,k) = -CC(ido-1,k,1);
-      CH(ido-1,0,k) =  CC(ido-1,k,0);
-      }
-  if (ido<=2) return;
-  for (k=0; k<l1; k++)
-    for (i=2; i<ido; i+=2)
-      {
-      ic=ido-i;
-      MULPM (tr2,ti2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
-      PM (CH(i-1,0,k),CH(ic-1,1,k),CC(i-1,k,0),tr2)
-      PM (CH(i  ,0,k),CH(ic  ,1,k),ti2,CC(i  ,k,0))
-      }
-  }
-
-static void radf3(size_t ido, size_t l1, const double *cc, double *ch,
-  const double *wa)
-  {
-  const size_t cdim=3;
-  static const double taur=-0.5, taui=0.86602540378443864676;
-  size_t i, k, ic;
-  double ci2, di2, di3, cr2, dr2, dr3, ti2, ti3, tr2, tr3;
-
-  for (k=0; k<l1; k++)
-    {
-    cr2=CC(0,k,1)+CC(0,k,2);
-    CH(0,0,k) = CC(0,k,0)+cr2;
-    CH(0,2,k) = taui*(CC(0,k,2)-CC(0,k,1));
-    CH(ido-1,1,k) = CC(0,k,0)+taur*cr2;
-    }
-  if (ido==1) return;
-  for (k=0; k<l1; k++)
-    for (i=2; i<ido; i+=2)
-      {
-      ic=ido-i;
-      MULPM (dr2,di2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
-      MULPM (dr3,di3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
-      cr2=dr2+dr3;
-      ci2=di2+di3;
-      CH(i-1,0,k) = CC(i-1,k,0)+cr2;
-      CH(i  ,0,k) = CC(i  ,k,0)+ci2;
-      tr2 = CC(i-1,k,0)+taur*cr2;
-      ti2 = CC(i  ,k,0)+taur*ci2;
-      tr3 = taui*(di2-di3);
-      ti3 = taui*(dr3-dr2);
-      PM(CH(i-1,2,k),CH(ic-1,1,k),tr2,tr3)
-      PM(CH(i  ,2,k),CH(ic  ,1,k),ti3,ti2)
-      }
-  }
-
-static void radf4(size_t ido, size_t l1, const double *cc, double *ch,
-  const double *wa)
-  {
-  const size_t cdim=4;
-  static const double hsqt2=0.70710678118654752440;
-  size_t i, k, ic;
-  double ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
-
-  for (k=0; k<l1; k++)
-    {
-    PM (tr1,CH(0,2,k),CC(0,k,3),CC(0,k,1))
-    PM (tr2,CH(ido-1,1,k),CC(0,k,0),CC(0,k,2))
-    PM (CH(0,0,k),CH(ido-1,3,k),tr2,tr1)
-    }
-  if ((ido&1)==0)
-    for (k=0; k<l1; k++)
-      {
-      ti1=-hsqt2*(CC(ido-1,k,1)+CC(ido-1,k,3));
-      tr1= hsqt2*(CC(ido-1,k,1)-CC(ido-1,k,3));
-      PM (CH(ido-1,0,k),CH(ido-1,2,k),CC(ido-1,k,0),tr1)
-      PM (CH(    0,3,k),CH(    0,1,k),ti1,CC(ido-1,k,2))
-      }
-  if (ido<=2) return;
-  for (k=0; k<l1; k++)
-    for (i=2; i<ido; i+=2)
-      {
-      ic=ido-i;
-      MULPM(cr2,ci2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
-      MULPM(cr3,ci3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
-      MULPM(cr4,ci4,WA(2,i-2),WA(2,i-1),CC(i-1,k,3),CC(i,k,3))
-      PM(tr1,tr4,cr4,cr2)
-      PM(ti1,ti4,ci2,ci4)
-      PM(tr2,tr3,CC(i-1,k,0),cr3)
-      PM(ti2,ti3,CC(i  ,k,0),ci3)
-      PM(CH(i-1,0,k),CH(ic-1,3,k),tr2,tr1)
-      PM(CH(i  ,0,k),CH(ic  ,3,k),ti1,ti2)
-      PM(CH(i-1,2,k),CH(ic-1,1,k),tr3,ti4)
-      PM(CH(i  ,2,k),CH(ic  ,1,k),tr4,ti3)
-      }
-  }
-
-static void radf5(size_t ido, size_t l1, const double *cc, double *ch,
-  const double *wa)
-  {
-  const size_t cdim=5;
-  static const double tr11= 0.3090169943749474241, ti11=0.95105651629515357212,
-                      tr12=-0.8090169943749474241, ti12=0.58778525229247312917;
-  size_t i, k, ic;
-  double ci2, di2, ci4, ci5, di3, di4, di5, ci3, cr2, cr3, dr2, dr3,
-         dr4, dr5, cr5, cr4, ti2, ti3, ti5, ti4, tr2, tr3, tr4, tr5;
-
-  for (k=0; k<l1; k++)
-    {
-    PM (cr2,ci5,CC(0,k,4),CC(0,k,1))
-    PM (cr3,ci4,CC(0,k,3),CC(0,k,2))
-    CH(0,0,k)=CC(0,k,0)+cr2+cr3;
-    CH(ido-1,1,k)=CC(0,k,0)+tr11*cr2+tr12*cr3;
-    CH(0,2,k)=ti11*ci5+ti12*ci4;
-    CH(ido-1,3,k)=CC(0,k,0)+tr12*cr2+tr11*cr3;
-    CH(0,4,k)=ti12*ci5-ti11*ci4;
-    }
-  if (ido==1) return;
-  for (k=0; k<l1;++k)
-    for (i=2; i<ido; i+=2)
-      {
-      ic=ido-i;
-      MULPM (dr2,di2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
-      MULPM (dr3,di3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
-      MULPM (dr4,di4,WA(2,i-2),WA(2,i-1),CC(i-1,k,3),CC(i,k,3))
-      MULPM (dr5,di5,WA(3,i-2),WA(3,i-1),CC(i-1,k,4),CC(i,k,4))
-      PM(cr2,ci5,dr5,dr2)
-      PM(ci2,cr5,di2,di5)
-      PM(cr3,ci4,dr4,dr3)
-      PM(ci3,cr4,di3,di4)
-      CH(i-1,0,k)=CC(i-1,k,0)+cr2+cr3;
-      CH(i  ,0,k)=CC(i  ,k,0)+ci2+ci3;
-      tr2=CC(i-1,k,0)+tr11*cr2+tr12*cr3;
-      ti2=CC(i  ,k,0)+tr11*ci2+tr12*ci3;
-      tr3=CC(i-1,k,0)+tr12*cr2+tr11*cr3;
-      ti3=CC(i  ,k,0)+tr12*ci2+tr11*ci3;
-      MULPM(tr5,tr4,cr5,cr4,ti11,ti12)
-      MULPM(ti5,ti4,ci5,ci4,ti11,ti12)
-      PM(CH(i-1,2,k),CH(ic-1,1,k),tr2,tr5)
-      PM(CH(i  ,2,k),CH(ic  ,1,k),ti5,ti2)
-      PM(CH(i-1,4,k),CH(ic-1,3,k),tr3,tr4)
-      PM(CH(i  ,4,k),CH(ic  ,3,k),ti4,ti3)
-      }
-  }
-
-#undef CH
-#undef CC
-#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
-#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
-#define C1(a,b,c) cc[(a)+ido*((b)+l1*(c))]
-#define C2(a,b) cc[(a)+idl1*(b)]
-#define CH2(a,b) ch[(a)+idl1*(b)]
-static void radfg(size_t ido, size_t ip, size_t l1, size_t idl1,
-  double *cc, double *ch, const double *wa)
-  {
-  const size_t cdim=ip;
-  static const double twopi=6.28318530717958647692;
-  size_t idij, ipph, i, j, k, l, j2, ic, jc, lc, ik;
-  double ai1, ai2, ar1, ar2, arg;
-  double *csarr;
-  size_t aidx;
-
-  ipph=(ip+1)/ 2;
-  if(ido!=1)
-    {
-    memcpy(ch,cc,idl1*sizeof(double));
-
-    for(j=1; j<ip; j++)
-      for(k=0; k<l1; k++)
-        {
-        CH(0,k,j)=C1(0,k,j);
-        idij=(j-1)*ido+1;
-        for(i=2; i<ido; i+=2,idij+=2)
-          MULPM(CH(i-1,k,j),CH(i,k,j),wa[idij-1],wa[idij],C1(i-1,k,j),C1(i,k,j))
-        }
-
-    for(j=1,jc=ip-1; j<ipph; j++,jc--)
-      for(k=0; k<l1; k++)
-        for(i=2; i<ido; i+=2)
-          {
-          PM(C1(i-1,k,j),C1(i  ,k,jc),CH(i-1,k,jc),CH(i-1,k,j ))
-          PM(C1(i  ,k,j),C1(i-1,k,jc),CH(i  ,k,j ),CH(i  ,k,jc))
-          }
-    }
-  else
-    memcpy(cc,ch,idl1*sizeof(double));
-
-  for(j=1,jc=ip-1; j<ipph; j++,jc--)
-    for(k=0; k<l1; k++)
-      PM(C1(0,k,j),C1(0,k,jc),CH(0,k,jc),CH(0,k,j))
-
-  csarr=RALLOC(double,2*ip);
-  arg=twopi / ip;
-  csarr[0]=1.;
-  csarr[1]=0.;
-  csarr[2]=csarr[2*ip-2]=cos(arg);
-  csarr[3]=sin(arg); csarr[2*ip-1]=-csarr[3];
-  for (i=2; i<=ip/2; ++i)
-    {
-    csarr[2*i]=csarr[2*ip-2*i]=cos(i*arg);
-    csarr[2*i+1]=sin(i*arg);
-    csarr[2*ip-2*i+1]=-csarr[2*i+1];
-    }
-  for(l=1,lc=ip-1; l<ipph; l++,lc--)
-    {
-    ar1=csarr[2*l];
-    ai1=csarr[2*l+1];
-    for(ik=0; ik<idl1; ik++)
-      {
-      CH2(ik,l)=C2(ik,0)+ar1*C2(ik,1);
-      CH2(ik,lc)=ai1*C2(ik,ip-1);
-      }
-    aidx=2*l;
-    for(j=2,jc=ip-2; j<ipph; j++,jc--)
-      {
-      aidx+=2*l;
-      if (aidx>=2*ip) aidx-=2*ip;
-      ar2=csarr[aidx];
-      ai2=csarr[aidx+1];
-      for(ik=0; ik<idl1; ik++)
-        {
-        CH2(ik,l )+=ar2*C2(ik,j );
-        CH2(ik,lc)+=ai2*C2(ik,jc);
-        }
-      }
-    }
-  DEALLOC(csarr);
-
-  for(j=1; j<ipph; j++)
-    for(ik=0; ik<idl1; ik++)
-      CH2(ik,0)+=C2(ik,j);
-
-  for(k=0; k<l1; k++)
-    memcpy(&CC(0,0,k),&CH(0,k,0),ido*sizeof(double));
-  for(j=1; j<ipph; j++)
-    {
-    jc=ip-j;
-    j2=2*j;
-    for(k=0; k<l1; k++)
-      {
-      CC(ido-1,j2-1,k) = CH(0,k,j );
-      CC(0    ,j2  ,k) = CH(0,k,jc);
-      }
-    }
-  if(ido==1) return;
-
-  for(j=1; j<ipph; j++)
-    {
-    jc=ip-j;
-    j2=2*j;
-    for(k=0; k<l1; k++)
-      for(i=2; i<ido; i+=2)
-        {
-        ic=ido-i;
-        PM (CC(i-1,j2,k),CC(ic-1,j2-1,k),CH(i-1,k,j ),CH(i-1,k,jc))
-        PM (CC(i  ,j2,k),CC(ic  ,j2-1,k),CH(i  ,k,jc),CH(i  ,k,j ))
-        }
-    }
-  }
-
-#undef CC
-#undef CH
-#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
-#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
-
-static void radb2(size_t ido, size_t l1, const double *cc, double *ch,
-  const double *wa)
-  {
-  const size_t cdim=2;
-  size_t i, k, ic;
-  double ti2, tr2;
-
-  for (k=0; k<l1; k++)
-    PM (CH(0,k,0),CH(0,k,1),CC(0,0,k),CC(ido-1,1,k))
-  if ((ido&1)==0)
-    for (k=0; k<l1; k++)
-      {
-      CH(ido-1,k,0) =  2*CC(ido-1,0,k);
-      CH(ido-1,k,1) = -2*CC(0    ,1,k);
-      }
-  if (ido<=2) return;
-  for (k=0; k<l1;++k)
-    for (i=2; i<ido; i+=2)
-      {
-      ic=ido-i;
-      PM (CH(i-1,k,0),tr2,CC(i-1,0,k),CC(ic-1,1,k))
-      PM (ti2,CH(i  ,k,0),CC(i  ,0,k),CC(ic  ,1,k))
-      MULPM (CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),ti2,tr2)
-      }
-  }
-
-static void radb3(size_t ido, size_t l1, const double *cc, double *ch,
-  const double *wa)
-  {
-  const size_t cdim=3;
-  static const double taur=-0.5, taui=0.86602540378443864676;
-  size_t i, k, ic;
-  double ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2;
-
-  for (k=0; k<l1; k++)
-    {
-    tr2=2*CC(ido-1,1,k);
-    cr2=CC(0,0,k)+taur*tr2;
-    CH(0,k,0)=CC(0,0,k)+tr2;
-    ci3=2*taui*CC(0,2,k);
-    PM (CH(0,k,2),CH(0,k,1),cr2,ci3);
-    }
-  if (ido==1) return;
-  for (k=0; k<l1; k++)
-    for (i=2; i<ido; i+=2)
-      {
-      ic=ido-i;
-      tr2=CC(i-1,2,k)+CC(ic-1,1,k);
-      ti2=CC(i  ,2,k)-CC(ic  ,1,k);
-      cr2=CC(i-1,0,k)+taur*tr2;
-      ci2=CC(i  ,0,k)+taur*ti2;
-      CH(i-1,k,0)=CC(i-1,0,k)+tr2;
-      CH(i  ,k,0)=CC(i  ,0,k)+ti2;
-      cr3=taui*(CC(i-1,2,k)-CC(ic-1,1,k));
-      ci3=taui*(CC(i  ,2,k)+CC(ic  ,1,k));
-      PM(dr3,dr2,cr2,ci3)
-      PM(di2,di3,ci2,cr3)
-      MULPM(CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),di2,dr2)
-      MULPM(CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),di3,dr3)
-      }
-  }
-
-static void radb4(size_t ido, size_t l1, const double *cc, double *ch,
-  const double *wa)
-  {
-  const size_t cdim=4;
-  static const double sqrt2=1.41421356237309504880;
-  size_t i, k, ic;
-  double ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
-
-  for (k=0; k<l1; k++)
-    {
-    PM (tr2,tr1,CC(0,0,k),CC(ido-1,3,k))
-    tr3=2*CC(ido-1,1,k);
-    tr4=2*CC(0,2,k);
-    PM (CH(0,k,0),CH(0,k,2),tr2,tr3)
-    PM (CH(0,k,3),CH(0,k,1),tr1,tr4)
-    }
-  if ((ido&1)==0)
-    for (k=0; k<l1; k++)
-      {
-      PM (ti1,ti2,CC(0    ,3,k),CC(0    ,1,k))
-      PM (tr2,tr1,CC(ido-1,0,k),CC(ido-1,2,k))
-      CH(ido-1,k,0)=tr2+tr2;
-      CH(ido-1,k,1)=sqrt2*(tr1-ti1);
-      CH(ido-1,k,2)=ti2+ti2;
-      CH(ido-1,k,3)=-sqrt2*(tr1+ti1);
-      }
-  if (ido<=2) return;
-  for (k=0; k<l1;++k)
-    for (i=2; i<ido; i+=2)
-      {
-      ic=ido-i;
-      PM (tr2,tr1,CC(i-1,0,k),CC(ic-1,3,k))
-      PM (ti1,ti2,CC(i  ,0,k),CC(ic  ,3,k))
-      PM (tr4,ti3,CC(i  ,2,k),CC(ic  ,1,k))
-      PM (tr3,ti4,CC(i-1,2,k),CC(ic-1,1,k))
-      PM (CH(i-1,k,0),cr3,tr2,tr3)
-      PM (CH(i  ,k,0),ci3,ti2,ti3)
-      PM (cr4,cr2,tr1,tr4)
-      PM (ci2,ci4,ti1,ti4)
-      MULPM (CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),ci2,cr2)
-      MULPM (CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),ci3,cr3)
-      MULPM (CH(i,k,3),CH(i-1,k,3),WA(2,i-2),WA(2,i-1),ci4,cr4)
-      }
-  }
-
-static void radb5(size_t ido, size_t l1, const double *cc, double *ch,
-  const double *wa)
-  {
-  const size_t cdim=5;
-  static const double tr11= 0.3090169943749474241, ti11=0.95105651629515357212,
-                      tr12=-0.8090169943749474241, ti12=0.58778525229247312917;
-  size_t i, k, ic;
-  double ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4,
-         ti2, ti3, ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5;
-
-  for (k=0; k<l1; k++)
-    {
-    ti5=2*CC(0,2,k);
-    ti4=2*CC(0,4,k);
-    tr2=2*CC(ido-1,1,k);
-    tr3=2*CC(ido-1,3,k);
-    CH(0,k,0)=CC(0,0,k)+tr2+tr3;
-    cr2=CC(0,0,k)+tr11*tr2+tr12*tr3;
-    cr3=CC(0,0,k)+tr12*tr2+tr11*tr3;
-    MULPM(ci5,ci4,ti5,ti4,ti11,ti12)
-    PM(CH(0,k,4),CH(0,k,1),cr2,ci5)
-    PM(CH(0,k,3),CH(0,k,2),cr3,ci4)
-    }
-  if (ido==1) return;
-  for (k=0; k<l1;++k)
-    for (i=2; i<ido; i+=2)
-      {
-      ic=ido-i;
-      PM(tr2,tr5,CC(i-1,2,k),CC(ic-1,1,k))
-      PM(ti5,ti2,CC(i  ,2,k),CC(ic  ,1,k))
-      PM(tr3,tr4,CC(i-1,4,k),CC(ic-1,3,k))
-      PM(ti4,ti3,CC(i  ,4,k),CC(ic  ,3,k))
-      CH(i-1,k,0)=CC(i-1,0,k)+tr2+tr3;
-      CH(i  ,k,0)=CC(i  ,0,k)+ti2+ti3;
-      cr2=CC(i-1,0,k)+tr11*tr2+tr12*tr3;
-      ci2=CC(i  ,0,k)+tr11*ti2+tr12*ti3;
-      cr3=CC(i-1,0,k)+tr12*tr2+tr11*tr3;
-      ci3=CC(i  ,0,k)+tr12*ti2+tr11*ti3;
-      MULPM(cr5,cr4,tr5,tr4,ti11,ti12)
-      MULPM(ci5,ci4,ti5,ti4,ti11,ti12)
-      PM(dr4,dr3,cr3,ci4)
-      PM(di3,di4,ci3,cr4)
-      PM(dr5,dr2,cr2,ci5)
-      PM(di2,di5,ci2,cr5)
-      MULPM(CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),di2,dr2)
-      MULPM(CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),di3,dr3)
-      MULPM(CH(i,k,3),CH(i-1,k,3),WA(2,i-2),WA(2,i-1),di4,dr4)
-      MULPM(CH(i,k,4),CH(i-1,k,4),WA(3,i-2),WA(3,i-1),di5,dr5)
-      }
-  }
-
-static void radbg(size_t ido, size_t ip, size_t l1, size_t idl1,
-  double *cc, double *ch, const double *wa)
-  {
-  const size_t cdim=ip;
-  static const double twopi=6.28318530717958647692;
-  size_t idij, ipph, i, j, k, l, j2, ic, jc, lc, ik;
-  double ai1, ai2, ar1, ar2, arg;
-  double *csarr;
-  size_t aidx;
-
-  ipph=(ip+1)/ 2;
-  for(k=0; k<l1; k++)
-    memcpy(&CH(0,k,0),&CC(0,0,k),ido*sizeof(double));
-  for(j=1; j<ipph; j++)
-    {
-    jc=ip-j;
-    j2=2*j;
-    for(k=0; k<l1; k++)
-      {
-      CH(0,k,j )=2*CC(ido-1,j2-1,k);
-      CH(0,k,jc)=2*CC(0    ,j2  ,k);
-      }
-    }
-
-  if(ido!=1)
-    for(j=1,jc=ip-1; j<ipph; j++,jc--)
-      for(k=0; k<l1; k++)
-        for(i=2; i<ido; i+=2)
-          {
-          ic=ido-i;
-          PM (CH(i-1,k,j ),CH(i-1,k,jc),CC(i-1,2*j,k),CC(ic-1,2*j-1,k))
-          PM (CH(i  ,k,jc),CH(i  ,k,j ),CC(i  ,2*j,k),CC(ic  ,2*j-1,k))
-          }
-
-  csarr=RALLOC(double,2*ip);
-  arg=twopi/ip;
-  csarr[0]=1.;
-  csarr[1]=0.;
-  csarr[2]=csarr[2*ip-2]=cos(arg);
-  csarr[3]=sin(arg); csarr[2*ip-1]=-csarr[3];
-  for (i=2; i<=ip/2; ++i)
-    {
-    csarr[2*i]=csarr[2*ip-2*i]=cos(i*arg);
-    csarr[2*i+1]=sin(i*arg);
-    csarr[2*ip-2*i+1]=-csarr[2*i+1];
-    }
-  for(l=1; l<ipph; l++)
-    {
-    lc=ip-l;
-    ar1=csarr[2*l];
-    ai1=csarr[2*l+1];
-    for(ik=0; ik<idl1; ik++)
-      {
-      C2(ik,l)=CH2(ik,0)+ar1*CH2(ik,1);
-      C2(ik,lc)=ai1*CH2(ik,ip-1);
-      }
-    aidx=2*l;
-    for(j=2; j<ipph; j++)
-      {
-      jc=ip-j;
-      aidx+=2*l;
-      if (aidx>=2*ip) aidx-=2*ip;
-      ar2=csarr[aidx];
-      ai2=csarr[aidx+1];
-      for(ik=0; ik<idl1; ik++)
-        {
-        C2(ik,l )+=ar2*CH2(ik,j );
-        C2(ik,lc)+=ai2*CH2(ik,jc);
-        }
-      }
-    }
-  DEALLOC(csarr);
-
-  for(j=1; j<ipph; j++)
-    for(ik=0; ik<idl1; ik++)
-      CH2(ik,0)+=CH2(ik,j);
-
-  for(j=1,jc=ip-1; j<ipph; j++,jc--)
-    for(k=0; k<l1; k++)
-      PM (CH(0,k,jc),CH(0,k,j),C1(0,k,j),C1(0,k,jc))
-
-  if(ido==1)
-    return;
-  for(j=1,jc=ip-1; j<ipph; j++,jc--)
-    for(k=0; k<l1; k++)
-      for(i=2; i<ido; i+=2)
-        {
-        PM (CH(i-1,k,jc),CH(i-1,k,j ),C1(i-1,k,j),C1(i  ,k,jc))
-        PM (CH(i  ,k,j ),CH(i  ,k,jc),C1(i  ,k,j),C1(i-1,k,jc))
-        }
-  memcpy(cc,ch,idl1*sizeof(double));
-
-  for(j=1; j<ip; j++)
-    for(k=0; k<l1; k++)
-      {
-      C1(0,k,j)=CH(0,k,j);
-      idij=(j-1)*ido+1;
-      for(i=2; i<ido; i+=2,idij+=2)
-        MULPM (C1(i,k,j),C1(i-1,k,j),wa[idij-1],wa[idij],CH(i,k,j),CH(i-1,k,j))
-      }
-  }
-
-#undef CC
-#undef CH
-#undef PM
-#undef MULPM
-
-
-/*----------------------------------------------------------------------
-   cfftf1, cfftb1, cfftf, cfftb, cffti1, cffti. Complex FFTs.
-  ----------------------------------------------------------------------*/
-
-static void cfft1(size_t n, cmplx c[], cmplx ch[], const cmplx wa[],
-  const size_t ifac[], int isign)
-  {
-  size_t k1, l1=1, nf=ifac[1], iw=0;
-  cmplx *p1=c, *p2=ch;
-
-  for(k1=0; k1<nf; k1++)
-    {
-    size_t ip=ifac[k1+2];
-    size_t l2=ip*l1;
-    size_t ido = n/l2;
-    if(ip==4)
-      (isign>0) ? passb4(ido, l1, p1, p2, wa+iw)
-                : passf4(ido, l1, p1, p2, wa+iw);
-    else if(ip==2)
-      (isign>0) ? passb2(ido, l1, p1, p2, wa+iw)
-                : passf2(ido, l1, p1, p2, wa+iw);
-    else if(ip==3)
-      (isign>0) ? passb3(ido, l1, p1, p2, wa+iw)
-                : passf3(ido, l1, p1, p2, wa+iw);
-    else if(ip==5)
-      (isign>0) ? passb5(ido, l1, p1, p2, wa+iw)
-                : passf5(ido, l1, p1, p2, wa+iw);
-    else if(ip==6)
-      (isign>0) ? passb6(ido, l1, p1, p2, wa+iw)
-                : passf6(ido, l1, p1, p2, wa+iw);
-    else
-      (isign>0) ? passbg(ido, ip, l1, p1, p2, wa+iw)
-                : passfg(ido, ip, l1, p1, p2, wa+iw);
-    SWAP(p1,p2,cmplx *);
-    l1=l2;
-    iw+=(ip-1)*ido;
-    }
-  if (p1!=c)
-    memcpy (c,p1,n*sizeof(cmplx));
-  }
-
-void cfftf(size_t n, double c[], double wsave[])
-  {
-  if (n!=1)
-    cfft1(n, (cmplx*)c, (cmplx*)wsave, (cmplx*)(wsave+2*n),
-          (size_t*)(wsave+4*n),-1);
-  }
-
-void cfftb(size_t n, double c[], double wsave[])
-  {
-  if (n!=1)
-    cfft1(n, (cmplx*)c, (cmplx*)wsave, (cmplx*)(wsave+2*n),
-          (size_t*)(wsave+4*n),+1);
-  }
-
-static void factorize (size_t n, const size_t *pf, size_t npf, size_t *ifac)
-  {
-  size_t nl=n, nf=0, ntry=0, j=0, i;
-
-startloop:
-  j++;
-  ntry = (j<=npf) ? pf[j-1] : ntry+2;
-  do
-    {
-    size_t nq=nl / ntry;
-    size_t nr=nl-ntry*nq;
-    if (nr!=0)
-      goto startloop;
-    nf++;
-    ifac[nf+1]=ntry;
-    nl=nq;
-    if ((ntry==2) && (nf!=1))
-      {
-      for (i=nf+1; i>2; --i)
-        ifac[i]=ifac[i-1];
-      ifac[2]=2;
-      }
-    }
-  while(nl!=1);
-  ifac[0]=n;
-  ifac[1]=nf;
-  }
-
-static void cffti1(size_t n, double wa[], size_t ifac[])
-  {
-  static const size_t ntryh[5]={4,6,3,2,5};
-  static const double twopi=6.28318530717958647692;
-  size_t j, k, fi;
-
-  double argh=twopi/n;
-  size_t i=0, l1=1;
-  factorize (n,ntryh,5,ifac);
-  for(k=1; k<=ifac[1]; k++)
-    {
-    size_t ip=ifac[k+1];
-    size_t ido=n/(l1*ip);
-    for(j=1; j<ip; j++)
-      {
-      size_t is = i;
-      double argld=j*l1*argh;
-      wa[i  ]=1;
-      wa[i+1]=0;
-      for(fi=1; fi<=ido; fi++)
-        {
-        double arg=fi*argld;
-        i+=2;
-        wa[i  ]=cos(arg);
-        wa[i+1]=sin(arg);
-        }
-      if(ip>6)
-        {
-        wa[is  ]=wa[i  ];
-        wa[is+1]=wa[i+1];
-        }
-      }
-    l1*=ip;
-    }
-  }
-
-void cffti(size_t n, double wsave[])
-  { if (n!=1) cffti1(n, wsave+2*n,(size_t*)(wsave+4*n)); }
-
-
-/*----------------------------------------------------------------------
-   rfftf1, rfftb1, rfftf, rfftb, rffti1, rffti. Real FFTs.
-  ----------------------------------------------------------------------*/
-
-static void rfftf1(size_t n, double c[], double ch[], const double wa[],
-  const size_t ifac[])
-  {
-  size_t k1, l1=n, nf=ifac[1], iw=n-1;
-  double *p1=ch, *p2=c;
-
-  for(k1=1; k1<=nf;++k1)
-    {
-    size_t ip=ifac[nf-k1+2];
-    size_t ido=n / l1;
-    l1 /= ip;
-    iw-=(ip-1)*ido;
-    SWAP (p1,p2,double *);
-    if(ip==4)
-      radf4(ido, l1, p1, p2, wa+iw);
-    else if(ip==2)
-      radf2(ido, l1, p1, p2, wa+iw);
-    else if(ip==3)
-      radf3(ido, l1, p1, p2, wa+iw);
-    else if(ip==5)
-      radf5(ido, l1, p1, p2, wa+iw);
-    else
-      {
-      if (ido==1)
-        SWAP (p1,p2,double *);
-      radfg(ido, ip, l1, ido*l1, p1, p2, wa+iw);
-      SWAP (p1,p2,double *);
-      }
-    }
-  if (p1==c)
-    memcpy (c,ch,n*sizeof(double));
-  }
-
-static void rfftb1(size_t n, double c[], double ch[], const double wa[],
-  const size_t ifac[])
-  {
-  size_t k1, l1=1, nf=ifac[1], iw=0;
-  double *p1=c, *p2=ch;
-
-  for(k1=1; k1<=nf; k1++)
-    {
-    size_t ip = ifac[k1+1],
-           ido= n/(ip*l1);
-    if(ip==4)
-      radb4(ido, l1, p1, p2, wa+iw);
-    else if(ip==2)
-      radb2(ido, l1, p1, p2, wa+iw);
-    else if(ip==3)
-      radb3(ido, l1, p1, p2, wa+iw);
-    else if(ip==5)
-      radb5(ido, l1, p1, p2, wa+iw);
-    else
-      {
-      radbg(ido, ip, l1, ido*l1, p1, p2, wa+iw);
-      if (ido!=1)
-        SWAP (p1,p2,double *);
-      }
-    SWAP (p1,p2,double *);
-    l1*=ip;
-    iw+=(ip-1)*ido;
-    }
-  if (p1!=c)
-    memcpy (c,ch,n*sizeof(double));
-  }
-
-void rfftf(size_t n, double r[], double wsave[])
-  { if(n!=1) rfftf1(n, r, wsave, wsave+n,(size_t*)(wsave+2*n)); }
-
-void rfftb(size_t n, double r[], double wsave[])
-  { if(n!=1) rfftb1(n, r, wsave, wsave+n,(size_t*)(wsave+2*n)); }
-
-static void rffti1(size_t n, double wa[], size_t ifac[])
-  {
-  static const size_t ntryh[4]={4,2,3,5};
-  static const double twopi=6.28318530717958647692;
-  size_t i, j, k, fi;
-
-  double argh=twopi/n;
-  size_t is=0, l1=1;
-  factorize (n,ntryh,4,ifac);
-  for (k=1; k<ifac[1]; k++)
-    {
-    size_t ip=ifac[k+1],
-           ido=n/(l1*ip);
-    for (j=1; j<ip; ++j)
-      {
-      double argld=j*l1*argh;
-      for(i=is,fi=1; i<=ido+is-3; i+=2,++fi)
-        {
-        double arg=fi*argld;
-        wa[i  ]=cos(arg);
-        wa[i+1]=sin(arg);
-        }
-      is+=ido;
-      }
-    l1*=ip;
-    }
-  }
-
-void rffti(size_t n, double wsave[])
-  { if (n!=1) rffti1(n, wsave+n,(size_t*)(wsave+2*n)); }
diff --git a/libfftpack/fftpack.h b/libfftpack/fftpack.h
deleted file mode 100644
index 6a2e96e..0000000
--- a/libfftpack/fftpack.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- *  This file is part of libfftpack.
- *
- *  libfftpack is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  libfftpack is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with libfftpack; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-/*
- *  libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
-
-/*
-  fftpack.h : function declarations for fftpack.c
-  Algorithmically based on Fortran-77 FFTPACK by Paul N. Swarztrauber
-  (Version 4, 1985).
-
-  Pekka Janhunen 23.2.1995
-
-  (reformatted by joerg arndt)
-
-  reformatted and slightly enhanced by Martin Reinecke (2004)
- */
-
-#ifndef PLANCK_FFTPACK_H
-#define PLANCK_FFTPACK_H
-
-#include "c_utils.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*! forward complex transform */
-void cfftf(size_t N, double complex_data[], double wrk[]);
-/*! backward complex transform */
-void cfftb(size_t N, double complex_data[], double wrk[]);
-/*! initializer for complex transforms */
-void cffti(size_t N, double wrk[]);
-
-/*! forward real transform */
-void rfftf(size_t N, double data[], double wrk[]);
-/*! backward real transform */
-void rfftb(size_t N, double data[], double wrk[]);
-/*! initializer for real transforms */
-void rffti(size_t N, double wrk[]);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/libfftpack/fftpack_inc.c b/libfftpack/fftpack_inc.c
deleted file mode 100644
index 55d0ac5..0000000
--- a/libfftpack/fftpack_inc.c
+++ /dev/null
@@ -1,306 +0,0 @@
-/*
- *  This file is part of libfftpack.
- *
- *  libfftpack is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  libfftpack is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with libfftpack; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-/*
- *  libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
-
-/*
-  fftpack.c : A set of FFT routines in C.
-  Algorithmically based on Fortran-77 FFTPACK by Paul N. Swarztrauber
-  (Version 4, 1985).
-
-  C port by Martin Reinecke (2010)
- */
-
-#ifdef BACKWARD
-#define PSIGN +
-#define PMSIGNC(a,b,c,d) { a.r=c.r+d.r; a.i=c.i+d.i; b.r=c.r-d.r; b.i=c.i-d.i; }
-/* a = b*c */
-#define MULPMSIGNC(a,b,c) { a.r=b.r*c.r-b.i*c.i; a.i=b.r*c.i+b.i*c.r; }
-#else
-#define PSIGN -
-#define PMSIGNC(a,b,c,d) { a.r=c.r-d.r; a.i=c.i-d.i; b.r=c.r+d.r; b.i=c.i+d.i; }
-/* a = conj(b)*c */
-#define MULPMSIGNC(a,b,c) { a.r=b.r*c.r+b.i*c.i; a.i=b.r*c.i-b.i*c.r; }
-#endif
-
-static void X(2) (size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
-  const cmplx *wa)
-  {
-  const size_t cdim=2;
-  size_t k,i;
-  cmplx t;
-  if (ido==1)
-    for (k=0;k<l1;++k)
-      PMC (CH(0,k,0),CH(0,k,1),CC(0,0,k),CC(0,1,k))
-  else
-    for (k=0;k<l1;++k)
-      for (i=0;i<ido;++i)
-        {
-        PMC (CH(i,k,0),t,CC(i,0,k),CC(i,1,k))
-        MULPMSIGNC (CH(i,k,1),WA(0,i),t)
-        }
-  }
-
-static void X(3)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
-  const cmplx *wa)
-  {
-  const size_t cdim=3;
-  static const double taur=-0.5, taui= PSIGN 0.86602540378443864676;
-  size_t i, k;
-  cmplx c2, c3, d2, d3, t2;
-
-  if (ido==1)
-    for (k=0; k<l1; ++k)
-      {
-      PMC (t2,c3,CC(0,1,k),CC(0,2,k))
-      ADDC (CH(0,k,0),t2,CC(0,0,k))
-      SCALEC(t2,taur)
-      ADDC(c2,CC(0,0,k),t2)
-      SCALEC(c3,taui)
-      CONJFLIPC(c3)
-      PMC(CH(0,k,1),CH(0,k,2),c2,c3)
-      }
-  else
-    for (k=0; k<l1; ++k)
-      for (i=0; i<ido; ++i)
-        {
-        PMC (t2,c3,CC(i,1,k),CC(i,2,k))
-        ADDC (CH(i,k,0),t2,CC(i,0,k))
-        SCALEC(t2,taur)
-        ADDC(c2,CC(i,0,k),t2)
-        SCALEC(c3,taui)
-        CONJFLIPC(c3)
-        PMC(d2,d3,c2,c3)
-        MULPMSIGNC(CH(i,k,1),WA(0,i),d2)
-        MULPMSIGNC(CH(i,k,2),WA(1,i),d3)
-        }
-  }
-
-static void X(4)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
-  const cmplx *wa)
-  {
-  const size_t cdim=4;
-  size_t i, k;
-  cmplx c2, c3, c4, t1, t2, t3, t4;
-
-  if (ido==1)
-    for (k=0; k<l1; ++k)
-      {
-      PMC(t2,t1,CC(0,0,k),CC(0,2,k))
-      PMC(t3,t4,CC(0,1,k),CC(0,3,k))
-      CONJFLIPC(t4)
-      PMC(CH(0,k,0),CH(0,k,2),t2,t3)
-      PMSIGNC (CH(0,k,1),CH(0,k,3),t1,t4)
-      }
-  else
-    for (k=0; k<l1; ++k)
-      for (i=0; i<ido; ++i)
-        {
-        PMC(t2,t1,CC(i,0,k),CC(i,2,k))
-        PMC(t3,t4,CC(i,1,k),CC(i,3,k))
-        CONJFLIPC(t4)
-        PMC(CH(i,k,0),c3,t2,t3)
-        PMSIGNC (c2,c4,t1,t4)
-        MULPMSIGNC (CH(i,k,1),WA(0,i),c2)
-        MULPMSIGNC (CH(i,k,2),WA(1,i),c3)
-        MULPMSIGNC (CH(i,k,3),WA(2,i),c4)
-        }
-  }
-
-static void X(5)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
-  const cmplx *wa)
-  {
-  const size_t cdim=5;
-  static const double tr11= 0.3090169943749474241,
-                      ti11= PSIGN 0.95105651629515357212,
-                      tr12=-0.8090169943749474241,
-                      ti12= PSIGN 0.58778525229247312917;
-  size_t i, k;
-  cmplx c2, c3, c4, c5, d2, d3, d4, d5, t2, t3, t4, t5;
-
-  if (ido==1)
-    for (k=0; k<l1; ++k)
-      {
-      PMC (t2,t5,CC(0,1,k),CC(0,4,k))
-      PMC (t3,t4,CC(0,2,k),CC(0,3,k))
-      CH(0,k,0).r=CC(0,0,k).r+t2.r+t3.r;
-      CH(0,k,0).i=CC(0,0,k).i+t2.i+t3.i;
-      c2.r=CC(0,0,k).r+tr11*t2.r+tr12*t3.r;
-      c2.i=CC(0,0,k).i+tr11*t2.i+tr12*t3.i;
-      c3.r=CC(0,0,k).r+tr12*t2.r+tr11*t3.r;
-      c3.i=CC(0,0,k).i+tr12*t2.i+tr11*t3.i;
-      c5.r=ti11*t5.r+ti12*t4.r;
-      c5.i=ti11*t5.i+ti12*t4.i;
-      c4.r=ti12*t5.r-ti11*t4.r;
-      c4.i=ti12*t5.i-ti11*t4.i;
-      CONJFLIPC(c5)
-      PMC(CH(0,k,1),CH(0,k,4),c2,c5)
-      CONJFLIPC(c4)
-      PMC(CH(0,k,2),CH(0,k,3),c3,c4)
-      }
-  else
-    for (k=0; k<l1; ++k)
-      for (i=0; i<ido; ++i)
-        {
-        PMC (t2,t5,CC(i,1,k),CC(i,4,k))
-        PMC (t3,t4,CC(i,2,k),CC(i,3,k))
-        CH(i,k,0).r=CC(i,0,k).r+t2.r+t3.r;
-        CH(i,k,0).i=CC(i,0,k).i+t2.i+t3.i;
-        c2.r=CC(i,0,k).r+tr11*t2.r+tr12*t3.r;
-        c2.i=CC(i,0,k).i+tr11*t2.i+tr12*t3.i;
-        c3.r=CC(i,0,k).r+tr12*t2.r+tr11*t3.r;
-        c3.i=CC(i,0,k).i+tr12*t2.i+tr11*t3.i;
-        c5.r=ti11*t5.r+ti12*t4.r;
-        c5.i=ti11*t5.i+ti12*t4.i;
-        c4.r=ti12*t5.r-ti11*t4.r;
-        c4.i=ti12*t5.i-ti11*t4.i;
-        CONJFLIPC(c5)
-        PMC(d2,d5,c2,c5)
-        CONJFLIPC(c4)
-        PMC(d3,d4,c3,c4)
-        MULPMSIGNC (CH(i,k,1),WA(0,i),d2)
-        MULPMSIGNC (CH(i,k,2),WA(1,i),d3)
-        MULPMSIGNC (CH(i,k,3),WA(2,i),d4)
-        MULPMSIGNC (CH(i,k,4),WA(3,i),d5)
-        }
-  }
-
-static void X(6)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
-  const cmplx *wa)
-  {
-  const size_t cdim=6;
-  static const double taui= PSIGN 0.86602540378443864676;
-  cmplx ta1,ta2,ta3,a0,a1,a2,tb1,tb2,tb3,b0,b1,b2,d1,d2,d3,d4,d5;
-  size_t i, k;
-
-  if (ido==1)
-    for (k=0; k<l1; ++k)
-      {
-      PMC(ta1,ta3,CC(0,2,k),CC(0,4,k))
-      ta2.r = CC(0,0,k).r - .5*ta1.r;
-      ta2.i = CC(0,0,k).i - .5*ta1.i;
-      SCALEC(ta3,taui)
-      ADDC(a0,CC(0,0,k),ta1)
-      CONJFLIPC(ta3)
-      PMC(a1,a2,ta2,ta3)
-      PMC(tb1,tb3,CC(0,5,k),CC(0,1,k))
-      tb2.r = CC(0,3,k).r - .5*tb1.r;
-      tb2.i = CC(0,3,k).i - .5*tb1.i;
-      SCALEC(tb3,taui)
-      ADDC(b0,CC(0,3,k),tb1)
-      CONJFLIPC(tb3)
-      PMC(b1,b2,tb2,tb3)
-      PMC(CH(0,k,0),CH(0,k,3),a0,b0)
-      PMC(CH(0,k,4),CH(0,k,1),a1,b1)
-      PMC(CH(0,k,2),CH(0,k,5),a2,b2)
-      }
-  else
-    for (k=0; k<l1; ++k)
-      for (i=0; i<ido; ++i)
-        {
-        PMC(ta1,ta3,CC(i,2,k),CC(i,4,k))
-        ta2.r = CC(i,0,k).r - .5*ta1.r;
-        ta2.i = CC(i,0,k).i - .5*ta1.i;
-        SCALEC(ta3,taui)
-        ADDC(a0,CC(i,0,k),ta1)
-        CONJFLIPC(ta3)
-        PMC(a1,a2,ta2,ta3)
-        PMC(tb1,tb3,CC(i,5,k),CC(i,1,k))
-        tb2.r = CC(i,3,k).r - .5*tb1.r;
-        tb2.i = CC(i,3,k).i - .5*tb1.i;
-        SCALEC(tb3,taui)
-        ADDC(b0,CC(i,3,k),tb1)
-        CONJFLIPC(tb3)
-        PMC(b1,b2,tb2,tb3)
-        PMC(CH(i,k,0),d3,a0,b0)
-        PMC(d4,d1,a1,b1)
-        PMC(d2,d5,a2,b2)
-        MULPMSIGNC (CH(i,k,1),WA(0,i),d1)
-        MULPMSIGNC (CH(i,k,2),WA(1,i),d2)
-        MULPMSIGNC (CH(i,k,3),WA(2,i),d3)
-        MULPMSIGNC (CH(i,k,4),WA(3,i),d4)
-        MULPMSIGNC (CH(i,k,5),WA(4,i),d5)
-        }
-  }
-
-static void X(g)(size_t ido, size_t ip, size_t l1, const cmplx *cc, cmplx *ch,
-  const cmplx *wa)
-  {
-  const size_t cdim=ip;
-  cmplx *tarr=RALLOC(cmplx,2*ip);
-  cmplx *ccl=tarr, *wal=tarr+ip;
-  size_t i,j,k,l,jc,lc;
-  size_t ipph = (ip+1)/2;
-
-  for (i=1; i<ip; ++i)
-    wal[i]=wa[ido*(i-1)];
-  for (k=0; k<l1; ++k)
-    for (i=0; i<ido; ++i)
-      {
-      cmplx s=CC(i,0,k);
-      ccl[0] = CC(i,0,k);
-      for(j=1,jc=ip-1; j<ipph; ++j,--jc)
-        {
-        PMC (ccl[j],ccl[jc],CC(i,j,k),CC(i,jc,k))
-        ADDC (s,s,ccl[j])
-        }
-      CH(i,k,0) = s;
-      for (j=1, jc=ip-1; j<=ipph; ++j,--jc)
-        {
-        cmplx abr=ccl[0], abi={0.,0.};
-        size_t iang=0;
-        for (l=1,lc=ip-1; l<ipph; ++l,--lc)
-          {
-          iang+=j;
-          if (iang>ip) iang-=ip;
-          abr.r += ccl[l ].r*wal[iang].r;
-          abr.i += ccl[l ].i*wal[iang].r;
-          abi.r += ccl[lc].r*wal[iang].i;
-          abi.i += ccl[lc].i*wal[iang].i;
-          }
-#ifndef BACKWARD
-          { abi.i=-abi.i; abi.r=-abi.r; }
-#endif
-        CONJFLIPC(abi)
-        PMC(CH(i,k,j),CH(i,k,jc),abr,abi)
-        }
-      }
-
-  DEALLOC(tarr);
-
-  if (ido==1) return;
-
-  for (j=1; j<ip; ++j)
-    for (k=0; k<l1; ++k)
-      {
-      size_t idij=(j-1)*ido+1;
-      for(i=1; i<ido; ++i, ++idij)
-        {
-        cmplx t=CH(i,k,j);
-        MULPMSIGNC (CH(i,k,j),wa[idij],t)
-        }
-      }
-  }
-
-#undef PSIGN
-#undef PMSIGNC
-#undef MULPMSIGNC
diff --git a/libfftpack/libfftpack.dox b/libfftpack/libfftpack.dox
deleted file mode 100644
index 9ed2362..0000000
--- a/libfftpack/libfftpack.dox
+++ /dev/null
@@ -1,5 +0,0 @@
-/*! \mainpage Libfftpack documentation
-  <ul>
-  <li>\ref fftgroup "Programming interface"
-  </ul>
- */
diff --git a/libfftpack/ls_fft.c b/libfftpack/ls_fft.c
deleted file mode 100644
index b1c0c96..0000000
--- a/libfftpack/ls_fft.c
+++ /dev/null
@@ -1,291 +0,0 @@
-/*
- *  This file is part of libfftpack.
- *
- *  libfftpack is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  libfftpack is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with libfftpack; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-/*
- *  libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
-
-/*
- *  Copyright (C) 2005 Max-Planck-Society
- *  \author Martin Reinecke
- */
-
-#include <stdlib.h>
-#include <math.h>
-#include <string.h>
-#include "bluestein.h"
-#include "fftpack.h"
-#include "ls_fft.h"
-
-complex_plan make_complex_plan (size_t length)
-  {
-  complex_plan plan = RALLOC(complex_plan_i,1);
-  size_t pfsum = prime_factor_sum(length);
-  double comp1 = (double)(length*pfsum);
-  double comp2 = 2*3*length*log(3.*length);
-  comp2*=3.; /* fudge factor that appears to give good overall performance */
-  plan->length=length;
-  plan->bluestein = (comp2<comp1);
-  if (plan->bluestein)
-    bluestein_i (length,&(plan->work),&(plan->worksize));
-  else
-    {
-    plan->worksize=4*length+15;
-    plan->work=RALLOC(double,4*length+15);
-    cffti(length, plan->work);
-    }
-  return plan;
-  }
-
-complex_plan copy_complex_plan (complex_plan plan)
-  {
-  if (!plan) return NULL;
-  {
-  complex_plan newplan = RALLOC(complex_plan_i,1);
-  *newplan = *plan;
-  newplan->work=RALLOC(double,newplan->worksize);
-  memcpy(newplan->work,plan->work,sizeof(double)*newplan->worksize);
-  return newplan;
-  }
-  }
-
-void kill_complex_plan (complex_plan plan)
-  {
-  DEALLOC(plan->work);
-  DEALLOC(plan);
-  }
-
-void complex_plan_forward (complex_plan plan, double *data)
-  {
-  if (plan->bluestein)
-    bluestein (plan->length, data, plan->work, -1);
-  else
-    cfftf (plan->length, data, plan->work);
-  }
-
-void complex_plan_backward (complex_plan plan, double *data)
-  {
-  if (plan->bluestein)
-    bluestein (plan->length, data, plan->work, 1);
-  else
-    cfftb (plan->length, data, plan->work);
-  }
-
-
-real_plan make_real_plan (size_t length)
-  {
-  real_plan plan = RALLOC(real_plan_i,1);
-  size_t pfsum = prime_factor_sum(length);
-  double comp1 = .5*length*pfsum;
-  double comp2 = 2*3*length*log(3.*length);
-  comp2*=3; /* fudge factor that appears to give good overall performance */
-  plan->length=length;
-  plan->bluestein = (comp2<comp1);
-  if (plan->bluestein)
-    bluestein_i (length,&(plan->work),&(plan->worksize));
-  else
-    {
-    plan->worksize=2*length+15;
-    plan->work=RALLOC(double,2*length+15);
-    rffti(length, plan->work);
-    }
-  return plan;
-  }
-
-real_plan copy_real_plan (real_plan plan)
-  {
-  if (!plan) return NULL;
-  {
-  real_plan newplan = RALLOC(real_plan_i,1);
-  *newplan = *plan;
-  newplan->work=RALLOC(double,newplan->worksize);
-  memcpy(newplan->work,plan->work,sizeof(double)*newplan->worksize);
-  return newplan;
-  }
-  }
-
-void kill_real_plan (real_plan plan)
-  {
-  DEALLOC(plan->work);
-  DEALLOC(plan);
-  }
-
-void real_plan_forward_fftpack (real_plan plan, double *data)
-  {
-  if (plan->bluestein)
-    {
-    size_t m;
-    size_t n=plan->length;
-    double *tmp = RALLOC(double,2*n);
-    for (m=0; m<n; ++m)
-      {
-      tmp[2*m] = data[m];
-      tmp[2*m+1] = 0.;
-      }
-    bluestein(n,tmp,plan->work,-1);
-    data[0] = tmp[0];
-    memcpy (data+1, tmp+2, (n-1)*sizeof(double));
-    DEALLOC(tmp);
-    }
-  else
-    rfftf (plan->length, data, plan->work);
-  }
-
-static void fftpack2halfcomplex (double *data, size_t n)
-  {
-  size_t m;
-  double *tmp = RALLOC(double,n);
-  tmp[0]=data[0];
-  for (m=1; m<(n+1)/2; ++m)
-    {
-    tmp[m]=data[2*m-1];
-    tmp[n-m]=data[2*m];
-    }
-  if (!(n&1))
-    tmp[n/2]=data[n-1];
-  memcpy (data,tmp,n*sizeof(double));
-  DEALLOC(tmp);
-  }
-
-static void halfcomplex2fftpack (double *data, size_t n)
-  {
-  size_t m;
-  double *tmp = RALLOC(double,n);
-  tmp[0]=data[0];
-  for (m=1; m<(n+1)/2; ++m)
-    {
-    tmp[2*m-1]=data[m];
-    tmp[2*m]=data[n-m];
-    }
-  if (!(n&1))
-    tmp[n-1]=data[n/2];
-  memcpy (data,tmp,n*sizeof(double));
-  DEALLOC(tmp);
-  }
-
-void real_plan_forward_fftw (real_plan plan, double *data)
-  {
-  real_plan_forward_fftpack (plan, data);
-  fftpack2halfcomplex (data,plan->length);
-  }
-
-void real_plan_backward_fftpack (real_plan plan, double *data)
-  {
-  if (plan->bluestein)
-    {
-    size_t m;
-    size_t n=plan->length;
-    double *tmp = RALLOC(double,2*n);
-    tmp[0]=data[0];
-    tmp[1]=0.;
-    memcpy (tmp+2,data+1, (n-1)*sizeof(double));
-    if ((n&1)==0) tmp[n+1]=0.;
-    for (m=2; m<n; m+=2)
-      {
-      tmp[2*n-m]=tmp[m];
-      tmp[2*n-m+1]=-tmp[m+1];
-      }
-    bluestein (n, tmp, plan->work, 1);
-    for (m=0; m<n; ++m)
-      data[m] = tmp[2*m];
-    DEALLOC(tmp);
-    }
-  else
-    rfftb (plan->length, data, plan->work);
-  }
-
-void real_plan_backward_fftw (real_plan plan, double *data)
-  {
-  halfcomplex2fftpack (data,plan->length);
-  real_plan_backward_fftpack (plan, data);
-  }
-
-void real_plan_forward_c (real_plan plan, double *data)
-  {
-  size_t m;
-  size_t n=plan->length;
-
-  if (plan->bluestein)
-    {
-    for (m=1; m<2*n; m+=2)
-      data[m]=0;
-    bluestein (plan->length, data, plan->work, -1);
-    data[1]=0;
-    for (m=2; m<n; m+=2)
-      {
-      double avg;
-      avg = 0.5*(data[2*n-m]+data[m]);
-      data[2*n-m] = data[m] = avg;
-      avg = 0.5*(data[2*n-m+1]-data[m+1]);
-      data[2*n-m+1] = avg;
-      data[m+1] = -avg;
-      }
-    if ((n&1)==0) data[n+1] = 0.;
-    }
-  else
-    {
-/* using "m+m" instead of "2*m" to avoid a nasty bug in Intel's compiler */
-    for (m=0; m<n; ++m) data[m+1] = data[m+m];
-    rfftf (n, data+1, plan->work);
-    data[0] = data[1];
-    data[1] = 0;
-    for (m=2; m<n; m+=2)
-      {
-      data[2*n-m]   =  data[m];
-      data[2*n-m+1] = -data[m+1];
-      }
-    if ((n&1)==0) data[n+1] = 0.;
-    }
-  }
-
-void real_plan_backward_c (real_plan plan, double *data)
-  {
-  size_t n=plan->length;
-
-  if (plan->bluestein)
-    {
-    size_t m;
-    data[1]=0;
-    for (m=2; m<n; m+=2)
-      {
-      double avg;
-      avg = 0.5*(data[2*n-m]+data[m]);
-      data[2*n-m] = data[m] = avg;
-      avg = 0.5*(data[2*n-m+1]-data[m+1]);
-      data[2*n-m+1] = avg;
-      data[m+1] = -avg;
-      }
-    if ((n&1)==0) data[n+1] = 0.;
-    bluestein (plan->length, data, plan->work, 1);
-    for (m=1; m<2*n; m+=2)
-      data[m]=0;
-    }
-  else
-    {
-    ptrdiff_t m;
-    data[1] = data[0];
-    rfftb (n, data+1, plan->work);
-    for (m=n-1; m>=0; --m)
-      {
-      data[2*m]   = data[m+1];
-      data[2*m+1] = 0.;
-      }
-    }
-  }
diff --git a/libfftpack/ls_fft.h b/libfftpack/ls_fft.h
deleted file mode 100644
index 2d555eb..0000000
--- a/libfftpack/ls_fft.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- *  This file is part of libfftpack.
- *
- *  libfftpack is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  libfftpack is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with libfftpack; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-/*
- *  libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
-
-/*! \file ls_fft.h
- *  Interface for the LevelS FFT package.
- *
- *  Copyright (C) 2004 Max-Planck-Society
- *  \author Martin Reinecke
- */
-
-#ifndef PLANCK_LS_FFT_H
-#define PLANCK_LS_FFT_H
-
-#include "c_utils.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*!\defgroup fftgroup FFT interface
-This package is intended to calculate one-dimensional real or complex FFTs
-with high accuracy and good efficiency even for lengths containing large
-prime factors.
-The code is written in C, but a Fortran wrapper exists as well.
-
-Before any FFT is executed, a plan must be generated for it. Plan creation
-is designed to be fast, so that there is no significant overhead if the
-plan is only used once or a few times.
-
-The main component of the code is based on Paul N. Swarztrauber's FFTPACK in the
-double precision incarnation by Hugh C. Pumphrey
-(http://www.netlib.org/fftpack/dp.tgz).
-
-I replaced the iterative sine and cosine calculations in radfg() and radbg()
-by an exact calculation, which slightly improves the transform accuracy for
-real FFTs with lengths containing large prime factors.
-
-Since FFTPACK becomes quite slow for FFT lengths with large prime factors
-(in the worst case of prime lengths it reaches \f$\mathcal{O}(n^2)\f$
-complexity), I implemented Bluestein's algorithm, which computes a FFT of length
-\f$n\f$ by several FFTs of length \f$n_2\ge 2n-1\f$ and a convolution. Since
-\f$n_2\f$ can be chosen to be highly composite, this algorithm is more efficient
-if \f$n\f$ has large prime factors. The longer FFTs themselves are then computed
-using the FFTPACK routines.
-Bluestein's algorithm was implemented according to the description on Wikipedia
-(<a href="http://en.wikipedia.org/wiki/Bluestein%27s_FFT_algorithm">
-http://en.wikipedia.org/wiki/Bluestein%27s_FFT_algorithm</a>).
-
-\b Thread-safety:
-All routines can be called concurrently; all information needed by
-<tt>ls_fft</tt> is stored in the plan variable. However, using the same plan
-variable on multiple threads simultaneously is not supported and will lead to
-data corruption.
-*/
-/*! \{ */
-
-typedef struct
-  {
-  double *work;
-  size_t length, worksize;
-  int bluestein;
-  } complex_plan_i;
-
-/*! The opaque handle type for complex-FFT plans. */
-typedef complex_plan_i * complex_plan;
-
-/*! Returns a plan for a complex FFT with \a length elements. */
-complex_plan make_complex_plan (size_t length);
-/*! Constructs a copy of \a plan. */
-complex_plan copy_complex_plan (complex_plan plan);
-/*! Destroys a plan for a complex FFT. */
-void kill_complex_plan (complex_plan plan);
-/*! Computes a complex forward FFT on \a data, using \a plan.
-    \a Data has the form <tt>r0, i0, r1, i1, ...,
-    r[length-1], i[length-1]</tt>. */
-void complex_plan_forward (complex_plan plan, double *data);
-/*! Computes a complex backward FFT on \a data, using \a plan.
-    \a Data has the form <tt>r0, i0, r1, i1, ...,
-    r[length-1], i[length-1]</tt>. */
-void complex_plan_backward (complex_plan plan, double *data);
-
-typedef struct
-  {
-  double *work;
-  size_t length, worksize;
-  int bluestein;
-  } real_plan_i;
-
-/*! The opaque handle type for real-FFT plans. */
-typedef real_plan_i * real_plan;
-
-/*! Returns a plan for a real FFT with \a length elements. */
-real_plan make_real_plan (size_t length);
-/*! Constructs a copy of \a plan. */
-real_plan copy_real_plan (real_plan plan);
-/*! Destroys a plan for a real FFT. */
-void kill_real_plan (real_plan plan);
-/*! Computes a real forward FFT on \a data, using \a plan
-    and assuming the FFTPACK storage scheme:
-    - on entry, \a data has the form <tt>r0, r1, ..., r[length-1]</tt>;
-    - on exit, it has the form <tt>r0, r1, i1, r2, i2, ...</tt>
-      (a total of \a length values). */
-void real_plan_forward_fftpack (real_plan plan, double *data);
-/*! Computes a real backward FFT on \a data, using \a plan
-    and assuming the FFTPACK storage scheme:
-    - on entry, \a data has the form <tt>r0, r1, i1, r2, i2, ...</tt>
-    (a total of \a length values);
-    - on exit, it has the form <tt>r0, r1, ..., r[length-1]</tt>. */
-void real_plan_backward_fftpack (real_plan plan, double *data);
-/*! Computes a real forward FFT on \a data, using \a plan
-    and assuming the FFTW halfcomplex storage scheme:
-    - on entry, \a data has the form <tt>r0, r1, ..., r[length-1]</tt>;
-    - on exit, it has the form <tt>r0, r1, r2, ..., i2, i1</tt>. */
-void real_plan_forward_fftw (real_plan plan, double *data);
-/*! Computes a real backward FFT on \a data, using \a plan
-    and assuming the FFTW halfcomplex storage scheme:
-    - on entry, \a data has the form <tt>r0, r1, r2, ..., i2, i1</tt>.
-    - on exit, it has the form <tt>r0, r1, ..., r[length-1]</tt>. */
-void real_plan_backward_fftw (real_plan plan, double *data);
-/*! Computes a real forward FFT on \a data, using \a plan
-    and assuming a full-complex storage scheme:
-    - on entry, \a data has the form <tt>r0, [ignored], r1, [ignored], ...,
-      r[length-1], [ignored]</tt>;
-    - on exit, it has the form <tt>r0, i0, r1, i1, ...,
-      r[length-1], i[length-1]</tt>. */
-void real_plan_forward_c (real_plan plan, double *data);
-/*! Computes a real backward FFT on \a data, using \a plan
-    and assuming a full-complex storage scheme:
-    - on entry, \a data has the form <tt>r0, i0, r1, i1, ...,
-      r[length-1], i[length-1]</tt>;
-    - on exit, it has the form <tt>r0, 0, r1, 0, ..., r[length-1], 0</tt>. */
-void real_plan_backward_c (real_plan plan, double *data);
-
-/*! \} */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/libfftpack/planck.make b/libfftpack/planck.make
deleted file mode 100644
index c171367..0000000
--- a/libfftpack/planck.make
+++ /dev/null
@@ -1,21 +0,0 @@
-PKG:=libfftpack
-
-SD:=$(SRCROOT)/$(PKG)
-OD:=$(BLDROOT)/$(PKG)
-
-FULL_INCLUDE+= -I$(SD)
-
-HDR_$(PKG):=$(SD)/*.h
-LIB_$(PKG):=$(LIBDIR)/libfftpack.a
-OBJ:=fftpack.o bluestein.o ls_fft.o
-OBJ:=$(OBJ:%=$(OD)/%)
-
-ODEP:=$(HDR_$(PKG)) $(HDR_c_utils)
-
-$(OD)/fftpack.o: $(SD)/fftpack_inc.c
-
-$(OBJ): $(ODEP) | $(OD)_mkdir
-$(LIB_$(PKG)): $(OBJ)
-
-all_hdr+=$(HDR_$(PKG))
-all_lib+=$(LIB_$(PKG))
diff --git a/libsharp/planck.make b/libsharp/planck.make
deleted file mode 100644
index 76d534f..0000000
--- a/libsharp/planck.make
+++ /dev/null
@@ -1,29 +0,0 @@
-PKG:=libsharp
-
-SD:=$(SRCROOT)/$(PKG)
-OD:=$(BLDROOT)/$(PKG)
-
-FULL_INCLUDE+= -I$(SD)
-
-HDR_$(PKG):=$(SD)/*.h
-LIB_$(PKG):=$(LIBDIR)/libsharp.a
-BIN:=sharp_testsuite
-LIBOBJ:=sharp_ylmgen_c.o sharp.o sharp_announce.o sharp_geomhelpers.o sharp_almhelpers.o sharp_core.o sharp_legendre.o sharp_legendre_roots.o sharp_legendre_table.o
-ALLOBJ:=$(LIBOBJ) sharp_testsuite.o
-LIBOBJ:=$(LIBOBJ:%=$(OD)/%)
-ALLOBJ:=$(ALLOBJ:%=$(OD)/%)
-
-ODEP:=$(HDR_$(PKG)) $(HDR_libfftpack) $(HDR_c_utils)
-$(OD)/sharp_core.o: $(SD)/sharp_core_inchelper.c $(SD)/sharp_core_inc.c $(SD)/sharp_core_inc2.c
-$(OD)/sharp.o: $(SD)/sharp_mpi.c
-BDEP:=$(LIB_$(PKG)) $(LIB_libfftpack) $(LIB_c_utils)
-
-$(LIB_$(PKG)): $(LIBOBJ)
-
-$(ALLOBJ): $(ODEP) | $(OD)_mkdir
-BIN:=$(BIN:%=$(BINDIR)/%)
-$(BIN): $(BINDIR)/% : $(OD)/%.o $(BDEP)
-
-all_hdr+=$(HDR_$(PKG))
-all_lib+=$(LIB_$(PKG))
-all_cbin+=$(BIN)
diff --git a/libsharp/sharp.c b/libsharp/sharp.c
index 1eb8857..b1b9277 100644
--- a/libsharp/sharp.c
+++ b/libsharp/sharp.c
@@ -30,7 +30,7 @@
  */
 
 #include <math.h>
-#include "ls_fft.h"
+#include "pocketfft/pocketfft.h"
 #include "sharp_ylmgen_c.h"
 #include "sharp_internal.h"
 #include "c_utils.h"
@@ -82,7 +82,7 @@ typedef struct
   double phi0_;
   dcmplx *shiftarr;
   int s_shift;
-  real_plan plan;
+  rfft_plan plan;
   int norot;
   } ringhelper;
 
@@ -94,7 +94,7 @@ static void ringhelper_init (ringhelper *self)
 
 static void ringhelper_destroy (ringhelper *self)
   {
-  if (self->plan) kill_real_plan(self->plan);
+  if (self->plan) destroy_rfft_plan(self->plan);
   DEALLOC(self->shiftarr);
   ringhelper_init(self);
   }
@@ -111,11 +111,11 @@ static void ringhelper_update (ringhelper *self, int nph, int mmax, double phi0)
       for (int m=0; m<=mmax; ++m)
         self->shiftarr[m] = cos(m*phi0) + _Complex_I*sin(m*phi0);
       }
-  if (!self->plan) self->plan=make_real_plan(nph);
-  if (nph!=(int)self->plan->length)
+  if (!self->plan) self->plan=make_rfft_plan(nph);
+  if (nph!=(int)rfft_length(self->plan))
     {
-    kill_real_plan(self->plan);
-    self->plan=make_real_plan(nph);
+    destroy_rfft_plan(self->plan);
+    self->plan=make_rfft_plan(nph);
     }
   }
 
@@ -323,7 +323,7 @@ static void ringhelper_phase2ring (ringhelper *self,
       }
     }
   data[1]=data[0];
-  real_plan_backward_fftpack (self->plan, &(data[1]));
+  rfft_backward (self->plan, &(data[1]), 1.);
   }
 
 static void ringhelper_ring2phase (ringhelper *self,
@@ -342,7 +342,7 @@ static void ringhelper_ring2phase (ringhelper *self,
   if (flags&SHARP_REAL_HARMONICS)
     wgt *= sqrt_two;
 
-  real_plan_forward_fftpack (self->plan, &(data[1]));
+  rfft_forward (self->plan, &(data[1]), 1.);
   data[0]=data[1];
   data[1]=data[nph+1]=0.;
 
diff --git a/libsharp/sharp.h b/libsharp/sharp.h
index 6722aee..9c5dd57 100644
--- a/libsharp/sharp.h
+++ b/libsharp/sharp.h
@@ -39,8 +39,5 @@
 #include <complex.h>
 
 #include "sharp_lowlevel.h"
-#include "sharp_legendre.h"
-#include "sharp_legendre_roots.h"
-#include "sharp_legendre_table.h"
 
 #endif
diff --git a/libsharp/sharp_geomhelpers.c b/libsharp/sharp_geomhelpers.c
index dbb44e0..0aed60d 100644
--- a/libsharp/sharp_geomhelpers.c
+++ b/libsharp/sharp_geomhelpers.c
@@ -34,7 +34,7 @@
 #include "sharp_geomhelpers.h"
 #include "sharp_legendre_roots.h"
 #include "c_utils.h"
-#include "ls_fft.h"
+#include "pocketfft/pocketfft.h"
 #include <stdio.h>
 
 void sharp_make_subset_healpix_geom_info (int nside, int stride, int nrings,
@@ -161,9 +161,9 @@ void sharp_make_fejer1_geom_info (int nrings, int ppring, double phi0,
     weight[2*k  ]=2./(1.-4.*k*k)*sin((k*pi)/nrings);
     }
   if ((nrings&1)==0) weight[nrings-1]=0.;
-  real_plan plan = make_real_plan(nrings);
-  real_plan_backward_fftpack(plan,weight);
-  kill_real_plan(plan);
+  rfft_plan plan = make_rfft_plan(nrings);
+  rfft_backward(plan,weight,1.);
+  destroy_rfft_plan(plan);
 
   for (int m=0; m<(nrings+1)/2; ++m)
     {
@@ -208,9 +208,9 @@ void sharp_make_cc_geom_info (int nrings, int ppring, double phi0,
   for (int k=1; k<=(n/2-1); ++k)
     weight[2*k-1]=2./(1.-4.*k*k) + dw;
   weight[2*(n/2)-1]=(n-3.)/(2*(n/2)-1) -1. -dw*((2-(n&1))*n-1);
-  real_plan plan = make_real_plan(n);
-  real_plan_backward_fftpack(plan,weight);
-  kill_real_plan(plan);
+  rfft_plan plan = make_rfft_plan(n);
+  rfft_backward(plan,weight,1.);
+  destroy_rfft_plan(plan);
   weight[n]=weight[0];
 
   for (int m=0; m<(nrings+1)/2; ++m)
@@ -256,9 +256,9 @@ void sharp_make_fejer2_geom_info (int nrings, int ppring, double phi0,
   for (int k=1; k<=(n/2-1); ++k)
     weight[2*k-1]=2./(1.-4.*k*k);
   weight[2*(n/2)-1]=(n-3.)/(2*(n/2)-1) -1.;
-  real_plan plan = make_real_plan(n);
-  real_plan_backward_fftpack(plan,weight);
-  kill_real_plan(plan);
+  rfft_plan plan = make_rfft_plan(n);
+  rfft_backward(plan,weight,1.);
+  destroy_rfft_plan(plan);
   for (int m=0; m<nrings; ++m)
     weight[m]=weight[m+1];
 
diff --git a/libsharp/sharp_legendre.c b/libsharp/sharp_legendre.c
deleted file mode 100644
index 24d69b6..0000000
--- a/libsharp/sharp_legendre.c
+++ /dev/null
@@ -1,1319 +0,0 @@
-/* DO NOT EDIT. md5sum of source: a8c5c18a7a19c378187dbf461d12eb5c *//*
-
-    NOTE NOTE NOTE
-
-    This file is edited in sharp_legendre.c.in which is then preprocessed.
-    Do not make manual  modifications to sharp_legendre.c.
-
-    NOTE NOTE NOTE
-
-*/
-
-
-/*
- *  This file is part of libsharp.
- *
- * Redistribution and use in source and binary forms, with or without
- * met:
- * 
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 
- * 3. Neither the name of the copyright holder nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*! \file sharp_legendre.c.in
- *
- *  Copyright (C) 2015 University of Oslo
- *  \author Dag Sverre Seljebotn
- */
-
-#ifndef NO_LEGENDRE
-#if (VLEN==8)
-#error This code is not tested with MIC; please compile with -DNO_LEGENDRE
-/* ...or test it (it probably works) and remove this check */
-#endif
-
-#ifndef SHARP_LEGENDRE_CS
-#define SHARP_LEGENDRE_CS 4
-#endif
-
-#define MAX_CS 6
-#if (SHARP_LEGENDRE_CS > MAX_CS)
-#error (SHARP_LEGENDRE_CS > MAX_CS)
-#endif
-
-#include "sharp_legendre.h"
-#include "sharp_vecsupport.h"
-
-#include <stdlib.h>
-
-
-
-static void legendre_transform_vec1(double *recfacs, double *bl, ptrdiff_t lmax,
-                                              double xarr[(1) * VLEN],
-                                              double out[(1) * VLEN]) {
-    
-    Tv P_0, Pm1_0, Pm2_0, x0, y0;
-    
-    Tv W1, W2, b, R;
-    ptrdiff_t l;
-
-    
-    x0 = vloadu(xarr + 0 * VLEN);
-    Pm1_0 = vload(1.0);
-    P_0 = x0;
-    b = vload(*bl);
-    y0 = vmul(Pm1_0, b);
-    
-    
-    b = vload(*(bl + 1));
-    
-    vfmaeq(y0, P_0, b);
-    
-
-    for (l = 2; l <= lmax; ++l) {
-        b = vload(*(bl + l));
-        R = vload(*(recfacs + l));
-        
-        /* 
-           P = x * Pm1 + recfacs[l] * (x * Pm1 - Pm2)
-        */
-        
-        Pm2_0 = Pm1_0; Pm1_0 = P_0;
-        W1 = vmul(x0, Pm1_0);
-        W2 = W1;
-        W2 = vsub(W2, Pm2_0);
-        P_0 = W1;
-        vfmaeq(P_0, W2, R);
-        vfmaeq(y0, P_0, b);
-        
-
-    }
-    
-    vstoreu(out + 0 * VLEN, y0);
-    
-}
-
-static void legendre_transform_vec2(double *recfacs, double *bl, ptrdiff_t lmax,
-                                              double xarr[(2) * VLEN],
-                                              double out[(2) * VLEN]) {
-    
-    Tv P_0, Pm1_0, Pm2_0, x0, y0;
-    
-    Tv P_1, Pm1_1, Pm2_1, x1, y1;
-    
-    Tv W1, W2, b, R;
-    ptrdiff_t l;
-
-    
-    x0 = vloadu(xarr + 0 * VLEN);
-    Pm1_0 = vload(1.0);
-    P_0 = x0;
-    b = vload(*bl);
-    y0 = vmul(Pm1_0, b);
-    
-    x1 = vloadu(xarr + 1 * VLEN);
-    Pm1_1 = vload(1.0);
-    P_1 = x1;
-    b = vload(*bl);
-    y1 = vmul(Pm1_1, b);
-    
-    
-    b = vload(*(bl + 1));
-    
-    vfmaeq(y0, P_0, b);
-    
-    vfmaeq(y1, P_1, b);
-    
-
-    for (l = 2; l <= lmax; ++l) {
-        b = vload(*(bl + l));
-        R = vload(*(recfacs + l));
-        
-        /* 
-           P = x * Pm1 + recfacs[l] * (x * Pm1 - Pm2)
-        */
-        
-        Pm2_0 = Pm1_0; Pm1_0 = P_0;
-        W1 = vmul(x0, Pm1_0);
-        W2 = W1;
-        W2 = vsub(W2, Pm2_0);
-        P_0 = W1;
-        vfmaeq(P_0, W2, R);
-        vfmaeq(y0, P_0, b);
-        
-        Pm2_1 = Pm1_1; Pm1_1 = P_1;
-        W1 = vmul(x1, Pm1_1);
-        W2 = W1;
-        W2 = vsub(W2, Pm2_1);
-        P_1 = W1;
-        vfmaeq(P_1, W2, R);
-        vfmaeq(y1, P_1, b);
-        
-
-    }
-    
-    vstoreu(out + 0 * VLEN, y0);
-    
-    vstoreu(out + 1 * VLEN, y1);
-    
-}
-
-static void legendre_transform_vec3(double *recfacs, double *bl, ptrdiff_t lmax,
-                                              double xarr[(3) * VLEN],
-                                              double out[(3) * VLEN]) {
-    
-    Tv P_0, Pm1_0, Pm2_0, x0, y0;
-    
-    Tv P_1, Pm1_1, Pm2_1, x1, y1;
-    
-    Tv P_2, Pm1_2, Pm2_2, x2, y2;
-    
-    Tv W1, W2, b, R;
-    ptrdiff_t l;
-
-    
-    x0 = vloadu(xarr + 0 * VLEN);
-    Pm1_0 = vload(1.0);
-    P_0 = x0;
-    b = vload(*bl);
-    y0 = vmul(Pm1_0, b);
-    
-    x1 = vloadu(xarr + 1 * VLEN);
-    Pm1_1 = vload(1.0);
-    P_1 = x1;
-    b = vload(*bl);
-    y1 = vmul(Pm1_1, b);
-    
-    x2 = vloadu(xarr + 2 * VLEN);
-    Pm1_2 = vload(1.0);
-    P_2 = x2;
-    b = vload(*bl);
-    y2 = vmul(Pm1_2, b);
-    
-    
-    b = vload(*(bl + 1));
-    
-    vfmaeq(y0, P_0, b);
-    
-    vfmaeq(y1, P_1, b);
-    
-    vfmaeq(y2, P_2, b);
-    
-
-    for (l = 2; l <= lmax; ++l) {
-        b = vload(*(bl + l));
-        R = vload(*(recfacs + l));
-        
-        /* 
-           P = x * Pm1 + recfacs[l] * (x * Pm1 - Pm2)
-        */
-        
-        Pm2_0 = Pm1_0; Pm1_0 = P_0;
-        W1 = vmul(x0, Pm1_0);
-        W2 = W1;
-        W2 = vsub(W2, Pm2_0);
-        P_0 = W1;
-        vfmaeq(P_0, W2, R);
-        vfmaeq(y0, P_0, b);
-        
-        Pm2_1 = Pm1_1; Pm1_1 = P_1;
-        W1 = vmul(x1, Pm1_1);
-        W2 = W1;
-        W2 = vsub(W2, Pm2_1);
-        P_1 = W1;
-        vfmaeq(P_1, W2, R);
-        vfmaeq(y1, P_1, b);
-        
-        Pm2_2 = Pm1_2; Pm1_2 = P_2;
-        W1 = vmul(x2, Pm1_2);
-        W2 = W1;
-        W2 = vsub(W2, Pm2_2);
-        P_2 = W1;
-        vfmaeq(P_2, W2, R);
-        vfmaeq(y2, P_2, b);
-        
-
-    }
-    
-    vstoreu(out + 0 * VLEN, y0);
-    
-    vstoreu(out + 1 * VLEN, y1);
-    
-    vstoreu(out + 2 * VLEN, y2);
-    
-}
-
-static void legendre_transform_vec4(double *recfacs, double *bl, ptrdiff_t lmax,
-                                              double xarr[(4) * VLEN],
-                                              double out[(4) * VLEN]) {
-    
-    Tv P_0, Pm1_0, Pm2_0, x0, y0;
-    
-    Tv P_1, Pm1_1, Pm2_1, x1, y1;
-    
-    Tv P_2, Pm1_2, Pm2_2, x2, y2;
-    
-    Tv P_3, Pm1_3, Pm2_3, x3, y3;
-    
-    Tv W1, W2, b, R;
-    ptrdiff_t l;
-
-    
-    x0 = vloadu(xarr + 0 * VLEN);
-    Pm1_0 = vload(1.0);
-    P_0 = x0;
-    b = vload(*bl);
-    y0 = vmul(Pm1_0, b);
-    
-    x1 = vloadu(xarr + 1 * VLEN);
-    Pm1_1 = vload(1.0);
-    P_1 = x1;
-    b = vload(*bl);
-    y1 = vmul(Pm1_1, b);
-    
-    x2 = vloadu(xarr + 2 * VLEN);
-    Pm1_2 = vload(1.0);
-    P_2 = x2;
-    b = vload(*bl);
-    y2 = vmul(Pm1_2, b);
-    
-    x3 = vloadu(xarr + 3 * VLEN);
-    Pm1_3 = vload(1.0);
-    P_3 = x3;
-    b = vload(*bl);
-    y3 = vmul(Pm1_3, b);
-    
-    
-    b = vload(*(bl + 1));
-    
-    vfmaeq(y0, P_0, b);
-    
-    vfmaeq(y1, P_1, b);
-    
-    vfmaeq(y2, P_2, b);
-    
-    vfmaeq(y3, P_3, b);
-    
-
-    for (l = 2; l <= lmax; ++l) {
-        b = vload(*(bl + l));
-        R = vload(*(recfacs + l));
-        
-        /* 
-           P = x * Pm1 + recfacs[l] * (x * Pm1 - Pm2)
-        */
-        
-        Pm2_0 = Pm1_0; Pm1_0 = P_0;
-        W1 = vmul(x0, Pm1_0);
-        W2 = W1;
-        W2 = vsub(W2, Pm2_0);
-        P_0 = W1;
-        vfmaeq(P_0, W2, R);
-        vfmaeq(y0, P_0, b);
-        
-        Pm2_1 = Pm1_1; Pm1_1 = P_1;
-        W1 = vmul(x1, Pm1_1);
-        W2 = W1;
-        W2 = vsub(W2, Pm2_1);
-        P_1 = W1;
-        vfmaeq(P_1, W2, R);
-        vfmaeq(y1, P_1, b);
-        
-        Pm2_2 = Pm1_2; Pm1_2 = P_2;
-        W1 = vmul(x2, Pm1_2);
-        W2 = W1;
-        W2 = vsub(W2, Pm2_2);
-        P_2 = W1;
-        vfmaeq(P_2, W2, R);
-        vfmaeq(y2, P_2, b);
-        
-        Pm2_3 = Pm1_3; Pm1_3 = P_3;
-        W1 = vmul(x3, Pm1_3);
-        W2 = W1;
-        W2 = vsub(W2, Pm2_3);
-        P_3 = W1;
-        vfmaeq(P_3, W2, R);
-        vfmaeq(y3, P_3, b);
-        
-
-    }
-    
-    vstoreu(out + 0 * VLEN, y0);
-    
-    vstoreu(out + 1 * VLEN, y1);
-    
-    vstoreu(out + 2 * VLEN, y2);
-    
-    vstoreu(out + 3 * VLEN, y3);
-    
-}
-
-static void legendre_transform_vec5(double *recfacs, double *bl, ptrdiff_t lmax,
-                                              double xarr[(5) * VLEN],
-                                              double out[(5) * VLEN]) {
-    
-    Tv P_0, Pm1_0, Pm2_0, x0, y0;
-    
-    Tv P_1, Pm1_1, Pm2_1, x1, y1;
-    
-    Tv P_2, Pm1_2, Pm2_2, x2, y2;
-    
-    Tv P_3, Pm1_3, Pm2_3, x3, y3;
-    
-    Tv P_4, Pm1_4, Pm2_4, x4, y4;
-    
-    Tv W1, W2, b, R;
-    ptrdiff_t l;
-
-    
-    x0 = vloadu(xarr + 0 * VLEN);
-    Pm1_0 = vload(1.0);
-    P_0 = x0;
-    b = vload(*bl);
-    y0 = vmul(Pm1_0, b);
-    
-    x1 = vloadu(xarr + 1 * VLEN);
-    Pm1_1 = vload(1.0);
-    P_1 = x1;
-    b = vload(*bl);
-    y1 = vmul(Pm1_1, b);
-    
-    x2 = vloadu(xarr + 2 * VLEN);
-    Pm1_2 = vload(1.0);
-    P_2 = x2;
-    b = vload(*bl);
-    y2 = vmul(Pm1_2, b);
-    
-    x3 = vloadu(xarr + 3 * VLEN);
-    Pm1_3 = vload(1.0);
-    P_3 = x3;
-    b = vload(*bl);
-    y3 = vmul(Pm1_3, b);
-    
-    x4 = vloadu(xarr + 4 * VLEN);
-    Pm1_4 = vload(1.0);
-    P_4 = x4;
-    b = vload(*bl);
-    y4 = vmul(Pm1_4, b);
-    
-    
-    b = vload(*(bl + 1));
-    
-    vfmaeq(y0, P_0, b);
-    
-    vfmaeq(y1, P_1, b);
-    
-    vfmaeq(y2, P_2, b);
-    
-    vfmaeq(y3, P_3, b);
-    
-    vfmaeq(y4, P_4, b);
-    
-
-    for (l = 2; l <= lmax; ++l) {
-        b = vload(*(bl + l));
-        R = vload(*(recfacs + l));
-        
-        /* 
-           P = x * Pm1 + recfacs[l] * (x * Pm1 - Pm2)
-        */
-        
-        Pm2_0 = Pm1_0; Pm1_0 = P_0;
-        W1 = vmul(x0, Pm1_0);
-        W2 = W1;
-        W2 = vsub(W2, Pm2_0);
-        P_0 = W1;
-        vfmaeq(P_0, W2, R);
-        vfmaeq(y0, P_0, b);
-        
-        Pm2_1 = Pm1_1; Pm1_1 = P_1;
-        W1 = vmul(x1, Pm1_1);
-        W2 = W1;
-        W2 = vsub(W2, Pm2_1);
-        P_1 = W1;
-        vfmaeq(P_1, W2, R);
-        vfmaeq(y1, P_1, b);
-        
-        Pm2_2 = Pm1_2; Pm1_2 = P_2;
-        W1 = vmul(x2, Pm1_2);
-        W2 = W1;
-        W2 = vsub(W2, Pm2_2);
-        P_2 = W1;
-        vfmaeq(P_2, W2, R);
-        vfmaeq(y2, P_2, b);
-        
-        Pm2_3 = Pm1_3; Pm1_3 = P_3;
-        W1 = vmul(x3, Pm1_3);
-        W2 = W1;
-        W2 = vsub(W2, Pm2_3);
-        P_3 = W1;
-        vfmaeq(P_3, W2, R);
-        vfmaeq(y3, P_3, b);
-        
-        Pm2_4 = Pm1_4; Pm1_4 = P_4;
-        W1 = vmul(x4, Pm1_4);
-        W2 = W1;
-        W2 = vsub(W2, Pm2_4);
-        P_4 = W1;
-        vfmaeq(P_4, W2, R);
-        vfmaeq(y4, P_4, b);
-        
-
-    }
-    
-    vstoreu(out + 0 * VLEN, y0);
-    
-    vstoreu(out + 1 * VLEN, y1);
-    
-    vstoreu(out + 2 * VLEN, y2);
-    
-    vstoreu(out + 3 * VLEN, y3);
-    
-    vstoreu(out + 4 * VLEN, y4);
-    
-}
-
-static void legendre_transform_vec6(double *recfacs, double *bl, ptrdiff_t lmax,
-                                              double xarr[(6) * VLEN],
-                                              double out[(6) * VLEN]) {
-    
-    Tv P_0, Pm1_0, Pm2_0, x0, y0;
-    
-    Tv P_1, Pm1_1, Pm2_1, x1, y1;
-    
-    Tv P_2, Pm1_2, Pm2_2, x2, y2;
-    
-    Tv P_3, Pm1_3, Pm2_3, x3, y3;
-    
-    Tv P_4, Pm1_4, Pm2_4, x4, y4;
-    
-    Tv P_5, Pm1_5, Pm2_5, x5, y5;
-    
-    Tv W1, W2, b, R;
-    ptrdiff_t l;
-
-    
-    x0 = vloadu(xarr + 0 * VLEN);
-    Pm1_0 = vload(1.0);
-    P_0 = x0;
-    b = vload(*bl);
-    y0 = vmul(Pm1_0, b);
-    
-    x1 = vloadu(xarr + 1 * VLEN);
-    Pm1_1 = vload(1.0);
-    P_1 = x1;
-    b = vload(*bl);
-    y1 = vmul(Pm1_1, b);
-    
-    x2 = vloadu(xarr + 2 * VLEN);
-    Pm1_2 = vload(1.0);
-    P_2 = x2;
-    b = vload(*bl);
-    y2 = vmul(Pm1_2, b);
-    
-    x3 = vloadu(xarr + 3 * VLEN);
-    Pm1_3 = vload(1.0);
-    P_3 = x3;
-    b = vload(*bl);
-    y3 = vmul(Pm1_3, b);
-    
-    x4 = vloadu(xarr + 4 * VLEN);
-    Pm1_4 = vload(1.0);
-    P_4 = x4;
-    b = vload(*bl);
-    y4 = vmul(Pm1_4, b);
-    
-    x5 = vloadu(xarr + 5 * VLEN);
-    Pm1_5 = vload(1.0);
-    P_5 = x5;
-    b = vload(*bl);
-    y5 = vmul(Pm1_5, b);
-    
-    
-    b = vload(*(bl + 1));
-    
-    vfmaeq(y0, P_0, b);
-    
-    vfmaeq(y1, P_1, b);
-    
-    vfmaeq(y2, P_2, b);
-    
-    vfmaeq(y3, P_3, b);
-    
-    vfmaeq(y4, P_4, b);
-    
-    vfmaeq(y5, P_5, b);
-    
-
-    for (l = 2; l <= lmax; ++l) {
-        b = vload(*(bl + l));
-        R = vload(*(recfacs + l));
-        
-        /* 
-           P = x * Pm1 + recfacs[l] * (x * Pm1 - Pm2)
-        */
-        
-        Pm2_0 = Pm1_0; Pm1_0 = P_0;
-        W1 = vmul(x0, Pm1_0);
-        W2 = W1;
-        W2 = vsub(W2, Pm2_0);
-        P_0 = W1;
-        vfmaeq(P_0, W2, R);
-        vfmaeq(y0, P_0, b);
-        
-        Pm2_1 = Pm1_1; Pm1_1 = P_1;
-        W1 = vmul(x1, Pm1_1);
-        W2 = W1;
-        W2 = vsub(W2, Pm2_1);
-        P_1 = W1;
-        vfmaeq(P_1, W2, R);
-        vfmaeq(y1, P_1, b);
-        
-        Pm2_2 = Pm1_2; Pm1_2 = P_2;
-        W1 = vmul(x2, Pm1_2);
-        W2 = W1;
-        W2 = vsub(W2, Pm2_2);
-        P_2 = W1;
-        vfmaeq(P_2, W2, R);
-        vfmaeq(y2, P_2, b);
-        
-        Pm2_3 = Pm1_3; Pm1_3 = P_3;
-        W1 = vmul(x3, Pm1_3);
-        W2 = W1;
-        W2 = vsub(W2, Pm2_3);
-        P_3 = W1;
-        vfmaeq(P_3, W2, R);
-        vfmaeq(y3, P_3, b);
-        
-        Pm2_4 = Pm1_4; Pm1_4 = P_4;
-        W1 = vmul(x4, Pm1_4);
-        W2 = W1;
-        W2 = vsub(W2, Pm2_4);
-        P_4 = W1;
-        vfmaeq(P_4, W2, R);
-        vfmaeq(y4, P_4, b);
-        
-        Pm2_5 = Pm1_5; Pm1_5 = P_5;
-        W1 = vmul(x5, Pm1_5);
-        W2 = W1;
-        W2 = vsub(W2, Pm2_5);
-        P_5 = W1;
-        vfmaeq(P_5, W2, R);
-        vfmaeq(y5, P_5, b);
-        
-
-    }
-    
-    vstoreu(out + 0 * VLEN, y0);
-    
-    vstoreu(out + 1 * VLEN, y1);
-    
-    vstoreu(out + 2 * VLEN, y2);
-    
-    vstoreu(out + 3 * VLEN, y3);
-    
-    vstoreu(out + 4 * VLEN, y4);
-    
-    vstoreu(out + 5 * VLEN, y5);
-    
-}
-
-
-
-static void legendre_transform_vec1_s(float *recfacs, float *bl, ptrdiff_t lmax,
-                                              float xarr[(1) * VLEN_s],
-                                              float out[(1) * VLEN_s]) {
-    
-    Tv_s P_0, Pm1_0, Pm2_0, x0, y0;
-    
-    Tv_s W1, W2, b, R;
-    ptrdiff_t l;
-
-    
-    x0 = vloadu_s(xarr + 0 * VLEN_s);
-    Pm1_0 = vload_s(1.0);
-    P_0 = x0;
-    b = vload_s(*bl);
-    y0 = vmul_s(Pm1_0, b);
-    
-    
-    b = vload_s(*(bl + 1));
-    
-    vfmaeq_s(y0, P_0, b);
-    
-
-    for (l = 2; l <= lmax; ++l) {
-        b = vload_s(*(bl + l));
-        R = vload_s(*(recfacs + l));
-        
-        /* 
-           P = x * Pm1 + recfacs[l] * (x * Pm1 - Pm2)
-        */
-        
-        Pm2_0 = Pm1_0; Pm1_0 = P_0;
-        W1 = vmul_s(x0, Pm1_0);
-        W2 = W1;
-        W2 = vsub_s(W2, Pm2_0);
-        P_0 = W1;
-        vfmaeq_s(P_0, W2, R);
-        vfmaeq_s(y0, P_0, b);
-        
-
-    }
-    
-    vstoreu_s(out + 0 * VLEN_s, y0);
-    
-}
-
-static void legendre_transform_vec2_s(float *recfacs, float *bl, ptrdiff_t lmax,
-                                              float xarr[(2) * VLEN_s],
-                                              float out[(2) * VLEN_s]) {
-    
-    Tv_s P_0, Pm1_0, Pm2_0, x0, y0;
-    
-    Tv_s P_1, Pm1_1, Pm2_1, x1, y1;
-    
-    Tv_s W1, W2, b, R;
-    ptrdiff_t l;
-
-    
-    x0 = vloadu_s(xarr + 0 * VLEN_s);
-    Pm1_0 = vload_s(1.0);
-    P_0 = x0;
-    b = vload_s(*bl);
-    y0 = vmul_s(Pm1_0, b);
-    
-    x1 = vloadu_s(xarr + 1 * VLEN_s);
-    Pm1_1 = vload_s(1.0);
-    P_1 = x1;
-    b = vload_s(*bl);
-    y1 = vmul_s(Pm1_1, b);
-    
-    
-    b = vload_s(*(bl + 1));
-    
-    vfmaeq_s(y0, P_0, b);
-    
-    vfmaeq_s(y1, P_1, b);
-    
-
-    for (l = 2; l <= lmax; ++l) {
-        b = vload_s(*(bl + l));
-        R = vload_s(*(recfacs + l));
-        
-        /* 
-           P = x * Pm1 + recfacs[l] * (x * Pm1 - Pm2)
-        */
-        
-        Pm2_0 = Pm1_0; Pm1_0 = P_0;
-        W1 = vmul_s(x0, Pm1_0);
-        W2 = W1;
-        W2 = vsub_s(W2, Pm2_0);
-        P_0 = W1;
-        vfmaeq_s(P_0, W2, R);
-        vfmaeq_s(y0, P_0, b);
-        
-        Pm2_1 = Pm1_1; Pm1_1 = P_1;
-        W1 = vmul_s(x1, Pm1_1);
-        W2 = W1;
-        W2 = vsub_s(W2, Pm2_1);
-        P_1 = W1;
-        vfmaeq_s(P_1, W2, R);
-        vfmaeq_s(y1, P_1, b);
-        
-
-    }
-    
-    vstoreu_s(out + 0 * VLEN_s, y0);
-    
-    vstoreu_s(out + 1 * VLEN_s, y1);
-    
-}
-
-static void legendre_transform_vec3_s(float *recfacs, float *bl, ptrdiff_t lmax,
-                                              float xarr[(3) * VLEN_s],
-                                              float out[(3) * VLEN_s]) {
-    
-    Tv_s P_0, Pm1_0, Pm2_0, x0, y0;
-    
-    Tv_s P_1, Pm1_1, Pm2_1, x1, y1;
-    
-    Tv_s P_2, Pm1_2, Pm2_2, x2, y2;
-    
-    Tv_s W1, W2, b, R;
-    ptrdiff_t l;
-
-    
-    x0 = vloadu_s(xarr + 0 * VLEN_s);
-    Pm1_0 = vload_s(1.0);
-    P_0 = x0;
-    b = vload_s(*bl);
-    y0 = vmul_s(Pm1_0, b);
-    
-    x1 = vloadu_s(xarr + 1 * VLEN_s);
-    Pm1_1 = vload_s(1.0);
-    P_1 = x1;
-    b = vload_s(*bl);
-    y1 = vmul_s(Pm1_1, b);
-    
-    x2 = vloadu_s(xarr + 2 * VLEN_s);
-    Pm1_2 = vload_s(1.0);
-    P_2 = x2;
-    b = vload_s(*bl);
-    y2 = vmul_s(Pm1_2, b);
-    
-    
-    b = vload_s(*(bl + 1));
-    
-    vfmaeq_s(y0, P_0, b);
-    
-    vfmaeq_s(y1, P_1, b);
-    
-    vfmaeq_s(y2, P_2, b);
-    
-
-    for (l = 2; l <= lmax; ++l) {
-        b = vload_s(*(bl + l));
-        R = vload_s(*(recfacs + l));
-        
-        /* 
-           P = x * Pm1 + recfacs[l] * (x * Pm1 - Pm2)
-        */
-        
-        Pm2_0 = Pm1_0; Pm1_0 = P_0;
-        W1 = vmul_s(x0, Pm1_0);
-        W2 = W1;
-        W2 = vsub_s(W2, Pm2_0);
-        P_0 = W1;
-        vfmaeq_s(P_0, W2, R);
-        vfmaeq_s(y0, P_0, b);
-        
-        Pm2_1 = Pm1_1; Pm1_1 = P_1;
-        W1 = vmul_s(x1, Pm1_1);
-        W2 = W1;
-        W2 = vsub_s(W2, Pm2_1);
-        P_1 = W1;
-        vfmaeq_s(P_1, W2, R);
-        vfmaeq_s(y1, P_1, b);
-        
-        Pm2_2 = Pm1_2; Pm1_2 = P_2;
-        W1 = vmul_s(x2, Pm1_2);
-        W2 = W1;
-        W2 = vsub_s(W2, Pm2_2);
-        P_2 = W1;
-        vfmaeq_s(P_2, W2, R);
-        vfmaeq_s(y2, P_2, b);
-        
-
-    }
-    
-    vstoreu_s(out + 0 * VLEN_s, y0);
-    
-    vstoreu_s(out + 1 * VLEN_s, y1);
-    
-    vstoreu_s(out + 2 * VLEN_s, y2);
-    
-}
-
-static void legendre_transform_vec4_s(float *recfacs, float *bl, ptrdiff_t lmax,
-                                              float xarr[(4) * VLEN_s],
-                                              float out[(4) * VLEN_s]) {
-    
-    Tv_s P_0, Pm1_0, Pm2_0, x0, y0;
-    
-    Tv_s P_1, Pm1_1, Pm2_1, x1, y1;
-    
-    Tv_s P_2, Pm1_2, Pm2_2, x2, y2;
-    
-    Tv_s P_3, Pm1_3, Pm2_3, x3, y3;
-    
-    Tv_s W1, W2, b, R;
-    ptrdiff_t l;
-
-    
-    x0 = vloadu_s(xarr + 0 * VLEN_s);
-    Pm1_0 = vload_s(1.0);
-    P_0 = x0;
-    b = vload_s(*bl);
-    y0 = vmul_s(Pm1_0, b);
-    
-    x1 = vloadu_s(xarr + 1 * VLEN_s);
-    Pm1_1 = vload_s(1.0);
-    P_1 = x1;
-    b = vload_s(*bl);
-    y1 = vmul_s(Pm1_1, b);
-    
-    x2 = vloadu_s(xarr + 2 * VLEN_s);
-    Pm1_2 = vload_s(1.0);
-    P_2 = x2;
-    b = vload_s(*bl);
-    y2 = vmul_s(Pm1_2, b);
-    
-    x3 = vloadu_s(xarr + 3 * VLEN_s);
-    Pm1_3 = vload_s(1.0);
-    P_3 = x3;
-    b = vload_s(*bl);
-    y3 = vmul_s(Pm1_3, b);
-    
-    
-    b = vload_s(*(bl + 1));
-    
-    vfmaeq_s(y0, P_0, b);
-    
-    vfmaeq_s(y1, P_1, b);
-    
-    vfmaeq_s(y2, P_2, b);
-    
-    vfmaeq_s(y3, P_3, b);
-    
-
-    for (l = 2; l <= lmax; ++l) {
-        b = vload_s(*(bl + l));
-        R = vload_s(*(recfacs + l));
-        
-        /* 
-           P = x * Pm1 + recfacs[l] * (x * Pm1 - Pm2)
-        */
-        
-        Pm2_0 = Pm1_0; Pm1_0 = P_0;
-        W1 = vmul_s(x0, Pm1_0);
-        W2 = W1;
-        W2 = vsub_s(W2, Pm2_0);
-        P_0 = W1;
-        vfmaeq_s(P_0, W2, R);
-        vfmaeq_s(y0, P_0, b);
-        
-        Pm2_1 = Pm1_1; Pm1_1 = P_1;
-        W1 = vmul_s(x1, Pm1_1);
-        W2 = W1;
-        W2 = vsub_s(W2, Pm2_1);
-        P_1 = W1;
-        vfmaeq_s(P_1, W2, R);
-        vfmaeq_s(y1, P_1, b);
-        
-        Pm2_2 = Pm1_2; Pm1_2 = P_2;
-        W1 = vmul_s(x2, Pm1_2);
-        W2 = W1;
-        W2 = vsub_s(W2, Pm2_2);
-        P_2 = W1;
-        vfmaeq_s(P_2, W2, R);
-        vfmaeq_s(y2, P_2, b);
-        
-        Pm2_3 = Pm1_3; Pm1_3 = P_3;
-        W1 = vmul_s(x3, Pm1_3);
-        W2 = W1;
-        W2 = vsub_s(W2, Pm2_3);
-        P_3 = W1;
-        vfmaeq_s(P_3, W2, R);
-        vfmaeq_s(y3, P_3, b);
-        
-
-    }
-    
-    vstoreu_s(out + 0 * VLEN_s, y0);
-    
-    vstoreu_s(out + 1 * VLEN_s, y1);
-    
-    vstoreu_s(out + 2 * VLEN_s, y2);
-    
-    vstoreu_s(out + 3 * VLEN_s, y3);
-    
-}
-
-static void legendre_transform_vec5_s(float *recfacs, float *bl, ptrdiff_t lmax,
-                                              float xarr[(5) * VLEN_s],
-                                              float out[(5) * VLEN_s]) {
-    
-    Tv_s P_0, Pm1_0, Pm2_0, x0, y0;
-    
-    Tv_s P_1, Pm1_1, Pm2_1, x1, y1;
-    
-    Tv_s P_2, Pm1_2, Pm2_2, x2, y2;
-    
-    Tv_s P_3, Pm1_3, Pm2_3, x3, y3;
-    
-    Tv_s P_4, Pm1_4, Pm2_4, x4, y4;
-    
-    Tv_s W1, W2, b, R;
-    ptrdiff_t l;
-
-    
-    x0 = vloadu_s(xarr + 0 * VLEN_s);
-    Pm1_0 = vload_s(1.0);
-    P_0 = x0;
-    b = vload_s(*bl);
-    y0 = vmul_s(Pm1_0, b);
-    
-    x1 = vloadu_s(xarr + 1 * VLEN_s);
-    Pm1_1 = vload_s(1.0);
-    P_1 = x1;
-    b = vload_s(*bl);
-    y1 = vmul_s(Pm1_1, b);
-    
-    x2 = vloadu_s(xarr + 2 * VLEN_s);
-    Pm1_2 = vload_s(1.0);
-    P_2 = x2;
-    b = vload_s(*bl);
-    y2 = vmul_s(Pm1_2, b);
-    
-    x3 = vloadu_s(xarr + 3 * VLEN_s);
-    Pm1_3 = vload_s(1.0);
-    P_3 = x3;
-    b = vload_s(*bl);
-    y3 = vmul_s(Pm1_3, b);
-    
-    x4 = vloadu_s(xarr + 4 * VLEN_s);
-    Pm1_4 = vload_s(1.0);
-    P_4 = x4;
-    b = vload_s(*bl);
-    y4 = vmul_s(Pm1_4, b);
-    
-    
-    b = vload_s(*(bl + 1));
-    
-    vfmaeq_s(y0, P_0, b);
-    
-    vfmaeq_s(y1, P_1, b);
-    
-    vfmaeq_s(y2, P_2, b);
-    
-    vfmaeq_s(y3, P_3, b);
-    
-    vfmaeq_s(y4, P_4, b);
-    
-
-    for (l = 2; l <= lmax; ++l) {
-        b = vload_s(*(bl + l));
-        R = vload_s(*(recfacs + l));
-        
-        /* 
-           P = x * Pm1 + recfacs[l] * (x * Pm1 - Pm2)
-        */
-        
-        Pm2_0 = Pm1_0; Pm1_0 = P_0;
-        W1 = vmul_s(x0, Pm1_0);
-        W2 = W1;
-        W2 = vsub_s(W2, Pm2_0);
-        P_0 = W1;
-        vfmaeq_s(P_0, W2, R);
-        vfmaeq_s(y0, P_0, b);
-        
-        Pm2_1 = Pm1_1; Pm1_1 = P_1;
-        W1 = vmul_s(x1, Pm1_1);
-        W2 = W1;
-        W2 = vsub_s(W2, Pm2_1);
-        P_1 = W1;
-        vfmaeq_s(P_1, W2, R);
-        vfmaeq_s(y1, P_1, b);
-        
-        Pm2_2 = Pm1_2; Pm1_2 = P_2;
-        W1 = vmul_s(x2, Pm1_2);
-        W2 = W1;
-        W2 = vsub_s(W2, Pm2_2);
-        P_2 = W1;
-        vfmaeq_s(P_2, W2, R);
-        vfmaeq_s(y2, P_2, b);
-        
-        Pm2_3 = Pm1_3; Pm1_3 = P_3;
-        W1 = vmul_s(x3, Pm1_3);
-        W2 = W1;
-        W2 = vsub_s(W2, Pm2_3);
-        P_3 = W1;
-        vfmaeq_s(P_3, W2, R);
-        vfmaeq_s(y3, P_3, b);
-        
-        Pm2_4 = Pm1_4; Pm1_4 = P_4;
-        W1 = vmul_s(x4, Pm1_4);
-        W2 = W1;
-        W2 = vsub_s(W2, Pm2_4);
-        P_4 = W1;
-        vfmaeq_s(P_4, W2, R);
-        vfmaeq_s(y4, P_4, b);
-        
-
-    }
-    
-    vstoreu_s(out + 0 * VLEN_s, y0);
-    
-    vstoreu_s(out + 1 * VLEN_s, y1);
-    
-    vstoreu_s(out + 2 * VLEN_s, y2);
-    
-    vstoreu_s(out + 3 * VLEN_s, y3);
-    
-    vstoreu_s(out + 4 * VLEN_s, y4);
-    
-}
-
-static void legendre_transform_vec6_s(float *recfacs, float *bl, ptrdiff_t lmax,
-                                              float xarr[(6) * VLEN_s],
-                                              float out[(6) * VLEN_s]) {
-    
-    Tv_s P_0, Pm1_0, Pm2_0, x0, y0;
-    
-    Tv_s P_1, Pm1_1, Pm2_1, x1, y1;
-    
-    Tv_s P_2, Pm1_2, Pm2_2, x2, y2;
-    
-    Tv_s P_3, Pm1_3, Pm2_3, x3, y3;
-    
-    Tv_s P_4, Pm1_4, Pm2_4, x4, y4;
-    
-    Tv_s P_5, Pm1_5, Pm2_5, x5, y5;
-    
-    Tv_s W1, W2, b, R;
-    ptrdiff_t l;
-
-    
-    x0 = vloadu_s(xarr + 0 * VLEN_s);
-    Pm1_0 = vload_s(1.0);
-    P_0 = x0;
-    b = vload_s(*bl);
-    y0 = vmul_s(Pm1_0, b);
-    
-    x1 = vloadu_s(xarr + 1 * VLEN_s);
-    Pm1_1 = vload_s(1.0);
-    P_1 = x1;
-    b = vload_s(*bl);
-    y1 = vmul_s(Pm1_1, b);
-    
-    x2 = vloadu_s(xarr + 2 * VLEN_s);
-    Pm1_2 = vload_s(1.0);
-    P_2 = x2;
-    b = vload_s(*bl);
-    y2 = vmul_s(Pm1_2, b);
-    
-    x3 = vloadu_s(xarr + 3 * VLEN_s);
-    Pm1_3 = vload_s(1.0);
-    P_3 = x3;
-    b = vload_s(*bl);
-    y3 = vmul_s(Pm1_3, b);
-    
-    x4 = vloadu_s(xarr + 4 * VLEN_s);
-    Pm1_4 = vload_s(1.0);
-    P_4 = x4;
-    b = vload_s(*bl);
-    y4 = vmul_s(Pm1_4, b);
-    
-    x5 = vloadu_s(xarr + 5 * VLEN_s);
-    Pm1_5 = vload_s(1.0);
-    P_5 = x5;
-    b = vload_s(*bl);
-    y5 = vmul_s(Pm1_5, b);
-    
-    
-    b = vload_s(*(bl + 1));
-    
-    vfmaeq_s(y0, P_0, b);
-    
-    vfmaeq_s(y1, P_1, b);
-    
-    vfmaeq_s(y2, P_2, b);
-    
-    vfmaeq_s(y3, P_3, b);
-    
-    vfmaeq_s(y4, P_4, b);
-    
-    vfmaeq_s(y5, P_5, b);
-    
-
-    for (l = 2; l <= lmax; ++l) {
-        b = vload_s(*(bl + l));
-        R = vload_s(*(recfacs + l));
-        
-        /* 
-           P = x * Pm1 + recfacs[l] * (x * Pm1 - Pm2)
-        */
-        
-        Pm2_0 = Pm1_0; Pm1_0 = P_0;
-        W1 = vmul_s(x0, Pm1_0);
-        W2 = W1;
-        W2 = vsub_s(W2, Pm2_0);
-        P_0 = W1;
-        vfmaeq_s(P_0, W2, R);
-        vfmaeq_s(y0, P_0, b);
-        
-        Pm2_1 = Pm1_1; Pm1_1 = P_1;
-        W1 = vmul_s(x1, Pm1_1);
-        W2 = W1;
-        W2 = vsub_s(W2, Pm2_1);
-        P_1 = W1;
-        vfmaeq_s(P_1, W2, R);
-        vfmaeq_s(y1, P_1, b);
-        
-        Pm2_2 = Pm1_2; Pm1_2 = P_2;
-        W1 = vmul_s(x2, Pm1_2);
-        W2 = W1;
-        W2 = vsub_s(W2, Pm2_2);
-        P_2 = W1;
-        vfmaeq_s(P_2, W2, R);
-        vfmaeq_s(y2, P_2, b);
-        
-        Pm2_3 = Pm1_3; Pm1_3 = P_3;
-        W1 = vmul_s(x3, Pm1_3);
-        W2 = W1;
-        W2 = vsub_s(W2, Pm2_3);
-        P_3 = W1;
-        vfmaeq_s(P_3, W2, R);
-        vfmaeq_s(y3, P_3, b);
-        
-        Pm2_4 = Pm1_4; Pm1_4 = P_4;
-        W1 = vmul_s(x4, Pm1_4);
-        W2 = W1;
-        W2 = vsub_s(W2, Pm2_4);
-        P_4 = W1;
-        vfmaeq_s(P_4, W2, R);
-        vfmaeq_s(y4, P_4, b);
-        
-        Pm2_5 = Pm1_5; Pm1_5 = P_5;
-        W1 = vmul_s(x5, Pm1_5);
-        W2 = W1;
-        W2 = vsub_s(W2, Pm2_5);
-        P_5 = W1;
-        vfmaeq_s(P_5, W2, R);
-        vfmaeq_s(y5, P_5, b);
-        
-
-    }
-    
-    vstoreu_s(out + 0 * VLEN_s, y0);
-    
-    vstoreu_s(out + 1 * VLEN_s, y1);
-    
-    vstoreu_s(out + 2 * VLEN_s, y2);
-    
-    vstoreu_s(out + 3 * VLEN_s, y3);
-    
-    vstoreu_s(out + 4 * VLEN_s, y4);
-    
-    vstoreu_s(out + 5 * VLEN_s, y5);
-    
-}
-
-
-
-
-
-void sharp_legendre_transform_recfac(double *r, ptrdiff_t lmax) {
-    /* (l - 1) / l, for l >= 2 */
-    ptrdiff_t l;
-    r[0] = 0;
-    r[1] = 1;
-    for (l = 2; l <= lmax; ++l) {
-        r[l] = (double)(l - 1) / (double)l;
-    }
-}
-
-void sharp_legendre_transform_recfac_s(float *r, ptrdiff_t lmax) {
-    /* (l - 1) / l, for l >= 2 */
-    ptrdiff_t l;
-    r[0] = 0;
-    r[1] = 1;
-    for (l = 2; l <= lmax; ++l) {
-        r[l] = (float)(l - 1) / (float)l;
-    }
-}
-
-
-/*
-  Compute sum_l b_l P_l(x_i) for all i. 
- */
-
-#define LEN (SHARP_LEGENDRE_CS * VLEN)
-#define LEN_s (SHARP_LEGENDRE_CS * VLEN_s)
-
-
-void sharp_legendre_transform(double *bl,
-                                   double *recfac,
-                                   ptrdiff_t lmax,
-                                   double *x, double *out, ptrdiff_t nx) {
-    double xchunk[MAX_CS * VLEN], outchunk[MAX_CS * LEN];
-    int compute_recfac;
-    ptrdiff_t i, j, len;
-
-    compute_recfac = (recfac == NULL);
-    if (compute_recfac) {
-        recfac = malloc(sizeof(double) * (lmax + 1));
-        sharp_legendre_transform_recfac(recfac, lmax);
-    }
-
-    for (j = 0; j != LEN; ++j) xchunk[j] = 0;
-
-    for (i = 0; i < nx; i += LEN) {
-        len = (i + (LEN) <= nx) ? (LEN) : (nx - i);
-        for (j = 0; j != len; ++j) xchunk[j] = x[i + j];
-        switch ((len + VLEN - 1) / VLEN) {
-          case 6: legendre_transform_vec6(recfac, bl, lmax, xchunk, outchunk); break;
-          case 5: legendre_transform_vec5(recfac, bl, lmax, xchunk, outchunk); break;
-          case 4: legendre_transform_vec4(recfac, bl, lmax, xchunk, outchunk); break;
-          case 3: legendre_transform_vec3(recfac, bl, lmax, xchunk, outchunk); break;
-          case 2: legendre_transform_vec2(recfac, bl, lmax, xchunk, outchunk); break;
-          case 1:
-          case 0:
-              legendre_transform_vec1(recfac, bl, lmax, xchunk, outchunk); break;
-        }
-        for (j = 0; j != len; ++j) out[i + j] = outchunk[j];
-    }
-    if (compute_recfac) {
-        free(recfac);
-    }
-}
-
-void sharp_legendre_transform_s(float *bl,
-                                   float *recfac,
-                                   ptrdiff_t lmax,
-                                   float *x, float *out, ptrdiff_t nx) {
-    float xchunk[MAX_CS * VLEN_s], outchunk[MAX_CS * LEN_s];
-    int compute_recfac;
-    ptrdiff_t i, j, len;
-
-    compute_recfac = (recfac == NULL);
-    if (compute_recfac) {
-        recfac = malloc(sizeof(float) * (lmax + 1));
-        sharp_legendre_transform_recfac_s(recfac, lmax);
-    }
-
-    for (j = 0; j != LEN_s; ++j) xchunk[j] = 0;
-
-    for (i = 0; i < nx; i += LEN_s) {
-        len = (i + (LEN_s) <= nx) ? (LEN_s) : (nx - i);
-        for (j = 0; j != len; ++j) xchunk[j] = x[i + j];
-        switch ((len + VLEN_s - 1) / VLEN_s) {
-          case 6: legendre_transform_vec6_s(recfac, bl, lmax, xchunk, outchunk); break;
-          case 5: legendre_transform_vec5_s(recfac, bl, lmax, xchunk, outchunk); break;
-          case 4: legendre_transform_vec4_s(recfac, bl, lmax, xchunk, outchunk); break;
-          case 3: legendre_transform_vec3_s(recfac, bl, lmax, xchunk, outchunk); break;
-          case 2: legendre_transform_vec2_s(recfac, bl, lmax, xchunk, outchunk); break;
-          case 1:
-          case 0:
-              legendre_transform_vec1_s(recfac, bl, lmax, xchunk, outchunk); break;
-        }
-        for (j = 0; j != len; ++j) out[i + j] = outchunk[j];
-    }
-    if (compute_recfac) {
-        free(recfac);
-    }
-}
-
-
-#endif
\ No newline at end of file
diff --git a/libsharp/sharp_legendre.c.in b/libsharp/sharp_legendre.c.in
deleted file mode 100644
index cd65012..0000000
--- a/libsharp/sharp_legendre.c.in
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
-
-    NOTE NOTE NOTE
-
-    This file is edited in sharp_legendre.c.in which is then preprocessed.
-    Do not make manual  modifications to sharp_legendre.c.
-
-    NOTE NOTE NOTE
-
-*/
-
-
-/*
- *  This file is part of libsharp.
- *
- * Redistribution and use in source and binary forms, with or without
- * met:
- * 
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 
- * 3. Neither the name of the copyright holder nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*! \file sharp_legendre.c.in
- *
- *  Copyright (C) 2015 University of Oslo
- *  \author Dag Sverre Seljebotn
- */
-
-#ifndef NO_LEGENDRE
-#if (VLEN==8)
-#error This code is not tested with MIC; please compile with -DNO_LEGENDRE
-/* ...or test it (it probably works) and remove this check */
-#endif
-
-#ifndef SHARP_LEGENDRE_CS
-#define SHARP_LEGENDRE_CS 4
-#endif
-
-#define MAX_CS 6
-#if (SHARP_LEGENDRE_CS > MAX_CS)
-#error (SHARP_LEGENDRE_CS > MAX_CS)
-#endif
-
-#include "sharp_legendre.h"
-#include "sharp_vecsupport.h"
-
-#include <stdlib.h>
-
-/*{ for scalar, T in [("double", ""), ("float", "_s")] }*/
-/*{ for cs in range(1, 7) }*/
-static void legendre_transform_vec{{cs}}{{T}}({{scalar}} *recfacs, {{scalar}} *bl, ptrdiff_t lmax,
-                                              {{scalar}} xarr[({{cs}}) * VLEN{{T}}],
-                                              {{scalar}} out[({{cs}}) * VLEN{{T}}]) {
-    /*{ for i in range(cs) }*/
-    Tv{{T}} P_{{i}}, Pm1_{{i}}, Pm2_{{i}}, x{{i}}, y{{i}};
-    /*{ endfor }*/
-    Tv{{T}} W1, W2, b, R;
-    ptrdiff_t l;
-
-    /*{ for i in range(cs) }*/
-    x{{i}} = vloadu{{T}}(xarr + {{i}} * VLEN{{T}});
-    Pm1_{{i}} = vload{{T}}(1.0);
-    P_{{i}} = x{{i}};
-    b = vload{{T}}(*bl);
-    y{{i}} = vmul{{T}}(Pm1_{{i}}, b);
-    /*{ endfor }*/
-    
-    b = vload{{T}}(*(bl + 1));
-    /*{ for i in range(cs) }*/
-    vfmaeq{{T}}(y{{i}}, P_{{i}}, b);
-    /*{ endfor }*/
-
-    for (l = 2; l <= lmax; ++l) {
-        b = vload{{T}}(*(bl + l));
-        R = vload{{T}}(*(recfacs + l));
-        
-        /* 
-           P = x * Pm1 + recfacs[l] * (x * Pm1 - Pm2)
-        */
-        /*{ for i in range(cs) }*/
-        Pm2_{{i}} = Pm1_{{i}}; Pm1_{{i}} = P_{{i}};
-        W1 = vmul{{T}}(x{{i}}, Pm1_{{i}});
-        W2 = W1;
-        W2 = vsub{{T}}(W2, Pm2_{{i}});
-        P_{{i}} = W1;
-        vfmaeq{{T}}(P_{{i}}, W2, R);
-        vfmaeq{{T}}(y{{i}}, P_{{i}}, b);
-        /*{ endfor }*/
-
-    }
-    /*{ for i in range(cs) }*/
-    vstoreu{{T}}(out + {{i}} * VLEN{{T}}, y{{i}});
-    /*{ endfor }*/
-}
-/*{ endfor }*/
-/*{ endfor }*/
-
-
-/*{ for scalar, T in [("double", ""), ("float", "_s")] }*/
-void sharp_legendre_transform_recfac{{T}}({{scalar}} *r, ptrdiff_t lmax) {
-    /* (l - 1) / l, for l >= 2 */
-    ptrdiff_t l;
-    r[0] = 0;
-    r[1] = 1;
-    for (l = 2; l <= lmax; ++l) {
-        r[l] = ({{scalar}})(l - 1) / ({{scalar}})l;
-    }
-}
-/*{ endfor }*/
-
-/*
-  Compute sum_l b_l P_l(x_i) for all i. 
- */
-
-#define LEN (SHARP_LEGENDRE_CS * VLEN)
-#define LEN_s (SHARP_LEGENDRE_CS * VLEN_s)
-
-/*{ for scalar, T in [("double", ""), ("float", "_s")] }*/
-void sharp_legendre_transform{{T}}({{scalar}} *bl,
-                                   {{scalar}} *recfac,
-                                   ptrdiff_t lmax,
-                                   {{scalar}} *x, {{scalar}} *out, ptrdiff_t nx) {
-    {{scalar}} xchunk[MAX_CS * VLEN{{T}}], outchunk[MAX_CS * LEN{{T}}];
-    int compute_recfac;
-    ptrdiff_t i, j, len;
-
-    compute_recfac = (recfac == NULL);
-    if (compute_recfac) {
-        recfac = malloc(sizeof({{scalar}}) * (lmax + 1));
-        sharp_legendre_transform_recfac{{T}}(recfac, lmax);
-    }
-
-    for (j = 0; j != LEN{{T}}; ++j) xchunk[j] = 0;
-
-    for (i = 0; i < nx; i += LEN{{T}}) {
-        len = (i + (LEN{{T}}) <= nx) ? (LEN{{T}}) : (nx - i);
-        for (j = 0; j != len; ++j) xchunk[j] = x[i + j];
-        switch ((len + VLEN{{T}} - 1) / VLEN{{T}}) {
-          case 6: legendre_transform_vec6{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
-          case 5: legendre_transform_vec5{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
-          case 4: legendre_transform_vec4{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
-          case 3: legendre_transform_vec3{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
-          case 2: legendre_transform_vec2{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
-          case 1:
-          case 0:
-              legendre_transform_vec1{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
-        }
-        for (j = 0; j != len; ++j) out[i + j] = outchunk[j];
-    }
-    if (compute_recfac) {
-        free(recfac);
-    }
-}
-/*{ endfor }*/
-
-#endif
diff --git a/libsharp/sharp_legendre.h b/libsharp/sharp_legendre.h
deleted file mode 100644
index cfd8aee..0000000
--- a/libsharp/sharp_legendre.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- *  This file is part of libsharp.
- *
- * Redistribution and use in source and binary forms, with or without
- * met:
- * 
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 
- * 3. Neither the name of the copyright holder nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*! \file sharp_legendre.h
- *  Interface for the Legendre transform parts of the spherical transform library.
- *
- *  Copyright (C) 2015 University of Oslo
- *  \author Dag Sverre Seljebotn
- */
-
-#ifndef SHARP_LEGENDRE_H
-#define SHARP_LEGENDRE_H
-
-#include <stddef.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifndef NO_LEGENDRE
-
-void sharp_legendre_transform(double *bl, double *recfac, ptrdiff_t lmax, double *x,
-                              double *out, ptrdiff_t nx);
-void sharp_legendre_transform_s(float *bl, float *recfac, ptrdiff_t lmax, float *x,
-                                float *out, ptrdiff_t nx);
-void sharp_legendre_transform_recfac(double *r, ptrdiff_t lmax);
-void sharp_legendre_transform_recfac_s(float *r, ptrdiff_t lmax);
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/libsharp/sharp_legendre_table.c b/libsharp/sharp_legendre_table.c
deleted file mode 100644
index 7fef085..0000000
--- a/libsharp/sharp_legendre_table.c
+++ /dev/null
@@ -1,1467 +0,0 @@
-/*
-
-This file originated as a concatenation of files from libpsht. Further refactoring
-could be carried out to make the code use libsharp conventions instead for SSE etc.;
-
-*/
-
-
-/*
-sse_utils.h
-*/
-/*
- *  This file is part of libc_utils.
- *
- *  libc_utils is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  libc_utils is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with libc_utils; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-/*
- *  libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
-
-/*! \file sse_utils.h
- *  SSE/SSE2/SSE3-related functionality
- *
- *  Copyright (C) 2010,2011 Max-Planck-Society
- *  \author Martin Reinecke
- */
-
-
-#if (defined(__SSE__))
-
-#include <xmmintrin.h>
-
-
-typedef __m128 v4sf; /* vector of 4 floats (SSE1) */
-
-typedef union {
-  float f[4];
-  v4sf v;
-} V4SF;
-
-static inline v4sf build_v4sf (float a, float b, float c, float d)
-  { return _mm_set_ps(d,c,b,a); }
-static inline void read_v4sf (v4sf v, float *a, float *b, float *c, float *d)
-  {
-  V4SF tmp;
-  tmp.v = v;
-  if (a) *a=tmp.f[0];
-  if (b) *b=tmp.f[1];
-  if (c) *c=tmp.f[2];
-  if (d) *d=tmp.f[3];
-  }
-
-
-#endif
-
-#if (defined(__SSE2__))
-
-#include <emmintrin.h>
-
-
-typedef __m128d v2df; /* vector of 2 doubles (SSE2) */
-
-typedef union {
-  double d[2];
-  v2df v;
-} V2DF;
-
-typedef struct {
-  v2df a,b;
-} v2df2;
-typedef struct {
-  V2DF a,b;
-} V2DF2;
-
-#define V2DF_SIGNMASK _mm_set1_pd(-0.0)
-
-static inline v2df build_v2df (double a, double b)
-  { return _mm_set_pd(b,a); }
-static inline void read_v2df (v2df v, double *a, double *b)
-  { _mm_store_sd(a,v); _mm_storeh_pd(b,v); }
-
-static inline int v2df_any_gt (v2df a, v2df b)
-  {
-  return (_mm_movemask_pd(_mm_cmpgt_pd(_mm_andnot_pd(V2DF_SIGNMASK,a),b))!=0);
-  }
-static inline int v2df_all_ge (v2df a, v2df b)
-  {
-  return (_mm_movemask_pd(_mm_cmplt_pd(_mm_andnot_pd(V2DF_SIGNMASK,a),b))==0);
-  }
-static inline V2DF to_V2DF (v2df x)
-  { V2DF X; X.v=x; return X; }
-static inline V2DF2 to_V2DF2 (v2df2 x)
-  { V2DF2 X; X.a.v=x.a; X.b.v=x.b; return X; }
-static inline v2df2 to_v2df2 (V2DF2 X)
-  { v2df2 x; x.a=X.a.v; x.b=X.b.v; return x; }
-static inline v2df2 zero_v2df2(void)
-  { v2df2 x; x.a=x.b=_mm_setzero_pd(); return x; }
-
-
-#endif
-
-#if (defined(__SSE3__))
-
-#include <pmmintrin.h>
-
-#endif
-
-
-
-/*
-ylmgen_c.h
-*/
-/*
- *  This file is part of libpsht.
- *
- *  libpsht is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  libpsht is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with libpsht; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-/*
- *  libpsht is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
-
-/*! \file ylmgen_c.h
- *  Code for efficient calculation of Y_lm(phi=0,theta)
- *
- *  Copyright (C) 2005-2011 Max-Planck-Society
- *  \author Martin Reinecke
- */
-
-typedef double ylmgen_dbl2[2];
-typedef double ylmgen_dbl3[3];
-
-typedef struct
-  {
-  double cth_crit;
-  int mdist_crit;
-  /* members depending on m and m' */
-  int s, m, mlo, mhi, cosPow, sinPow;
-  long double prefactor;
-  ylmgen_dbl3 *fx;
-  int preMinus_p, preMinus_m;
-  } sylmgen_d;
-
-typedef struct
-  {
-  double fsmall, fbig, eps, cth_crit;
-  int lmax, mmax, smax, m_cur, ith, nth, m_crit, spinrec;
-  /*! The index of the first non-negligible Y_lm value. */
-  int *firstl;
-  double *cf, *mfac, *t1fac, *t2fac, *th, *cth, *sth, *logsth;
-  ylmgen_dbl2 *recfac;
-  double *lamfact;
-  /*! Points to an array of size [0..lmax] containing the Y_lm values. */
-  double *ylm;
-  /*! Points to an array of size [0..lmax] containing the lambda_w
-      and lambda_x values for spin>0 transforms. */
-  ylmgen_dbl2 **lambda_wx;
-  long double *logsum, *lc05, *ls05;
-  double *flm1, *flm2, *xl;
-
-  sylmgen_d **sylm;
-
-  int *lwx_uptodate;
-  int ylm_uptodate;
-
-#ifdef __SSE2__
-  int ith1, ith2;
-  /*! Points to an array of size [0..lmax] containing the Y_lm values. */
-  v2df *ylm_sse2;
-  /*! Points to an array of size [0..lmax] containing the lambda_w
-      and lambda_x values for spin>0 transforms. */
-  v2df2 **lambda_wx_sse2;
-  int *lwx_uptodate_sse2;
-  int ylm_uptodate_sse2;
-#endif
-
-  int recfac_uptodate, lamfact_uptodate;
-  } Ylmgen_C;
-
-/*! Creates a generator which will calculate Y_lm(theta,phi=0)
-    up to \a l=l_max and \a m=m_max. It may regard Y_lm whose absolute
-    magnitude is smaller than \a epsilon as zero. If \a spinrec is nonzero,
-    the spin-1 and spin-2 Y_lm will be calculated by recursion from the spin-0
-    ones, otherwise Wigner d matrix elements will be used. */
-static void Ylmgen_init (Ylmgen_C *gen, int l_max, int m_max, int s_max, int spinrec,
-   double epsilon);
-
-/*! Passes am array \a theta of \a nth colatitudes that will be used in
-    subsequent calls. The individual angles will be referenced by their
-    index in the array, starting with 0.
-    \note The array can be freed or reused after the call. */
-static void Ylmgen_set_theta (Ylmgen_C *gen, const double *theta, int nth);
-
-/*! Deallocates a generator previously initialised by Ylmgen_init(). */
-static void Ylmgen_destroy (Ylmgen_C *gen);
-
-/*! Prepares the object for the calculation at \a theta and \a m. */
-static void Ylmgen_prepare (Ylmgen_C *gen, int ith, int m);
-
-/*! Recalculates (if necessary) the Y_lm values. */
-static void Ylmgen_recalc_Ylm (Ylmgen_C *gen);
-/*! Recalculates (if necessary) the lambda_w and lambda_x values for spin >0
-    transforms. */
-/*static void Ylmgen_recalc_lambda_wx (Ylmgen_C *gen, int spin);*/
-
-#ifdef __SSE2__
-/*! Prepares the object for the calculation at \a theta, \a theta2 and \a m. */
-static void Ylmgen_prepare_sse2 (Ylmgen_C *gen, int ith1, int ith2, int m);
-
-/*! Recalculates (if necessary) the Y_lm values. */
-static void Ylmgen_recalc_Ylm_sse2 (Ylmgen_C *gen);
-/*! Recalculates (if necessary) the lambda_w and lambda_x values for spin >0
-    transforms. */
-/*static void Ylmgen_recalc_lambda_wx_sse2 (Ylmgen_C *gen, int spin);*/
-#endif
-
-/*! Returns a pointer to an array with lmax+1 entries containing normalisation
-    factors that must be applied to Y_lm values computed for \a spin with the
-    given \a spinrec flag. The array must be deallocated (using free()) by the
-    user. */
-/*static double *Ylmgen_get_norm (int lmax, int spin, int spinrec);*/
-
-
-/*
-ylmgen_c.c
-*/
-
-/*
- *  This file is part of libpsht.
- *
- *  libpsht is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  libpsht is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with libpsht; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-/*
- *  libpsht is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
-
-/*
- *  Code for efficient calculation of Y_lm(theta,phi=0)
- *
- *  Copyright (C) 2005-2011 Max-Planck-Society
- *  Author: Martin Reinecke
- */
-
-#include <math.h>
-#include <stdlib.h>
-#include "c_utils.h"
-
-enum { large_exponent2=90, minscale=-4, maxscale=11 };
-
-/*static void sylmgen_init (sylmgen_d *gen, const Ylmgen_C *ygen, int spin)
-  {
-  int i;
-  UTIL_ASSERT(spin>=1,"incorrect spin");
-  gen->s=spin;
-  gen->m=gen->mlo=gen->mhi=-1234567890;
-  ALLOC(gen->fx,ylmgen_dbl3,ygen->lmax+2);
-
-  for (i=0; i<ygen->lmax+2; ++i)
-    gen->fx[i][0]=gen->fx[i][1]=gen->fx[i][2]=0.;
-
-  gen->cth_crit = 2.;
-  gen->mdist_crit = ygen->lmax+1;
-  }*/
-
-static void sylmgen_destroy (sylmgen_d *gen)
-  { DEALLOC(gen->fx); }
-
-/*static void sylmgen_prepare (sylmgen_d *gen, const Ylmgen_C *ygen, int m_)
-  {
-  int mlo_, mhi_, ms_similar, l;
-
-  if (m_==gen->m) return;
-  UTIL_ASSERT(m_>=0,"incorrect m");
-
-  mlo_=m_; mhi_=gen->s;
-  if (mhi_<mlo_) SWAP(mhi_,mlo_,int);
-  ms_similar = ((gen->mhi==mhi_) && (gen->mlo==mlo_));
-
-  gen->m=m_;
-  gen->mlo = mlo_; gen->mhi = mhi_;
-
-  if (!ms_similar)
-    {
-    for (l=gen->mhi; l<ygen->lmax; ++l)
-      {
-      double t = ygen->flm1[l+gen->m]*ygen->flm1[l-gen->m]
-                *ygen->flm1[l+gen->s]*ygen->flm1[l-gen->s];
-      double lt = 2*l+1;
-      double l1 = l+1;
-      gen->fx[l+1][0]=l1*lt*t;
-      gen->fx[l+1][1]=gen->m*gen->s*ygen->xl[l]*ygen->xl[l+1];
-      t = ygen->flm2[l+gen->m]*ygen->flm2[l-gen->m]
-         *ygen->flm2[l+gen->s]*ygen->flm2[l-gen->s];
-      gen->fx[l+1][2]=t*l1*ygen->xl[l];
-      }
-    gen->prefactor = 0.5L*(ygen->logsum[2*gen->mhi]
-      -ygen->logsum[gen->mhi+gen->mlo]-ygen->logsum[gen->mhi-gen->mlo]);
-    }
-
-  gen->preMinus_p = gen->preMinus_m = 0;
-  if (gen->mhi==gen->m)
-    {
-    gen->cosPow = gen->mhi+gen->s; gen->sinPow = gen->mhi-gen->s;
-    gen->preMinus_p = gen->preMinus_m = ((gen->mhi-gen->s)&1);
-    }
-  else
-    {
-    gen->cosPow = gen->mhi+gen->m; gen->sinPow = gen->mhi-gen->m;
-    gen->preMinus_m = ((gen->mhi+gen->m)&1);
-    }
-    }*/
-
-/*static void sylmgen_recalc (sylmgen_d *gen, const Ylmgen_C *ygen, int ith,
-  ylmgen_dbl2 *res, int *firstl)
-  {
-  const double ln2     = 0.6931471805599453094172321214581766;
-  const double inv_ln2 = 1.4426950408889634073599246810018921;
-  int l=gen->mhi;
-  int lmax = ygen->lmax;
-  ylmgen_dbl3 *fy = gen->fx;
-  const double fsmall = ygen->fsmall, fbig = ygen->fbig, eps = ygen->eps;
-  const double cth = ygen->cth[ith];
-  long double logvalp = inv_ln2*(gen->prefactor
-    + ygen->lc05[ith]*gen->cosPow + ygen->ls05[ith]*gen->sinPow);
-  long double logvalm = inv_ln2*(gen->prefactor
-    + ygen->lc05[ith]*gen->sinPow + ygen->ls05[ith]*gen->cosPow);
-  int scalep = (int)(logvalp/large_exponent2)-minscale;
-  int scalem = (int)(logvalm/large_exponent2)-minscale;
-  double rec1p=0., rec1m=0.;
-  double rec2p = exp(ln2*(double)(logvalp-(scalep+minscale)*large_exponent2));
-  double rec2m = exp(ln2*(double)(logvalm-(scalem+minscale)*large_exponent2));
-  double corfacp,corfacm;
-  double tp,tm;
-
-  if ((abs(gen->m-gen->s)>=gen->mdist_crit)&&(fabs(cth)>=gen->cth_crit))
-    { *firstl=ygen->lmax+1; return; }
-
-  if (gen->preMinus_p)
-    rec2p=-rec2p;
-  if (gen->preMinus_m)
-    rec2m=-rec2m;
-  if (gen->s&1)
-    rec2p=-rec2p;
-
-  / iterate until we reach the realm of IEEE numbers /
-  while((scalem<0)&&(scalep<0))
-    {
-    if (++l>lmax) break;
-    rec1p = (cth - fy[l][1])*fy[l][0]*rec2p - fy[l][2]*rec1p;
-    rec1m = (cth + fy[l][1])*fy[l][0]*rec2m - fy[l][2]*rec1m;
-    if (++l>lmax) break;
-    rec2p = (cth - fy[l][1])*fy[l][0]*rec1p - fy[l][2]*rec2p;
-    rec2m = (cth + fy[l][1])*fy[l][0]*rec1m - fy[l][2]*rec2m;
-
-    while (fabs(rec2p)>fbig)
-      { rec1p *= fsmall; rec2p *= fsmall; ++scalep; }
-    while (fabs(rec2m)>fbig)
-      { rec1m *= fsmall; rec2m *= fsmall; ++scalem; }
-    }
-
-  corfacp = (scalep<0) ? 0. : ygen->cf[scalep];
-  corfacm = (scalem<0) ? 0. : ygen->cf[scalem];
-
-  if (l<=lmax)
-    {
-    while (1)
-      {
-      if ((fabs(rec2p*corfacp)>eps) || (fabs(rec2m*corfacm)>eps))
-        break;
-      if (++l>lmax) break;
-      rec1p = (cth - fy[l][1])*fy[l][0]*rec2p - fy[l][2]*rec1p;
-      rec1m = (cth + fy[l][1])*fy[l][0]*rec2m - fy[l][2]*rec1m;
-      if ((fabs(rec1p*corfacp)>eps) || (fabs(rec1m*corfacm)>eps))
-        { SWAP(rec1p,rec2p,double); SWAP(rec1m,rec2m,double); break; }
-      if (++l>lmax) break;
-      rec2p = (cth - fy[l][1])*fy[l][0]*rec1p - fy[l][2]*rec2p;
-      rec2m = (cth + fy[l][1])*fy[l][0]*rec1m - fy[l][2]*rec2m;
-
-      if ((fabs(rec2p)>fbig) || (fabs(rec2m)>fbig))
-        {
-        while (fabs(rec2p)>fbig)
-          { rec1p *= fsmall; rec2p *= fsmall; ++scalep; }
-        while (fabs(rec2m)>fbig)
-          { rec1m *= fsmall; rec2m *= fsmall; ++scalem; }
-        corfacp = (scalep<0) ? 0. : ygen->cf[scalep];
-        corfacm = (scalem<0) ? 0. : ygen->cf[scalem];
-        }
-      }
-    }
-
-  *firstl=l;
-  if (l>lmax)
-    {
-    gen->mdist_crit=abs(gen->m-gen->s);
-    gen->cth_crit= fabs(cth);
-    return;
-    }
-
-  tp = rec2p*corfacp; tm = rec2m*corfacm;
-  res[l][0]=tp+tm;
-  res[l][1]=tm-tp;
-
-  while (1)
-    {
-    if ((fabs(tp)>eps) && (fabs(tm)>eps))
-      break;
-    if (++l>lmax) break;
-    rec1p = (cth - fy[l][1])*fy[l][0]*rec2p - fy[l][2]*rec1p;
-    rec1m = (cth + fy[l][1])*fy[l][0]*rec2m - fy[l][2]*rec1m;
-    tp=rec1p*corfacp; tm=rec1m*corfacm;
-    res[l][0]=tp+tm; res[l][1]=tm-tp;
-    if ((fabs(tp)>eps) && (fabs(tm)>eps))
-      { SWAP(rec1p,rec2p,double); SWAP(rec1m,rec2m,double); break; }
-    if (++l>lmax) break;
-    rec2p = (cth - fy[l][1])*fy[l][0]*rec1p - fy[l][2]*rec2p;
-    rec2m = (cth + fy[l][1])*fy[l][0]*rec1m - fy[l][2]*rec2m;
-    tp=rec2p*corfacp; tm=rec2m*corfacm;
-    res[l][0]=tp+tm; res[l][1]=tm-tp;
-
-    if ((fabs(rec2p)>fbig) || (fabs(rec2m)>fbig))
-      {
-      while (fabs(rec2p)>fbig)
-        { rec1p *= fsmall; rec2p *= fsmall; ++scalep; }
-      while (fabs(rec2m)>fbig)
-        { rec1m *= fsmall; rec2m *= fsmall; ++scalem; }
-      corfacp = (scalep<0) ? 0. : ygen->cf[scalep];
-      corfacm = (scalem<0) ? 0. : ygen->cf[scalem];
-      }
-    }
-
-  rec1p *= corfacp; rec2p *= corfacp;
-  rec1m *= corfacm; rec2m *= corfacm;
-
-  for (;l<lmax-1;l+=2)
-    {
-    rec1p = (cth - fy[l+1][1])*fy[l+1][0]*rec2p - fy[l+1][2]*rec1p;
-    rec1m = (cth + fy[l+1][1])*fy[l+1][0]*rec2m - fy[l+1][2]*rec1m;
-    res[l+1][0] = rec1p+rec1m; res[l+1][1] = rec1m-rec1p;
-    rec2p = (cth - fy[l+2][1])*fy[l+2][0]*rec1p - fy[l+2][2]*rec2p;
-    rec2m = (cth + fy[l+2][1])*fy[l+2][0]*rec1m - fy[l+2][2]*rec2m;
-    res[l+2][0] = rec2p+rec2m; res[l+2][1] = rec2m-rec2p;
-    }
-  while (1)
-    {
-    if (++l>lmax) break;
-    rec1p = (cth - fy[l][1])*fy[l][0]*rec2p - fy[l][2]*rec1p;
-    rec1m = (cth + fy[l][1])*fy[l][0]*rec2m - fy[l][2]*rec1m;
-    res[l][0] = rec1p+rec1m; res[l][1] = rec1m-rec1p;
-    if (++l>lmax) break;
-    rec2p = (cth - fy[l][1])*fy[l][0]*rec1p - fy[l][2]*rec2p;
-    rec2m = (cth + fy[l][1])*fy[l][0]*rec1m - fy[l][2]*rec2m;
-    res[l][0] = rec2p+rec2m; res[l][1] = rec2m-rec2p;
-    }
-    }*/
-
-#ifdef __SSE2__
-
-#define ADVANCE(L,ap,am,bp,bm) \
-  { \
-  v2df f0=_mm_set1_pd(fy[L][0]), \
-       f1=_mm_set1_pd(fy[L][1]), \
-       f2=_mm_set1_pd(fy[L][2]); \
-  ap = _mm_sub_pd( \
-       _mm_mul_pd(_mm_sub_pd(cth,f1),_mm_mul_pd(f0,bp)), \
-       _mm_mul_pd(f2,ap)); \
-  am = _mm_sub_pd( \
-       _mm_mul_pd(_mm_add_pd(cth,f1),_mm_mul_pd(f0,bm)), \
-       _mm_mul_pd(f2,am)); \
-  }
-
-#define RENORMX(r1,r2,corf,sca,scb) \
-  do \
-    { \
-    double rec1a, rec1b, rec2a, rec2b, corfaca, corfacb; \
-    read_v2df (r1, &rec1a, &rec1b); read_v2df (r2, &rec2a, &rec2b); \
-    read_v2df (corf, &corfaca, &corfacb); \
-    while (fabs(rec2a)>fbig) \
-      { \
-      rec1a*=fsmall; rec2a*=fsmall; ++sca; \
-      corfaca = (sca<0) ? 0. : ygen->cf[sca]; \
-      } \
-    while (fabs(rec2b)>fbig) \
-      { \
-      rec1b*=fsmall; rec2b*=fsmall; ++scb; \
-      corfacb = (scb<0) ? 0. : ygen->cf[scb]; \
-      } \
-    r1=build_v2df(rec1a,rec1b); r2=build_v2df(rec2a,rec2b); \
-    corf=build_v2df(corfaca,corfacb); \
-    } \
-  while(0)
-
-#define RENORM \
-  RENORMX(rec1p,rec2p,corfacp,scale1p,scale2p); \
-  RENORMX(rec1m,rec2m,corfacm,scale1m,scale2m)
-
-/*static void sylmgen_recalc_sse2 (sylmgen_d *gen, const Ylmgen_C *ygen,
-  int ith1, int ith2, v2df2 *res, int *firstl)
-  {
-  const double ln2     = 0.6931471805599453094172321214581766;
-  const double inv_ln2 = 1.4426950408889634073599246810018921;
-  int l=gen->mhi;
-  int lmax = ygen->lmax;
-  ylmgen_dbl3 *fy = gen->fx;
-  const double fbig=ygen->fbig, fsmall=ygen->fsmall;
-  v2df eps2    = build_v2df(ygen->eps,ygen->eps);
-  const double cth1=ygen->cth[ith1], cth2=ygen->cth[ith2];
-  v2df cth = build_v2df(cth1,cth2);
-  long double
-    logval1p = inv_ln2*(gen->prefactor
-             + ygen->lc05[ith1]*gen->cosPow + ygen->ls05[ith1]*gen->sinPow),
-    logval2p = inv_ln2*(gen->prefactor
-             + ygen->lc05[ith2]*gen->cosPow + ygen->ls05[ith2]*gen->sinPow),
-    logval1m = inv_ln2*(gen->prefactor
-             + ygen->lc05[ith1]*gen->sinPow + ygen->ls05[ith1]*gen->cosPow),
-    logval2m = inv_ln2*(gen->prefactor
-             + ygen->lc05[ith2]*gen->sinPow + ygen->ls05[ith2]*gen->cosPow);
-
-  int scale1p = (int)(logval1p/large_exponent2)-minscale,
-      scale2p = (int)(logval2p/large_exponent2)-minscale,
-      scale1m = (int)(logval1m/large_exponent2)-minscale,
-      scale2m = (int)(logval2m/large_exponent2)-minscale;
-
-  v2df rec1p =_mm_setzero_pd(), rec1m=_mm_setzero_pd();
-  v2df rec2p = build_v2df(
-    exp(ln2*(double)(logval1p-(scale1p+minscale)*large_exponent2)),
-    exp(ln2*(double)(logval2p-(scale2p+minscale)*large_exponent2)));
-  v2df rec2m = build_v2df(
-    exp(ln2*(double)(logval1m-(scale1m+minscale)*large_exponent2)),
-    exp(ln2*(double)(logval2m-(scale2m+minscale)*large_exponent2)));
-  v2df corfacp=build_v2df((scale1p<0) ? 0. : ygen->cf[scale1p],
-                          (scale2p<0) ? 0. : ygen->cf[scale2p]),
-       corfacm=build_v2df((scale1m<0) ? 0. : ygen->cf[scale1m],
-                          (scale2m<0) ? 0. : ygen->cf[scale2m]);
-  v2df tp,tm;
-
-  if ((abs(gen->m-gen->s)>=gen->mdist_crit)
-     &&(fabs(ygen->cth[ith1])>=gen->cth_crit)
-     &&(fabs(ygen->cth[ith2])>=gen->cth_crit))
-    { *firstl=ygen->lmax+1; return; }
-
-  if (gen->preMinus_p)
-    rec2p = _mm_xor_pd (rec2p,V2DF_SIGNMASK); / negate /
-  if (gen->preMinus_m)
-    rec2m = _mm_xor_pd (rec2m,V2DF_SIGNMASK); / negate /
-  if (gen->s&1)
-    rec2p = _mm_xor_pd (rec2p,V2DF_SIGNMASK); / negate /
-
-  / iterate until we reach the realm of IEEE numbers /
-  while((scale1m<0)&&(scale1p<0)&&(scale2m<0)&&(scale2p<0))
-    {
-    if (++l>lmax) break;
-    ADVANCE (l,rec1p,rec1m,rec2p,rec2m)
-    if (++l>lmax) break;
-    ADVANCE (l,rec2p,rec2m,rec1p,rec1m)
-
-    RENORM;
-    }
-
-  if (l<=lmax)
-    {
-    while (1)
-      {
-      if (v2df_any_gt(_mm_mul_pd(rec2p,corfacp),eps2) ||
-          v2df_any_gt(_mm_mul_pd(rec2m,corfacm),eps2))
-        break;
-      if (++l>lmax) break;
-      ADVANCE (l,rec1p,rec1m,rec2p,rec2m)
-      if (v2df_any_gt(_mm_mul_pd(rec1p,corfacp),eps2) ||
-          v2df_any_gt(_mm_mul_pd(rec1m,corfacm),eps2))
-        { SWAP(rec1p,rec2p,v2df); SWAP(rec1m,rec2m,v2df); break; }
-      if (++l>lmax) break;
-      ADVANCE (l,rec2p,rec2m,rec1p,rec1m)
-
-      RENORM;
-      }
-    }
-
-  *firstl=l;
-  if (l>lmax)
-    {
-    gen->mdist_crit=abs(gen->m-gen->s);
-    gen->cth_crit= (fabs(cth1)<fabs(cth2)) ? fabs(cth1) : fabs(cth2);
-    return;
-    }
-  tp = _mm_mul_pd(rec2p,corfacp); tm = _mm_mul_pd(rec2m,corfacm);
-  res[l].a=_mm_add_pd(tp,tm);
-  res[l].b=_mm_sub_pd(tm,tp);
-
-  while (1)
-    {
-    if (v2df_all_ge(tp,eps2) && v2df_all_ge(tm,eps2))
-      break;
-    if (++l>lmax) break;
-    ADVANCE(l,rec1p,rec1m,rec2p,rec2m)
-    tp=_mm_mul_pd(rec1p,corfacp); tm=_mm_mul_pd(rec1m,corfacm);
-    res[l].a=_mm_add_pd(tp,tm); res[l].b=_mm_sub_pd(tm,tp);
-    if (v2df_all_ge(tp,eps2) && v2df_all_ge(tm,eps2))
-      { SWAP(rec1p,rec2p,v2df); SWAP(rec1m,rec2m,v2df); break; }
-    if (++l>lmax) break;
-    ADVANCE (l,rec2p,rec2m,rec1p,rec1m)
-    tp=_mm_mul_pd(rec2p,corfacp); tm=_mm_mul_pd(rec2m,corfacm);
-    res[l].a=_mm_add_pd(tp,tm); res[l].b=_mm_sub_pd(tm,tp);
-
-    RENORM;
-    }
-
-  rec1p = _mm_mul_pd(rec1p,corfacp); rec2p = _mm_mul_pd(rec2p,corfacp);
-  rec1m = _mm_mul_pd(rec1m,corfacm); rec2m = _mm_mul_pd(rec2m,corfacm);
-
-  for (;l<lmax-1;l+=2)
-    {
-    ADVANCE(l+1,rec1p,rec1m,rec2p,rec2m)
-    res[l+1].a=_mm_add_pd(rec1p,rec1m); res[l+1].b=_mm_sub_pd(rec1m,rec1p);
-    ADVANCE(l+2,rec2p,rec2m,rec1p,rec1m)
-    res[l+2].a=_mm_add_pd(rec2p,rec2m); res[l+2].b=_mm_sub_pd(rec2m,rec2p);
-    }
-  while (1)
-    {
-    if (++l>lmax) break;
-    ADVANCE(l,rec1p,rec1m,rec2p,rec2m)
-    res[l].a=_mm_add_pd(rec1p,rec1m); res[l].b=_mm_sub_pd(rec1m,rec1p);
-    if (++l>lmax) break;
-    ADVANCE(l,rec2p,rec2m,rec1p,rec1m)
-    res[l].a=_mm_add_pd(rec2p,rec2m); res[l].b=_mm_sub_pd(rec2m,rec2p);
-    }
-  }
-*/
-#endif
-
-static void Ylmgen_init (Ylmgen_C *gen, int l_max, int m_max, int s_max, int spinrec,
-  double epsilon)
-  {
-  int m;
-  const double inv_sqrt4pi = 0.2820947917738781434740397257803862929220;
-  const double inv_ln2 = 1.4426950408889634073599246810018921;
-
-  gen->fsmall = ldexp(1.,-large_exponent2);
-  gen->fbig   = ldexp(1., large_exponent2);
-  gen->eps = epsilon;
-  gen->cth_crit = 2.;
-  gen->ith = -1;
-  gen->nth = 0;
-  gen->lmax = l_max;
-  gen->mmax = m_max;
-  gen->smax = s_max;
-  gen->spinrec = spinrec;
-  gen->m_cur = -1;
-  gen->m_crit = gen->mmax+1;
-  gen->firstl = RALLOC(int,gen->smax+1);
-  for (m=0; m<=gen->smax; ++m) gen->firstl[m]=-1;
-  gen->cf = RALLOC(double,maxscale-minscale+1);
-  for (m=0; m<(maxscale-minscale+1); ++m)
-    gen->cf[m] = ldexp(1.,(m+minscale)*large_exponent2);
-  gen->recfac = RALLOC(ylmgen_dbl2,gen->lmax+1);
-  gen->mfac = RALLOC(double,gen->mmax+1);
-  gen->mfac[0] = 1;
-  for (m=1; m<=gen->mmax; ++m)
-    gen->mfac[m] = gen->mfac[m-1]*sqrt((2*m+1.)/(2*m));
-  for (m=0; m<=gen->mmax; ++m)
-    gen->mfac[m] = inv_ln2*log(inv_sqrt4pi*gen->mfac[m]);
-
-  gen->t1fac = RALLOC(double,gen->lmax+1);
-  for (m=0; m<=gen->lmax; ++m)
-    gen->t1fac[m] = sqrt(4.*(m+1)*(m+1)-1.);
-  gen->t2fac = RALLOC(double,gen->lmax+gen->mmax+1);
-  for (m=0; m<=gen->lmax+gen->mmax; ++m)
-    gen->t2fac[m] = 1./sqrt(m+1.);
-
-  gen->lamfact = RALLOC(double,gen->lmax+1);
-  gen->ylm = RALLOC(double,gen->lmax+1);
-  ALLOC(gen->lambda_wx,ylmgen_dbl2 *,gen->smax+1);
-  for (m=0; m<=gen->smax; ++m)
-    gen->lambda_wx[m]=NULL;
-
-  gen->sylm = RALLOC(sylmgen_d *,gen->smax+1);
-  for (m=0; m<=gen->smax; ++m)
-    gen->sylm[m]=NULL;
-
-  gen->ylm_uptodate = 0;
-  gen->lwx_uptodate = RALLOC(int,gen->smax+1);
-  SET_ARRAY(gen->lwx_uptodate,0,gen->smax+1,0);
-  gen->recfac_uptodate = 0;
-  gen->lamfact_uptodate = 0;
-
-  gen->th = gen->cth = gen->sth = gen->logsth = NULL;
-
-#ifdef __SSE2__
-  gen->ith1 = gen->ith2 = -1;
-  gen->ylm_sse2 = RALLOC(v2df,gen->lmax+1);
-  ALLOC(gen->lambda_wx_sse2,v2df2 *,gen->smax+1);
-  for (m=0; m<=gen->smax; ++m)
-    gen->lambda_wx_sse2[m]=NULL;
-  gen->ylm_uptodate_sse2 = 0;
-  gen->lwx_uptodate_sse2 = RALLOC(int,gen->smax+1);
-  SET_ARRAY(gen->lwx_uptodate_sse2,0,gen->smax+1,0);
-#endif
-
-  ALLOC(gen->logsum,long double,2*gen->lmax+1);
-  gen->lc05 = gen->ls05 = NULL;
-  ALLOC(gen->flm1,double,2*gen->lmax+1);
-  ALLOC(gen->flm2,double,2*gen->lmax+1);
-  ALLOC(gen->xl,double,gen->lmax+1);
-
-  gen->logsum[0] = 0.;
-  for (m=1; m<2*gen->lmax+1; ++m)
-    gen->logsum[m] = gen->logsum[m-1]+logl((long double)m);
-  for (m=0; m<2*gen->lmax+1; ++m)
-    {
-    gen->flm1[m] = sqrt(1./(m+1.));
-    gen->flm2[m] = sqrt(m/(m+1.));
-    }
-
-  gen->xl[0]=0;
-  for (m=1; m<gen->lmax+1; ++m) gen->xl[m]=1./m;
-  }
-
-static void Ylmgen_destroy (Ylmgen_C *gen)
-  {
-  int m;
-
-  DEALLOC(gen->firstl);
-  DEALLOC(gen->cf);
-  DEALLOC(gen->recfac);
-  DEALLOC(gen->mfac);
-  DEALLOC(gen->t1fac);
-  DEALLOC(gen->t2fac);
-  DEALLOC(gen->lamfact);
-  DEALLOC(gen->ylm);
-  DEALLOC(gen->lwx_uptodate);
-  for (m=0; m<=gen->smax; ++m)
-    DEALLOC(gen->lambda_wx[m]);
-  DEALLOC(gen->lambda_wx);
-  for (m=0; m<=gen->smax; ++m)
-    if (gen->sylm[m])
-      {
-      sylmgen_destroy (gen->sylm[m]);
-      DEALLOC(gen->sylm[m]);
-      }
-  DEALLOC(gen->sylm);
-  DEALLOC(gen->th);
-  DEALLOC(gen->cth);
-  DEALLOC(gen->sth);
-  DEALLOC(gen->logsth);
-  DEALLOC(gen->logsum);
-  DEALLOC(gen->lc05);
-  DEALLOC(gen->ls05);
-  DEALLOC(gen->flm1);
-  DEALLOC(gen->flm2);
-  DEALLOC(gen->xl);
-#ifdef __SSE2__
-  DEALLOC(gen->ylm_sse2);
-  for (m=0; m<=gen->smax; ++m)
-    DEALLOC(gen->lambda_wx_sse2[m]);
-  DEALLOC(gen->lambda_wx_sse2);
-  DEALLOC(gen->lwx_uptodate_sse2);
-#endif
-  }
-
-static void Ylmgen_set_theta (Ylmgen_C *gen, const double *theta, int nth)
-  {
-  const double inv_ln2 = 1.4426950408889634073599246810018921;
-  int m;
-  DEALLOC(gen->th);
-  DEALLOC(gen->cth);
-  DEALLOC(gen->sth);
-  DEALLOC(gen->logsth);
-  DEALLOC(gen->lc05);
-  DEALLOC(gen->ls05);
-  gen->th = RALLOC(double,nth);
-  gen->cth = RALLOC(double,nth);
-  gen->sth = RALLOC(double,nth);
-  gen->logsth = RALLOC(double,nth);
-  gen->lc05 = RALLOC(long double,nth);
-  gen->ls05 = RALLOC(long double,nth);
-  for (m=0; m<nth; ++m)
-    {
-    const double pi = 3.141592653589793238462643383279502884197;
-    double th=theta[m];
-    UTIL_ASSERT ((th>=0.)&&(th<=pi),"bad theta angle specified");
-    /* tiny adjustments to make sure cos and sin (theta/2) are positive */
-    if (th==0.) th=1e-16;
-    if (ABSAPPROX(th,pi,1e-15)) th=pi-1e-15;
-    gen->th[m] = th;
-    gen->cth[m] = cos(th);
-    gen->sth[m] = sin(th);
-    gen->logsth[m] = inv_ln2*log(gen->sth[m]);
-    gen->lc05[m]=logl(cosl(0.5L*th));
-    gen->ls05[m]=logl(sinl(0.5L*th));
-    }
-
-  gen->nth = nth;
-  gen->ith = -1;
-#ifdef __SSE2__
-  gen->ith1 = gen->ith2 = -1;
-#endif
-  }
-
-static void Ylmgen_prepare (Ylmgen_C *gen, int ith, int m)
-  {
-  if ((ith==gen->ith) && (m==gen->m_cur)) return;
-
-  gen->ylm_uptodate = 0;
-  SET_ARRAY(gen->lwx_uptodate,0,gen->smax+1,0);
-
-  gen->ith = ith;
-
-  if (m!=gen->m_cur)
-    {
-    gen->recfac_uptodate = 0;
-    gen->lamfact_uptodate = 0;
-    gen->m_cur = m;
-    }
-  }
-
-static void Ylmgen_recalc_recfac (Ylmgen_C *gen)
-  {
-  double f_old=1;
-  int l, m;
-
-  if (gen->recfac_uptodate) return;
-  gen->recfac_uptodate = 1;
-
-  m = gen->m_cur;
-  for (l=m; l<=gen->lmax; ++l)
-    {
-    gen->recfac[l][0] = gen->t1fac[l]*gen->t2fac[l+m]*gen->t2fac[l-m];
-    gen->recfac[l][1] = gen->recfac[l][0]/f_old;
-    f_old = gen->recfac[l][0];
-    }
-  }
-
-/*static void Ylmgen_recalc_lamfact (Ylmgen_C *gen)
-  {
-  int l, m;
-
-  if (gen->lamfact_uptodate) return;
-  gen->lamfact_uptodate = 1;
-
-  m = gen->m_cur;
-  gen->lamfact[m] = 0;
-  for (l=m+1; l<=gen->lmax; ++l)
-    gen->lamfact[l] = sqrt((2*l+1.)/(2*l-1.) * (l*l-m*m));
-    }*/
-
-#define RENORMALIZE_SCALAR \
-  do \
-    { \
-    while (fabs(lam_2)>fbig) \
-      { lam_1*=fsmall; lam_2*=fsmall; ++scale; } \
-    corfac = (scale<0) ? 0. : gen->cf[scale]; \
-    } \
-  while(0)
-
-static void Ylmgen_recalc_Ylm (Ylmgen_C *gen)
-  {
-  const double ln2 = 0.6931471805599453094172321214581766;
-
-  double logval,lam_1,lam_2,corfac;
-  double eps=gen->eps, fbig=gen->fbig, fsmall=gen->fsmall;
-  ylmgen_dbl2 *recfac = gen->recfac;
-  int lmax=gen->lmax;
-  int scale,l;
-  int m = gen->m_cur;
-  double cth=gen->cth[gen->ith], sth=gen->sth[gen->ith];
-  double *result = gen->ylm;
-
-  if (gen->ylm_uptodate) return;
-  gen->ylm_uptodate=1;
-
-  if (((m>=gen->m_crit)&&(fabs(cth)>=gen->cth_crit)) || ((m>0)&&(sth==0)))
-    { gen->firstl[0]=gen->lmax+1; return; }
-
-  Ylmgen_recalc_recfac(gen);
-
-  logval = gen->mfac[m];
-  if (m>0) logval += m*gen->logsth[gen->ith];
-  scale = (int) (logval/large_exponent2)-minscale;
-  corfac = (scale<0) ? 0. : gen->cf[scale];
-
-  lam_1 = 0;
-  lam_2 = exp(ln2*(logval-(scale+minscale)*large_exponent2));
-  if (m&1) lam_2 = -lam_2;
-
-  l=m;
-  if (scale<0)
-    {
-    while (1)
-      {
-      if (++l>lmax) break;
-      lam_1 = cth*lam_2*recfac[l-1][0] - lam_1*recfac[l-1][1];
-      if (++l>lmax) break;
-      lam_2 = cth*lam_1*recfac[l-1][0] - lam_2*recfac[l-1][1];
-      if (fabs(lam_2)>fbig)
-        {
-        RENORMALIZE_SCALAR;
-        if (scale>=0) break;
-        }
-      }
-    }
-
-  lam_1*=corfac;
-  lam_2*=corfac;
-
-  if (l<=lmax)
-    {
-    while (1)
-      {
-      if (fabs(lam_2)>eps) break;
-      if (++l>lmax) break;
-      lam_1 = cth*lam_2*recfac[l-1][0] - lam_1*recfac[l-1][1];
-      if (fabs(lam_1)>eps)
-        { double x=lam_1; lam_1=lam_2; lam_2=x; break; }
-      if (++l>lmax) break;
-      lam_2 = cth*lam_1*recfac[l-1][0] - lam_2*recfac[l-1][1];
-      }
-    }
-
-  gen->firstl[0]=l;
-  if (l>lmax)
-    { gen->m_crit=m; gen->cth_crit=fabs(cth); return; }
-
-  for(;l<lmax-3;l+=4)
-    {
-    result[l]=lam_2;
-    lam_1 = cth*lam_2*recfac[l][0] - lam_1*recfac[l][1];
-    result[l+1] = lam_1;
-    lam_2 = cth*lam_1*recfac[l+1][0] - lam_2*recfac[l+1][1];
-    result[l+2]=lam_2;
-    lam_1 = cth*lam_2*recfac[l+2][0] - lam_1*recfac[l+2][1];
-    result[l+3] = lam_1;
-    lam_2 = cth*lam_1*recfac[l+3][0] - lam_2*recfac[l+3][1];
-    }
-
-  while (1)
-    {
-    result[l]=lam_2;
-    if (++l>lmax) break;
-    lam_1 = cth*lam_2*recfac[l-1][0] - lam_1*recfac[l-1][1];
-    result[l] = lam_1;
-    if (++l>lmax) break;
-    lam_2 = cth*lam_1*recfac[l-1][0] - lam_2*recfac[l-1][1];
-    }
-  }
-
-
-/*
-static void Ylmgen_recalc_lambda_wx1 (Ylmgen_C *gen)
-  {
-  if (gen->lwx_uptodate[1]) return;
-  Ylmgen_recalc_Ylm(gen);
-  gen->firstl[1] = gen->firstl[0];
-  if (gen->firstl[1]>gen->lmax) return;
-  Ylmgen_recalc_lamfact(gen);
-  gen->lwx_uptodate[1] = 1;
-
-  {
-  double cth=gen->cth[gen->ith];
-  double xsth=1./(gen->sth[gen->ith]);
-  double m=gen->m_cur;
-  double m_on_sth = m*xsth;
-  double lam_lm=0;
-  ylmgen_dbl2 *lambda_wx = gen->lambda_wx[1];
-  int l;
-  double ell;
-  for (ell=l=gen->firstl[1]; l<=gen->lmax; ++l, ell+=1.)
-    {
-    double lam_lm1m=lam_lm;
-    lam_lm=gen->ylm[l];
-    lambda_wx[l][0] = xsth*(gen->lamfact[l]*lam_lm1m - ell*cth*lam_lm);
-    lambda_wx[l][1] = m_on_sth*lam_lm;
-    }
-  }
-  }
-
-static void Ylmgen_recalc_lambda_wx2 (Ylmgen_C *gen)
-  {
-  if (gen->lwx_uptodate[2]) return;
-  Ylmgen_recalc_Ylm(gen);
-  gen->firstl[2] = gen->firstl[0];
-  if (gen->firstl[2]>gen->lmax) return;
-  Ylmgen_recalc_lamfact(gen);
-  gen->lwx_uptodate[2] = 1;
-
-  {
-  double cth=gen->cth[gen->ith];
-  double sth=gen->sth[gen->ith];
-  double m=gen->m_cur;
-  double one_on_s2 = 1./(sth*sth);
-  double two_on_s2 = 2*one_on_s2;
-  double two_c_on_s2 = cth * two_on_s2;
-  double m2 = m*m;
-  double two_m_on_s2 = m*two_on_s2;
-  double lam_lm=0;
-  ylmgen_dbl2 *lambda_wx = gen->lambda_wx[2];
-  int l;
-  double ell;
-  for (ell=l=gen->firstl[2]; l<=gen->lmax; ++l, ell+=1.)
-    {
-    double lam_lm1m=lam_lm;
-    lam_lm=gen->ylm[l];
-    {
-    const double t1  = lam_lm1m*gen->lamfact[l];
-    const double a_w = (m2-ell)*two_on_s2 - ell*(ell-1.);
-    const double a_x = cth*(ell-1.)*lam_lm;
-    lambda_wx[l][0] = a_w*lam_lm + t1*two_c_on_s2;
-    lambda_wx[l][1] = two_m_on_s2 * (t1-a_x);
-    }
-    }
-  }
-  }
-
-void Ylmgen_recalc_lambda_wx (Ylmgen_C *gen, int spin)
-  {
-  UTIL_ASSERT ((spin>0) && (spin<=gen->smax),
-    "invalid spin in Ylmgen_recalc_lambda_wx");
-
-  if (!gen->lambda_wx[spin])
-    gen->lambda_wx[spin]=RALLOC(ylmgen_dbl2,gen->lmax+1);
-
-  if (gen->spinrec && spin==1) { Ylmgen_recalc_lambda_wx1(gen); return; }
-  if (gen->spinrec && spin==2) { Ylmgen_recalc_lambda_wx2(gen); return; }
-
-  if (!gen->sylm[spin])
-    {
-    gen->sylm[spin]=RALLOC(sylmgen_d,1);
-    sylmgen_init(gen->sylm[spin],gen,spin);
-    }
-  if (gen->lwx_uptodate[spin]) return;
-  sylmgen_prepare(gen->sylm[spin],gen,gen->m_cur);
-  sylmgen_recalc(gen->sylm[spin],gen,gen->ith,gen->lambda_wx[spin],
-    &gen->firstl[spin]);
-  gen->lwx_uptodate[spin] = 1;
-  }
-*/
-#ifdef __SSE2__
-
-static void Ylmgen_prepare_sse2 (Ylmgen_C *gen, int ith1, int ith2, int m)
-  {
-  if ((ith1==gen->ith1) && (ith2==gen->ith2) && (m==gen->m_cur)) return;
-
-  gen->ylm_uptodate_sse2 = 0;
-  SET_ARRAY(gen->lwx_uptodate_sse2,0,gen->smax+1,0);
-
-  gen->ith1 = ith1; gen->ith2 = ith2;
-
-  if (m!=gen->m_cur)
-    {
-    gen->recfac_uptodate = gen->lamfact_uptodate = 0;
-    gen->m_cur = m;
-    }
-  }
-
-
-#define RENORMALIZE \
-  do \
-    { \
-    double lam1a, lam1b, lam2a, lam2b, corfaca, corfacb; \
-    read_v2df (lam_1, &lam1a, &lam1b); read_v2df (lam_2, &lam2a, &lam2b); \
-    read_v2df (corfac, &corfaca, &corfacb); \
-    while (fabs(lam2a)>fbig) \
-      { \
-      lam1a*=fsmall; lam2a*=fsmall; ++scale1; \
-      corfaca = (scale1<0) ? 0. : gen->cf[scale1]; \
-      } \
-    while (fabs(lam2b)>fbig) \
-      { \
-      lam1b*=fsmall; lam2b*=fsmall; ++scale2; \
-      corfacb = (scale2<0) ? 0. : gen->cf[scale2]; \
-      } \
-    lam_1=build_v2df(lam1a,lam1b); lam_2=build_v2df(lam2a,lam2b); \
-    corfac=build_v2df(corfaca,corfacb); \
-    } \
-  while(0)
-#define GETPRE(prea,preb,lv) \
-  { \
-  prea=_mm_mul_pd(_mm_set1_pd(recfac[lv][0]),cth); \
-  preb=_mm_set1_pd(recfac[lv][1]); \
-  }
-#define NEXTSTEP(prea,preb,prec,pred,reca,recb,lv) \
-  { \
-  preb = _mm_mul_pd(preb,reca); \
-  prea = _mm_mul_pd(prea,recb); \
-  prec = _mm_set1_pd(recfac[lv][0]); \
-  pred = _mm_set1_pd(recfac[lv][1]); \
-  reca = _mm_sub_pd(prea,preb); \
-  prec = _mm_mul_pd(cth,prec); \
-  }
-
-static void Ylmgen_recalc_Ylm_sse2 (Ylmgen_C *gen)
-  {
-  const double ln2 = 0.6931471805599453094172321214581766;
-
-  v2df lam_1,lam_2,corfac;
-  double logval1,logval2;
-  double eps=gen->eps, fbig=gen->fbig, fsmall=gen->fsmall;
-  v2df eps2=build_v2df(eps,eps);
-  v2df fbig2=build_v2df(fbig,fbig);
-  ylmgen_dbl2 *recfac = gen->recfac;
-  int lmax=gen->lmax;
-  int scale1,scale2,l;
-  int m = gen->m_cur;
-  double cth1=gen->cth[gen->ith1], cth2=gen->cth[gen->ith2];
-  v2df cth=build_v2df(cth1,cth2);
-  v2df *result = gen->ylm_sse2;
-  v2df pre0,pre1,pre2,pre3;
-
-  if (gen->ylm_uptodate_sse2) return;
-  gen->ylm_uptodate_sse2=1;
-
-  if ((m>=gen->m_crit)&&(fabs(cth1)>=gen->cth_crit)&&(fabs(cth2)>=gen->cth_crit))
-    { gen->firstl[0]=gen->lmax+1; return; }
-
-  Ylmgen_recalc_recfac(gen);
-
-  logval1 = logval2 = gen->mfac[m];
-  if (m>0) logval1 += m*gen->logsth[gen->ith1];
-  if (m>0) logval2 += m*gen->logsth[gen->ith2];
-  scale1 = (int) (logval1/large_exponent2)-minscale;
-  scale2 = (int) (logval2/large_exponent2)-minscale;
-  corfac = build_v2df((scale1<0) ? 0. : gen->cf[scale1],
-                      (scale2<0) ? 0. : gen->cf[scale2]);
-
-  lam_1 = _mm_setzero_pd();
-  lam_2 = build_v2df(exp(ln2*(logval1-(scale1+minscale)*large_exponent2)),
-                     exp(ln2*(logval2-(scale2+minscale)*large_exponent2)));
-  if (m&1) lam_2 = _mm_xor_pd (lam_2,V2DF_SIGNMASK); /* negate */
-
-  l=m;
-  if ((scale1<0) && (scale2<0))
-    {
-    GETPRE(pre0,pre1,l)
-    while (1)
-      {
-      if (++l>lmax) break;
-      NEXTSTEP(pre0,pre1,pre2,pre3,lam_1,lam_2,l)
-      if (++l>lmax) break;
-      NEXTSTEP(pre2,pre3,pre0,pre1,lam_2,lam_1,l)
-      if (v2df_any_gt(lam_2,fbig2))
-        {
-        RENORMALIZE;
-        if ((scale1>=0) || (scale2>=0)) break;
-        }
-      }
-    }
-
-  if (l<=lmax)
-    {
-    GETPRE(pre0,pre1,l)
-    while (1)
-      {
-      v2df t1;
-      result[l]=t1=_mm_mul_pd(lam_2,corfac);
-      if (v2df_any_gt(t1,eps2))
-        break;
-      if (++l>lmax) break;
-      NEXTSTEP(pre0,pre1,pre2,pre3,lam_1,lam_2,l)
-
-      result[l]=t1=_mm_mul_pd(lam_1,corfac);
-      if (v2df_any_gt(t1,eps2))
-        { v2df tmp=lam_1;lam_1=lam_2;lam_2=tmp; break; }
-      if (++l>lmax) break;
-      NEXTSTEP(pre2,pre3,pre0,pre1,lam_2,lam_1,l)
-
-      if (v2df_any_gt(lam_2,fbig2))
-        RENORMALIZE;
-      }
-    }
-
-  gen->firstl[0]=l;
-  if (l>lmax)
-    {
-    gen->m_crit=m;
-    gen->cth_crit= (fabs(cth1)<fabs(cth2)) ? fabs(cth1) : fabs(cth2);
-    return;
-    }
-
-  GETPRE(pre0,pre1,l)
-  while (1)
-    {
-    v2df t1;
-    result[l]=t1=_mm_mul_pd(lam_2,corfac);
-    if (v2df_all_ge(t1,eps2))
-      break;
-    if (++l>lmax) return;
-    NEXTSTEP(pre0,pre1,pre2,pre3,lam_1,lam_2,l)
-
-    result[l]=t1=_mm_mul_pd(lam_1,corfac);
-    if (v2df_all_ge(t1,eps2))
-      { v2df tmp=lam_1;lam_1=lam_2;lam_2=tmp; break; }
-    if (++l>lmax) return;
-    NEXTSTEP(pre2,pre3,pre0,pre1,lam_2,lam_1,l)
-
-    if (v2df_any_gt(lam_2,fbig2))
-      RENORMALIZE;
-    }
-
-  lam_1 = _mm_mul_pd (lam_1,corfac);
-  lam_2 = _mm_mul_pd (lam_2,corfac);
-
-  GETPRE(pre0,pre1,l)
-  for(;l<lmax-2;l+=2)
-    {
-    result[l]=lam_2;
-    NEXTSTEP(pre0,pre1,pre2,pre3,lam_1,lam_2,l+1)
-    result[l+1]=lam_1;
-    NEXTSTEP(pre2,pre3,pre0,pre1,lam_2,lam_1,l+2)
-    }
-
-  while (1)
-    {
-    result[l]=lam_2;
-    if (++l>lmax) break;
-    NEXTSTEP(pre0,pre1,pre2,pre3,lam_1,lam_2,l)
-    result[l] = lam_1;
-    if (++l>lmax) break;
-    NEXTSTEP(pre2,pre3,pre0,pre1,lam_2,lam_1,l)
-    }
-  }
-
-/*static void Ylmgen_recalc_lambda_wx1_sse2 (Ylmgen_C *gen)
-  {
-  if (gen->lwx_uptodate_sse2[1]) return;
-  Ylmgen_recalc_Ylm_sse2(gen);
-  gen->firstl[1] = gen->firstl[0];
-  if (gen->firstl[1]>gen->lmax) return;
-  Ylmgen_recalc_lamfact(gen);
-  gen->lwx_uptodate_sse2[1] = 1;
-
-  {
-  v2df cth=build_v2df(gen->cth[gen->ith1],gen->cth[gen->ith2]);
-  v2df xsth=build_v2df(1./gen->sth[gen->ith1],1./gen->sth[gen->ith2]);
-  v2df m=build_v2df(gen->m_cur,gen->m_cur);
-  v2df m_on_sth = _mm_mul_pd(m,xsth);
-  v2df lam_lm=_mm_setzero_pd();
-  v2df2 *lambda_wx = gen->lambda_wx_sse2[1];
-  int l;
-  v2df ell=build_v2df(gen->firstl[1],gen->firstl[1]);
-  v2df uno=_mm_set1_pd(1.);
-  for (l=gen->firstl[1]; l<=gen->lmax; ++l, ell=_mm_add_pd(ell,uno))
-    {
-    v2df lamfact=_mm_load1_pd(&gen->lamfact[l]);
-    v2df lam_lm1m=lam_lm;
-    lam_lm=gen->ylm_sse2[l];
-    lambda_wx[l].a = _mm_mul_pd(xsth,_mm_sub_pd(_mm_mul_pd(lamfact,lam_lm1m),
-                     _mm_mul_pd(_mm_mul_pd(ell,cth),lam_lm)));
-    lambda_wx[l].b = _mm_mul_pd(m_on_sth,lam_lm);
-    }
-  }
-  }
-
-static void Ylmgen_recalc_lambda_wx2_sse2 (Ylmgen_C *gen)
-  {
-  if (gen->lwx_uptodate_sse2[2]) return;
-  Ylmgen_recalc_Ylm_sse2(gen);
-  gen->firstl[2] = gen->firstl[0];
-  if (gen->firstl[2]>gen->lmax) return;
-  Ylmgen_recalc_lamfact(gen);
-  gen->lwx_uptodate_sse2[2] = 1;
-
-  {
-  v2df cth=build_v2df(gen->cth[gen->ith1],gen->cth[gen->ith2]);
-  v2df sth=build_v2df(gen->sth[gen->ith1],gen->sth[gen->ith2]);
-  v2df m=build_v2df(gen->m_cur,gen->m_cur);
-  v2df uno=_mm_set1_pd(1.);
-  v2df one_on_s2 = _mm_div_pd(uno,_mm_mul_pd(sth,sth));
-  v2df two_on_s2 = _mm_mul_pd(_mm_set1_pd(2.),one_on_s2);
-  v2df two_c_on_s2 = _mm_mul_pd(cth,two_on_s2);
-  v2df m2 = _mm_mul_pd(m,m);
-  v2df two_m_on_s2 = _mm_mul_pd(m,two_on_s2);
-  v2df lam_lm=_mm_setzero_pd();
-  v2df2 *lambda_wx = gen->lambda_wx_sse2[2];
-  int l;
-  v2df ell=build_v2df(gen->firstl[2],gen->firstl[2]);
-  for (l=gen->firstl[2]; l<=gen->lmax; ++l, ell=_mm_add_pd(ell,uno))
-    {
-    v2df lamfact=_mm_load1_pd(&gen->lamfact[l]);
-    v2df lam_lm1m=lam_lm;
-    lam_lm=gen->ylm_sse2[l];
-    {
-    const v2df t1  = _mm_mul_pd(lam_lm1m,lamfact);
-    const v2df ellm1 = _mm_sub_pd(ell,uno);
-    const v2df a_w = _mm_sub_pd
-      (_mm_mul_pd(_mm_sub_pd(m2,ell),two_on_s2),_mm_mul_pd(ell,ellm1));
-    const v2df a_x = _mm_mul_pd(_mm_mul_pd(cth,ellm1),lam_lm);
-    lambda_wx[l].a =
-      _mm_add_pd(_mm_mul_pd(a_w,lam_lm),_mm_mul_pd(t1,two_c_on_s2));
-    lambda_wx[l].b = _mm_mul_pd(two_m_on_s2,_mm_sub_pd(t1,a_x));
-    }
-    }
-  }
-  }*/
-
-/*static void Ylmgen_recalc_lambda_wx_sse2 (Ylmgen_C *gen, int spin)
-  {
-  UTIL_ASSERT ((spin>0) && (spin<=gen->smax),
-    "invalid spin in Ylmgen_recalc_lambda_wx_sse2");
-
-  if (!gen->lambda_wx_sse2[spin])
-    gen->lambda_wx_sse2[spin]=RALLOC(v2df2,gen->lmax+1);
-
-  if (gen->spinrec && spin==1) { Ylmgen_recalc_lambda_wx1_sse2(gen); return; }
-  if (gen->spinrec && spin==2) { Ylmgen_recalc_lambda_wx2_sse2(gen); return; }
-
-  if (!gen->sylm[spin])
-    {
-    gen->sylm[spin]=RALLOC(sylmgen_d,1);
-    sylmgen_init(gen->sylm[spin],gen,spin);
-    }
-  if (gen->lwx_uptodate_sse2[spin]) return;
-  sylmgen_prepare(gen->sylm[spin],gen,gen->m_cur);
-  sylmgen_recalc_sse2(gen->sylm[spin],gen,gen->ith1,gen->ith2,
-    gen->lambda_wx_sse2[spin],&gen->firstl[spin]);
-  gen->lwx_uptodate_sse2[spin] = 1;
-  }*/
-
-#endif /* __SSE2__ */
-
-/*
-double *Ylmgen_get_norm (int lmax, int spin, int spinrec)
-  {
-  const double pi = 3.141592653589793238462643383279502884197;
-  double *res=RALLOC(double,lmax+1);
-  int l;
-  double spinsign;
-  / sign convention for H=1 (LensPix paper) /
-#if 1
-  spinsign = (spin>0) ? -1.0 : 1.0;
-#else
-  spinsign = 1.0;
-#endif
-
-  if (spin==0)
-    {
-    for (l=0; l<=lmax; ++l)
-      res[l]=1.;
-    return res;
-    }
-
-  if ((!spinrec) || (spin>=3))
-    {
-    spinsign = (spin&1) ? -spinsign : spinsign;
-    for (l=0; l<=lmax; ++l)
-      res[l] = (l<spin) ? 0. : spinsign*0.5*sqrt((2*l+1)/(4*pi));
-    return res;
-    }
-
-  if (spin==1)
-    {
-    for (l=0; l<=lmax; ++l)
-      res[l] = (l<spin) ? 0. : spinsign*sqrt(1./((l+1.)*l));
-    return res;
-    }
-
-  if (spin==2)
-    {
-    for (l=0; l<=lmax; ++l)
-      res[l] = (l<spin) ? 0. : spinsign*sqrt(1./((l+2.)*(l+1.)*l*(l-1.)));
-    return res;
-    }
-
-  UTIL_FAIL ("error in Ylmgen_get_norm");
-  return NULL;
-  }
-*/
-
-
-
-/*
-New high-level wrapper
-*/
-#include "sharp_legendre_table.h"
-#include <stdio.h>
-
-void sharp_normalized_associated_legendre_table(
-  ptrdiff_t m,
-  int spin,
-  ptrdiff_t lmax,
-  ptrdiff_t ntheta,
-  double *theta,
-  ptrdiff_t theta_stride,
-  ptrdiff_t l_stride,
-  ptrdiff_t spin_stride,
-  double *out
-) {
-    if (spin != 0) UTIL_FAIL ("sharp_normalized_associated_legendre_table: only spin=0 has been implemented so far");
-
-    Ylmgen_C ctx;
-    ptrdiff_t itheta, l, lmin;
-
-    Ylmgen_init(&ctx, lmax, lmax, 0, 0, 1e-300);
-    Ylmgen_set_theta(&ctx, theta, ntheta);
-
-    itheta = 0;
-    #ifdef __SSE2__
-    for (; itheta < ntheta - 1; itheta += 2) {
-        Ylmgen_prepare_sse2(&ctx, itheta, itheta + 1, m);
-        Ylmgen_recalc_Ylm_sse2(&ctx);
-        lmin = IMIN(*ctx.firstl, lmax + 1);
-        for (l = m; l < lmin; ++l) {
-            out[itheta * theta_stride + (l - m) * l_stride + spin * spin_stride] = 0;
-            out[(itheta + 1) * theta_stride + (l - m) * l_stride + spin * spin_stride] = 0;
-        }
-        for (l = IMAX(lmin, m); l <= lmax; ++l) {
-            double v1, v2;
-            read_v2df(ctx.ylm_sse2[l], &v1, &v2);
-            out[itheta * theta_stride + (l - m) * l_stride + spin * spin_stride] = v1;
-            out[(itheta + 1) * theta_stride + (l - m) * l_stride + spin * spin_stride] = v2;
-        }
-    }
-    #endif
-    for (; itheta < ntheta; itheta += 1) {
-        Ylmgen_prepare(&ctx, itheta, m);
-        Ylmgen_recalc_Ylm(&ctx);
-        lmin = IMIN(*ctx.firstl, lmax + 1);
-        for (l = m; l < lmin; ++l) {
-            out[itheta * theta_stride + (l - m) * l_stride + spin * spin_stride] = 0;
-        }
-        for (l = IMAX(lmin, m); l <= lmax; ++l) {
-            out[itheta * theta_stride + (l - m) * l_stride + spin * spin_stride] = ctx.ylm[l];
-        }
-    }
-    Ylmgen_destroy(&ctx);
-}
diff --git a/libsharp/sharp_legendre_table.h b/libsharp/sharp_legendre_table.h
deleted file mode 100644
index de4cdd6..0000000
--- a/libsharp/sharp_legendre_table.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- *  This file is part of libsharp.
- *
- * Redistribution and use in source and binary forms, with or without
- * met:
- * 
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 
- * 3. Neither the name of the copyright holder nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*! \file sharp_legendre_table.h
- *  Interface for computing tables of the normalized associated Legendre transform
- *
- *  Copyright (C) 2017 Dag Sverre Seljebotn
- *  \author Dag Sverre Seljebotn
- *
- *  Note: This code was mainly copied from libpsht; only a small high-level wrapper added
- */
-
-#ifndef SHARP_LEGENDRE_TABLE_H
-#define SHARP_LEGENDRE_TABLE_H
-
-#include <stddef.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifndef NO_LEGENDRE_TABLE
-
-
-/*! Returns a table of the normalized associated Legendre polynomials. m is a single
-    fixed argument and a table for multiple l and cos(theta) is provided.
-    (Internally, sin(theta) is also used for part of the computation, making theta
-    the most convenient argument.)
-
-    NOTE: Support for spin-weighted Legendre functions is on the TODO-list. Only spin=0
-    is supported now.
-
-    \param m The m-value to compute a table for; must be >= 0
-    \param spin The spin parameter; pass 0 for the regular associated Legendre functions.
-                NOTE: This is present for future compatability, currently only 0 is supported.
-    \param lmax A table will be provided for l = m .. lmax
-    \param ntheta How many theta values to evaluate for
-    \param theta Contiguous 1D array of theta values
-    \param theta_stride See below
-    \param l_stride See below
-    \param spin_stride See below. "ispin" will always be 0 if spin==0, or 0 for positive spin
-                       and 1 for the corresponding negative spin otherwise.
-    \param out Contiguous 3D array that will receive the output. Each output entry
-               is assigned to out[itheta * theta_stride + (l - m) * l_stride + ispin * spin_stride].
- */
-void sharp_normalized_associated_legendre_table(
-  ptrdiff_t m,
-  int spin,
-  ptrdiff_t lmax,
-  ptrdiff_t ntheta,
-  /* contiguous 1D array of theta values to compute for,
-     contains ntheta values */
-  double *theta,
-  /* contiguous 2D array, in "theta-major ordering". Has `ntheta`
-     rows and `ncols` columns. Indexed as out[itheta * ncols + (l - m)].
-     If `ncols > lmax - m` then those entries are not accessed.
-  */
-  ptrdiff_t theta_stride,
-  ptrdiff_t l_stride,
-  ptrdiff_t spin_stride,
-  double *out
-);
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/m4/m4_ax_create_pkgconfig_info.m4 b/m4/m4_ax_create_pkgconfig_info.m4
new file mode 100644
index 0000000..308e64f
--- /dev/null
+++ b/m4/m4_ax_create_pkgconfig_info.m4
@@ -0,0 +1,351 @@
+# ============================================================================
+#  http://www.gnu.org/software/autoconf-archive/ax_create_pkgconfig_info.html
+# ============================================================================
+#
+# SYNOPSIS
+#
+#   AX_CREATE_PKGCONFIG_INFO [(outputfile, [requires [,libs [,summary [,cflags [, ldflags]]]]])]
+#
+# DESCRIPTION
+#
+#   Defaults:
+#
+#     $1 = $PACKAGE_NAME.pc
+#     $2 = (empty)
+#     $3 = $PACKAGE_LIBS $LIBS (as set at that point in configure.ac)
+#     $4 = $PACKAGE_SUMMARY (or $1 Library)
+#     $5 = $PACKAGE_CFLAGS (as set at the point in configure.ac)
+#     $6 = $PACKAGE_LDFLAGS (as set at the point in configure.ac)
+#
+#     PACKAGE_NAME defaults to $PACKAGE if not set.
+#     PACKAGE_LIBS defaults to -l$PACKAGE_NAME if not set.
+#
+#   The resulting file is called $PACKAGE.pc.in / $PACKAGE.pc
+#
+#   You will find this macro most useful in conjunction with
+#   ax_spec_defaults that can read good initializers from the .spec file. In
+#   consequencd, most of the generatable installable stuff can be made from
+#   information being updated in a single place for the whole project.
+#
+# LICENSE
+#
+#   Copyright (c) 2008 Guido U. Draheim <guidod@gmx.de>
+#   Copyright (c) 2008 Sven Verdoolaege <skimo@kotnet.org>
+#
+#   This program is free software; you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by the
+#   Free Software Foundation; either version 3 of the License, or (at your
+#   option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+#   Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+#   As a special exception, the respective Autoconf Macro's copyright owner
+#   gives unlimited permission to copy, distribute and modify the configure
+#   scripts that are the output of Autoconf when processing the Macro. You
+#   need not follow the terms of the GNU General Public License when using
+#   or distributing such scripts, even though portions of the text of the
+#   Macro appear in them. The GNU General Public License (GPL) does govern
+#   all other use of the material that constitutes the Autoconf Macro.
+#
+#   This special exception to the GPL applies to versions of the Autoconf
+#   Macro released by the Autoconf Archive. When you make and distribute a
+#   modified version of the Autoconf Macro, you may extend this special
+#   exception to the GPL to apply to your modified version as well.
+
+#serial 12
+
+AC_DEFUN([AX_CREATE_PKGCONFIG_INFO],[dnl
+AS_VAR_PUSHDEF([PKGCONFIG_suffix],[ax_create_pkgconfig_suffix])dnl
+AS_VAR_PUSHDEF([PKGCONFIG_libdir],[ax_create_pkgconfig_libdir])dnl
+AS_VAR_PUSHDEF([PKGCONFIG_libfile],[ax_create_pkgconfig_libfile])dnl
+AS_VAR_PUSHDEF([PKGCONFIG_libname],[ax_create_pkgconfig_libname])dnl
+AS_VAR_PUSHDEF([PKGCONFIG_version],[ax_create_pkgconfig_version])dnl
+AS_VAR_PUSHDEF([PKGCONFIG_description],[ax_create_pkgconfig_description])dnl
+AS_VAR_PUSHDEF([PKGCONFIG_requires],[ax_create_pkgconfig_requires])dnl
+AS_VAR_PUSHDEF([PKGCONFIG_pkglibs],[ax_create_pkgconfig_pkglibs])dnl
+AS_VAR_PUSHDEF([PKGCONFIG_libs],[ax_create_pkgconfig_libs])dnl
+AS_VAR_PUSHDEF([PKGCONFIG_ldflags],[ax_create_pkgconfig_ldflags])dnl
+AS_VAR_PUSHDEF([PKGCONFIG_cppflags],[ax_create_pkgconfig_cppflags])dnl
+AS_VAR_PUSHDEF([PKGCONFIG_generate],[ax_create_pkgconfig_generate])dnl
+AS_VAR_PUSHDEF([PKGCONFIG_src_libdir],[ax_create_pkgconfig_src_libdir])dnl
+AS_VAR_PUSHDEF([PKGCONFIG_src_headers],[ax_create_pkgconfig_src_headers])dnl
+
+# we need the expanded forms...
+test "x$prefix" = xNONE && prefix=$ac_default_prefix
+test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
+
+AC_MSG_CHECKING(our pkgconfig libname)
+test ".$PKGCONFIG_libname" != "." || \
+PKGCONFIG_libname="ifelse($1,,${PACKAGE_NAME},`basename $1 .pc`)"
+test ".$PKGCONFIG_libname" != "." || \
+PKGCONFIG_libname="$PACKAGE"
+PKGCONFIG_libname=`eval echo "$PKGCONFIG_libname"`
+PKGCONFIG_libname=`eval echo "$PKGCONFIG_libname"`
+AC_MSG_RESULT($PKGCONFIG_libname)
+
+AC_MSG_CHECKING(our pkgconfig version)
+test ".$PKGCONFIG_version" != "." || \
+PKGCONFIG_version="${PACKAGE_VERSION}"
+test ".$PKGCONFIG_version" != "." || \
+PKGCONFIG_version="$VERSION"
+PKGCONFIG_version=`eval echo "$PKGCONFIG_version"`
+PKGCONFIG_version=`eval echo "$PKGCONFIG_version"`
+AC_MSG_RESULT($PKGCONFIG_version)
+
+AC_MSG_CHECKING(our pkgconfig_libdir)
+test ".$pkgconfig_libdir" = "." && \
+pkgconfig_libdir='${libdir}/pkgconfig'
+PKGCONFIG_libdir=`eval echo "$pkgconfig_libdir"`
+PKGCONFIG_libdir=`eval echo "$PKGCONFIG_libdir"`
+PKGCONFIG_libdir=`eval echo "$PKGCONFIG_libdir"`
+AC_MSG_RESULT($pkgconfig_libdir)
+test "$pkgconfig_libdir" != "$PKGCONFIG_libdir" && (
+AC_MSG_RESULT(expanded our pkgconfig_libdir... $PKGCONFIG_libdir))
+AC_SUBST([pkgconfig_libdir])
+
+AC_MSG_CHECKING(our pkgconfig_libfile)
+test ".$pkgconfig_libfile" != "." || \
+pkgconfig_libfile="ifelse($1,,$PKGCONFIG_libname.pc,`basename $1`)"
+PKGCONFIG_libfile=`eval echo "$pkgconfig_libfile"`
+PKGCONFIG_libfile=`eval echo "$PKGCONFIG_libfile"`
+AC_MSG_RESULT($pkgconfig_libfile)
+test "$pkgconfig_libfile" != "$PKGCONFIG_libfile" && (
+AC_MSG_RESULT(expanded our pkgconfig_libfile... $PKGCONFIG_libfile))
+AC_SUBST([pkgconfig_libfile])
+
+AC_MSG_CHECKING(our package / suffix)
+PKGCONFIG_suffix="$program_suffix"
+test ".$PKGCONFIG_suffix" != .NONE || PKGCONFIG_suffix=""
+AC_MSG_RESULT(${PACKAGE_NAME} / ${PKGCONFIG_suffix})
+
+AC_MSG_CHECKING(our pkgconfig description)
+PKGCONFIG_description="ifelse($4,,$PACKAGE_SUMMARY,$4)"
+test ".$PKGCONFIG_description" != "." || \
+PKGCONFIG_description="$PKGCONFIG_libname Library"
+PKGCONFIG_description=`eval echo "$PKGCONFIG_description"`
+PKGCONFIG_description=`eval echo "$PKGCONFIG_description"`
+AC_MSG_RESULT($PKGCONFIG_description)
+
+AC_MSG_CHECKING(our pkgconfig requires)
+PKGCONFIG_requires="ifelse($2,,$PACKAGE_REQUIRES,$2)"
+PKGCONFIG_requires=`eval echo "$PKGCONFIG_requires"`
+PKGCONFIG_requires=`eval echo "$PKGCONFIG_requires"`
+AC_MSG_RESULT($PKGCONFIG_requires)
+
+AC_MSG_CHECKING(our pkgconfig ext libs)
+PKGCONFIG_pkglibs="$PACKAGE_LIBS"
+test ".$PKGCONFIG_pkglibs" != "." || PKGCONFIG_pkglibs="-l$PKGCONFIG_libname"
+PKGCONFIG_libs="ifelse($3,,$PKGCONFIG_pkglibs $LIBS,$3)"
+PKGCONFIG_libs=`eval echo "$PKGCONFIG_libs"`
+PKGCONFIG_libs=`eval echo "$PKGCONFIG_libs"`
+AC_MSG_RESULT($PKGCONFIG_libs)
+
+AC_MSG_CHECKING(our pkgconfig cppflags)
+PKGCONFIG_cppflags="ifelse($5,,$PACKAGE_CFLAGS,$5)"
+PKGCONFIG_cppflags=`eval echo "$PKGCONFIG_cppflags"`
+PKGCONFIG_cppflags=`eval echo "$PKGCONFIG_cppflags"`
+AC_MSG_RESULT($PKGCONFIG_cppflags)
+
+AC_MSG_CHECKING(our pkgconfig ldflags)
+PKGCONFIG_ldflags="ifelse($6,,$PACKAGE_LDFLAGS,$5)"
+PKGCONFIG_ldflags=`eval echo "$PKGCONFIG_ldflags"`
+PKGCONFIG_ldflags=`eval echo "$PKGCONFIG_ldflags"`
+AC_MSG_RESULT($PKGCONFIG_ldflags)
+
+test ".$PKGCONFIG_generate" != "." || \
+PKGCONFIG_generate="ifelse($1,,$PKGCONFIG_libname.pc,$1)"
+PKGCONFIG_generate=`eval echo "$PKGCONFIG_generate"`
+PKGCONFIG_generate=`eval echo "$PKGCONFIG_generate"`
+test "$pkgconfig_libfile" != "$PKGCONFIG_generate" && (
+AC_MSG_RESULT(generate the pkgconfig later... $PKGCONFIG_generate))
+
+if test ".$PKGCONFIG_src_libdir" = "." ; then
+PKGCONFIG_src_libdir=`pwd`
+PKGCONFIG_src_libdir=`AS_DIRNAME("$PKGCONFIG_src_libdir/$PKGCONFIG_generate")`
+test ! -d $PKGCONFIG_src_libdir/src || \
+PKGCONFIG_src_libdir="$PKGCONFIG_src_libdir/src"
+case ".$objdir" in
+*libs) PKGCONFIG_src_libdir="$PKGCONFIG_src_libdir/$objdir" ;; esac
+AC_MSG_RESULT(noninstalled pkgconfig -L $PKGCONFIG_src_libdir)
+fi
+
+if test ".$PKGCONFIG_src_headers" = "." ; then
+PKGCONFIG_src_headers=`pwd`
+v="$ac_top_srcdir" ;
+test ".$v" != "." || v="$ax_spec_dir"
+test ".$v" != "." || v="$srcdir"
+case "$v" in /*) PKGCONFIG_src_headers="" ;; esac
+PKGCONFIG_src_headers=`AS_DIRNAME("$PKGCONFIG_src_headers/$v/x")`
+test ! -d $PKGCONFIG_src_headers/incl[]ude || \
+PKGCONFIG_src_headers="$PKGCONFIG_src_headers/incl[]ude"
+AC_MSG_RESULT(noninstalled pkgconfig -I $PKGCONFIG_src_headers)
+fi
+
+
+dnl AC_CONFIG_COMMANDS crap disallows to use $PKGCONFIG_libfile here...
+AC_CONFIG_COMMANDS([$ax_create_pkgconfig_generate],[
+pkgconfig_generate="$ax_create_pkgconfig_generate"
+if test ! -f "$pkgconfig_generate.in"
+then generate="true"
+elif grep ' generated by configure ' $pkgconfig_generate.in >/dev/null
+then generate="true"
+else generate="false";
+fi
+if $generate ; then
+AC_MSG_NOTICE(creating $pkgconfig_generate.in)
+cat > $pkgconfig_generate.in <<AXEOF
+# generated by configure / remove this line to disable regeneration
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+bindir=@bindir@
+libdir=@libdir@
+datarootdir=@datarootdir@
+datadir=@datadir@
+sysconfdir=@sysconfdir@
+includedir=@includedir@
+package=@PACKAGE@
+suffix=@suffix@
+
+Name: @PACKAGE_NAME@
+Description: @PACKAGE_DESCRIPTION@
+Version: @PACKAGE_VERSION@
+Requires: @PACKAGE_REQUIRES@
+Libs: -L\${libdir} @LDFLAGS@ @LIBS@
+Cflags: -I\${includedir} @CPPFLAGS@
+AXEOF
+fi # DONE generate $pkgconfig_generate.in
+AC_MSG_NOTICE(creating $pkgconfig_generate)
+cat >conftest.sed <<AXEOF
+s|@prefix@|${pkgconfig_prefix}|
+s|@exec_prefix@|${pkgconfig_execprefix}|
+s|@bindir@|${pkgconfig_bindir}|
+s|@libdir@|${pkgconfig_libdir}|
+s|@datarootdir@|${pkgconfig_datarootdir}|
+s|@datadir@|${pkgconfig_datadir}|
+s|@sysconfdir@|${pkgconfig_sysconfdir}|
+s|@includedir@|${pkgconfig_includedir}|
+s|@suffix@|${pkgconfig_suffix}|
+s|@PACKAGE@|${pkgconfig_package}|
+s|@PACKAGE_NAME@|${pkgconfig_libname}|
+s|@PACKAGE_DESCRIPTION@|${pkgconfig_description}|
+s|@PACKAGE_VERSION@|${pkgconfig_version}|
+s|@PACKAGE_REQUIRES@|${pkgconfig_requires}|
+s|@LIBS@|${pkgconfig_libs}|
+s|@LDFLAGS@|${pkgconfig_ldflags}|
+s|@CPPFLAGS@|${pkgconfig_cppflags}|
+AXEOF
+sed -f conftest.sed  $pkgconfig_generate.in > $pkgconfig_generate
+if test ! -s $pkgconfig_generate ; then
+    AC_MSG_ERROR([$pkgconfig_generate is empty])
+fi ; rm conftest.sed # DONE generate $pkgconfig_generate
+pkgconfig_uninstalled=`echo $pkgconfig_generate |sed 's/.pc$/-uninstalled.pc/'`
+AC_MSG_NOTICE(creating $pkgconfig_uninstalled)
+cat >conftest.sed <<AXEOF
+s|@prefix@|${pkgconfig_prefix}|
+s|@exec_prefix@|${pkgconfig_execprefix}|
+s|@bindir@|${pkgconfig_bindir}|
+s|@libdir@|${pkgconfig_src_libdir}|
+s|@datarootdir@|${pkgconfig_datarootdir}|
+s|@datadir@|${pkgconfig_datadir}|
+s|@sysconfdir@|${pkgconfig_sysconfdir}|
+s|@includedir@|${pkgconfig_src_headers}|
+s|@suffix@|${pkgconfig_suffix}|
+s|@PACKAGE@|${pkgconfig_package}|
+s|@PACKAGE_NAME@|${pkgconfig_libname}|
+s|@PACKAGE_DESCRIPTION@|${pkgconfig_description}|
+s|@PACKAGE_VERSION@|${pkgconfig_version}|
+s|@PACKAGE_REQUIRES@|${pkgconfig_requires}|
+s|@LIBS@|${pkgconfig_libs}|
+s|@LDFLAGS@|${pkgconfig_ldflags}|
+s|@CPPFLAGS@|${pkgconfig_cppflags}|
+AXEOF
+sed -f conftest.sed $pkgconfig_generate.in > $pkgconfig_uninstalled
+if test ! -s $pkgconfig_uninstalled ; then
+    AC_MSG_ERROR([$pkgconfig_uninstalled is empty])
+fi ; rm conftest.sed # DONE generate $pkgconfig_uninstalled
+           pkgconfig_requires_add=`echo ${pkgconfig_requires}`
+if test ".$pkgconfig_requires_add" != "." ; then
+           pkgconfig_requires_add="pkg-config $pkgconfig_requires_add"
+    else   pkgconfig_requires_add=":" ; fi
+pkgconfig_uninstalled=`echo $pkgconfig_generate |sed 's/.pc$/-uninstalled.sh/'`
+AC_MSG_NOTICE(creating $pkgconfig_uninstalled)
+cat >conftest.sed <<AXEOF
+s|@prefix@|\"${pkgconfig_prefix}\"|
+s|@exec_prefix@|\"${pkgconfig_execprefix}\"|
+s|@bindir@|\"${pkgconfig_bindir}\"|
+s|@libdir@|\"${pkgconfig_src_libdir}\"|
+s|@datarootdir@|\"${pkgconfig_datarootdir}\"|
+s|@datadir@|\"${pkgconfig_datadir}\"|
+s|@sysconfdir@|\"${pkgconfig_sysconfdir}\"|
+s|@includedir@|\"${pkgconfig_src_headers}\"|
+s|@suffix@|\"${pkgconfig_suffix}\"|
+s|@PACKAGE@|\"${pkgconfig_package}\"|
+s|@PACKAGE_NAME@|\"${pkgconfig_libname}\"|
+s|@PACKAGE_DESCRIPTION@|\"${pkgconfig_description}\"|
+s|@PACKAGE_VERSION@|\"${pkgconfig_version}\"|
+s|@PACKAGE_REQUIRES@|\"${pkgconfig_requires}\"|
+s|@LIBS@|\"${pkgconfig_libs}\"|
+s|@LDFLAGS@|\"${pkgconfig_ldflags}\"|
+s|@CPPFLAGS@|\"${pkgconfig_cppflags}\"|
+s>Name:>for option\\; do case \"\$option\" in --list-all|--name) echo >
+s>Description: *>\\;\\; --help) pkg-config --help \\; echo Buildscript Of >
+s>Version: *>\\;\\; --modversion|--version) echo >
+s>Requires:>\\;\\; --requires) echo $pkgconfig_requires_add>
+s>Libs: *>\\;\\; --libs) echo >
+s>Cflags: *>\\;\\; --cflags) echo >
+/--libs)/a\\
+       $pkgconfig_requires_add
+/--cflags)/a\\
+       $pkgconfig_requires_add\\
+;; --variable=*) eval echo '\$'\`echo \$option | sed -e 's/.*=//'\`\\
+;; --uninstalled) exit 0 \\
+;; *) ;; esac done
+AXEOF
+sed -f conftest.sed  $pkgconfig_generate.in > $pkgconfig_uninstalled
+if test ! -s $pkgconfig_uninstalled ; then
+    AC_MSG_ERROR([$pkgconfig_uninstalled is empty])
+fi ; rm conftest.sed # DONE generate $pkgconfig_uninstalled
+],[
+dnl AC_CONFIG_COMMANDS crap, the AS_PUSHVAR defines are invalid here...
+ax_create_pkgconfig_generate="$ax_create_pkgconfig_generate"
+pkgconfig_prefix='$prefix'
+pkgconfig_execprefix='$exec_prefix'
+pkgconfig_bindir='$bindir'
+pkgconfig_libdir='$libdir'
+pkgconfig_includedir='$includedir'
+pkgconfig_datarootdir='$datarootdir'
+pkgconfig_datadir='$datadir'
+pkgconfig_sysconfdir='$sysconfdir'
+pkgconfig_suffix='$ax_create_pkgconfig_suffix'
+pkgconfig_package='$PACKAGE_NAME'
+pkgconfig_libname='$ax_create_pkgconfig_libname'
+pkgconfig_description='$ax_create_pkgconfig_description'
+pkgconfig_version='$ax_create_pkgconfig_version'
+pkgconfig_requires='$ax_create_pkgconfig_requires'
+pkgconfig_libs='$ax_create_pkgconfig_libs'
+pkgconfig_ldflags='$ax_create_pkgconfig_ldflags'
+pkgconfig_cppflags='$ax_create_pkgconfig_cppflags'
+pkgconfig_src_libdir='$ax_create_pkgconfig_src_libdir'
+pkgconfig_src_headers='$ax_create_pkgconfig_src_headers'
+])dnl
+AS_VAR_POPDEF([PKGCONFIG_suffix])dnl
+AS_VAR_POPDEF([PKGCONFIG_libdir])dnl
+AS_VAR_POPDEF([PKGCONFIG_libfile])dnl
+AS_VAR_POPDEF([PKGCONFIG_libname])dnl
+AS_VAR_POPDEF([PKGCONFIG_version])dnl
+AS_VAR_POPDEF([PKGCONFIG_description])dnl
+AS_VAR_POPDEF([PKGCONFIG_requires])dnl
+AS_VAR_POPDEF([PKGCONFIG_pkglibs])dnl
+AS_VAR_POPDEF([PKGCONFIG_libs])dnl
+AS_VAR_POPDEF([PKGCONFIG_ldflags])dnl
+AS_VAR_POPDEF([PKGCONFIG_cppflags])dnl
+AS_VAR_POPDEF([PKGCONFIG_generate])dnl
+AS_VAR_POPDEF([PKGCONFIG_src_libdir])dnl
+AS_VAR_POPDEF([PKGCONFIG_src_headers])dnl
+])
diff --git a/python/fake_pyrex/Pyrex/Distutils/__init__.py b/python/fake_pyrex/Pyrex/Distutils/__init__.py
deleted file mode 100644
index 51c8e16..0000000
--- a/python/fake_pyrex/Pyrex/Distutils/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# work around broken setuptools monkey patching
diff --git a/python/fake_pyrex/Pyrex/Distutils/build_ext.py b/python/fake_pyrex/Pyrex/Distutils/build_ext.py
deleted file mode 100644
index 4f846f6..0000000
--- a/python/fake_pyrex/Pyrex/Distutils/build_ext.py
+++ /dev/null
@@ -1 +0,0 @@
-build_ext = "yes, it's there!"
diff --git a/python/fake_pyrex/Pyrex/__init__.py b/python/fake_pyrex/Pyrex/__init__.py
deleted file mode 100644
index 51c8e16..0000000
--- a/python/fake_pyrex/Pyrex/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# work around broken setuptools monkey patching
diff --git a/python/fake_pyrex/README b/python/fake_pyrex/README
deleted file mode 100644
index cf3f3ff..0000000
--- a/python/fake_pyrex/README
+++ /dev/null
@@ -1,2 +0,0 @@
-This directory is here to fool setuptools into building .pyx files
-even if Pyrex is not installed. See ../setup.py.
\ No newline at end of file
diff --git a/python/libsharp/__init__.py b/python/libsharp/__init__.py
deleted file mode 100644
index dd0fa41..0000000
--- a/python/libsharp/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .libsharp import *
diff --git a/python/libsharp/libsharp.pxd b/python/libsharp/libsharp.pxd
deleted file mode 100644
index 27a4608..0000000
--- a/python/libsharp/libsharp.pxd
+++ /dev/null
@@ -1,92 +0,0 @@
-cdef extern from "sharp.h":
-
-    void sharp_legendre_transform_s(float *bl, float *recfac, ptrdiff_t lmax, float *x,
-                                    float *out, ptrdiff_t nx)
-    void sharp_legendre_transform(double *bl, double *recfac, ptrdiff_t lmax, double *x,
-                                  double *out, ptrdiff_t nx)
-    void sharp_legendre_transform_recfac(double *r, ptrdiff_t lmax)
-    void sharp_legendre_transform_recfac_s(float *r, ptrdiff_t lmax)
-    void sharp_legendre_roots(int n, double *x, double *w)
-
-    # sharp_lowlevel.h
-    ctypedef struct sharp_alm_info:
-      # Maximum \a l index of the array
-      int lmax
-      # Number of different \a m values in this object
-      int nm
-      # Array with \a nm entries containing the individual m values
-      int *mval
-      # Combination of flags from sharp_almflags
-      int flags
-      # Array with \a nm entries containing the (hypothetical) indices of
-      #   the coefficients with quantum numbers 0,\a mval[i]
-      long *mvstart
-      # Stride between a_lm and a_(l+1),m
-      long stride
-
-    ctypedef struct sharp_geom_info:
-        pass
-
-    void sharp_make_alm_info (int lmax, int mmax, int stride,
-                             ptrdiff_t *mvstart, sharp_alm_info **alm_info)
-
-    void sharp_make_geom_info (int nrings, int *nph, ptrdiff_t *ofs,
-                               int *stride, double *phi0, double *theta,
-                               double *wgt, sharp_geom_info **geom_info)
-
-    void sharp_destroy_alm_info(sharp_alm_info *info)
-    void sharp_destroy_geom_info(sharp_geom_info *info)
-
-    ptrdiff_t sharp_map_size(sharp_geom_info *info)
-    ptrdiff_t sharp_alm_count(sharp_alm_info *self)
-
-
-    ctypedef enum sharp_jobtype:
-        SHARP_YtW
-        SHARP_Yt
-        SHARP_WY
-        SHARP_Y
-
-    ctypedef enum:
-        SHARP_DP
-        SHARP_ADD
-
-    void sharp_execute(sharp_jobtype type_,
-                       int spin,
-                       void *alm,
-                       void *map,
-                       sharp_geom_info *geom_info,
-                       sharp_alm_info *alm_info,
-                       int ntrans,
-                       int flags,
-                       double *time,
-                       unsigned long long *opcnt) nogil
-
-    ctypedef enum:
-        SHARP_ERROR_NO_MPI
-
-    int sharp_execute_mpi_maybe (void *pcomm, sharp_jobtype type, int spin,
-        void *alm, void *map, sharp_geom_info *geom_info,
-        sharp_alm_info *alm_info, int ntrans, int flags, double *time,
-        unsigned long long *opcnt) nogil
-
-    void sharp_normalized_associated_legendre_table(int m, int spin, int lmax, int ntheta,
-        double *theta, int theta_stride, int l_stride, int spin_stride, double *out) nogil
-
-
-cdef extern from "sharp_geomhelpers.h":
-    void sharp_make_subset_healpix_geom_info(
-        int nside, int stride, int nrings,
-        int *rings, double *weight, sharp_geom_info **geom_info)
-    void sharp_make_gauss_geom_info(
-        int nrings, int nphi, double phi0,
-        int stride_lon, int stride_lat, sharp_geom_info **geom_info)
-
-cdef extern from "sharp_almhelpers.h":
-    void sharp_make_triangular_alm_info (int lmax, int mmax, int stride,
-        sharp_alm_info **alm_info)
-    void sharp_make_rectangular_alm_info (int lmax, int mmax, int stride,
-        sharp_alm_info **alm_info)
-    void sharp_make_mmajor_real_packed_alm_info (int lmax, int stride,
-        int nm, const int *ms, sharp_alm_info **alm_info)
-
diff --git a/python/libsharp/libsharp.pyx b/python/libsharp/libsharp.pyx
deleted file mode 100644
index dfefc93..0000000
--- a/python/libsharp/libsharp.pyx
+++ /dev/null
@@ -1,324 +0,0 @@
-import numpy as np
-cimport numpy as np
-cimport cython
-
-__all__ = ['legendre_transform', 'legendre_roots', 'sht', 'synthesis', 'adjoint_synthesis',
-           'analysis', 'adjoint_analysis', 'healpix_grid', 'triangular_order', 'rectangular_order',
-           'packed_real_order', 'normalized_associated_legendre_table']
-
-
-def legendre_transform(x, bl, out=None):
-    if out is None:
-        out = np.empty_like(x)
-    if out.shape[0] == 0:
-        return out
-    elif x.dtype == np.float64:
-        if bl.dtype != np.float64:
-            bl = bl.astype(np.float64)
-        return _legendre_transform(x, bl, out=out)
-    elif x.dtype == np.float32:
-        if bl.dtype != np.float32:
-            bl = bl.astype(np.float32)
-        return _legendre_transform_s(x, bl, out=out)
-    else:
-        raise ValueError("unsupported dtype")
-
-
-def _legendre_transform(double[::1] x, double[::1] bl, double[::1] out):
-    if out.shape[0] != x.shape[0]:
-        raise ValueError('x and out must have same shape')
-    sharp_legendre_transform(&bl[0], NULL, bl.shape[0] - 1, &x[0], &out[0], x.shape[0])
-    return np.asarray(out)
-
-
-def _legendre_transform_s(float[::1] x, float[::1] bl, float[::1] out):
-    if out.shape[0] != x.shape[0]:
-        raise ValueError('x and out must have same shape')
-    sharp_legendre_transform_s(&bl[0], NULL, bl.shape[0] - 1, &x[0], &out[0], x.shape[0])
-    return np.asarray(out)
-
-
-def legendre_roots(n):
-    x = np.empty(n, np.double)
-    w = np.empty(n, np.double)
-    cdef double[::1] x_buf = x, w_buf = w
-    if not (x_buf.shape[0] == w_buf.shape[0] == n):
-        raise AssertionError()
-    if n > 0:
-        sharp_legendre_roots(n, &x_buf[0], &w_buf[0])
-    return x, w
-
-
-JOBTYPE_TO_CONST = {
-    'Y': SHARP_Y,
-    'Yt': SHARP_Yt,
-    'WY': SHARP_WY,
-    'YtW': SHARP_YtW
-}
-
-def sht(jobtype, geom_info ginfo, alm_info ainfo, double[:, :, ::1] input,
-        int spin=0, comm=None, add=False):
-    cdef void *comm_ptr
-    cdef int flags = SHARP_DP | (SHARP_ADD if add else 0)
-    cdef int r
-    cdef sharp_jobtype jobtype_i
-    cdef double[:, :, ::1] output_buf
-    cdef int ntrans = input.shape[0]
-    cdef int ntotcomp = ntrans * input.shape[1]
-    cdef int i, j
-
-    if spin == 0 and input.shape[1] != 1:
-        raise ValueError('For spin == 0, we need input.shape[1] == 1')
-    elif spin != 0 and input.shape[1] != 2:
-        raise ValueError('For spin != 0, we need input.shape[1] == 2')
-
-
-    cdef size_t[::1] ptrbuf = np.empty(2 * ntotcomp, dtype=np.uintp)
-    cdef double **alm_ptrs = <double**>&ptrbuf[0]
-    cdef double **map_ptrs = <double**>&ptrbuf[ntotcomp]
-
-    try:
-        jobtype_i = JOBTYPE_TO_CONST[jobtype]
-    except KeyError:
-        raise ValueError('jobtype must be one of: %s' % ', '.join(sorted(JOBTYPE_TO_CONST.keys())))
-
-    if jobtype_i == SHARP_Y or jobtype_i == SHARP_WY:
-        output = np.empty((input.shape[0], input.shape[1], ginfo.local_size()), dtype=np.float64)
-        output_buf = output
-        for i in range(input.shape[0]):
-            for j in range(input.shape[1]):
-                alm_ptrs[i * input.shape[1] + j] = &input[i, j, 0]
-                map_ptrs[i * input.shape[1] + j] = &output_buf[i, j, 0]
-    else:
-        output = np.empty((input.shape[0], input.shape[1], ainfo.local_size()), dtype=np.float64)
-        output_buf = output
-        for i in range(input.shape[0]):
-            for j in range(input.shape[1]):
-                alm_ptrs[i * input.shape[1] + j] = &output_buf[i, j, 0]
-                map_ptrs[i * input.shape[1] + j] = &input[i, j, 0]
-
-    if comm is None:
-        with nogil:
-            sharp_execute (
-                jobtype_i,
-                geom_info=ginfo.ginfo, alm_info=ainfo.ainfo,
-                spin=spin, alm=alm_ptrs, map=map_ptrs,
-                ntrans=ntrans, flags=flags, time=NULL, opcnt=NULL)
-    else:
-        from mpi4py import MPI
-        if not isinstance(comm, MPI.Comm):
-            raise TypeError('comm must be an mpi4py communicator')
-        from .libsharp_mpi import _addressof
-        comm_ptr = <void*><size_t>_addressof(comm)
-        with nogil:
-            r = sharp_execute_mpi_maybe (
-                comm_ptr, jobtype_i,
-                geom_info=ginfo.ginfo, alm_info=ainfo.ainfo,
-                spin=spin, alm=alm_ptrs, map=map_ptrs,
-                ntrans=ntrans, flags=flags, time=NULL, opcnt=NULL)
-        if r == SHARP_ERROR_NO_MPI:
-            raise Exception('MPI requested, but not available')
-
-    return output
-
-
-def synthesis(*args, **kw):
-    return sht('Y', *args, **kw)
-
-def adjoint_synthesis(*args, **kw):
-    return sht('Yt', *args, **kw)
-
-def analysis(*args, **kw):
-    return sht('YtW', *args, **kw)
-
-def adjoint_analysis(*args, **kw):
-    return sht('WY', *args, **kw)
-
-
-#
-# geom_info
-#
-class NotInitializedError(Exception):
-    pass
-
-
-cdef class geom_info:
-    cdef sharp_geom_info *ginfo
-
-    def __cinit__(self, *args, **kw):
-        self.ginfo = NULL
-
-    def local_size(self):
-        if self.ginfo == NULL:
-            raise NotInitializedError()
-        return sharp_map_size(self.ginfo)
-
-    def __dealloc__(self):
-        if self.ginfo != NULL:
-            sharp_destroy_geom_info(self.ginfo)
-        self.ginfo = NULL
-
-
-cdef class healpix_grid(geom_info):
-
-    _weight_cache = {}  # { (nside, 'T'/'Q'/'U') -> numpy array of ring weights cached from file }
-
-    def __init__(self, int nside, stride=1, int[::1] rings=None, double[::1] weights=None):
-        if weights is not None and weights.shape[0] != 2 * nside:
-            raise ValueError('weights must have length 2 * nside')
-        sharp_make_subset_healpix_geom_info(nside, stride,
-                                            nrings=4 * nside - 1 if rings is None else rings.shape[0],
-                                            rings=NULL if rings is None else &rings[0],
-                                            weight=NULL if weights is None else &weights[0],
-                                            geom_info=&self.ginfo)
-
-    @classmethod
-    def load_ring_weights(cls, nside, fields):
-        """
-        Loads HEALPix ring weights from file. The environment variable
-        HEALPIX should be set, and this routine will look in the `data`
-        subdirectory.
-
-        Parameters
-        ----------
-
-        nside: int
-            HEALPix nside parameter
-
-        fields: tuple of str
-            Which weights to extract; pass ('T',) to only get scalar
-            weights back, or ('T', 'Q', 'U') to get all the weights
-
-        Returns
-        -------
-
-        List of NumPy arrays, according to fields parameter.
-
-        """
-        import os
-        from astropy.io import fits
-        data_path = os.path.join(os.environ['HEALPIX'], 'data')
-        fits_field_names = {
-            'T': 'TEMPERATURE WEIGHTS',
-            'Q': 'Q-POLARISATION WEIGHTS',
-            'U': 'U-POLARISATION WEIGHTS'}
-
-        must_load = [field for field in fields if (nside, field) not in cls._weight_cache]
-
-        if must_load:
-            hdulist = fits.open(os.path.join(data_path, 'weight_ring_n%05d.fits' % nside))
-            try:
-                for field in must_load:
-                    w = hdulist[1].data.field(fits_field_names[field]).ravel().astype(np.double)
-                    w += 1
-                    cls._weight_cache[nside, field] = w
-            finally:
-                hdulist.close()
-        return [cls._weight_cache[(nside, field)].copy() for field in fields]
-
-#
-# alm_info
-#
-
-
-cdef class alm_info:
-    cdef sharp_alm_info *ainfo
-
-    def __cinit__(self, *args, **kw):
-        self.ainfo = NULL
-
-    def local_size(self):
-        if self.ainfo == NULL:
-            raise NotInitializedError()
-        return sharp_alm_count(self.ainfo)
-
-    def mval(self):
-        if self.ainfo == NULL:
-            raise NotInitializedError()
-        return np.asarray(<int[:self.ainfo.nm]> self.ainfo.mval)
-
-    def mvstart(self):
-        if self.ainfo == NULL:
-            raise NotInitializedError()
-        return np.asarray(<long[:self.ainfo.nm]> self.ainfo.mvstart)
-
-    def __dealloc__(self):
-        if self.ainfo != NULL:
-            sharp_destroy_alm_info(self.ainfo)
-        self.ainfo = NULL
-
-    @cython.boundscheck(False)
-    def almxfl(self, np.ndarray[double, ndim=3, mode='c'] alm, np.ndarray[double, ndim=2, mode='c'] fl):
-        """Multiply Alm by a Ell based array
-
-
-        Parameters
-        ----------
-        alm : np.ndarray
-            input alm, 3 dimensions = (different signal x polarizations x lm-ordering)
-        fl : np.ndarray
-            either 1 dimension, e.g. gaussian beam, or 2 dimensions e.g. a polarized beam
-
-        Returns
-        -------
-        None, it modifies alms in-place
-
-        """
-        cdef int mvstart = 0
-        cdef bint has_multiple_beams = alm.shape[2] > 1 and fl.shape[1] > 1
-        cdef int f, i_m, m, num_ells, i_l, i_signal, i_pol, i_mv
-
-        for i_m in range(self.ainfo.nm):
-            m = self.ainfo.mval[i_m]
-            f = 1 if (m==0) else 2
-            num_ells = self.ainfo.lmax + 1 - m
-
-            if not has_multiple_beams:
-                for i_signal in range(alm.shape[0]):
-                    for i_pol in range(alm.shape[1]):
-                        for i_l in range(num_ells):
-                            l = m + i_l
-                            for i_mv in range(mvstart + f*i_l, mvstart + f*i_l +f):
-                                alm[i_signal, i_pol, i_mv] *= fl[l, 0]
-            else:
-                for i_signal in range(alm.shape[0]):
-                    for i_pol in range(alm.shape[1]):
-                        for i_l in range(num_ells):
-                            l = m + i_l
-                            for i_mv in range(mvstart + f*i_l, mvstart + f*i_l +f):
-                                alm[i_signal, i_pol, i_mv] *= fl[l, i_pol]
-            mvstart += f * num_ells
-
-cdef class triangular_order(alm_info):
-    def __init__(self, int lmax, mmax=None, stride=1):
-        mmax = mmax if mmax is not None else lmax
-        sharp_make_triangular_alm_info(lmax, mmax, stride, &self.ainfo)
-
-
-cdef class rectangular_order(alm_info):
-    def __init__(self, int lmax, mmax=None, stride=1):
-        mmax = mmax if mmax is not None else lmax
-        sharp_make_rectangular_alm_info(lmax, mmax, stride, &self.ainfo)
-
-
-cdef class packed_real_order(alm_info):
-    def __init__(self, int lmax, stride=1, int[::1] ms=None):
-        sharp_make_mmajor_real_packed_alm_info(lmax=lmax, stride=stride,
-                                               nm=lmax + 1 if ms is None else ms.shape[0],
-                                               ms=NULL if ms is None else &ms[0],
-                                               alm_info=&self.ainfo)
-
-#
-# 
-#
-
-@cython.boundscheck(False)
-def normalized_associated_legendre_table(int lmax, int m, theta):
-    cdef double[::1] theta_ = np.ascontiguousarray(theta, dtype=np.double)
-    out = np.zeros((theta_.shape[0], lmax - m + 1), np.double)
-    cdef double[:, ::1] out_ = out
-    if lmax < m:
-        raise ValueError("lmax < m")
-    with nogil:
-        sharp_normalized_associated_legendre_table(m, 0, lmax, theta_.shape[0], &theta_[0], lmax - m + 1, 1, 1, &out_[0,0])
-    return out
diff --git a/python/libsharp/libsharp_mpi.pyx b/python/libsharp/libsharp_mpi.pyx
deleted file mode 100644
index e819a77..0000000
--- a/python/libsharp/libsharp_mpi.pyx
+++ /dev/null
@@ -1,17 +0,0 @@
-cdef extern from "mpi.h":
-    ctypedef void *MPI_Comm
-
-cdef extern from "Python.h":
-    object PyLong_FromVoidPtr(void*)
-
-cdef extern:
-    ctypedef class mpi4py.MPI.Comm [object PyMPICommObject]:
-        cdef MPI_Comm ob_mpi
-        cdef unsigned flags
-
-# For compatibility with mpi4py <= 1.3.1
-# Newer versions could use the MPI._addressof function
-def _addressof(Comm comm):
-    cdef void *ptr = NULL
-    ptr = <void*>&comm.ob_mpi
-    return PyLong_FromVoidPtr(ptr)
diff --git a/python/libsharp/tests/__init__.py b/python/libsharp/tests/__init__.py
deleted file mode 100644
index 1bb8bf6..0000000
--- a/python/libsharp/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# empty
diff --git a/python/libsharp/tests/test_legendre.py b/python/libsharp/tests/test_legendre.py
deleted file mode 100644
index 0129b29..0000000
--- a/python/libsharp/tests/test_legendre.py
+++ /dev/null
@@ -1,58 +0,0 @@
-import numpy as np
-from scipy.special import legendre
-from scipy.special import p_roots
-import libsharp
-
-from numpy.testing import assert_allclose
-
-
-def check_legendre_transform(lmax, ntheta):
-    l = np.arange(lmax + 1)
-    if lmax >= 1:
-        sigma = -np.log(1e-3) / lmax / (lmax + 1)
-        bl = np.exp(-sigma*l*(l+1))
-        bl *= (2 * l + 1)
-    else:
-        bl = np.asarray([1], dtype=np.double)
-
-    theta = np.linspace(0, np.pi, ntheta, endpoint=True)
-    x = np.cos(theta)
-
-    # Compute truth using scipy.special.legendre
-    P = np.zeros((ntheta, lmax + 1))
-    for l in range(lmax + 1):
-        P[:, l] = legendre(l)(x)
-    y0 = np.dot(P, bl)
-
-
-    # double-precision
-    y = libsharp.legendre_transform(x, bl)
-
-    assert_allclose(y, y0, rtol=1e-12, atol=1e-12)
-
-    # single-precision
-    y32 = libsharp.legendre_transform(x.astype(np.float32), bl)
-    assert_allclose(y, y0, rtol=1e-5, atol=1e-5)
-
-
-def test_legendre_transform():
-    nthetas_to_try = [0, 9, 17, 19] + list(np.random.randint(500, size=20))
-    for ntheta in nthetas_to_try:
-        for lmax in [0, 1, 2, 3, 20] + list(np.random.randint(50, size=4)):
-            yield check_legendre_transform, lmax, ntheta
-
-def check_legendre_roots(n):
-    xs, ws = ([], []) if n == 0 else p_roots(n) # from SciPy
-    xl, wl = libsharp.legendre_roots(n)
-    assert_allclose(xs, xl, rtol=1e-14, atol=1e-14)
-    assert_allclose(ws, wl, rtol=1e-14, atol=1e-14)
-
-def test_legendre_roots():
-    """
-    Test the Legendre root-finding algorithm from libsharp by comparing it with
-    the SciPy version.
-    """
-    yield check_legendre_roots, 0
-    yield check_legendre_roots, 1
-    yield check_legendre_roots, 32
-    yield check_legendre_roots, 33
diff --git a/python/libsharp/tests/test_legendre_table.py b/python/libsharp/tests/test_legendre_table.py
deleted file mode 100644
index eb02df2..0000000
--- a/python/libsharp/tests/test_legendre_table.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from __future__ import print_function
-import numpy as np
-
-from numpy.testing import assert_almost_equal
-from nose.tools import eq_, ok_
-
-from libsharp import normalized_associated_legendre_table
-from scipy.special import sph_harm, p_roots
-
-def test_compare_legendre_table_with_scipy():
-    def test(theta, m, lmax):
-        Plm = normalized_associated_legendre_table(lmax, m, theta)
-
-        Plm_p = sph_harm(m, np.arange(m, lmax + 1), 0, theta)[None, :]
-        if not np.allclose(Plm_p, Plm):
-            print(Plm_p)
-            print(Plm)
-        return ok_, np.allclose(Plm_p, Plm)
-
-    yield test(np.pi/2, 0, 10)
-    yield test(np.pi/4, 0, 10)
-    yield test(3 * np.pi/4, 0, 10)
-    yield test(np.pi/4, 1, 4)
-    yield test(np.pi/4, 2, 4)
-    yield test(np.pi/4, 50, 50)
-    yield test(np.pi/2, 49, 50)
-
-
-def test_legendre_table_wrapper_logic():
-    # tests the SSE 2 logic in the high-level wrapper by using an odd number of thetas
-    theta = np.asarray([np.pi/2, np.pi/4, 3 * np.pi / 4])
-    m = 3
-    lmax = 10
-    Plm = normalized_associated_legendre_table(lmax, m, theta)
-    assert np.allclose(Plm[1, :], normalized_associated_legendre_table(lmax, m, np.pi/4)[0, :])
-    assert np.allclose(Plm[2, :], normalized_associated_legendre_table(lmax, m, 3 * np.pi/4)[0, :])
diff --git a/python/libsharp/tests/test_sht.py b/python/libsharp/tests/test_sht.py
deleted file mode 100644
index 63ccf20..0000000
--- a/python/libsharp/tests/test_sht.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import numpy as np
-from numpy.testing import assert_allclose
-import libsharp
-
-from mpi4py import MPI
-
-
-def test_basic():
-    lmax = 10
-    nside = 8
-    rank = MPI.COMM_WORLD.Get_rank()
-    ms = np.arange(rank, lmax + 1, MPI.COMM_WORLD.Get_size(), dtype=np.int32)
-    
-    order = libsharp.packed_real_order(lmax, ms=ms)
-    grid = libsharp.healpix_grid(nside)
-
-    
-    alm = np.zeros(order.local_size())
-    if rank == 0:
-        alm[0] = 1
-    elif rank == 1:
-        alm[0] = 1
-
-
-    map = libsharp.synthesis(grid, order, np.repeat(alm[None, None, :], 3, 0), comm=MPI.COMM_WORLD)
-    assert np.all(map[2, :] == map[1, :]) and np.all(map[1, :] == map[0, :])
-    map = map[0, 0, :]
-    print(rank, "shape", map.shape)
-    print(rank, "mean", map.mean())
-
-if __name__=="__main__":
-    test_basic()
diff --git a/python/libsharp/tests/test_smoothing_noise_pol_mpi.py b/python/libsharp/tests/test_smoothing_noise_pol_mpi.py
deleted file mode 100644
index 2cdff95..0000000
--- a/python/libsharp/tests/test_smoothing_noise_pol_mpi.py
+++ /dev/null
@@ -1,137 +0,0 @@
-# This test needs to be run with:
-
-# mpirun -np X python test_smoothing_noise_pol_mpi.py
-
-from mpi4py import MPI
-
-import numpy as np
-
-import healpy as hp
-
-import libsharp
-
-mpi = True
-rank = MPI.COMM_WORLD.Get_rank()
-
-nside = 256
-npix = hp.nside2npix(nside)
-
-np.random.seed(100)
-input_map = np.random.normal(size=(3, npix))
-fwhm_deg = 10
-lmax = 512
-
-nrings = 4 * nside - 1  # four missing pixels
-
-if rank == 0:
-    print("total rings", nrings)
-
-n_mpi_processes = MPI.COMM_WORLD.Get_size()
-rings_per_process = nrings // n_mpi_processes + 1
-# ring indices are 1-based
-
-ring_indices_emisphere = np.arange(2*nside, dtype=np.int32) + 1
-local_ring_indices = ring_indices_emisphere[rank::n_mpi_processes]
-
-# to improve performance, simmetric rings north/south need to be in the same rank
-# therefore we use symmetry to create the full ring indexing
-
-if local_ring_indices[-1] == 2 * nside:
-    # has equator ring
-    local_ring_indices = np.concatenate(
-      [local_ring_indices[:-1],
-       nrings - local_ring_indices[::-1] + 1]
-    )
-else:
-    # does not have equator ring
-    local_ring_indices = np.concatenate(
-      [local_ring_indices,
-       nrings - local_ring_indices[::-1] + 1]
-    )
-
-print("rank", rank, "n_rings", len(local_ring_indices))
-
-if not mpi:
-    local_ring_indices = None
-grid = libsharp.healpix_grid(nside, rings=local_ring_indices)
-
-# returns start index of the ring and number of pixels
-startpix, ringpix, _, _, _ = hp.ringinfo(nside, local_ring_indices.astype(np.int64))
-
-local_npix = grid.local_size()
-
-def expand_pix(startpix, ringpix, local_npix):
-    """Turn first pixel index and number of pixel in full array of pixels
-
-    to be optimized with cython or numba
-    """
-    local_pix = np.empty(local_npix, dtype=np.int64)
-    i = 0
-    for start, num in zip(startpix, ringpix):
-        local_pix[i:i+num] = np.arange(start, start+num)
-        i += num
-    return local_pix
-
-local_pix = expand_pix(startpix, ringpix, local_npix)
-
-local_map = input_map[:, local_pix]
-
-local_hitmap = np.zeros(npix)
-local_hitmap[local_pix] = 1
-hp.write_map("hitmap_{}.fits".format(rank), local_hitmap, overwrite=True)
-
-print("rank", rank, "npix", npix, "local_npix", local_npix, "local_map len", len(local_map), "unique pix", len(np.unique(local_pix)))
-
-local_m_indices = np.arange(rank, lmax + 1, MPI.COMM_WORLD.Get_size(), dtype=np.int32)
-if not mpi:
-    local_m_indices = None
-
-order = libsharp.packed_real_order(lmax, ms=local_m_indices) 
-local_nl = order.local_size()
-print("rank", rank, "local_nl", local_nl, "mval", order.mval())
-
-mpi_comm = MPI.COMM_WORLD if mpi else None
-
-# map2alm
-# maps in libsharp are 3D, 2nd dimension is IQU, 3rd is pixel
-
-alm_sharp_I = libsharp.analysis(grid, order,
-                                np.ascontiguousarray(local_map[0].reshape((1, 1, -1))),
-                                spin=0, comm=mpi_comm)
-alm_sharp_P = libsharp.analysis(grid, order,
-                                np.ascontiguousarray(local_map[1:].reshape((1, 2, -1))),
-                                spin=2, comm=mpi_comm)
-
-beam = hp.gauss_beam(fwhm=np.radians(fwhm_deg), lmax=lmax, pol=True)
-
-print("Smooth")
-# smooth in place (zonca implemented this function)
-order.almxfl(alm_sharp_I, np.ascontiguousarray(beam[:, 0:1]))
-order.almxfl(alm_sharp_P, np.ascontiguousarray(beam[:, (1, 2)]))
-
-# alm2map
-
-new_local_map_I = libsharp.synthesis(grid, order, alm_sharp_I, spin=0, comm=mpi_comm)
-new_local_map_P = libsharp.synthesis(grid, order, alm_sharp_P, spin=2, comm=mpi_comm)
-
-# Transfer map to first process for writing
-
-local_full_map = np.zeros(input_map.shape, dtype=np.float64)
-local_full_map[0, local_pix] = new_local_map_I
-local_full_map[1:, local_pix] = new_local_map_P
-
-output_map = np.zeros(input_map.shape, dtype=np.float64) if rank == 0 else None
-mpi_comm.Reduce(local_full_map, output_map, root=0, op=MPI.SUM)
-
-if rank == 0:
-    # hp.write_map("sharp_smoothed_map.fits", output_map, overwrite=True)
-    # hp_smoothed = hp.alm2map(hp.map2alm(input_map, lmax=lmax), nside=nside) # transform only
-    hp_smoothed = hp.smoothing(input_map, fwhm=np.radians(fwhm_deg), lmax=lmax)
-    std_diff = (hp_smoothed-output_map).std()
-    print("Std of difference between libsharp and healpy", std_diff)
-    # hp.write_map(
-    #     "healpy_smoothed_map.fits",
-    #     hp_smoothed,
-    #     overwrite=True
-    # )
-    assert std_diff < 1e-5
diff --git a/python/setup.py b/python/setup.py
deleted file mode 100644
index 788d7a6..0000000
--- a/python/setup.py
+++ /dev/null
@@ -1,83 +0,0 @@
-#! /usr/bin/env python
-
-descr   = """Spherical Harmionic transforms package
-
-Python API for the libsharp spherical harmonic transforms library
-"""
-
-import os
-import sys
-
-DISTNAME            = 'libsharp'
-DESCRIPTION         = 'libsharp library for fast Spherical Harmonic Transforms'
-LONG_DESCRIPTION    = descr
-MAINTAINER          = 'Dag Sverre Seljebotn',
-MAINTAINER_EMAIL    = 'd.s.seljebotn@astro.uio.no',
-URL                 = 'http://sourceforge.net/projects/libsharp/'
-LICENSE             = 'GPL'
-DOWNLOAD_URL        = "http://sourceforge.net/projects/libsharp/"
-VERSION             = '0.1'
-
-# Add our fake Pyrex at the end of the Python search path
-# in order to fool setuptools into allowing compilation of
-# pyx files to C files. Importing Cython.Distutils then
-# makes Cython the tool of choice for this rather than
-# (the possibly nonexisting) Pyrex.
-project_path = os.path.split(__file__)[0]
-sys.path.append(os.path.join(project_path, 'fake_pyrex'))
-
-from setuptools import setup, find_packages, Extension
-from Cython.Build import cythonize
-import numpy as np
-
-libsharp = os.environ.get('LIBSHARP', None)
-libsharp_include = os.environ.get('LIBSHARP_INCLUDE', libsharp and os.path.join(libsharp, 'include'))
-libsharp_lib = os.environ.get('LIBSHARP_LIB', libsharp and os.path.join(libsharp, 'lib'))
-
-if libsharp_include is None or libsharp_lib is None:
-    sys.stderr.write('Please set LIBSHARP environment variable to the install directly of libsharp, '
-                     'this script will refer to the lib and include sub-directories. Alternatively '
-                     'set LIBSHARP_INCLUDE and LIBSHARP_LIB\n')
-    sys.exit(1)
-
-if __name__ == "__main__":
-    setup(install_requires = ['numpy'],
-          packages = find_packages(),
-          test_suite="nose.collector",
-          # Well, technically zipping the package will work, but since it's
-          # all compiled code it'll just get unzipped again at runtime, which
-          # is pointless:
-          zip_safe = False,
-          name = DISTNAME,
-          version = VERSION,
-          maintainer = MAINTAINER,
-          maintainer_email = MAINTAINER_EMAIL,
-          description = DESCRIPTION,
-          license = LICENSE,
-          url = URL,
-          download_url = DOWNLOAD_URL,
-          long_description = LONG_DESCRIPTION,
-          classifiers =
-            [ 'Development Status :: 3 - Alpha',
-              'Environment :: Console',
-              'Intended Audience :: Developers',
-              'Intended Audience :: Science/Research',
-              'License :: OSI Approved :: GNU General Public License (GPL)',
-              'Topic :: Scientific/Engineering'],
-          ext_modules = cythonize([
-              Extension("libsharp.libsharp",
-                        ["libsharp/libsharp.pyx"],
-                        libraries=["sharp", "fftpack", "c_utils"],
-                        include_dirs=[libsharp_include, np.get_include()],
-                        library_dirs=[libsharp_lib],
-                        extra_link_args=["-fopenmp"],
-              ),
-              Extension("libsharp.libsharp_mpi",
-                        ["libsharp/libsharp_mpi.pyx"],
-                        libraries=["sharp", "fftpack", "c_utils"],
-                        include_dirs=[libsharp_include, np.get_include()],
-                        library_dirs=[libsharp_lib],
-                        extra_link_args=["-fopenmp"],
-              ),
-              ]),
-          )
diff --git a/runjinja.py b/runjinja.py
deleted file mode 100755
index fb06737..0000000
--- a/runjinja.py
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Preprocesses foo.c.in to foo.c. Reads STDIN and writes STDOUT.
-"""
-
-import sys
-import hashlib
-from jinja2 import Template, Environment
-
-env = Environment(block_start_string='/*{',
-                  block_end_string='}*/',
-                  variable_start_string='{{',
-                  variable_end_string='}}')
-
-extra_vars = dict(len=len)
-input = sys.stdin.read()
-sys.stdout.write('/* DO NOT EDIT. md5sum of source: %s */' % hashlib.md5(input.encode()).hexdigest())
-sys.stdout.write(env.from_string(input).render(**extra_vars))