Changed building procedure for sharp to avoid the compiler conflict

2017-01-16 12:07:51 +01:00 · 2017-01-16 12:07:51 +01:00 · eedea45bc1
commit eedea45bc1
parent 8b2f0f7557
79 changed files with 35 additions and 16346 deletions
--- a/external/external_build.cmake
+++ b/external/external_build.cmake
@ -31,12 +31,11 @@ SET(CONFIGURE_CPP_FLAGS "")
 SET(CONFIGURE_LDFLAGS "")
 if (ENABLE_SHARP)
-  SET(SHARP_SOURCE ${CMAKE_SOURCE_DIR}/external/sharp)
+  SET(DEP_BUILD ${CMAKE_SOURCE_DIR}/sharp-prefix/src/sharp/libsharp/auto)
  SET(DEP_BUILD ${CMAKE_SOURCE_DIR}/external/sharp/auto)
  ExternalProject_Add(sharp
-    SOURCE_DIR ${SHARP_SOURCE}
+    URL ${CMAKE_SOURCE_DIR}/external/libsharp-6077806.tar.gz
    BUILD_IN_SOURCE 1 
-    CONFIGURE_COMMAND ${SHARP_SOURCE}/configure "CC=${CMAKE_C_COMPILER}" "CXX=${CMAKE_CXX_COMPILER}" --prefix=${DEP_BUILD}
+    CONFIGURE_COMMAND autoconf && ./configure "CC=${CMAKE_C_COMPILER}" "CXX=${CMAKE_CXX_COMPILER}" --prefix=${DEP_BUILD}
    BUILD_COMMAND ${CMAKE_MAKE_PROGRAM}
    INSTALL_COMMAND echo "No install"
  )
--- a/external/libsharp-6077806.tar.gz
+++ b/external/libsharp-6077806.tar.gz
--- a/external/sharp/COPYING
+++ b/external/sharp/COPYING
@ -1,339 +0,0 @@
                    GNU GENERAL PUBLIC LICENSE
                       Version 2, June 1991
 Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 Everyone is permitted to copy and distribute verbatim copies
 of this license document, but changing it is not allowed.
                            Preamble
  The licenses for most software are designed to take away your
 freedom to share and change it.  By contrast, the GNU General Public
 License is intended to guarantee your freedom to share and change free
 software--to make sure the software is free for all its users.  This
 General Public License applies to most of the Free Software
 Foundation's software and to any other program whose authors commit to
 using it.  (Some other Free Software Foundation software is covered by
 the GNU Lesser General Public License instead.)  You can apply it to
 your programs, too.
  When we speak of free software, we are referring to freedom, not
 price.  Our General Public Licenses are designed to make sure that you
 have the freedom to distribute copies of free software (and charge for
 this service if you wish), that you receive source code or can get it
 if you want it, that you can change the software or use pieces of it
 in new free programs; and that you know you can do these things.
  To protect your rights, we need to make restrictions that forbid
 anyone to deny you these rights or to ask you to surrender the rights.
 These restrictions translate to certain responsibilities for you if you
 distribute copies of the software, or if you modify it.
  For example, if you distribute copies of such a program, whether
 gratis or for a fee, you must give the recipients all the rights that
 you have.  You must make sure that they, too, receive or can get the
 source code.  And you must show them these terms so they know their
 rights.
  We protect your rights with two steps: (1) copyright the software, and
 (2) offer you this license which gives you legal permission to copy,
 distribute and/or modify the software.
  Also, for each author's protection and ours, we want to make certain
 that everyone understands that there is no warranty for this free
 software.  If the software is modified by someone else and passed on, we
 want its recipients to know that what they have is not the original, so
 that any problems introduced by others will not reflect on the original
 authors' reputations.
  Finally, any free program is threatened constantly by software
 patents.  We wish to avoid the danger that redistributors of a free
 program will individually obtain patent licenses, in effect making the
 program proprietary.  To prevent this, we have made it clear that any
 patent must be licensed for everyone's free use or not licensed at all.
  The precise terms and conditions for copying, distribution and
 modification follow.
                    GNU GENERAL PUBLIC LICENSE
   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
  0. This License applies to any program or other work which contains
 a notice placed by the copyright holder saying it may be distributed
 under the terms of this General Public License.  The "Program", below,
 refers to any such program or work, and a "work based on the Program"
 means either the Program or any derivative work under copyright law:
 that is to say, a work containing the Program or a portion of it,
 either verbatim or with modifications and/or translated into another
 language.  (Hereinafter, translation is included without limitation in
 the term "modification".)  Each licensee is addressed as "you".
 Activities other than copying, distribution and modification are not
 covered by this License; they are outside its scope.  The act of
 running the Program is not restricted, and the output from the Program
 is covered only if its contents constitute a work based on the
 Program (independent of having been made by running the Program).
 Whether that is true depends on what the Program does.
  1. You may copy and distribute verbatim copies of the Program's
 source code as you receive it, in any medium, provided that you
 conspicuously and appropriately publish on each copy an appropriate
 copyright notice and disclaimer of warranty; keep intact all the
 notices that refer to this License and to the absence of any warranty;
 and give any other recipients of the Program a copy of this License
 along with the Program.
 You may charge a fee for the physical act of transferring a copy, and
 you may at your option offer warranty protection in exchange for a fee.
  2. You may modify your copy or copies of the Program or any portion
 of it, thus forming a work based on the Program, and copy and
 distribute such modifications or work under the terms of Section 1
 above, provided that you also meet all of these conditions:
    a) You must cause the modified files to carry prominent notices
    stating that you changed the files and the date of any change.
    b) You must cause any work that you distribute or publish, that in
    whole or in part contains or is derived from the Program or any
    part thereof, to be licensed as a whole at no charge to all third
    parties under the terms of this License.
    c) If the modified program normally reads commands interactively
    when run, you must cause it, when started running for such
    interactive use in the most ordinary way, to print or display an
    announcement including an appropriate copyright notice and a
    notice that there is no warranty (or else, saying that you provide
    a warranty) and that users may redistribute the program under
    these conditions, and telling the user how to view a copy of this
    License.  (Exception: if the Program itself is interactive but
    does not normally print such an announcement, your work based on
    the Program is not required to print an announcement.)
 These requirements apply to the modified work as a whole.  If
 identifiable sections of that work are not derived from the Program,
 and can be reasonably considered independent and separate works in
 themselves, then this License, and its terms, do not apply to those
 sections when you distribute them as separate works.  But when you
 distribute the same sections as part of a whole which is a work based
 on the Program, the distribution of the whole must be on the terms of
 this License, whose permissions for other licensees extend to the
 entire whole, and thus to each and every part regardless of who wrote it.
 Thus, it is not the intent of this section to claim rights or contest
 your rights to work written entirely by you; rather, the intent is to
 exercise the right to control the distribution of derivative or
 collective works based on the Program.
 In addition, mere aggregation of another work not based on the Program
 with the Program (or with a work based on the Program) on a volume of
 a storage or distribution medium does not bring the other work under
 the scope of this License.
  3. You may copy and distribute the Program (or a work based on it,
 under Section 2) in object code or executable form under the terms of
 Sections 1 and 2 above provided that you also do one of the following:
    a) Accompany it with the complete corresponding machine-readable
    source code, which must be distributed under the terms of Sections
    1 and 2 above on a medium customarily used for software interchange; or,
    b) Accompany it with a written offer, valid for at least three
    years, to give any third party, for a charge no more than your
    cost of physically performing source distribution, a complete
    machine-readable copy of the corresponding source code, to be
    distributed under the terms of Sections 1 and 2 above on a medium
    customarily used for software interchange; or,
    c) Accompany it with the information you received as to the offer
    to distribute corresponding source code.  (This alternative is
    allowed only for noncommercial distribution and only if you
    received the program in object code or executable form with such
    an offer, in accord with Subsection b above.)
 The source code for a work means the preferred form of the work for
 making modifications to it.  For an executable work, complete source
 code means all the source code for all modules it contains, plus any
 associated interface definition files, plus the scripts used to
 control compilation and installation of the executable.  However, as a
 special exception, the source code distributed need not include
 anything that is normally distributed (in either source or binary
 form) with the major components (compiler, kernel, and so on) of the
 operating system on which the executable runs, unless that component
 itself accompanies the executable.
 If distribution of executable or object code is made by offering
 access to copy from a designated place, then offering equivalent
 access to copy the source code from the same place counts as
 distribution of the source code, even though third parties are not
 compelled to copy the source along with the object code.
  4. You may not copy, modify, sublicense, or distribute the Program
 except as expressly provided under this License.  Any attempt
 otherwise to copy, modify, sublicense or distribute the Program is
 void, and will automatically terminate your rights under this License.
 However, parties who have received copies, or rights, from you under
 this License will not have their licenses terminated so long as such
 parties remain in full compliance.
  5. You are not required to accept this License, since you have not
 signed it.  However, nothing else grants you permission to modify or
 distribute the Program or its derivative works.  These actions are
 prohibited by law if you do not accept this License.  Therefore, by
 modifying or distributing the Program (or any work based on the
 Program), you indicate your acceptance of this License to do so, and
 all its terms and conditions for copying, distributing or modifying
 the Program or works based on it.
  6. Each time you redistribute the Program (or any work based on the
 Program), the recipient automatically receives a license from the
 original licensor to copy, distribute or modify the Program subject to
 these terms and conditions.  You may not impose any further
 restrictions on the recipients' exercise of the rights granted herein.
 You are not responsible for enforcing compliance by third parties to
 this License.
  7. If, as a consequence of a court judgment or allegation of patent
 infringement or for any other reason (not limited to patent issues),
 conditions are imposed on you (whether by court order, agreement or
 otherwise) that contradict the conditions of this License, they do not
 excuse you from the conditions of this License.  If you cannot
 distribute so as to satisfy simultaneously your obligations under this
 License and any other pertinent obligations, then as a consequence you
 may not distribute the Program at all.  For example, if a patent
 license would not permit royalty-free redistribution of the Program by
 all those who receive copies directly or indirectly through you, then
 the only way you could satisfy both it and this License would be to
 refrain entirely from distribution of the Program.
 If any portion of this section is held invalid or unenforceable under
 any particular circumstance, the balance of the section is intended to
 apply and the section as a whole is intended to apply in other
 circumstances.
 It is not the purpose of this section to induce you to infringe any
 patents or other property right claims or to contest validity of any
 such claims; this section has the sole purpose of protecting the
 integrity of the free software distribution system, which is
 implemented by public license practices.  Many people have made
 generous contributions to the wide range of software distributed
 through that system in reliance on consistent application of that
 system; it is up to the author/donor to decide if he or she is willing
 to distribute software through any other system and a licensee cannot
 impose that choice.
 This section is intended to make thoroughly clear what is believed to
 be a consequence of the rest of this License.
  8. If the distribution and/or use of the Program is restricted in
 certain countries either by patents or by copyrighted interfaces, the
 original copyright holder who places the Program under this License
 may add an explicit geographical distribution limitation excluding
 those countries, so that distribution is permitted only in or among
 countries not thus excluded.  In such case, this License incorporates
 the limitation as if written in the body of this License.
  9. The Free Software Foundation may publish revised and/or new versions
 of the General Public License from time to time.  Such new versions will
 be similar in spirit to the present version, but may differ in detail to
 address new problems or concerns.
 Each version is given a distinguishing version number.  If the Program
 specifies a version number of this License which applies to it and "any
 later version", you have the option of following the terms and conditions
 either of that version or of any later version published by the Free
 Software Foundation.  If the Program does not specify a version number of
 this License, you may choose any version ever published by the Free Software
 Foundation.
  10. If you wish to incorporate parts of the Program into other free
 programs whose distribution conditions are different, write to the author
 to ask for permission.  For software which is copyrighted by the Free
 Software Foundation, write to the Free Software Foundation; we sometimes
 make exceptions for this.  Our decision will be guided by the two goals
 of preserving the free status of all derivatives of our free software and
 of promoting the sharing and reuse of software generally.
                            NO WARRANTY
  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
 FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
 OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
 PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
 OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
 TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
 PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
 REPAIR OR CORRECTION.
  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
 WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
 REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
 INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
 OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
 TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
 YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
 PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGES.
                     END OF TERMS AND CONDITIONS
            How to Apply These Terms to Your New Programs
  If you develop a new program, and you want it to be of the greatest
 possible use to the public, the best way to achieve this is to make it
 free software which everyone can redistribute and change under these terms.
  To do so, attach the following notices to the program.  It is safest
 to attach them to the start of each source file to most effectively
 convey the exclusion of warranty; and each file should have at least
 the "copyright" line and a pointer to where the full notice is found.
    <one line to give the program's name and a brief idea of what it does.>
    Copyright (C) <year>  <name of author>
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 Also add information on how to contact you by electronic and paper mail.
 If the program is interactive, make it output a short notice like this
 when it starts in an interactive mode:
    Gnomovision version 69, Copyright (C) year name of author
    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
    This is free software, and you are welcome to redistribute it
    under certain conditions; type `show c' for details.
 The hypothetical commands `show w' and `show c' should show the appropriate
 parts of the General Public License.  Of course, the commands you use may
 be called something other than `show w' and `show c'; they could even be
 mouse-clicks or menu items--whatever suits your program.
 You should also get your employer (if you work as a programmer) or your
 school, if any, to sign a "copyright disclaimer" for the program, if
 necessary.  Here is a sample; alter the names:
  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
  `Gnomovision' (which makes passes at compilers) written by James Hacker.
  <signature of Ty Coon>, 1 April 1989
  Ty Coon, President of Vice
 This General Public License does not permit incorporating your program into
 proprietary programs.  If your program is a subroutine library, you may
 consider it more useful to permit linking proprietary applications with the
 library.  If this is what you want to do, use the GNU Lesser General
 Public License instead of this License.
--- a/external/sharp/Makefile
+++ b/external/sharp/Makefile
@ -1,78 +0,0 @@
 SHARP_TARGET?=auto
 ifndef SHARP_TARGET
  SHARP_TARGET:=$(error SHARP_TARGET undefined. Please see README.compilation for help)UNDEFINED
 endif
 default: compile_all
 SRCROOT:=$(shell pwd)
 include $(SRCROOT)/config/config.$(SHARP_TARGET)
 include $(SRCROOT)/config/rules.common
 all_hdr:=
 all_lib:=
 all_cbin:=
 FULL_INCLUDE:=
 include c_utils/planck.make
 include libfftpack/planck.make
 include libsharp/planck.make
 include docsrc/planck.make
 CYTHON_MODULES=python/libsharp/libsharp.so $(if $(MPI_CFLAGS), python/libsharp/libsharp_mpi.so)
 $(all_lib): %: | $(LIBDIR)_mkdir
 	@echo "#  creating library $*"
 	$(ARCREATE) $@ $^
 $(all_cbin): %: | $(BINDIR)_mkdir
 	@echo "#  linking C binary $*"
 	$(CL) -o $@ $^ $(CLFLAGS)
 compile_all: $(all_cbin) hdrcopy
 hdrclean:
 	@if [ -d $(INCDIR) ]; then rm -rf $(INCDIR)/* ; fi
 hdrcopy: | $(INCDIR)_mkdir
 	@if [ "$(all_hdr)" ]; then cp -p $(all_hdr) $(INCDIR); fi
 $(notdir $(all_cbin)) : % : $(BINDIR)/%
 test: compile_all
 	$(BINDIR)/sharp_testsuite acctest && \
 	$(BINDIR)/sharp_testsuite test healpix 2048 -1 1024 -1 0 1 && \
 	$(BINDIR)/sharp_testsuite test fejer1 2047 -1 -1 4096 2 1 && \
 	$(BINDIR)/sharp_testsuite test gauss 2047 -1 -1 4096 0 2
 perftest: compile_all
 	$(BINDIR)/sharp_testsuite test healpix 2048 -1 1024 -1 0 1 && \
 	$(BINDIR)/sharp_testsuite test gauss 63 -1 -1 128 0 1 && \
 	$(BINDIR)/sharp_testsuite test gauss 127 -1 -1 256 0 1 && \
 	$(BINDIR)/sharp_testsuite test gauss 255 -1 -1 512 0 1 && \
 	$(BINDIR)/sharp_testsuite test gauss 511 -1 -1 1024 0 1 && \
 	$(BINDIR)/sharp_testsuite test gauss 1023 -1 -1 2048 0 1 && \
 	$(BINDIR)/sharp_testsuite test gauss 2047 -1 -1 4096 0 1 && \
 	$(BINDIR)/sharp_testsuite test gauss 4095 -1 -1 8192 0 1 && \
 	$(BINDIR)/sharp_testsuite test gauss 8191 -1 -1 16384 0 1
 %.c: %.c.in
 # Only do this if the md5sum changed, in order to avoid Python and Jinja
 # dependency when not modifying the c.in file
 	grep `md5sum $< | cut -d ' ' -f 1` $@ || ./runjinja.py < $< > $@
 genclean:
 	rm libsharp/sharp_legendre.c || exit 0
 $(CYTHON_MODULES): %.so: %.pyx
 ifndef PIC_CFLAGS
 	$(error Python extension must be built using the --enable-pic configure option.)
 endif
 	cython $<
 	$(CC) $(DEBUG_CFLAGS) $(OPENMP_CFLAGS) $(PIC_CFLAGS) `python-config --cflags` -I$(INCDIR) -o $(<:.pyx=.o) -c $(<:.pyx=.c)
 	$(CL) -shared $(<:.pyx=.o) $(OPENMP_CFLAGS) $(CYTHON_OBJ) -L$(LIBDIR) -lsharp -lfftpack -lc_utils -L`python-config --prefix`/lib `python-config --ldflags` -o $@
 python: $(all_lib) hdrcopy $(CYTHON_MODULES)
 pytest: python
 	cd python && nosetests --nocapture libsharp/tests/test_sht.py
--- a/external/sharp/README.md
+++ b/external/sharp/README.md
@ -1,43 +0,0 @@
 # Libsharp
 *IMPORTANT NOTE*: It appears that the default branch upon cloning from
 github.com/dagss/libsharp was an outdated 'dagss' branch instead of
 the 'master' branch. To get the latest copy,
 please do `git checkout master; git pull`. New clones are no longer affected.
 ## Paper
 https://arxiv.org/abs/1303.4945
 ## Compilation
 GNU make is required for compilation.
 Libsharp compilation has been successfully tested with GNU and Intel compilers.
 When using gcc, version 4.x is required [1].
 Since libsharp was written in standard C99, other compilers should work fine,
 but SSE2/AVX support will most likely be deactivated.
 If you obtained libsharp directly from the git repository, you will also
 need a copy of the GNU autotools. In this case, run "autoconf" in libsharp's
 main directory before any other steps.
 For libsharp releases distributed as a .tar.gz file, this step is not necessary.
 Afterwards, simply run "./configure"; if this fails, please refer to the output
 of "./configure --help" for additional hints and, if necessary, provide
 additional flags to the configure script.
 Once the script finishes successfully, run "make"
 (or "gmake"). This should install the compilation products in the
 subdirectory "auto/".
 Documentation can be created by the command "(g)make doc".
 However this requires the doxygen application to be installed
 on your system.
 The documentation will be created in the subdirectory doc/.
 [1] Some versions of the gcc 4.4.x release series contain a bug which causes
 the compiler to crash during libsharp compilation. This appears to be fixed
 in the gcc 4.4.7 release. It is possible to work around this problem by adding
 the compiler flag "-fno-tree-fre" after the other optimization flags - the
 configure script should do this automatically.
--- a/external/sharp/c_utils/c_utils.c
+++ b/external/sharp/c_utils/c_utils.c
@ -1,81 +0,0 @@
 /*
 *  This file is part of libc_utils.
 *
 *  libc_utils is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libc_utils is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libc_utils; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*
 *  Convenience functions
 *
 *  Copyright (C) 2008, 2009, 2010, 2011, 2012 Max-Planck-Society
 *  Author: Martin Reinecke
 */
 #include <stdio.h>
 #include "c_utils.h"
 void util_fail_ (const char *file, int line, const char *func, const char *msg)
  {
  fprintf(stderr,"%s, %i (%s):\n%s\n",file,line,func,msg);
  exit(1);
  }
 void util_warn_ (const char *file, int line, const char *func, const char *msg)
  {
  fprintf(stderr,"%s, %i (%s):\n%s\n",file,line,func,msg);
  }
 /* This function tries to avoid allocations with a total size close to a high
   power of two (called the "critical stride" here), by adding a few more bytes
   if necssary. This lowers the probability that two arrays differ by a multiple
   of the critical stride in their starting address, which in turn lowers the
   risk of cache line contention. */
 static size_t manipsize(size_t sz)
  {
  const size_t critical_stride=4096, cacheline=64, overhead=32;
  if (sz < (critical_stride/2)) return sz;
  if (((sz+overhead)%critical_stride)>(2*cacheline)) return sz;
  return sz+2*cacheline;
  }
 #ifdef __SSE__
 #include <xmmintrin.h>
 void *util_malloc_ (size_t sz)
  {
  void *res;
  if (sz==0) return NULL;
  res = _mm_malloc(manipsize(sz),16);
  UTIL_ASSERT(res,"_mm_malloc() failed");
  return res;
  }
 void util_free_ (void *ptr)
  { if ((ptr)!=NULL) _mm_free(ptr); }
 #else
 void *util_malloc_ (size_t sz)
  {
  void *res;
  if (sz==0) return NULL;
  res = malloc(manipsize(sz));
  UTIL_ASSERT(res,"malloc() failed");
  return res;
  }
 void util_free_ (void *ptr)
  { if ((ptr)!=NULL) free(ptr); }
 #endif
--- a/external/sharp/c_utils/c_utils.h
+++ b/external/sharp/c_utils/c_utils.h
@ -1,147 +0,0 @@
 /*
 *  This file is part of libc_utils.
 *
 *  libc_utils is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libc_utils is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libc_utils; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*! \file c_utils.h
 *  Convenience functions
 *
 *  Copyright (C) 2008, 2009, 2010, 2011 Max-Planck-Society
 *  \author Martin Reinecke
 *  \note This file should only be included from .c files, NOT from .h files.
 */
 #ifndef PLANCK_C_UTILS_H
 #define PLANCK_C_UTILS_H
 #include <math.h>
 #include <stdlib.h>
 #include <stddef.h>
 #ifdef __cplusplus
 extern "C" {
 #endif
 void util_fail_ (const char *file, int line, const char *func, const char *msg);
 void util_warn_ (const char *file, int line, const char *func, const char *msg);
 void *util_malloc_ (size_t sz);
 void util_free_ (void *ptr);
 #if defined (__GNUC__)
 #define UTIL_FUNC_NAME__ __func__
 #else
 #define UTIL_FUNC_NAME__ "unknown"
 #endif
 /*! \def UTIL_ASSERT(cond,msg)
    If \a cond is false, print an error message containing function name,
    source file name and line number of the call, as well as \a msg;
    then exit the program with an error status. */
 #define UTIL_ASSERT(cond,msg) \
  if(!(cond)) util_fail_(__FILE__,__LINE__,UTIL_FUNC_NAME__,msg)
 /*! \def UTIL_WARN(cond,msg)
    If \a cond is false, print an warning containing function name,
    source file name and line number of the call, as well as \a msg. */
 #define UTIL_WARN(cond,msg) \
  if(!(cond)) util_warn_(__FILE__,__LINE__,UTIL_FUNC_NAME__,msg)
 /*! \def UTIL_FAIL(msg)
    Print an error message containing function name,
    source file name and line number of the call, as well as \a msg;
    then exit the program with an error status. */
 #define UTIL_FAIL(msg) \
  util_fail_(__FILE__,__LINE__,UTIL_FUNC_NAME__,msg)
 /*! \def ALLOC(ptr,type,num)
    Allocate space for \a num objects of type \a type. Make sure that the
    allocation succeeded, else stop the program with an error. Return the
    resulting pointer in \a ptr. */
 #define ALLOC(ptr,type,num) \
  do { (ptr)=(type *)util_malloc_((num)*sizeof(type)); } while (0)
 /*! \def RALLOC(type,num)
    Allocate space for \a num objects of type \a type. Make sure that the
    allocation succeeded, else stop the program with an error. Cast the
    resulting pointer to \a (type*). */
 #define RALLOC(type,num) \
  ((type *)util_malloc_((num)*sizeof(type)))
 /*! \def DEALLOC(ptr)
    Deallocate \a ptr. It must have been allocated using \a ALLOC or
    \a RALLOC. */
 #define DEALLOC(ptr) \
  do { util_free_(ptr); (ptr)=NULL; } while(0)
 #define RESIZE(ptr,type,num) \
  do { util_free_(ptr); ALLOC(ptr,type,num); } while(0)
 #define GROW(ptr,type,sz_old,sz_new) \
  do { \
    if ((sz_new)>(sz_old)) \
      { RESIZE(ptr,type,2*(sz_new));sz_old=2*(sz_new); } \
  } while(0)
 /*! \def SET_ARRAY(ptr,i1,i2,val)
    Set the entries \a ptr[i1] ... \a ptr[i2-1] to \a val. */
 #define SET_ARRAY(ptr,i1,i2,val) \
  do { \
    ptrdiff_t cnt_; \
    for (cnt_=(i1);cnt_<(i2);++cnt_) (ptr)[cnt_]=(val); \
    } while(0)
 /*! \def COPY_ARRAY(src,dest,i1,i2)
    Copy the entries \a src[i1] ... \a src[i2-1] to
    \a dest[i1] ... \a dest[i2-1]. */
 #define COPY_ARRAY(src,dest,i1,i2) \
  do { \
    ptrdiff_t cnt_; \
    for (cnt_=(i1);cnt_<(i2);++cnt_) (dest)[cnt_]=(src)[cnt_]; \
    } while(0)
 #define ALLOC2D(ptr,type,num1,num2) \
  do { \
    size_t cnt_, num1_=(num1), num2_=(num2); \
    ALLOC((ptr),type *,num1_); \
    ALLOC((ptr)[0],type,num1_*num2_); \
    for (cnt_=1; cnt_<num1_; ++cnt_) \
      (ptr)[cnt_]=(ptr)[cnt_-1]+num2_; \
    } while(0)
 #define DEALLOC2D(ptr) \
  do { if(ptr) DEALLOC((ptr)[0]); DEALLOC(ptr); } while(0)
 #define FAPPROX(a,b,eps) \
  (fabs((a)-(b))<((eps)*fabs(b)))
 #define ABSAPPROX(a,b,eps) \
  (fabs((a)-(b))<(eps))
 #define IMAX(a,b) \
  (((a)>(b)) ? (a) : (b))
 #define IMIN(a,b) \
  (((a)<(b)) ? (a) : (b))
 #define SWAP(a,b,type) \
  do { type tmp_=(a); (a)=(b); (b)=tmp_; } while(0)
 #define CHECK_STACK_ALIGN(align) \
  do { \
    double foo; \
    UTIL_WARN((((size_t)(&foo))&(align-1))==0, \
      "WARNING: stack not sufficiently aligned!"); \
    } while(0)
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/external/sharp/c_utils/memusage.c
+++ b/external/sharp/c_utils/memusage.c
@ -1,68 +0,0 @@
 /*
 *  This file is part of libc_utils.
 *
 *  libc_utils is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libc_utils is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libc_utils; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*
 *  Functionality for measuring memory consumption
 *
 *  Copyright (C) 2012 Max-Planck-Society
 *  Author: Martin Reinecke
 */
 #include <stdio.h>
 #include <string.h>
 #include "memusage.h"
 double residentSetSize(void)
  {
  FILE *statm = fopen("/proc/self/statm","r");
  double res;
  if (!statm) return -1.0;
  if (fscanf(statm,"%*f %lf",&res))
      { fclose(statm); return -1.0; }
  fclose(statm);
  return (res*4096);
  }
 double VmHWM(void)
  {
  char word[1024];
  FILE *f = fopen("/proc/self/status", "r");
  double res;
  if (!f) return -1.0;
  while(1)
    {
    if (fscanf (f,"%1023s",word)<0)
      { fclose(f); return -1.0; }
    if (!strncmp(word, "VmHWM:", 6))
      {
      if (fscanf(f,"%lf%2s",&res,word)<0)
 	{ fclose(f); return -1.0; }
      if (strncmp(word, "kB", 2))
        { fclose(f); return -1.0; }
      res *=1024;
      fclose(f);
      return res;
      }
    }
  }
--- a/external/sharp/c_utils/memusage.h
+++ b/external/sharp/c_utils/memusage.h
@ -1,51 +0,0 @@
 /*
 *  This file is part of libc_utils.
 *
 *  libc_utils is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libc_utils is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libc_utils; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*! \file memusage.h
 *  Functionality for measuring memory consumption
 *
 *  Copyright (C) 2012 Max-Planck-Society
 *  \author Martin Reinecke
 */
 #ifndef PLANCK_MEMUSAGE_H
 #define PLANCK_MEMUSAGE_H
 #ifdef __cplusplus
 extern "C" {
 #endif
 /*! Returns the current resident set size in bytes.
    \note Currently only supported on Linux. Returns -1 if unsupported. */
 double residentSetSize(void);
 /*! Returns the high water mark of the resident set size in bytes.
    \note Currently only supported on Linux. Returns -1 if unsupported. */
 double VmHWM(void);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/external/sharp/c_utils/planck.make
+++ b/external/sharp/c_utils/planck.make
@ -1,18 +0,0 @@
 PKG:=c_utils
 SD:=$(SRCROOT)/$(PKG)
 OD:=$(BLDROOT)/$(PKG)
 FULL_INCLUDE+= -I$(SD)
 HDR_$(PKG):=$(SD)/*.h
 LIB_$(PKG):=$(LIBDIR)/libc_utils.a
 OBJ:=c_utils.o walltime_c.o memusage.o
 OBJ:=$(OBJ:%=$(OD)/%)
 $(OBJ): $(HDR_$(PKG)) | $(OD)_mkdir
 $(LIB_$(PKG)): $(OBJ)
 all_hdr+=$(HDR_$(PKG))
 all_lib+=$(LIB_$(PKG))
--- a/external/sharp/c_utils/walltime_c.c
+++ b/external/sharp/c_utils/walltime_c.c
@ -1,54 +0,0 @@
 /*
 *  This file is part of libc_utils.
 *
 *  libc_utils is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libc_utils is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libc_utils; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*
 *  Functionality for reading wall clock time
 *
 *  Copyright (C) 2010, 2011 Max-Planck-Society
 *  Author: Martin Reinecke
 */
 #if defined (_OPENMP)
 #include <omp.h>
 #elif defined (USE_MPI)
 #include "mpi.h"
 #else
 #include <sys/time.h>
 #include <stdlib.h>
 #endif
 #include "walltime_c.h"
 double wallTime(void)
  {
 #if defined (_OPENMP)
  return omp_get_wtime();
 #elif defined (USE_MPI)
  return MPI_Wtime();
 #else
  struct timeval t;
  gettimeofday(&t, NULL);
  return t.tv_sec + 1e-6*t.tv_usec;
 #endif
  }
--- a/external/sharp/c_utils/walltime_c.h
+++ b/external/sharp/c_utils/walltime_c.h
@ -1,54 +0,0 @@
 /*
 *  This file is part of libc_utils.
 *
 *  libc_utils is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libc_utils is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libc_utils; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*! \file walltime_c.h
 *  Functionality for reading wall clock time
 *
 *  Copyright (C) 2010 Max-Planck-Society
 *  \author Martin Reinecke
 */
 #ifndef PLANCK_WALLTIME_C_H
 #define PLANCK_WALLTIME_C_H
 #ifdef __cplusplus
 extern "C" {
 #endif
 /*! Returns an approximation of the current wall time (in seconds).
    The first available of the following timers will be used:
    <ul>
    <li> \a omp_get_wtime(), if OpenMP is available
    <li> \a MPI_Wtime(), if MPI is available
    <li> \a gettimeofday() otherwise
    </ul>
    \note Only useful for measuring time differences.
    \note This function has an execution time between 10 and 100 nanoseconds. */
 double wallTime(void);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/external/sharp/config/config.auto.in
+++ b/external/sharp/config/config.auto.in
@ -1,12 +0,0 @@
@SILENT_RULE@
 CC=@CC@
 CL=@CC@
 CCFLAGS_NO_C=@CCFLAGS_NO_C@
 CCFLAGS=$(CCFLAGS_NO_C) -c
 CLFLAGS=-L. -L$(LIBDIR) @LDCCFLAGS@ -lm
 DEBUG_CFLAGS=@DEBUG_CFLAGS@
 MPI_CFLAGS=@MPI_CFLAGS@
 OPENMP_CFLAGS=@OPENMP_CFLAGS@
 PIC_CFLAGS=@PIC_CFLAGS@
 ARCREATE=@ARCREATE@
--- a/external/sharp/config/rules.common
+++ b/external/sharp/config/rules.common
@ -1,33 +0,0 @@
 BLDROOT   = $(SRCROOT)/build.$(SHARP_TARGET)
 PREFIX    = $(SRCROOT)/$(SHARP_TARGET)
 BINDIR    = $(PREFIX)/bin
 INCDIR    = $(PREFIX)/include
 LIBDIR    = $(PREFIX)/lib
 DOCDIR    = $(SRCROOT)/doc
 PYTHONDIR = $(SRCROOT)/python/libsharp
 # do not use any suffix rules
 .SUFFIXES:
 # do not use any default rules
 .DEFAULT:
 echo_config:
 	@echo using configuration \'$(SHARP_TARGET)\'
 $(BLDROOT)/%.o : $(SRCROOT)/%.c | echo_config
 	@echo "#  compiling $*.c"
 	cd $(@D) && $(CC) $(FULL_INCLUDE) -I$(BLDROOT) $(CCFLAGS) $<
 $(BLDROOT)/%.o : $(SRCROOT)/%.cc | echo_config
 	@echo "#  compiling $*.cc"
 	cd $(@D) && $(CXX) $(FULL_INCLUDE) -I$(BLDROOT) $(CXXCFLAGS) $<
 %_mkdir:
 	@if [ ! -d $* ]; then mkdir -p $* ; fi
 clean:
 	rm -rf $(BLDROOT) $(PREFIX) $(DOCDIR) autom4te.cache/ config.log config.status
 	rm -rf $(PYTHONDIR)/*.c $(PYTHONDIR)/*.o $(PYTHONDIR)/*.so
 distclean: clean
 	rm -f config/config.auto
--- a/external/sharp/configure
+++ b/external/sharp/configure
--- a/external/sharp/configure.ac
+++ b/external/sharp/configure.ac
@ -1,113 +0,0 @@
 AC_INIT(config/config.auto.in)
 AC_CHECK_PROG([uname_found],[uname],[1],[0])
 if test $uname_found -eq 0 ; then
    echo "No uname found; setting system type to unknown."
    system="unknown"
 else
    system=`uname -s`-`uname -r`
 fi
 AC_LANG([C])
 AC_TRY_COMPILE([], [@%:@ifndef __INTEL_COMPILER
 choke me
@%:@endif], [ICC=[yes]], [ICC=[no]])
 if test $ICC = yes; then GCC=no; fi
 CCTYPE=unknown
 if test $GCC = yes; then CCTYPE=gcc; fi
 if test $ICC = yes; then CCTYPE=icc; fi
 AC_OPENMP
 SILENT_RULE=".SILENT:"
 AC_ARG_ENABLE(noisy-make,
  [  --enable-noisy-make     enable detailed make output],
  [if test "$enableval" = yes; then
     SILENT_RULE=""
   fi])
 ENABLE_MPI=no
 AC_ARG_ENABLE(mpi,
  [  --enable-mpi            enable generation of MPI-parallel code],
  [if test "$enableval" = yes; then
     ENABLE_MPI=yes
   fi])
 ENABLE_DEBUG=no
 AC_ARG_ENABLE(debug,
  [  --enable-debug          enable generation of debugging symbols],
  [if test "$enableval" = yes; then
     ENABLE_DEBUG=yes
   fi])
 ENABLE_PIC=no
 AC_ARG_ENABLE(pic,
  [  --enable-pic            enable generation of position independent code],
  [if test "$enableval" = yes; then
     ENABLE_PIC=yes
   fi])
 case $CCTYPE in
  gcc)
    CCFLAGS="-O3 -fno-tree-vectorize -ffast-math -fomit-frame-pointer -std=c99 -pedantic -Wextra -Wall -Wno-unknown-pragmas -Wshadow -Wmissing-prototypes -Wfatal-errors"
    GCCVERSION="`$CC -dumpversion 2>&1`"
    echo "Using gcc version $GCCVERSION"
    AC_SUBST(GCCVERSION)
    changequote(,)
    gcc43=`echo $GCCVERSION | grep -c '^4\.[3456789]'`
    gcc44=`echo $GCCVERSION | grep -c '^4\.4'`
    changequote([,])
    if test $gcc43 -gt 0; then
      CCFLAGS="$CCFLAGS -march=native"
    fi
    if test $gcc44 -gt 0; then
      CCFLAGS="$CCFLAGS -fno-tree-fre"
    fi
    ;;
  icc)
    CCFLAGS="-O3 -xHOST -std=c99 -ip -Wbrief -Wall -vec-report0 -openmp-report0 -wd383,981,1419,1572"
    ;;
  *)
    CCFLAGS="-O2"
    # Don't do anything now
    ;;
 esac
 case $system in
  Darwin-*)
    ARCREATE="libtool -static -o"
    ;;
  *)
    ARCREATE="ar cr"
    ;;
 esac
 if test $ENABLE_DEBUG = yes; then
  DEBUG_CFLAGS="-g"
 fi
 if test $ENABLE_PIC = yes; then
  PIC_CFLAGS="-fPIC"
 fi
 if test $ENABLE_MPI = yes; then
  MPI_CFLAGS="-DUSE_MPI"
 fi
 CCFLAGS="$CCFLAGS $DEBUG_CFLAGS $OPENMP_CFLAGS $PIC_CFLAGS $MPI_CFLAGS"
 CCFLAGS_NO_C="$CCFLAGS $CPPFLAGS"
 LDCCFLAGS="$LDFLAGS $CCFLAGS"
 AC_SUBST(SILENT_RULE)
 AC_SUBST(CC)
 AC_SUBST(CCFLAGS_NO_C)
 AC_SUBST(LDCCFLAGS)
 AC_SUBST(DEBUG_CFLAGS)
 AC_SUBST(MPI_CFLAGS)
 AC_SUBST(OPENMP_CFLAGS)
 AC_SUBST(PIC_CFLAGS)
 AC_SUBST(ARCREATE)
 AC_OUTPUT(config/config.auto)
--- a/external/sharp/docsrc/c_utils.dox
+++ b/external/sharp/docsrc/c_utils.dox
@ -1,290 +0,0 @@
 # Doxyfile 1.8.1
 #---------------------------------------------------------------------------
 # Project related configuration options
 #---------------------------------------------------------------------------
 DOXYFILE_ENCODING      = UTF-8
 PROJECT_NAME           = "LevelS C support library"
 PROJECT_NUMBER         = 0.1
 PROJECT_BRIEF          =
 PROJECT_LOGO           =
 OUTPUT_DIRECTORY       = .
 CREATE_SUBDIRS         = NO
 OUTPUT_LANGUAGE        = English
 BRIEF_MEMBER_DESC      = NO
 REPEAT_BRIEF           = YES
 ABBREVIATE_BRIEF       =
 ALWAYS_DETAILED_SEC    = NO
 INLINE_INHERITED_MEMB  = NO
 FULL_PATH_NAMES        = NO
 STRIP_FROM_PATH        =
 STRIP_FROM_INC_PATH    =
 SHORT_NAMES            = NO
 JAVADOC_AUTOBRIEF      = NO
 QT_AUTOBRIEF           = NO
 MULTILINE_CPP_IS_BRIEF = NO
 INHERIT_DOCS           = YES
 SEPARATE_MEMBER_PAGES  = NO
 TAB_SIZE               = 8
 ALIASES                =
 TCL_SUBST              =
 OPTIMIZE_OUTPUT_FOR_C  = YES
 OPTIMIZE_OUTPUT_JAVA   = NO
 OPTIMIZE_FOR_FORTRAN   = NO
 OPTIMIZE_OUTPUT_VHDL   = NO
 EXTENSION_MAPPING      =
 MARKDOWN_SUPPORT       = YES
 BUILTIN_STL_SUPPORT    = NO
 CPP_CLI_SUPPORT        = NO
 SIP_SUPPORT            = NO
 IDL_PROPERTY_SUPPORT   = YES
 DISTRIBUTE_GROUP_DOC   = NO
 SUBGROUPING            = YES
 INLINE_GROUPED_CLASSES = NO
 INLINE_SIMPLE_STRUCTS  = NO
 TYPEDEF_HIDES_STRUCT   = NO
 SYMBOL_CACHE_SIZE      = 0
 LOOKUP_CACHE_SIZE      = 0
 #---------------------------------------------------------------------------
 # Build related configuration options
 #---------------------------------------------------------------------------
 EXTRACT_ALL            = NO
 EXTRACT_PRIVATE        = NO
 EXTRACT_PACKAGE        = NO
 EXTRACT_STATIC         = NO
 EXTRACT_LOCAL_CLASSES  = YES
 EXTRACT_LOCAL_METHODS  = NO
 EXTRACT_ANON_NSPACES   = NO
 HIDE_UNDOC_MEMBERS     = YES
 HIDE_UNDOC_CLASSES     = YES
 HIDE_FRIEND_COMPOUNDS  = YES
 HIDE_IN_BODY_DOCS      = NO
 INTERNAL_DOCS          = NO
 CASE_SENSE_NAMES       = YES
 HIDE_SCOPE_NAMES       = NO
 SHOW_INCLUDE_FILES     = YES
 FORCE_LOCAL_INCLUDES   = NO
 INLINE_INFO            = YES
 SORT_MEMBER_DOCS       = NO
 SORT_BRIEF_DOCS        = NO
 SORT_MEMBERS_CTORS_1ST = NO
 SORT_GROUP_NAMES       = NO
 SORT_BY_SCOPE_NAME     = NO
 STRICT_PROTO_MATCHING  = NO
 GENERATE_TODOLIST      = YES
 GENERATE_TESTLIST      = YES
 GENERATE_BUGLIST       = YES
 GENERATE_DEPRECATEDLIST= YES
 ENABLED_SECTIONS       =
 MAX_INITIALIZER_LINES  = 30
 SHOW_USED_FILES        = YES
 SHOW_FILES             = YES
 SHOW_NAMESPACES        = YES
 FILE_VERSION_FILTER    =
 LAYOUT_FILE            =
 CITE_BIB_FILES         =
 #---------------------------------------------------------------------------
 # configuration options related to warning and progress messages
 #---------------------------------------------------------------------------
 QUIET                  = YES
 WARNINGS               = YES
 WARN_IF_UNDOCUMENTED   = YES
 WARN_IF_DOC_ERROR      = YES
 WARN_NO_PARAMDOC       = NO
 WARN_FORMAT            = "$file:$line: $text"
 WARN_LOGFILE           =
 #---------------------------------------------------------------------------
 # configuration options related to the input files
 #---------------------------------------------------------------------------
 INPUT                  = ../c_utils
 INPUT_ENCODING         = UTF-8
 FILE_PATTERNS          = *.h \
                         *.c \
                         *.dox
 RECURSIVE              = YES
 EXCLUDE                =
 EXCLUDE_SYMLINKS       = NO
 EXCLUDE_PATTERNS       =
 EXCLUDE_SYMBOLS        =
 EXAMPLE_PATH           =
 EXAMPLE_PATTERNS       =
 EXAMPLE_RECURSIVE      = NO
 IMAGE_PATH             =
 INPUT_FILTER           =
 FILTER_PATTERNS        =
 FILTER_SOURCE_FILES    = NO
 FILTER_SOURCE_PATTERNS =
 #---------------------------------------------------------------------------
 # configuration options related to source browsing
 #---------------------------------------------------------------------------
 SOURCE_BROWSER         = YES
 INLINE_SOURCES         = NO
 STRIP_CODE_COMMENTS    = NO
 REFERENCED_BY_RELATION = NO
 REFERENCES_RELATION    = NO
 REFERENCES_LINK_SOURCE = YES
 USE_HTAGS              = NO
 VERBATIM_HEADERS       = YES
 #---------------------------------------------------------------------------
 # configuration options related to the alphabetical class index
 #---------------------------------------------------------------------------
 ALPHABETICAL_INDEX     = YES
 COLS_IN_ALPHA_INDEX    = 5
 IGNORE_PREFIX          =
 #---------------------------------------------------------------------------
 # configuration options related to the HTML output
 #---------------------------------------------------------------------------
 GENERATE_HTML          = YES
 HTML_OUTPUT            = htmldoc
 HTML_FILE_EXTENSION    = .html
 HTML_HEADER            =
 HTML_FOOTER            = footer.html
 HTML_STYLESHEET        =
 HTML_EXTRA_FILES       =
 HTML_COLORSTYLE_HUE    = 220
 HTML_COLORSTYLE_SAT    = 100
 HTML_COLORSTYLE_GAMMA  = 80
 HTML_TIMESTAMP         = YES
 HTML_DYNAMIC_SECTIONS  = NO
 HTML_INDEX_NUM_ENTRIES = 100
 GENERATE_DOCSET        = NO
 DOCSET_FEEDNAME        = "Doxygen generated docs"
 DOCSET_BUNDLE_ID       = org.doxygen.Project
 DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
 DOCSET_PUBLISHER_NAME  = Publisher
 GENERATE_HTMLHELP      = NO
 CHM_FILE               =
 HHC_LOCATION           =
 GENERATE_CHI           = NO
 CHM_INDEX_ENCODING     =
 BINARY_TOC             = NO
 TOC_EXPAND             = NO
 GENERATE_QHP           = NO
 QCH_FILE               =
 QHP_NAMESPACE          = org.doxygen.Project
 QHP_VIRTUAL_FOLDER     = doc
 QHP_CUST_FILTER_NAME   =
 QHP_CUST_FILTER_ATTRS  =
 QHP_SECT_FILTER_ATTRS  =
 QHG_LOCATION           =
 GENERATE_ECLIPSEHELP   = NO
 ECLIPSE_DOC_ID         = org.doxygen.Project
 DISABLE_INDEX          = NO
 GENERATE_TREEVIEW      = NO
 ENUM_VALUES_PER_LINE   = 4
 TREEVIEW_WIDTH         = 250
 EXT_LINKS_IN_WINDOW    = NO
 FORMULA_FONTSIZE       = 10
 FORMULA_TRANSPARENT    = YES
 USE_MATHJAX            = NO
 MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest
 MATHJAX_EXTENSIONS     =
 SEARCHENGINE           = NO
 SERVER_BASED_SEARCH    = NO
 #---------------------------------------------------------------------------
 # configuration options related to the LaTeX output
 #---------------------------------------------------------------------------
 GENERATE_LATEX         = NO
 LATEX_OUTPUT           = latex
 LATEX_CMD_NAME         = latex
 MAKEINDEX_CMD_NAME     = makeindex
 COMPACT_LATEX          = YES
 PAPER_TYPE             = a4wide
 EXTRA_PACKAGES         =
 LATEX_HEADER           =
 LATEX_FOOTER           =
 PDF_HYPERLINKS         = YES
 USE_PDFLATEX           = YES
 LATEX_BATCHMODE        = NO
 LATEX_HIDE_INDICES     = NO
 LATEX_SOURCE_CODE      = NO
 LATEX_BIB_STYLE        = plain
 #---------------------------------------------------------------------------
 # configuration options related to the RTF output
 #---------------------------------------------------------------------------
 GENERATE_RTF           = NO
 RTF_OUTPUT             = rtf
 COMPACT_RTF            = NO
 RTF_HYPERLINKS         = NO
 RTF_STYLESHEET_FILE    =
 RTF_EXTENSIONS_FILE    =
 #---------------------------------------------------------------------------
 # configuration options related to the man page output
 #---------------------------------------------------------------------------
 GENERATE_MAN           = NO
 MAN_OUTPUT             = man
 MAN_EXTENSION          = .3
 MAN_LINKS              = NO
 #---------------------------------------------------------------------------
 # configuration options related to the XML output
 #---------------------------------------------------------------------------
 GENERATE_XML           = NO
 XML_OUTPUT             = xml
 XML_SCHEMA             =
 XML_DTD                =
 XML_PROGRAMLISTING     = YES
 #---------------------------------------------------------------------------
 # configuration options for the AutoGen Definitions output
 #---------------------------------------------------------------------------
 GENERATE_AUTOGEN_DEF   = NO
 #---------------------------------------------------------------------------
 # configuration options related to the Perl module output
 #---------------------------------------------------------------------------
 GENERATE_PERLMOD       = NO
 PERLMOD_LATEX          = NO
 PERLMOD_PRETTY         = YES
 PERLMOD_MAKEVAR_PREFIX =
 #---------------------------------------------------------------------------
 # Configuration options related to the preprocessor
 #---------------------------------------------------------------------------
 ENABLE_PREPROCESSING   = YES
 MACRO_EXPANSION        = NO
 EXPAND_ONLY_PREDEF     = NO
 SEARCH_INCLUDES        = YES
 INCLUDE_PATH           =
 INCLUDE_FILE_PATTERNS  =
 PREDEFINED             =
 EXPAND_AS_DEFINED      =
 SKIP_FUNCTION_MACROS   = YES
 #---------------------------------------------------------------------------
 # Configuration::additions related to external references
 #---------------------------------------------------------------------------
 TAGFILES               =
 GENERATE_TAGFILE       = c_utils.tag
 ALLEXTERNALS           = NO
 EXTERNAL_GROUPS        = YES
 PERL_PATH              = /usr/bin/perl
 #---------------------------------------------------------------------------
 # Configuration options related to the dot tool
 #---------------------------------------------------------------------------
 CLASS_DIAGRAMS         = YES
 MSCGEN_PATH            =
 HIDE_UNDOC_RELATIONS   = YES
 HAVE_DOT               = NO
 DOT_NUM_THREADS        = 0
 DOT_FONTNAME           = FreeSans
 DOT_FONTSIZE           = 10
 DOT_FONTPATH           =
 CLASS_GRAPH            = YES
 COLLABORATION_GRAPH    = YES
 GROUP_GRAPHS           = YES
 UML_LOOK               = NO
 UML_LIMIT_NUM_FIELDS   = 10
 TEMPLATE_RELATIONS     = YES
 INCLUDE_GRAPH          = NO
 INCLUDED_BY_GRAPH      = NO
 CALL_GRAPH             = NO
 CALLER_GRAPH           = NO
 GRAPHICAL_HIERARCHY    = NO
 DIRECTORY_GRAPH        = YES
 DOT_IMAGE_FORMAT       = png
 INTERACTIVE_SVG        = NO
 DOT_PATH               =
 DOTFILE_DIRS           =
 MSCFILE_DIRS           =
 DOT_GRAPH_MAX_NODES    = 50
 MAX_DOT_GRAPH_DEPTH    = 0
 DOT_TRANSPARENT        = NO
 DOT_MULTI_TARGETS      = NO
 GENERATE_LEGEND        = YES
 DOT_CLEANUP            = YES
--- a/external/sharp/docsrc/footer.html
+++ b/external/sharp/docsrc/footer.html
@ -1,5 +0,0 @@
 <hr><address style="align: right;"><small>
 Generated on $datetime for $projectname
 </a> </small></address>
 </body>
 </html>
--- a/external/sharp/docsrc/index_code.html
+++ b/external/sharp/docsrc/index_code.html
@ -1,15 +0,0 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
 <html><head><meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1">
 <title>Libsharp source code documentation</title>
 </head><body>
 <H1>Libsharp source code documentation</H1>
 <H2>C interfaces</H2>
 <ul>
 <li><a href="c_utils/index.html">C support library</a>
 <li><a href="libfftpack/index.html">FFT interface</a>
 <li><a href="libsharp/index.html">Library for spherical harmonic transforms</a>
 </ul>
 </body>
 </html>
--- a/external/sharp/docsrc/libfftpack.dox
+++ b/external/sharp/docsrc/libfftpack.dox
@ -1,290 +0,0 @@
 # Doxyfile 1.8.1
 #---------------------------------------------------------------------------
 # Project related configuration options
 #---------------------------------------------------------------------------
 DOXYFILE_ENCODING      = UTF-8
 PROJECT_NAME           = "LevelS FFT library"
 PROJECT_NUMBER         = 0.1
 PROJECT_BRIEF          =
 PROJECT_LOGO           =
 OUTPUT_DIRECTORY       = .
 CREATE_SUBDIRS         = NO
 OUTPUT_LANGUAGE        = English
 BRIEF_MEMBER_DESC      = NO
 REPEAT_BRIEF           = YES
 ABBREVIATE_BRIEF       =
 ALWAYS_DETAILED_SEC    = NO
 INLINE_INHERITED_MEMB  = NO
 FULL_PATH_NAMES        = NO
 STRIP_FROM_PATH        =
 STRIP_FROM_INC_PATH    =
 SHORT_NAMES            = NO
 JAVADOC_AUTOBRIEF      = NO
 QT_AUTOBRIEF           = NO
 MULTILINE_CPP_IS_BRIEF = NO
 INHERIT_DOCS           = YES
 SEPARATE_MEMBER_PAGES  = NO
 TAB_SIZE               = 8
 ALIASES                =
 TCL_SUBST              =
 OPTIMIZE_OUTPUT_FOR_C  = YES
 OPTIMIZE_OUTPUT_JAVA   = NO
 OPTIMIZE_FOR_FORTRAN   = NO
 OPTIMIZE_OUTPUT_VHDL   = NO
 EXTENSION_MAPPING      =
 MARKDOWN_SUPPORT       = YES
 BUILTIN_STL_SUPPORT    = NO
 CPP_CLI_SUPPORT        = NO
 SIP_SUPPORT            = NO
 IDL_PROPERTY_SUPPORT   = YES
 DISTRIBUTE_GROUP_DOC   = NO
 SUBGROUPING            = YES
 INLINE_GROUPED_CLASSES = NO
 INLINE_SIMPLE_STRUCTS  = NO
 TYPEDEF_HIDES_STRUCT   = NO
 SYMBOL_CACHE_SIZE      = 0
 LOOKUP_CACHE_SIZE      = 0
 #---------------------------------------------------------------------------
 # Build related configuration options
 #---------------------------------------------------------------------------
 EXTRACT_ALL            = NO
 EXTRACT_PRIVATE        = NO
 EXTRACT_PACKAGE        = NO
 EXTRACT_STATIC         = NO
 EXTRACT_LOCAL_CLASSES  = YES
 EXTRACT_LOCAL_METHODS  = NO
 EXTRACT_ANON_NSPACES   = NO
 HIDE_UNDOC_MEMBERS     = YES
 HIDE_UNDOC_CLASSES     = YES
 HIDE_FRIEND_COMPOUNDS  = YES
 HIDE_IN_BODY_DOCS      = NO
 INTERNAL_DOCS          = NO
 CASE_SENSE_NAMES       = YES
 HIDE_SCOPE_NAMES       = NO
 SHOW_INCLUDE_FILES     = YES
 FORCE_LOCAL_INCLUDES   = NO
 INLINE_INFO            = YES
 SORT_MEMBER_DOCS       = NO
 SORT_BRIEF_DOCS        = NO
 SORT_MEMBERS_CTORS_1ST = NO
 SORT_GROUP_NAMES       = NO
 SORT_BY_SCOPE_NAME     = NO
 STRICT_PROTO_MATCHING  = NO
 GENERATE_TODOLIST      = YES
 GENERATE_TESTLIST      = YES
 GENERATE_BUGLIST       = YES
 GENERATE_DEPRECATEDLIST= YES
 ENABLED_SECTIONS       =
 MAX_INITIALIZER_LINES  = 30
 SHOW_USED_FILES        = YES
 SHOW_FILES             = YES
 SHOW_NAMESPACES        = YES
 FILE_VERSION_FILTER    =
 LAYOUT_FILE            =
 CITE_BIB_FILES         =
 #---------------------------------------------------------------------------
 # configuration options related to warning and progress messages
 #---------------------------------------------------------------------------
 QUIET                  = YES
 WARNINGS               = YES
 WARN_IF_UNDOCUMENTED   = YES
 WARN_IF_DOC_ERROR      = YES
 WARN_NO_PARAMDOC       = NO
 WARN_FORMAT            = "$file:$line: $text"
 WARN_LOGFILE           =
 #---------------------------------------------------------------------------
 # configuration options related to the input files
 #---------------------------------------------------------------------------
 INPUT                  = ../libfftpack
 INPUT_ENCODING         = UTF-8
 FILE_PATTERNS          = *.h \
                         *.c \
                         *.dox
 RECURSIVE              = YES
 EXCLUDE                =
 EXCLUDE_SYMLINKS       = NO
 EXCLUDE_PATTERNS       =
 EXCLUDE_SYMBOLS        =
 EXAMPLE_PATH           =
 EXAMPLE_PATTERNS       =
 EXAMPLE_RECURSIVE      = NO
 IMAGE_PATH             =
 INPUT_FILTER           =
 FILTER_PATTERNS        =
 FILTER_SOURCE_FILES    = NO
 FILTER_SOURCE_PATTERNS =
 #---------------------------------------------------------------------------
 # configuration options related to source browsing
 #---------------------------------------------------------------------------
 SOURCE_BROWSER         = YES
 INLINE_SOURCES         = NO
 STRIP_CODE_COMMENTS    = NO
 REFERENCED_BY_RELATION = NO
 REFERENCES_RELATION    = NO
 REFERENCES_LINK_SOURCE = YES
 USE_HTAGS              = NO
 VERBATIM_HEADERS       = YES
 #---------------------------------------------------------------------------
 # configuration options related to the alphabetical class index
 #---------------------------------------------------------------------------
 ALPHABETICAL_INDEX     = YES
 COLS_IN_ALPHA_INDEX    = 5
 IGNORE_PREFIX          =
 #---------------------------------------------------------------------------
 # configuration options related to the HTML output
 #---------------------------------------------------------------------------
 GENERATE_HTML          = YES
 HTML_OUTPUT            = htmldoc
 HTML_FILE_EXTENSION    = .html
 HTML_HEADER            =
 HTML_FOOTER            = footer.html
 HTML_STYLESHEET        =
 HTML_EXTRA_FILES       =
 HTML_COLORSTYLE_HUE    = 220
 HTML_COLORSTYLE_SAT    = 100
 HTML_COLORSTYLE_GAMMA  = 80
 HTML_TIMESTAMP         = YES
 HTML_DYNAMIC_SECTIONS  = NO
 HTML_INDEX_NUM_ENTRIES = 100
 GENERATE_DOCSET        = NO
 DOCSET_FEEDNAME        = "Doxygen generated docs"
 DOCSET_BUNDLE_ID       = org.doxygen.Project
 DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
 DOCSET_PUBLISHER_NAME  = Publisher
 GENERATE_HTMLHELP      = NO
 CHM_FILE               =
 HHC_LOCATION           =
 GENERATE_CHI           = NO
 CHM_INDEX_ENCODING     =
 BINARY_TOC             = NO
 TOC_EXPAND             = NO
 GENERATE_QHP           = NO
 QCH_FILE               =
 QHP_NAMESPACE          = org.doxygen.Project
 QHP_VIRTUAL_FOLDER     = doc
 QHP_CUST_FILTER_NAME   =
 QHP_CUST_FILTER_ATTRS  =
 QHP_SECT_FILTER_ATTRS  =
 QHG_LOCATION           =
 GENERATE_ECLIPSEHELP   = NO
 ECLIPSE_DOC_ID         = org.doxygen.Project
 DISABLE_INDEX          = NO
 GENERATE_TREEVIEW      = NO
 ENUM_VALUES_PER_LINE   = 4
 TREEVIEW_WIDTH         = 250
 EXT_LINKS_IN_WINDOW    = NO
 FORMULA_FONTSIZE       = 10
 FORMULA_TRANSPARENT    = YES
 USE_MATHJAX            = NO
 MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest
 MATHJAX_EXTENSIONS     =
 SEARCHENGINE           = NO
 SERVER_BASED_SEARCH    = NO
 #---------------------------------------------------------------------------
 # configuration options related to the LaTeX output
 #---------------------------------------------------------------------------
 GENERATE_LATEX         = NO
 LATEX_OUTPUT           = latex
 LATEX_CMD_NAME         = latex
 MAKEINDEX_CMD_NAME     = makeindex
 COMPACT_LATEX          = YES
 PAPER_TYPE             = a4wide
 EXTRA_PACKAGES         =
 LATEX_HEADER           =
 LATEX_FOOTER           =
 PDF_HYPERLINKS         = YES
 USE_PDFLATEX           = YES
 LATEX_BATCHMODE        = NO
 LATEX_HIDE_INDICES     = NO
 LATEX_SOURCE_CODE      = NO
 LATEX_BIB_STYLE        = plain
 #---------------------------------------------------------------------------
 # configuration options related to the RTF output
 #---------------------------------------------------------------------------
 GENERATE_RTF           = NO
 RTF_OUTPUT             = rtf
 COMPACT_RTF            = NO
 RTF_HYPERLINKS         = NO
 RTF_STYLESHEET_FILE    =
 RTF_EXTENSIONS_FILE    =
 #---------------------------------------------------------------------------
 # configuration options related to the man page output
 #---------------------------------------------------------------------------
 GENERATE_MAN           = NO
 MAN_OUTPUT             = man
 MAN_EXTENSION          = .3
 MAN_LINKS              = NO
 #---------------------------------------------------------------------------
 # configuration options related to the XML output
 #---------------------------------------------------------------------------
 GENERATE_XML           = NO
 XML_OUTPUT             = xml
 XML_SCHEMA             =
 XML_DTD                =
 XML_PROGRAMLISTING     = YES
 #---------------------------------------------------------------------------
 # configuration options for the AutoGen Definitions output
 #---------------------------------------------------------------------------
 GENERATE_AUTOGEN_DEF   = NO
 #---------------------------------------------------------------------------
 # configuration options related to the Perl module output
 #---------------------------------------------------------------------------
 GENERATE_PERLMOD       = NO
 PERLMOD_LATEX          = NO
 PERLMOD_PRETTY         = YES
 PERLMOD_MAKEVAR_PREFIX =
 #---------------------------------------------------------------------------
 # Configuration options related to the preprocessor
 #---------------------------------------------------------------------------
 ENABLE_PREPROCESSING   = YES
 MACRO_EXPANSION        = NO
 EXPAND_ONLY_PREDEF     = NO
 SEARCH_INCLUDES        = YES
 INCLUDE_PATH           =
 INCLUDE_FILE_PATTERNS  =
 PREDEFINED             =
 EXPAND_AS_DEFINED      =
 SKIP_FUNCTION_MACROS   = YES
 #---------------------------------------------------------------------------
 # Configuration::additions related to external references
 #---------------------------------------------------------------------------
 TAGFILES               = c_utils.tag=../c_utils
 GENERATE_TAGFILE       = libfftpack.tag
 ALLEXTERNALS           = NO
 EXTERNAL_GROUPS        = YES
 PERL_PATH              = /usr/bin/perl
 #---------------------------------------------------------------------------
 # Configuration options related to the dot tool
 #---------------------------------------------------------------------------
 CLASS_DIAGRAMS         = YES
 MSCGEN_PATH            =
 HIDE_UNDOC_RELATIONS   = YES
 HAVE_DOT               = NO
 DOT_NUM_THREADS        = 0
 DOT_FONTNAME           = FreeSans
 DOT_FONTSIZE           = 10
 DOT_FONTPATH           =
 CLASS_GRAPH            = YES
 COLLABORATION_GRAPH    = YES
 GROUP_GRAPHS           = YES
 UML_LOOK               = NO
 UML_LIMIT_NUM_FIELDS   = 10
 TEMPLATE_RELATIONS     = YES
 INCLUDE_GRAPH          = NO
 INCLUDED_BY_GRAPH      = NO
 CALL_GRAPH             = NO
 CALLER_GRAPH           = NO
 GRAPHICAL_HIERARCHY    = NO
 DIRECTORY_GRAPH        = YES
 DOT_IMAGE_FORMAT       = png
 INTERACTIVE_SVG        = NO
 DOT_PATH               =
 DOTFILE_DIRS           =
 MSCFILE_DIRS           =
 DOT_GRAPH_MAX_NODES    = 50
 MAX_DOT_GRAPH_DEPTH    = 0
 DOT_TRANSPARENT        = NO
 DOT_MULTI_TARGETS      = NO
 GENERATE_LEGEND        = YES
 DOT_CLEANUP            = YES
--- a/external/sharp/docsrc/libsharp.dox
+++ b/external/sharp/docsrc/libsharp.dox
@ -1,291 +0,0 @@
 # Doxyfile 1.8.1
 #---------------------------------------------------------------------------
 # Project related configuration options
 #---------------------------------------------------------------------------
 DOXYFILE_ENCODING      = UTF-8
 PROJECT_NAME           = "LevelS SHT library"
 PROJECT_NUMBER         = 0.1
 PROJECT_BRIEF          =
 PROJECT_LOGO           =
 OUTPUT_DIRECTORY       = .
 CREATE_SUBDIRS         = NO
 OUTPUT_LANGUAGE        = English
 BRIEF_MEMBER_DESC      = NO
 REPEAT_BRIEF           = YES
 ABBREVIATE_BRIEF       =
 ALWAYS_DETAILED_SEC    = NO
 INLINE_INHERITED_MEMB  = NO
 FULL_PATH_NAMES        = NO
 STRIP_FROM_PATH        =
 STRIP_FROM_INC_PATH    =
 SHORT_NAMES            = NO
 JAVADOC_AUTOBRIEF      = NO
 QT_AUTOBRIEF           = NO
 MULTILINE_CPP_IS_BRIEF = NO
 INHERIT_DOCS           = YES
 SEPARATE_MEMBER_PAGES  = NO
 TAB_SIZE               = 8
 ALIASES                =
 TCL_SUBST              =
 OPTIMIZE_OUTPUT_FOR_C  = YES
 OPTIMIZE_OUTPUT_JAVA   = NO
 OPTIMIZE_FOR_FORTRAN   = NO
 OPTIMIZE_OUTPUT_VHDL   = NO
 EXTENSION_MAPPING      =
 MARKDOWN_SUPPORT       = YES
 BUILTIN_STL_SUPPORT    = NO
 CPP_CLI_SUPPORT        = NO
 SIP_SUPPORT            = NO
 IDL_PROPERTY_SUPPORT   = YES
 DISTRIBUTE_GROUP_DOC   = NO
 SUBGROUPING            = YES
 INLINE_GROUPED_CLASSES = NO
 INLINE_SIMPLE_STRUCTS  = NO
 TYPEDEF_HIDES_STRUCT   = NO
 SYMBOL_CACHE_SIZE      = 0
 LOOKUP_CACHE_SIZE      = 0
 #---------------------------------------------------------------------------
 # Build related configuration options
 #---------------------------------------------------------------------------
 EXTRACT_ALL            = NO
 EXTRACT_PRIVATE        = NO
 EXTRACT_PACKAGE        = NO
 EXTRACT_STATIC         = NO
 EXTRACT_LOCAL_CLASSES  = YES
 EXTRACT_LOCAL_METHODS  = NO
 EXTRACT_ANON_NSPACES   = NO
 HIDE_UNDOC_MEMBERS     = YES
 HIDE_UNDOC_CLASSES     = YES
 HIDE_FRIEND_COMPOUNDS  = YES
 HIDE_IN_BODY_DOCS      = NO
 INTERNAL_DOCS          = NO
 CASE_SENSE_NAMES       = YES
 HIDE_SCOPE_NAMES       = NO
 SHOW_INCLUDE_FILES     = YES
 FORCE_LOCAL_INCLUDES   = NO
 INLINE_INFO            = YES
 SORT_MEMBER_DOCS       = NO
 SORT_BRIEF_DOCS        = NO
 SORT_MEMBERS_CTORS_1ST = NO
 SORT_GROUP_NAMES       = NO
 SORT_BY_SCOPE_NAME     = NO
 STRICT_PROTO_MATCHING  = NO
 GENERATE_TODOLIST      = YES
 GENERATE_TESTLIST      = YES
 GENERATE_BUGLIST       = YES
 GENERATE_DEPRECATEDLIST= YES
 ENABLED_SECTIONS       =
 MAX_INITIALIZER_LINES  = 30
 SHOW_USED_FILES        = YES
 SHOW_FILES             = YES
 SHOW_NAMESPACES        = YES
 FILE_VERSION_FILTER    =
 LAYOUT_FILE            =
 CITE_BIB_FILES         =
 #---------------------------------------------------------------------------
 # configuration options related to warning and progress messages
 #---------------------------------------------------------------------------
 QUIET                  = YES
 WARNINGS               = YES
 WARN_IF_UNDOCUMENTED   = YES
 WARN_IF_DOC_ERROR      = YES
 WARN_NO_PARAMDOC       = NO
 WARN_FORMAT            = "$file:$line: $text"
 WARN_LOGFILE           =
 #---------------------------------------------------------------------------
 # configuration options related to the input files
 #---------------------------------------------------------------------------
 INPUT                  = ../libsharp
 INPUT_ENCODING         = UTF-8
 FILE_PATTERNS          = *.h \
                         *.c \
                         *.dox
 RECURSIVE              = YES
 EXCLUDE                =
 EXCLUDE_SYMLINKS       = NO
 EXCLUDE_PATTERNS       =
 EXCLUDE_SYMBOLS        =
 EXAMPLE_PATH           =
 EXAMPLE_PATTERNS       =
 EXAMPLE_RECURSIVE      = NO
 IMAGE_PATH             =
 INPUT_FILTER           =
 FILTER_PATTERNS        =
 FILTER_SOURCE_FILES    = NO
 FILTER_SOURCE_PATTERNS =
 #---------------------------------------------------------------------------
 # configuration options related to source browsing
 #---------------------------------------------------------------------------
 SOURCE_BROWSER         = YES
 INLINE_SOURCES         = NO
 STRIP_CODE_COMMENTS    = NO
 REFERENCED_BY_RELATION = NO
 REFERENCES_RELATION    = NO
 REFERENCES_LINK_SOURCE = YES
 USE_HTAGS              = NO
 VERBATIM_HEADERS       = YES
 #---------------------------------------------------------------------------
 # configuration options related to the alphabetical class index
 #---------------------------------------------------------------------------
 ALPHABETICAL_INDEX     = YES
 COLS_IN_ALPHA_INDEX    = 5
 IGNORE_PREFIX          =
 #---------------------------------------------------------------------------
 # configuration options related to the HTML output
 #---------------------------------------------------------------------------
 GENERATE_HTML          = YES
 HTML_OUTPUT            = htmldoc
 HTML_FILE_EXTENSION    = .html
 HTML_HEADER            =
 HTML_FOOTER            = footer.html
 HTML_STYLESHEET        =
 HTML_EXTRA_FILES       =
 HTML_COLORSTYLE_HUE    = 220
 HTML_COLORSTYLE_SAT    = 100
 HTML_COLORSTYLE_GAMMA  = 80
 HTML_TIMESTAMP         = YES
 HTML_DYNAMIC_SECTIONS  = NO
 HTML_INDEX_NUM_ENTRIES = 100
 GENERATE_DOCSET        = NO
 DOCSET_FEEDNAME        = "Doxygen generated docs"
 DOCSET_BUNDLE_ID       = org.doxygen.Project
 DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
 DOCSET_PUBLISHER_NAME  = Publisher
 GENERATE_HTMLHELP      = NO
 CHM_FILE               =
 HHC_LOCATION           =
 GENERATE_CHI           = NO
 CHM_INDEX_ENCODING     =
 BINARY_TOC             = NO
 TOC_EXPAND             = NO
 GENERATE_QHP           = NO
 QCH_FILE               =
 QHP_NAMESPACE          = org.doxygen.Project
 QHP_VIRTUAL_FOLDER     = doc
 QHP_CUST_FILTER_NAME   =
 QHP_CUST_FILTER_ATTRS  =
 QHP_SECT_FILTER_ATTRS  =
 QHG_LOCATION           =
 GENERATE_ECLIPSEHELP   = NO
 ECLIPSE_DOC_ID         = org.doxygen.Project
 DISABLE_INDEX          = NO
 GENERATE_TREEVIEW      = NO
 ENUM_VALUES_PER_LINE   = 4
 TREEVIEW_WIDTH         = 250
 EXT_LINKS_IN_WINDOW    = NO
 FORMULA_FONTSIZE       = 10
 FORMULA_TRANSPARENT    = YES
 USE_MATHJAX            = NO
 MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest
 MATHJAX_EXTENSIONS     =
 SEARCHENGINE           = NO
 SERVER_BASED_SEARCH    = NO
 #---------------------------------------------------------------------------
 # configuration options related to the LaTeX output
 #---------------------------------------------------------------------------
 GENERATE_LATEX         = NO
 LATEX_OUTPUT           = latex
 LATEX_CMD_NAME         = latex
 MAKEINDEX_CMD_NAME     = makeindex
 COMPACT_LATEX          = YES
 PAPER_TYPE             = a4wide
 EXTRA_PACKAGES         =
 LATEX_HEADER           =
 LATEX_FOOTER           =
 PDF_HYPERLINKS         = YES
 USE_PDFLATEX           = YES
 LATEX_BATCHMODE        = NO
 LATEX_HIDE_INDICES     = NO
 LATEX_SOURCE_CODE      = NO
 LATEX_BIB_STYLE        = plain
 #---------------------------------------------------------------------------
 # configuration options related to the RTF output
 #---------------------------------------------------------------------------
 GENERATE_RTF           = NO
 RTF_OUTPUT             = rtf
 COMPACT_RTF            = NO
 RTF_HYPERLINKS         = NO
 RTF_STYLESHEET_FILE    =
 RTF_EXTENSIONS_FILE    =
 #---------------------------------------------------------------------------
 # configuration options related to the man page output
 #---------------------------------------------------------------------------
 GENERATE_MAN           = NO
 MAN_OUTPUT             = man
 MAN_EXTENSION          = .3
 MAN_LINKS              = NO
 #---------------------------------------------------------------------------
 # configuration options related to the XML output
 #---------------------------------------------------------------------------
 GENERATE_XML           = NO
 XML_OUTPUT             = xml
 XML_SCHEMA             =
 XML_DTD                =
 XML_PROGRAMLISTING     = YES
 #---------------------------------------------------------------------------
 # configuration options for the AutoGen Definitions output
 #---------------------------------------------------------------------------
 GENERATE_AUTOGEN_DEF   = NO
 #---------------------------------------------------------------------------
 # configuration options related to the Perl module output
 #---------------------------------------------------------------------------
 GENERATE_PERLMOD       = NO
 PERLMOD_LATEX          = NO
 PERLMOD_PRETTY         = YES
 PERLMOD_MAKEVAR_PREFIX =
 #---------------------------------------------------------------------------
 # Configuration options related to the preprocessor
 #---------------------------------------------------------------------------
 ENABLE_PREPROCESSING   = YES
 MACRO_EXPANSION        = NO
 EXPAND_ONLY_PREDEF     = NO
 SEARCH_INCLUDES        = YES
 INCLUDE_PATH           =
 INCLUDE_FILE_PATTERNS  =
 PREDEFINED             =
 EXPAND_AS_DEFINED      =
 SKIP_FUNCTION_MACROS   = YES
 #---------------------------------------------------------------------------
 # Configuration::additions related to external references
 #---------------------------------------------------------------------------
 TAGFILES               = libfftpack.tag=../libfftpack \
                         c_utils.tag=../c_utils
 GENERATE_TAGFILE       = libsharp.tag
 ALLEXTERNALS           = NO
 EXTERNAL_GROUPS        = YES
 PERL_PATH              = /usr/bin/perl
 #---------------------------------------------------------------------------
 # Configuration options related to the dot tool
 #---------------------------------------------------------------------------
 CLASS_DIAGRAMS         = YES
 MSCGEN_PATH            =
 HIDE_UNDOC_RELATIONS   = YES
 HAVE_DOT               = NO
 DOT_NUM_THREADS        = 0
 DOT_FONTNAME           = FreeSans
 DOT_FONTSIZE           = 10
 DOT_FONTPATH           =
 CLASS_GRAPH            = YES
 COLLABORATION_GRAPH    = YES
 GROUP_GRAPHS           = YES
 UML_LOOK               = NO
 UML_LIMIT_NUM_FIELDS   = 10
 TEMPLATE_RELATIONS     = YES
 INCLUDE_GRAPH          = NO
 INCLUDED_BY_GRAPH      = NO
 CALL_GRAPH             = NO
 CALLER_GRAPH           = NO
 GRAPHICAL_HIERARCHY    = NO
 DIRECTORY_GRAPH        = YES
 DOT_IMAGE_FORMAT       = png
 INTERACTIVE_SVG        = NO
 DOT_PATH               =
 DOTFILE_DIRS           =
 MSCFILE_DIRS           =
 DOT_GRAPH_MAX_NODES    = 50
 MAX_DOT_GRAPH_DEPTH    = 0
 DOT_TRANSPARENT        = NO
 DOT_MULTI_TARGETS      = NO
 GENERATE_LEGEND        = YES
 DOT_CLEANUP            = YES
--- a/external/sharp/docsrc/planck.make
+++ b/external/sharp/docsrc/planck.make
@ -1,20 +0,0 @@
 PKG:=docsrc
 docsrc_idx: $(DOCDIR)_mkdir
 	cp $(SRCROOT)/docsrc/index_code.html $(DOCDIR)/index.html
 docsrc_code_doc: $(DOCDIR)_mkdir docsrc_idx
 	cd $(SRCROOT)/docsrc; \
 	for i in c_utils libfftpack libsharp; do \
 	  doxygen $${i}.dox; \
 	  rm -rf $(DOCDIR)/$${i}; mv htmldoc $(DOCDIR)/$${i}; \
 	done; \
 	rm *.tag;
 docsrc_clean:
 	cd $(SRCROOT)/docsrc; \
 	rm -f *.tag
 	cd $(SRCROOT)/docsrc; \
 	rm -rf htmldoc
 doc: docsrc_code_doc
--- a/external/sharp/fortran/sharp.f90
+++ b/external/sharp/fortran/sharp.f90
@ -1,286 +0,0 @@
 module sharp
  use iso_c_binding
  implicit none
  ! alm_info flags
  integer, parameter :: SHARP_PACKED = 1
  ! sharp job types
  enum, bind(c)
      enumerator :: SHARP_YtW = 0
      enumerator :: SHARP_Y = 1
      enumerator :: SHARP_Yt = 2
      enumerator :: SHARP_WY = 3
      enumerator :: SHARP_ALM2MAP_DERIV1 = 4
   end enum
  ! sharp job flags
  integer, parameter :: SHARP_DP             = ISHFT(1, 4)
  integer, parameter :: SHARP_ADD            = ISHFT(1, 5)
  integer, parameter :: SHARP_REAL_HARMONICS = ISHFT(1, 6)
  integer, parameter :: SHARP_NO_FFT         = ISHFT(1, 7)
  type sharp_geom_info
     type(c_ptr) :: handle
     integer(c_intptr_t) :: n_local
  end type sharp_geom_info
  type sharp_alm_info
     type(c_ptr) :: handle
     integer(c_intptr_t) :: n_local
  end type sharp_alm_info
  interface
     ! alm_info
     subroutine sharp_make_general_alm_info( &
         lmax, nm, stride, mval, mvstart, flags, alm_info) bind(c)
       use iso_c_binding
       integer(c_int), value, intent(in)    :: lmax, nm, stride, flags
       integer(c_int), intent(in)           :: mval(nm)
       integer(c_intptr_t), intent(in)     :: mvstart(nm)
       type(c_ptr), intent(out)             :: alm_info
     end subroutine sharp_make_general_alm_info
     subroutine c_sharp_make_mmajor_real_packed_alm_info( &
         lmax, stride, nm, ms, alm_info) bind(c, name='sharp_make_mmajor_real_packed_alm_info')
       use iso_c_binding
       integer(c_int), value, intent(in)    :: lmax, nm, stride
       integer(c_int), intent(in), optional :: ms(nm)
       type(c_ptr), intent(out)             :: alm_info
     end subroutine c_sharp_make_mmajor_real_packed_alm_info
     function c_sharp_alm_count(alm_info) bind(c, name='sharp_alm_count')
       use iso_c_binding
       integer(c_intptr_t)           :: c_sharp_alm_count
       type(c_ptr), value, intent(in) :: alm_info
     end function c_sharp_alm_count
     subroutine c_sharp_destroy_alm_info(alm_info) bind(c, name='sharp_destroy_alm_info')
       use iso_c_binding
       type(c_ptr), value                   :: alm_info
     end subroutine c_sharp_destroy_alm_info
     ! geom_info
     subroutine sharp_make_subset_healpix_geom_info ( &
          nside, stride, nrings, rings, weight, geom_info) bind(c)
       use iso_c_binding
       integer(c_int), value, intent(in)    :: nside, stride, nrings
       integer(c_int), intent(in), optional :: rings(nrings)
       real(c_double), intent(in), optional :: weight(2 * nside)
       type(c_ptr), intent(out)             :: geom_info
     end subroutine sharp_make_subset_healpix_geom_info
     subroutine c_sharp_destroy_geom_info(geom_info) bind(c, name='sharp_destroy_geom_info')
       use iso_c_binding
       type(c_ptr), value                   :: geom_info
     end subroutine c_sharp_destroy_geom_info
     function c_sharp_map_size(info) bind(c, name='sharp_map_size')
       use iso_c_binding
       integer(c_intptr_t) :: c_sharp_map_size
       type(c_ptr), value   :: info
     end function c_sharp_map_size
     ! execute
     subroutine c_sharp_execute(type, spin, alm, map, geom_info, alm_info, ntrans, &
                                flags, time, opcnt) bind(c, name='sharp_execute')
       use iso_c_binding
       integer(c_int), value                        :: type, spin, ntrans, flags
       type(c_ptr), value                           :: alm_info, geom_info
       real(c_double), intent(out), optional        :: time
       integer(c_long_long), intent(out), optional  :: opcnt
       type(c_ptr), intent(in)                      :: alm(*), map(*)
     end subroutine c_sharp_execute
     subroutine c_sharp_execute_mpi(comm, type, spin, alm, map, geom_info, alm_info, ntrans, &
                                    flags, time, opcnt) bind(c, name='sharp_execute_mpi_fortran')
       use iso_c_binding
       integer(c_int), value                        :: comm, type, spin, ntrans, flags
       type(c_ptr), value                           :: alm_info, geom_info
       real(c_double), intent(out), optional        :: time
       integer(c_long_long), intent(out), optional  :: opcnt
       type(c_ptr), intent(in)                      :: alm(*), map(*)
     end subroutine c_sharp_execute_mpi
     ! Legendre transforms
     subroutine c_sharp_legendre_transform(bl, recfac, lmax, x, out, nx) &
          bind(c, name='sharp_legendre_transform')
       use iso_c_binding
       integer(c_intptr_t), value :: lmax, nx
       real(c_double) :: bl(lmax + 1), x(nx), out(nx)
       real(c_double), optional :: recfac(lmax + 1)
     end subroutine c_sharp_legendre_transform
     subroutine c_sharp_legendre_transform_s(bl, recfac, lmax, x, out, nx) &
          bind(c, name='sharp_legendre_transform_s')
       use iso_c_binding
       integer(c_intptr_t), value :: lmax, nx
       real(c_float) :: bl(lmax + 1), x(nx), out(nx)
       real(c_float), optional :: recfac(lmax + 1)
     end subroutine c_sharp_legendre_transform_s
  end interface
  interface sharp_execute
     module procedure sharp_execute_d
  end interface
  interface sharp_legendre_transform
     module procedure sharp_legendre_transform_d, sharp_legendre_transform_s
  end interface sharp_legendre_transform
 contains
  ! alm info
  ! if ms is not passed, we default to using m=0..lmax.
  subroutine sharp_make_mmajor_real_packed_alm_info(lmax, ms, alm_info)
    use iso_c_binding
    integer(c_int), value, intent(in)    :: lmax
    integer(c_int), intent(in), optional :: ms(:)
    type(sharp_alm_info), intent(out)    :: alm_info
    !--
    integer(c_int), allocatable          :: ms_copy(:)
    integer(c_int)                       :: nm
    if (present(ms)) then
       nm = size(ms)
       allocate(ms_copy(nm))
       ms_copy = ms
       call c_sharp_make_mmajor_real_packed_alm_info(lmax, 1, nm, ms_copy, alm_info=alm_info%handle)
       deallocate(ms_copy)
    else
       call c_sharp_make_mmajor_real_packed_alm_info(lmax, 1, lmax + 1, alm_info=alm_info%handle)
    end if
    alm_info%n_local = c_sharp_alm_count(alm_info%handle)
  end subroutine sharp_make_mmajor_real_packed_alm_info
  subroutine sharp_destroy_alm_info(alm_info)
    use iso_c_binding
    type(sharp_alm_info), intent(inout) :: alm_info
    call c_sharp_destroy_alm_info(alm_info%handle)
    alm_info%handle = c_null_ptr
  end subroutine sharp_destroy_alm_info
  ! geom info
  subroutine sharp_make_healpix_geom_info(nside, rings, weight, geom_info)
    integer(c_int), value                :: nside
    integer(c_int), optional             :: rings(:)
    real(c_double), intent(in), optional :: weight(2 * nside)
    type(sharp_geom_info), intent(out)   :: geom_info
    !--
    integer(c_int) :: nrings
    integer(c_int), allocatable :: rings_copy(:)
    if (present(rings)) then
       nrings = size(rings)
       allocate(rings_copy(nrings))
       rings_copy = rings
       call sharp_make_subset_healpix_geom_info(nside, 1, nrings, rings_copy, &
                                                weight, geom_info%handle)
       deallocate(rings_copy)
    else
       call sharp_make_subset_healpix_geom_info(nside, 1, nrings=4 * nside - 1, &
                                                weight=weight, geom_info=geom_info%handle)
    end if
    geom_info%n_local = c_sharp_map_size(geom_info%handle)
  end subroutine sharp_make_healpix_geom_info
  subroutine sharp_destroy_geom_info(geom_info)
    use iso_c_binding
    type(sharp_geom_info), intent(inout) :: geom_info
    call c_sharp_destroy_geom_info(geom_info%handle)
    geom_info%handle = c_null_ptr
  end subroutine sharp_destroy_geom_info
  ! Currently the only mode supported is stacked (not interleaved) maps.
  !
  ! Note that passing the exact dimension of alm/map is necesarry, it
  ! prevents the caller from doing too crazy slicing prior to pass array
  ! in...
  !
  ! Usage:
  !
  ! The alm array must have shape exactly alm(alm_info%n_local, nmaps)
  ! The maps array must have shape exactly map(map_info%n_local, nmaps).
  subroutine sharp_execute_d(type, spin, nmaps, alm, alm_info, map, geom_info, &
                             add, time, opcnt, comm)
    use iso_c_binding
    use mpi
    implicit none
    integer(c_int), value                        :: type, spin, nmaps
    integer(c_int), optional                     :: comm
    logical, value, optional                     :: add  ! should add instead of replace out
    type(sharp_alm_info)                         :: alm_info
    type(sharp_geom_info)                        :: geom_info
    real(c_double), intent(out), optional        :: time
    integer(c_long_long), intent(out), optional  :: opcnt
    real(c_double), target, intent(inout)        :: alm(0:alm_info%n_local - 1, 1:nmaps)
    real(c_double), target, intent(inout)        :: map(0:geom_info%n_local - 1, 1:nmaps)
    !--
    integer(c_int)         :: mod_flags, ntrans, k
    type(c_ptr), target    :: alm_ptr(nmaps)
    type(c_ptr), target    :: map_ptr(nmaps)
    mod_flags = SHARP_DP
    if (present(add) .and. add) then
       mod_flags = or(mod_flags, SHARP_ADD)
    end if
    if (spin == 0) then
       ntrans = nmaps
    else
       ntrans = nmaps / 2
    end if
    ! Set up pointer table to access maps
    alm_ptr(:) = c_null_ptr
    map_ptr(:) = c_null_ptr
    do k = 1, nmaps
       if (alm_info%n_local > 0) alm_ptr(k) = c_loc(alm(0, k))
       if (geom_info%n_local > 0) map_ptr(k) = c_loc(map(0, k))
    end do
    if (present(comm)) then
      call c_sharp_execute_mpi(comm, type, spin, alm_ptr, map_ptr, &
          geom_info=geom_info%handle, &
          alm_info=alm_info%handle, &
          ntrans=ntrans, &
          flags=mod_flags, &
          time=time, &
          opcnt=opcnt)
    else
      call c_sharp_execute(type, spin, alm_ptr, map_ptr, &
          geom_info=geom_info%handle, &
          alm_info=alm_info%handle, &
          ntrans=ntrans, &
          flags=mod_flags, &
          time=time, &
          opcnt=opcnt)
   end if
  end subroutine sharp_execute_d
  subroutine sharp_legendre_transform_d(bl, x, out)
    use iso_c_binding
    real(c_double) :: bl(:)
    real(c_double) :: x(:), out(size(x))
    !--
    integer(c_intptr_t) :: lmax, nx
    call c_sharp_legendre_transform(bl, lmax=int(size(bl) - 1, c_intptr_t), &
                                    x=x, out=out, nx=int(size(x), c_intptr_t))
  end subroutine sharp_legendre_transform_d
  subroutine sharp_legendre_transform_s(bl, x, out)
    use iso_c_binding
    real(c_float) :: bl(:)
    real(c_float) :: x(:), out(size(x))
    !--
    integer(c_intptr_t) :: lmax, nx
    call c_sharp_legendre_transform_s(bl, lmax=int(size(bl) - 1, c_intptr_t), &
                                      x=x, out=out, nx=int(size(x), c_intptr_t))
  end subroutine sharp_legendre_transform_s
 end module
--- a/external/sharp/fortran/test_sharp.f90
+++ b/external/sharp/fortran/test_sharp.f90
@ -1,84 +0,0 @@
 program test_sharp
  use mpi
  use sharp
  use iso_c_binding, only : c_ptr, c_double
  implicit none
  integer, parameter :: lmax = 2, nside = 2
  type(sharp_alm_info) :: alm_info
  type(sharp_geom_info) :: geom_info
  real(c_double), dimension(0:(lmax + 1)**2 - 1, 1:1) :: alm
  real(c_double), dimension(0:12*nside**2 - 1, 1:1) :: map
  integer(c_int), dimension(1:lmax + 1) :: ms
  integer(c_int), dimension(1:4 * nside - 1) :: rings
  integer(c_int) :: nm, m, nrings, iring
  integer :: nodecount, rank, ierr
  call MPI_Init(ierr)
  call MPI_Comm_size(MPI_COMM_WORLD, nodecount, ierr)
  call MPI_Comm_rank(MPI_COMM_WORLD, rank, ierr)
  nm = 0
  do m = rank, lmax, nodecount
     nm = nm + 1
     ms(nm) = m
  end do
  nrings = 0
  do iring = rank + 1, 4 * nside - 1, nodecount
     nrings = nrings + 1
     rings(nrings) = iring
  end do
  alm = 0
  map = 0
  if (rank == 0) then
    alm(0, 1) = 1
  end if
  print *, ms(1:nm)
  call sharp_make_mmajor_real_packed_alm_info(lmax, ms=ms(1:nm), alm_info=alm_info)
  print *, 'alm_info%n_local', alm_info%n_local
  call sharp_make_healpix_geom_info(nside, rings=rings(1:nrings), geom_info=geom_info)
  print *, 'geom_info%n_local', geom_info%n_local
  print *, 'execute'
  call sharp_execute(SHARP_Y, 0, 1, alm, alm_info, map, geom_info, comm=MPI_COMM_WORLD)
  print *, alm
  print *, map
  call sharp_destroy_alm_info(alm_info)
  call sharp_destroy_geom_info(geom_info)
  print *, 'DONE'
  call MPI_Finalize(ierr)
  print *, 'LEGENDRE TRANSFORMS'
  call test_legendre_transforms()
 contains
  subroutine test_legendre_transforms()
    integer, parameter :: lmax = 20, nx=10
    real(c_double) :: bl(0:lmax)
    real(c_double) :: x(nx), out(nx)
    real(c_float) :: out_s(nx)
    !--
    integer :: l, i
    do l = 0, lmax
       bl(l) = 1.0 / real(l + 1, c_double)
    end do
    do i = 1, nx
       x(i) = 1 / real(i, c_double)
    end do
    out = 0
    call sharp_legendre_transform(bl, x, out)
    print *, out
    call sharp_legendre_transform(real(bl, c_float), real(x, c_float), out_s)
    print *, out_s
  end subroutine test_legendre_transforms
 end program test_sharp
--- a/external/sharp/libfftpack/README
+++ b/external/sharp/libfftpack/README
@ -1,34 +0,0 @@
 ls_fft description:
 This package is intended to calculate one-dimensional real or complex FFTs
 with high accuracy and good efficiency even for lengths containing large
 prime factors.
 The code is written in C, but a Fortran wrapper exists as well.
 Before any FFT is executed, a plan must be generated for it. Plan creation
 is designed to be fast, so that there is no significant overhead if the
 plan is only used once or a few times.
 The main component of the code is based on Paul N. Swarztrauber's FFTPACK in the
 double precision incarnation by Hugh C. Pumphrey
 (http://www.netlib.org/fftpack/dp.tgz).
 I replaced the iterative sine and cosine calculations in radfg() and radbg()
 by an exact calculation, which slightly improves the transform accuracy for
 real FFTs with lengths containing large prime factors.
 Since FFTPACK becomes quite slow for FFT lengths with large prime factors
 (in the worst case of prime lengths it reaches O(n*n) complexity), I
 implemented Bluestein's algorithm, which computes a FFT of length n by
 several FFTs of length n2>=2*n-1 and a convolution. Since n2 can be chosen
 to be highly composite, this algorithm is more efficient if n has large
 prime factors. The longer FFTs themselves are then computed using the FFTPACK
 routines.
 Bluestein's algorithm was implemented according to the description at
 http://en.wikipedia.org/wiki/Bluestein's_FFT_algorithm.
 Thread-safety:
 All routines can be called concurrently; all information needed by ls_fft
 is stored in the plan variable. However, using the same plan variable on
 multiple threads simultaneously is not supported and will lead to data
 corruption.
--- a/external/sharp/libfftpack/bluestein.c
+++ b/external/sharp/libfftpack/bluestein.c
@ -1,173 +0,0 @@
 /*
 *  This file is part of libfftpack.
 *
 *  libfftpack is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libfftpack is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libfftpack; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*
 *  Copyright (C) 2005, 2006, 2007, 2008 Max-Planck-Society
 *  \author Martin Reinecke
 */
 #include <math.h>
 #include <stdlib.h>
 #include "fftpack.h"
 #include "bluestein.h"
 /* returns the sum of all prime factors of n */
 size_t prime_factor_sum (size_t n)
  {
  size_t result=0,x,limit,tmp;
  while (((tmp=(n>>1))<<1)==n)
    { result+=2; n=tmp; }
  limit=(size_t)sqrt(n+0.01);
  for (x=3; x<=limit; x+=2)
  while ((tmp=(n/x))*x==n)
    {
    result+=x;
    n=tmp;
    limit=(size_t)sqrt(n+0.01);
    }
  if (n>1) result+=n;
  return result;
  }
 /* returns the smallest composite of 2, 3 and 5 which is >= n */
 static size_t good_size(size_t n)
  {
  size_t f2, f23, f235, bestfac=2*n;
  if (n<=6) return n;
  for (f2=1; f2<bestfac; f2*=2)
    for (f23=f2; f23<bestfac; f23*=3)
      for (f235=f23; f235<bestfac; f235*=5)
        if (f235>=n) bestfac=f235;
  return bestfac;
  }
 void bluestein_i (size_t n, double **tstorage, size_t *worksize)
  {
  static const double pi=3.14159265358979323846;
  size_t n2=good_size(n*2-1);
  size_t m, coeff;
  double angle, xn2;
  double *bk, *bkf, *work;
  double pibyn=pi/n;
  *worksize=2+2*n+8*n2+16;
  *tstorage = RALLOC(double,2+2*n+8*n2+16);
  ((size_t *)(*tstorage))[0]=n2;
  bk  = *tstorage+2;
  bkf = *tstorage+2+2*n;
  work= *tstorage+2+2*(n+n2);
 /* initialize b_k */
  bk[0] = 1;
  bk[1] = 0;
  coeff=0;
  for (m=1; m<n; ++m)
    {
    coeff+=2*m-1;
    if (coeff>=2*n) coeff-=2*n;
    angle = pibyn*coeff;
    bk[2*m] = cos(angle);
    bk[2*m+1] = sin(angle);
    }
 /* initialize the zero-padded, Fourier transformed b_k. Add normalisation. */
  xn2 = 1./n2;
  bkf[0] = bk[0]*xn2;
  bkf[1] = bk[1]*xn2;
  for (m=2; m<2*n; m+=2)
    {
    bkf[m]   = bkf[2*n2-m]   = bk[m]   *xn2;
    bkf[m+1] = bkf[2*n2-m+1] = bk[m+1] *xn2;
    }
  for (m=2*n;m<=(2*n2-2*n+1);++m)
    bkf[m]=0.;
  cffti (n2,work);
  cfftf (n2,bkf,work);
  }
 void bluestein (size_t n, double *data, double *tstorage, int isign)
  {
  size_t n2=*((size_t *)tstorage);
  size_t m;
  double *bk, *bkf, *akf, *work;
  bk  = tstorage+2;
  bkf = tstorage+2+2*n;
  work= tstorage+2+2*(n+n2);
  akf = tstorage+2+2*n+6*n2+16;
 /* initialize a_k and FFT it */
  if (isign>0)
    for (m=0; m<2*n; m+=2)
      {
      akf[m]   = data[m]*bk[m]   - data[m+1]*bk[m+1];
      akf[m+1] = data[m]*bk[m+1] + data[m+1]*bk[m];
      }
  else
    for (m=0; m<2*n; m+=2)
      {
      akf[m]   = data[m]*bk[m]   + data[m+1]*bk[m+1];
      akf[m+1] =-data[m]*bk[m+1] + data[m+1]*bk[m];
      }
  for (m=2*n; m<2*n2; ++m)
    akf[m]=0;
  cfftf (n2,akf,work);
 /* do the convolution */
  if (isign>0)
    for (m=0; m<2*n2; m+=2)
      {
      double im = -akf[m]*bkf[m+1] + akf[m+1]*bkf[m];
      akf[m  ]  =  akf[m]*bkf[m]   + akf[m+1]*bkf[m+1];
      akf[m+1]  = im;
      }
  else
    for (m=0; m<2*n2; m+=2)
      {
      double im = akf[m]*bkf[m+1] + akf[m+1]*bkf[m];
      akf[m  ]  = akf[m]*bkf[m]   - akf[m+1]*bkf[m+1];
      akf[m+1]  = im;
      }
 /* inverse FFT */
  cfftb (n2,akf,work);
 /* multiply by b_k* */
  if (isign>0)
    for (m=0; m<2*n; m+=2)
      {
      data[m]   = bk[m]  *akf[m] - bk[m+1]*akf[m+1];
      data[m+1] = bk[m+1]*akf[m] + bk[m]  *akf[m+1];
      }
  else
    for (m=0; m<2*n; m+=2)
      {
      data[m]   = bk[m]  *akf[m] + bk[m+1]*akf[m+1];
      data[m+1] =-bk[m+1]*akf[m] + bk[m]  *akf[m+1];
      }
  }
--- a/external/sharp/libfftpack/bluestein.h
+++ b/external/sharp/libfftpack/bluestein.h
@ -1,48 +0,0 @@
 /*
 *  This file is part of libfftpack.
 *
 *  libfftpack is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libfftpack is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libfftpack; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*
 *  Copyright (C) 2005 Max-Planck-Society
 *  \author Martin Reinecke
 */
 #ifndef PLANCK_BLUESTEIN_H
 #define PLANCK_BLUESTEIN_H
 #include "c_utils.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 size_t prime_factor_sum (size_t n);
 void bluestein_i (size_t n, double **tstorage, size_t *worksize);
 void bluestein (size_t n, double *data, double *tstorage, int isign);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/external/sharp/libfftpack/fftpack.c
+++ b/external/sharp/libfftpack/fftpack.c
@ -1,833 +0,0 @@
 /*
 *  This file is part of libfftpack.
 *
 *  libfftpack is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libfftpack is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libfftpack; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*
  fftpack.c : A set of FFT routines in C.
  Algorithmically based on Fortran-77 FFTPACK by Paul N. Swarztrauber
  (Version 4, 1985).
  C port by Martin Reinecke (2010)
 */
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include "fftpack.h"
 #define WA(x,i) wa[(i)+(x)*ido]
 #define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
 #define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
 #define PM(a,b,c,d) { a=c+d; b=c-d; }
 #define PMC(a,b,c,d) { a.r=c.r+d.r; a.i=c.i+d.i; b.r=c.r-d.r; b.i=c.i-d.i; }
 #define ADDC(a,b,c) { a.r=b.r+c.r; a.i=b.i+c.i; }
 #define SCALEC(a,b) { a.r*=b; a.i*=b; }
 #define CONJFLIPC(a) { double tmp_=a.r; a.r=-a.i; a.i=tmp_; }
 /* (a+ib) = conj(c+id) * (e+if) */
 #define MULPM(a,b,c,d,e,f) { a=c*e+d*f; b=c*f-d*e; }
 typedef struct {
  double r,i;
 } cmplx;
 #define CONCAT(a,b) a ## b
 #define X(arg) CONCAT(passb,arg)
 #define BACKWARD
 #include "fftpack_inc.c"
 #undef BACKWARD
 #undef X
 #define X(arg) CONCAT(passf,arg)
 #include "fftpack_inc.c"
 #undef X
 #undef CC
 #undef CH
 #define CC(a,b,c) cc[(a)+ido*((b)+l1*(c))]
 #define CH(a,b,c) ch[(a)+ido*((b)+cdim*(c))]
 static void radf2 (size_t ido, size_t l1, const double *cc, double *ch,
  const double *wa)
  {
  const size_t cdim=2;
  size_t i, k, ic;
  double ti2, tr2;
  for (k=0; k<l1; k++)
    PM (CH(0,0,k),CH(ido-1,1,k),CC(0,k,0),CC(0,k,1))
  if ((ido&1)==0)
    for (k=0; k<l1; k++)
      {
      CH(    0,1,k) = -CC(ido-1,k,1);
      CH(ido-1,0,k) =  CC(ido-1,k,0);
      }
  if (ido<=2) return;
  for (k=0; k<l1; k++)
    for (i=2; i<ido; i+=2)
      {
      ic=ido-i;
      MULPM (tr2,ti2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
      PM (CH(i-1,0,k),CH(ic-1,1,k),CC(i-1,k,0),tr2)
      PM (CH(i  ,0,k),CH(ic  ,1,k),ti2,CC(i  ,k,0))
      }
  }
 static void radf3(size_t ido, size_t l1, const double *cc, double *ch,
  const double *wa)
  {
  const size_t cdim=3;
  static const double taur=-0.5, taui=0.86602540378443864676;
  size_t i, k, ic;
  double ci2, di2, di3, cr2, dr2, dr3, ti2, ti3, tr2, tr3;
  for (k=0; k<l1; k++)
    {
    cr2=CC(0,k,1)+CC(0,k,2);
    CH(0,0,k) = CC(0,k,0)+cr2;
    CH(0,2,k) = taui*(CC(0,k,2)-CC(0,k,1));
    CH(ido-1,1,k) = CC(0,k,0)+taur*cr2;
    }
  if (ido==1) return;
  for (k=0; k<l1; k++)
    for (i=2; i<ido; i+=2)
      {
      ic=ido-i;
      MULPM (dr2,di2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
      MULPM (dr3,di3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
      cr2=dr2+dr3;
      ci2=di2+di3;
      CH(i-1,0,k) = CC(i-1,k,0)+cr2;
      CH(i  ,0,k) = CC(i  ,k,0)+ci2;
      tr2 = CC(i-1,k,0)+taur*cr2;
      ti2 = CC(i  ,k,0)+taur*ci2;
      tr3 = taui*(di2-di3);
      ti3 = taui*(dr3-dr2);
      PM(CH(i-1,2,k),CH(ic-1,1,k),tr2,tr3)
      PM(CH(i  ,2,k),CH(ic  ,1,k),ti3,ti2)
      }
  }
 static void radf4(size_t ido, size_t l1, const double *cc, double *ch,
  const double *wa)
  {
  const size_t cdim=4;
  static const double hsqt2=0.70710678118654752440;
  size_t i, k, ic;
  double ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
  for (k=0; k<l1; k++)
    {
    PM (tr1,CH(0,2,k),CC(0,k,3),CC(0,k,1))
    PM (tr2,CH(ido-1,1,k),CC(0,k,0),CC(0,k,2))
    PM (CH(0,0,k),CH(ido-1,3,k),tr2,tr1)
    }
  if ((ido&1)==0)
    for (k=0; k<l1; k++)
      {
      ti1=-hsqt2*(CC(ido-1,k,1)+CC(ido-1,k,3));
      tr1= hsqt2*(CC(ido-1,k,1)-CC(ido-1,k,3));
      PM (CH(ido-1,0,k),CH(ido-1,2,k),CC(ido-1,k,0),tr1)
      PM (CH(    0,3,k),CH(    0,1,k),ti1,CC(ido-1,k,2))
      }
  if (ido<=2) return;
  for (k=0; k<l1; k++)
    for (i=2; i<ido; i+=2)
      {
      ic=ido-i;
      MULPM(cr2,ci2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
      MULPM(cr3,ci3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
      MULPM(cr4,ci4,WA(2,i-2),WA(2,i-1),CC(i-1,k,3),CC(i,k,3))
      PM(tr1,tr4,cr4,cr2)
      PM(ti1,ti4,ci2,ci4)
      PM(tr2,tr3,CC(i-1,k,0),cr3)
      PM(ti2,ti3,CC(i  ,k,0),ci3)
      PM(CH(i-1,0,k),CH(ic-1,3,k),tr2,tr1)
      PM(CH(i  ,0,k),CH(ic  ,3,k),ti1,ti2)
      PM(CH(i-1,2,k),CH(ic-1,1,k),tr3,ti4)
      PM(CH(i  ,2,k),CH(ic  ,1,k),tr4,ti3)
      }
  }
 static void radf5(size_t ido, size_t l1, const double *cc, double *ch,
  const double *wa)
  {
  const size_t cdim=5;
  static const double tr11= 0.3090169943749474241, ti11=0.95105651629515357212,
                      tr12=-0.8090169943749474241, ti12=0.58778525229247312917;
  size_t i, k, ic;
  double ci2, di2, ci4, ci5, di3, di4, di5, ci3, cr2, cr3, dr2, dr3,
         dr4, dr5, cr5, cr4, ti2, ti3, ti5, ti4, tr2, tr3, tr4, tr5;
  for (k=0; k<l1; k++)
    {
    PM (cr2,ci5,CC(0,k,4),CC(0,k,1))
    PM (cr3,ci4,CC(0,k,3),CC(0,k,2))
    CH(0,0,k)=CC(0,k,0)+cr2+cr3;
    CH(ido-1,1,k)=CC(0,k,0)+tr11*cr2+tr12*cr3;
    CH(0,2,k)=ti11*ci5+ti12*ci4;
    CH(ido-1,3,k)=CC(0,k,0)+tr12*cr2+tr11*cr3;
    CH(0,4,k)=ti12*ci5-ti11*ci4;
    }
  if (ido==1) return;
  for (k=0; k<l1;++k)
    for (i=2; i<ido; i+=2)
      {
      ic=ido-i;
      MULPM (dr2,di2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
      MULPM (dr3,di3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
      MULPM (dr4,di4,WA(2,i-2),WA(2,i-1),CC(i-1,k,3),CC(i,k,3))
      MULPM (dr5,di5,WA(3,i-2),WA(3,i-1),CC(i-1,k,4),CC(i,k,4))
      PM(cr2,ci5,dr5,dr2)
      PM(ci2,cr5,di2,di5)
      PM(cr3,ci4,dr4,dr3)
      PM(ci3,cr4,di3,di4)
      CH(i-1,0,k)=CC(i-1,k,0)+cr2+cr3;
      CH(i  ,0,k)=CC(i  ,k,0)+ci2+ci3;
      tr2=CC(i-1,k,0)+tr11*cr2+tr12*cr3;
      ti2=CC(i  ,k,0)+tr11*ci2+tr12*ci3;
      tr3=CC(i-1,k,0)+tr12*cr2+tr11*cr3;
      ti3=CC(i  ,k,0)+tr12*ci2+tr11*ci3;
      MULPM(tr5,tr4,cr5,cr4,ti11,ti12)
      MULPM(ti5,ti4,ci5,ci4,ti11,ti12)
      PM(CH(i-1,2,k),CH(ic-1,1,k),tr2,tr5)
      PM(CH(i  ,2,k),CH(ic  ,1,k),ti5,ti2)
      PM(CH(i-1,4,k),CH(ic-1,3,k),tr3,tr4)
      PM(CH(i  ,4,k),CH(ic  ,3,k),ti4,ti3)
      }
  }
 #undef CH
 #undef CC
 #define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
 #define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
 #define C1(a,b,c) cc[(a)+ido*((b)+l1*(c))]
 #define C2(a,b) cc[(a)+idl1*(b)]
 #define CH2(a,b) ch[(a)+idl1*(b)]
 static void radfg(size_t ido, size_t ip, size_t l1, size_t idl1,
  double *cc, double *ch, const double *wa)
  {
  const size_t cdim=ip;
  static const double twopi=6.28318530717958647692;
  size_t idij, ipph, i, j, k, l, j2, ic, jc, lc, ik;
  double ai1, ai2, ar1, ar2, arg;
  double *csarr;
  size_t aidx;
  ipph=(ip+1)/ 2;
  if(ido!=1)
    {
    memcpy(ch,cc,idl1*sizeof(double));
    for(j=1; j<ip; j++)
      for(k=0; k<l1; k++)
        {
        CH(0,k,j)=C1(0,k,j);
        idij=(j-1)*ido+1;
        for(i=2; i<ido; i+=2,idij+=2)
          MULPM(CH(i-1,k,j),CH(i,k,j),wa[idij-1],wa[idij],C1(i-1,k,j),C1(i,k,j))
        }
    for(j=1,jc=ip-1; j<ipph; j++,jc--)
      for(k=0; k<l1; k++)
        for(i=2; i<ido; i+=2)
          {
          PM(C1(i-1,k,j),C1(i  ,k,jc),CH(i-1,k,jc),CH(i-1,k,j ))
          PM(C1(i  ,k,j),C1(i-1,k,jc),CH(i  ,k,j ),CH(i  ,k,jc))
          }
    }
  else
    memcpy(cc,ch,idl1*sizeof(double));
  for(j=1,jc=ip-1; j<ipph; j++,jc--)
    for(k=0; k<l1; k++)
      PM(C1(0,k,j),C1(0,k,jc),CH(0,k,jc),CH(0,k,j))
  csarr=RALLOC(double,2*ip);
  arg=twopi / ip;
  csarr[0]=1.;
  csarr[1]=0.;
  csarr[2]=csarr[2*ip-2]=cos(arg);
  csarr[3]=sin(arg); csarr[2*ip-1]=-csarr[3];
  for (i=2; i<=ip/2; ++i)
    {
    csarr[2*i]=csarr[2*ip-2*i]=cos(i*arg);
    csarr[2*i+1]=sin(i*arg);
    csarr[2*ip-2*i+1]=-csarr[2*i+1];
    }
  for(l=1,lc=ip-1; l<ipph; l++,lc--)
    {
    ar1=csarr[2*l];
    ai1=csarr[2*l+1];
    for(ik=0; ik<idl1; ik++)
      {
      CH2(ik,l)=C2(ik,0)+ar1*C2(ik,1);
      CH2(ik,lc)=ai1*C2(ik,ip-1);
      }
    aidx=2*l;
    for(j=2,jc=ip-2; j<ipph; j++,jc--)
      {
      aidx+=2*l;
      if (aidx>=2*ip) aidx-=2*ip;
      ar2=csarr[aidx];
      ai2=csarr[aidx+1];
      for(ik=0; ik<idl1; ik++)
        {
        CH2(ik,l )+=ar2*C2(ik,j );
        CH2(ik,lc)+=ai2*C2(ik,jc);
        }
      }
    }
  DEALLOC(csarr);
  for(j=1; j<ipph; j++)
    for(ik=0; ik<idl1; ik++)
      CH2(ik,0)+=C2(ik,j);
  for(k=0; k<l1; k++)
    memcpy(&CC(0,0,k),&CH(0,k,0),ido*sizeof(double));
  for(j=1; j<ipph; j++)
    {
    jc=ip-j;
    j2=2*j;
    for(k=0; k<l1; k++)
      {
      CC(ido-1,j2-1,k) = CH(0,k,j );
      CC(0    ,j2  ,k) = CH(0,k,jc);
      }
    }
  if(ido==1) return;
  for(j=1; j<ipph; j++)
    {
    jc=ip-j;
    j2=2*j;
    for(k=0; k<l1; k++)
      for(i=2; i<ido; i+=2)
        {
        ic=ido-i;
        PM (CC(i-1,j2,k),CC(ic-1,j2-1,k),CH(i-1,k,j ),CH(i-1,k,jc))
        PM (CC(i  ,j2,k),CC(ic  ,j2-1,k),CH(i  ,k,jc),CH(i  ,k,j ))
        }
    }
  }
 #undef CC
 #undef CH
 #define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
 #define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
 static void radb2(size_t ido, size_t l1, const double *cc, double *ch,
  const double *wa)
  {
  const size_t cdim=2;
  size_t i, k, ic;
  double ti2, tr2;
  for (k=0; k<l1; k++)
    PM (CH(0,k,0),CH(0,k,1),CC(0,0,k),CC(ido-1,1,k))
  if ((ido&1)==0)
    for (k=0; k<l1; k++)
      {
      CH(ido-1,k,0) =  2*CC(ido-1,0,k);
      CH(ido-1,k,1) = -2*CC(0    ,1,k);
      }
  if (ido<=2) return;
  for (k=0; k<l1;++k)
    for (i=2; i<ido; i+=2)
      {
      ic=ido-i;
      PM (CH(i-1,k,0),tr2,CC(i-1,0,k),CC(ic-1,1,k))
      PM (ti2,CH(i  ,k,0),CC(i  ,0,k),CC(ic  ,1,k))
      MULPM (CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),ti2,tr2)
      }
  }
 static void radb3(size_t ido, size_t l1, const double *cc, double *ch,
  const double *wa)
  {
  const size_t cdim=3;
  static const double taur=-0.5, taui=0.86602540378443864676;
  size_t i, k, ic;
  double ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2;
  for (k=0; k<l1; k++)
    {
    tr2=2*CC(ido-1,1,k);
    cr2=CC(0,0,k)+taur*tr2;
    CH(0,k,0)=CC(0,0,k)+tr2;
    ci3=2*taui*CC(0,2,k);
    PM (CH(0,k,2),CH(0,k,1),cr2,ci3);
    }
  if (ido==1) return;
  for (k=0; k<l1; k++)
    for (i=2; i<ido; i+=2)
      {
      ic=ido-i;
      tr2=CC(i-1,2,k)+CC(ic-1,1,k);
      ti2=CC(i  ,2,k)-CC(ic  ,1,k);
      cr2=CC(i-1,0,k)+taur*tr2;
      ci2=CC(i  ,0,k)+taur*ti2;
      CH(i-1,k,0)=CC(i-1,0,k)+tr2;
      CH(i  ,k,0)=CC(i  ,0,k)+ti2;
      cr3=taui*(CC(i-1,2,k)-CC(ic-1,1,k));
      ci3=taui*(CC(i  ,2,k)+CC(ic  ,1,k));
      PM(dr3,dr2,cr2,ci3)
      PM(di2,di3,ci2,cr3)
      MULPM(CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),di2,dr2)
      MULPM(CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),di3,dr3)
      }
  }
 static void radb4(size_t ido, size_t l1, const double *cc, double *ch,
  const double *wa)
  {
  const size_t cdim=4;
  static const double sqrt2=1.41421356237309504880;
  size_t i, k, ic;
  double ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
  for (k=0; k<l1; k++)
    {
    PM (tr2,tr1,CC(0,0,k),CC(ido-1,3,k))
    tr3=2*CC(ido-1,1,k);
    tr4=2*CC(0,2,k);
    PM (CH(0,k,0),CH(0,k,2),tr2,tr3)
    PM (CH(0,k,3),CH(0,k,1),tr1,tr4)
    }
  if ((ido&1)==0)
    for (k=0; k<l1; k++)
      {
      PM (ti1,ti2,CC(0    ,3,k),CC(0    ,1,k))
      PM (tr2,tr1,CC(ido-1,0,k),CC(ido-1,2,k))
      CH(ido-1,k,0)=tr2+tr2;
      CH(ido-1,k,1)=sqrt2*(tr1-ti1);
      CH(ido-1,k,2)=ti2+ti2;
      CH(ido-1,k,3)=-sqrt2*(tr1+ti1);
      }
  if (ido<=2) return;
  for (k=0; k<l1;++k)
    for (i=2; i<ido; i+=2)
      {
      ic=ido-i;
      PM (tr2,tr1,CC(i-1,0,k),CC(ic-1,3,k))
      PM (ti1,ti2,CC(i  ,0,k),CC(ic  ,3,k))
      PM (tr4,ti3,CC(i  ,2,k),CC(ic  ,1,k))
      PM (tr3,ti4,CC(i-1,2,k),CC(ic-1,1,k))
      PM (CH(i-1,k,0),cr3,tr2,tr3)
      PM (CH(i  ,k,0),ci3,ti2,ti3)
      PM (cr4,cr2,tr1,tr4)
      PM (ci2,ci4,ti1,ti4)
      MULPM (CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),ci2,cr2)
      MULPM (CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),ci3,cr3)
      MULPM (CH(i,k,3),CH(i-1,k,3),WA(2,i-2),WA(2,i-1),ci4,cr4)
      }
  }
 static void radb5(size_t ido, size_t l1, const double *cc, double *ch,
  const double *wa)
  {
  const size_t cdim=5;
  static const double tr11= 0.3090169943749474241, ti11=0.95105651629515357212,
                      tr12=-0.8090169943749474241, ti12=0.58778525229247312917;
  size_t i, k, ic;
  double ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4,
         ti2, ti3, ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5;
  for (k=0; k<l1; k++)
    {
    ti5=2*CC(0,2,k);
    ti4=2*CC(0,4,k);
    tr2=2*CC(ido-1,1,k);
    tr3=2*CC(ido-1,3,k);
    CH(0,k,0)=CC(0,0,k)+tr2+tr3;
    cr2=CC(0,0,k)+tr11*tr2+tr12*tr3;
    cr3=CC(0,0,k)+tr12*tr2+tr11*tr3;
    MULPM(ci5,ci4,ti5,ti4,ti11,ti12)
    PM(CH(0,k,4),CH(0,k,1),cr2,ci5)
    PM(CH(0,k,3),CH(0,k,2),cr3,ci4)
    }
  if (ido==1) return;
  for (k=0; k<l1;++k)
    for (i=2; i<ido; i+=2)
      {
      ic=ido-i;
      PM(tr2,tr5,CC(i-1,2,k),CC(ic-1,1,k))
      PM(ti5,ti2,CC(i  ,2,k),CC(ic  ,1,k))
      PM(tr3,tr4,CC(i-1,4,k),CC(ic-1,3,k))
      PM(ti4,ti3,CC(i  ,4,k),CC(ic  ,3,k))
      CH(i-1,k,0)=CC(i-1,0,k)+tr2+tr3;
      CH(i  ,k,0)=CC(i  ,0,k)+ti2+ti3;
      cr2=CC(i-1,0,k)+tr11*tr2+tr12*tr3;
      ci2=CC(i  ,0,k)+tr11*ti2+tr12*ti3;
      cr3=CC(i-1,0,k)+tr12*tr2+tr11*tr3;
      ci3=CC(i  ,0,k)+tr12*ti2+tr11*ti3;
      MULPM(cr5,cr4,tr5,tr4,ti11,ti12)
      MULPM(ci5,ci4,ti5,ti4,ti11,ti12)
      PM(dr4,dr3,cr3,ci4)
      PM(di3,di4,ci3,cr4)
      PM(dr5,dr2,cr2,ci5)
      PM(di2,di5,ci2,cr5)
      MULPM(CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),di2,dr2)
      MULPM(CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),di3,dr3)
      MULPM(CH(i,k,3),CH(i-1,k,3),WA(2,i-2),WA(2,i-1),di4,dr4)
      MULPM(CH(i,k,4),CH(i-1,k,4),WA(3,i-2),WA(3,i-1),di5,dr5)
      }
  }
 static void radbg(size_t ido, size_t ip, size_t l1, size_t idl1,
  double *cc, double *ch, const double *wa)
  {
  const size_t cdim=ip;
  static const double twopi=6.28318530717958647692;
  size_t idij, ipph, i, j, k, l, j2, ic, jc, lc, ik;
  double ai1, ai2, ar1, ar2, arg;
  double *csarr;
  size_t aidx;
  ipph=(ip+1)/ 2;
  for(k=0; k<l1; k++)
    memcpy(&CH(0,k,0),&CC(0,0,k),ido*sizeof(double));
  for(j=1; j<ipph; j++)
    {
    jc=ip-j;
    j2=2*j;
    for(k=0; k<l1; k++)
      {
      CH(0,k,j )=2*CC(ido-1,j2-1,k);
      CH(0,k,jc)=2*CC(0    ,j2  ,k);
      }
    }
  if(ido!=1)
    for(j=1,jc=ip-1; j<ipph; j++,jc--)
      for(k=0; k<l1; k++)
        for(i=2; i<ido; i+=2)
          {
          ic=ido-i;
          PM (CH(i-1,k,j ),CH(i-1,k,jc),CC(i-1,2*j,k),CC(ic-1,2*j-1,k))
          PM (CH(i  ,k,jc),CH(i  ,k,j ),CC(i  ,2*j,k),CC(ic  ,2*j-1,k))
          }
  csarr=RALLOC(double,2*ip);
  arg=twopi/ip;
  csarr[0]=1.;
  csarr[1]=0.;
  csarr[2]=csarr[2*ip-2]=cos(arg);
  csarr[3]=sin(arg); csarr[2*ip-1]=-csarr[3];
  for (i=2; i<=ip/2; ++i)
    {
    csarr[2*i]=csarr[2*ip-2*i]=cos(i*arg);
    csarr[2*i+1]=sin(i*arg);
    csarr[2*ip-2*i+1]=-csarr[2*i+1];
    }
  for(l=1; l<ipph; l++)
    {
    lc=ip-l;
    ar1=csarr[2*l];
    ai1=csarr[2*l+1];
    for(ik=0; ik<idl1; ik++)
      {
      C2(ik,l)=CH2(ik,0)+ar1*CH2(ik,1);
      C2(ik,lc)=ai1*CH2(ik,ip-1);
      }
    aidx=2*l;
    for(j=2; j<ipph; j++)
      {
      jc=ip-j;
      aidx+=2*l;
      if (aidx>=2*ip) aidx-=2*ip;
      ar2=csarr[aidx];
      ai2=csarr[aidx+1];
      for(ik=0; ik<idl1; ik++)
        {
        C2(ik,l )+=ar2*CH2(ik,j );
        C2(ik,lc)+=ai2*CH2(ik,jc);
        }
      }
    }
  DEALLOC(csarr);
  for(j=1; j<ipph; j++)
    for(ik=0; ik<idl1; ik++)
      CH2(ik,0)+=CH2(ik,j);
  for(j=1,jc=ip-1; j<ipph; j++,jc--)
    for(k=0; k<l1; k++)
      PM (CH(0,k,jc),CH(0,k,j),C1(0,k,j),C1(0,k,jc))
  if(ido==1)
    return;
  for(j=1,jc=ip-1; j<ipph; j++,jc--)
    for(k=0; k<l1; k++)
      for(i=2; i<ido; i+=2)
        {
        PM (CH(i-1,k,jc),CH(i-1,k,j ),C1(i-1,k,j),C1(i  ,k,jc))
        PM (CH(i  ,k,j ),CH(i  ,k,jc),C1(i  ,k,j),C1(i-1,k,jc))
        }
  memcpy(cc,ch,idl1*sizeof(double));
  for(j=1; j<ip; j++)
    for(k=0; k<l1; k++)
      {
      C1(0,k,j)=CH(0,k,j);
      idij=(j-1)*ido+1;
      for(i=2; i<ido; i+=2,idij+=2)
        MULPM (C1(i,k,j),C1(i-1,k,j),wa[idij-1],wa[idij],CH(i,k,j),CH(i-1,k,j))
      }
  }
 #undef CC
 #undef CH
 #undef PM
 #undef MULPM
 /*----------------------------------------------------------------------
   cfftf1, cfftb1, cfftf, cfftb, cffti1, cffti. Complex FFTs.
  ----------------------------------------------------------------------*/
 static void cfft1(size_t n, cmplx c[], cmplx ch[], const cmplx wa[],
  const size_t ifac[], int isign)
  {
  size_t k1, l1=1, nf=ifac[1], iw=0;
  cmplx *p1=c, *p2=ch;
  for(k1=0; k1<nf; k1++)
    {
    size_t ip=ifac[k1+2];
    size_t l2=ip*l1;
    size_t ido = n/l2;
    if(ip==4)
      (isign>0) ? passb4(ido, l1, p1, p2, wa+iw)
                : passf4(ido, l1, p1, p2, wa+iw);
    else if(ip==2)
      (isign>0) ? passb2(ido, l1, p1, p2, wa+iw)
                : passf2(ido, l1, p1, p2, wa+iw);
    else if(ip==3)
      (isign>0) ? passb3(ido, l1, p1, p2, wa+iw)
                : passf3(ido, l1, p1, p2, wa+iw);
    else if(ip==5)
      (isign>0) ? passb5(ido, l1, p1, p2, wa+iw)
                : passf5(ido, l1, p1, p2, wa+iw);
    else if(ip==6)
      (isign>0) ? passb6(ido, l1, p1, p2, wa+iw)
                : passf6(ido, l1, p1, p2, wa+iw);
    else
      (isign>0) ? passbg(ido, ip, l1, p1, p2, wa+iw)
                : passfg(ido, ip, l1, p1, p2, wa+iw);
    SWAP(p1,p2,cmplx *);
    l1=l2;
    iw+=(ip-1)*ido;
    }
  if (p1!=c)
    memcpy (c,p1,n*sizeof(cmplx));
  }
 void cfftf(size_t n, double c[], double wsave[])
  {
  if (n!=1)
    cfft1(n, (cmplx*)c, (cmplx*)wsave, (cmplx*)(wsave+2*n),
          (size_t*)(wsave+4*n),-1);
  }
 void cfftb(size_t n, double c[], double wsave[])
  {
  if (n!=1)
    cfft1(n, (cmplx*)c, (cmplx*)wsave, (cmplx*)(wsave+2*n),
          (size_t*)(wsave+4*n),+1);
  }
 static void factorize (size_t n, const size_t *pf, size_t npf, size_t *ifac)
  {
  size_t nl=n, nf=0, ntry=0, j=0, i;
 startloop:
  j++;
  ntry = (j<=npf) ? pf[j-1] : ntry+2;
  do
    {
    size_t nq=nl / ntry;
    size_t nr=nl-ntry*nq;
    if (nr!=0)
      goto startloop;
    nf++;
    ifac[nf+1]=ntry;
    nl=nq;
    if ((ntry==2) && (nf!=1))
      {
      for (i=nf+1; i>2; --i)
        ifac[i]=ifac[i-1];
      ifac[2]=2;
      }
    }
  while(nl!=1);
  ifac[0]=n;
  ifac[1]=nf;
  }
 static void cffti1(size_t n, double wa[], size_t ifac[])
  {
  static const size_t ntryh[5]={4,6,3,2,5};
  static const double twopi=6.28318530717958647692;
  size_t j, k, fi;
  double argh=twopi/n;
  size_t i=0, l1=1;
  factorize (n,ntryh,5,ifac);
  for(k=1; k<=ifac[1]; k++)
    {
    size_t ip=ifac[k+1];
    size_t ido=n/(l1*ip);
    for(j=1; j<ip; j++)
      {
      size_t is = i;
      double argld=j*l1*argh;
      wa[i  ]=1;
      wa[i+1]=0;
      for(fi=1; fi<=ido; fi++)
        {
        double arg=fi*argld;
        i+=2;
        wa[i  ]=cos(arg);
        wa[i+1]=sin(arg);
        }
      if(ip>6)
        {
        wa[is  ]=wa[i  ];
        wa[is+1]=wa[i+1];
        }
      }
    l1*=ip;
    }
  }
 void cffti(size_t n, double wsave[])
  { if (n!=1) cffti1(n, wsave+2*n,(size_t*)(wsave+4*n)); }
 /*----------------------------------------------------------------------
   rfftf1, rfftb1, rfftf, rfftb, rffti1, rffti. Real FFTs.
  ----------------------------------------------------------------------*/
 static void rfftf1(size_t n, double c[], double ch[], const double wa[],
  const size_t ifac[])
  {
  size_t k1, l1=n, nf=ifac[1], iw=n-1;
  double *p1=ch, *p2=c;
  for(k1=1; k1<=nf;++k1)
    {
    size_t ip=ifac[nf-k1+2];
    size_t ido=n / l1;
    l1 /= ip;
    iw-=(ip-1)*ido;
    SWAP (p1,p2,double *);
    if(ip==4)
      radf4(ido, l1, p1, p2, wa+iw);
    else if(ip==2)
      radf2(ido, l1, p1, p2, wa+iw);
    else if(ip==3)
      radf3(ido, l1, p1, p2, wa+iw);
    else if(ip==5)
      radf5(ido, l1, p1, p2, wa+iw);
    else
      {
      if (ido==1)
        SWAP (p1,p2,double *);
      radfg(ido, ip, l1, ido*l1, p1, p2, wa+iw);
      SWAP (p1,p2,double *);
      }
    }
  if (p1==c)
    memcpy (c,ch,n*sizeof(double));
  }
 static void rfftb1(size_t n, double c[], double ch[], const double wa[],
  const size_t ifac[])
  {
  size_t k1, l1=1, nf=ifac[1], iw=0;
  double *p1=c, *p2=ch;
  for(k1=1; k1<=nf; k1++)
    {
    size_t ip = ifac[k1+1],
           ido= n/(ip*l1);
    if(ip==4)
      radb4(ido, l1, p1, p2, wa+iw);
    else if(ip==2)
      radb2(ido, l1, p1, p2, wa+iw);
    else if(ip==3)
      radb3(ido, l1, p1, p2, wa+iw);
    else if(ip==5)
      radb5(ido, l1, p1, p2, wa+iw);
    else
      {
      radbg(ido, ip, l1, ido*l1, p1, p2, wa+iw);
      if (ido!=1)
        SWAP (p1,p2,double *);
      }
    SWAP (p1,p2,double *);
    l1*=ip;
    iw+=(ip-1)*ido;
    }
  if (p1!=c)
    memcpy (c,ch,n*sizeof(double));
  }
 void rfftf(size_t n, double r[], double wsave[])
  { if(n!=1) rfftf1(n, r, wsave, wsave+n,(size_t*)(wsave+2*n)); }
 void rfftb(size_t n, double r[], double wsave[])
  { if(n!=1) rfftb1(n, r, wsave, wsave+n,(size_t*)(wsave+2*n)); }
 static void rffti1(size_t n, double wa[], size_t ifac[])
  {
  static const size_t ntryh[4]={4,2,3,5};
  static const double twopi=6.28318530717958647692;
  size_t i, j, k, fi;
  double argh=twopi/n;
  size_t is=0, l1=1;
  factorize (n,ntryh,4,ifac);
  for (k=1; k<ifac[1]; k++)
    {
    size_t ip=ifac[k+1],
           ido=n/(l1*ip);
    for (j=1; j<ip; ++j)
      {
      double argld=j*l1*argh;
      for(i=is,fi=1; i<=ido+is-3; i+=2,++fi)
        {
        double arg=fi*argld;
        wa[i  ]=cos(arg);
        wa[i+1]=sin(arg);
        }
      is+=ido;
      }
    l1*=ip;
    }
  }
 void rffti(size_t n, double wsave[])
  { if (n!=1) rffti1(n, wsave+n,(size_t*)(wsave+2*n)); }
--- a/external/sharp/libfftpack/fftpack.h
+++ b/external/sharp/libfftpack/fftpack.h
@ -1,64 +0,0 @@
 /*
 *  This file is part of libfftpack.
 *
 *  libfftpack is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libfftpack is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libfftpack; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*
  fftpack.h : function declarations for fftpack.c
  Algorithmically based on Fortran-77 FFTPACK by Paul N. Swarztrauber
  (Version 4, 1985).
  Pekka Janhunen 23.2.1995
  (reformatted by joerg arndt)
  reformatted and slightly enhanced by Martin Reinecke (2004)
 */
 #ifndef PLANCK_FFTPACK_H
 #define PLANCK_FFTPACK_H
 #include "c_utils.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 /*! forward complex transform */
 void cfftf(size_t N, double complex_data[], double wrk[]);
 /*! backward complex transform */
 void cfftb(size_t N, double complex_data[], double wrk[]);
 /*! initializer for complex transforms */
 void cffti(size_t N, double wrk[]);
 /*! forward real transform */
 void rfftf(size_t N, double data[], double wrk[]);
 /*! backward real transform */
 void rfftb(size_t N, double data[], double wrk[]);
 /*! initializer for real transforms */
 void rffti(size_t N, double wrk[]);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/external/sharp/libfftpack/fftpack_inc.c
+++ b/external/sharp/libfftpack/fftpack_inc.c
@ -1,306 +0,0 @@
 /*
 *  This file is part of libfftpack.
 *
 *  libfftpack is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libfftpack is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libfftpack; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*
  fftpack.c : A set of FFT routines in C.
  Algorithmically based on Fortran-77 FFTPACK by Paul N. Swarztrauber
  (Version 4, 1985).
  C port by Martin Reinecke (2010)
 */
 #ifdef BACKWARD
 #define PSIGN +
 #define PMSIGNC(a,b,c,d) { a.r=c.r+d.r; a.i=c.i+d.i; b.r=c.r-d.r; b.i=c.i-d.i; }
 /* a = b*c */
 #define MULPMSIGNC(a,b,c) { a.r=b.r*c.r-b.i*c.i; a.i=b.r*c.i+b.i*c.r; }
 #else
 #define PSIGN -
 #define PMSIGNC(a,b,c,d) { a.r=c.r-d.r; a.i=c.i-d.i; b.r=c.r+d.r; b.i=c.i+d.i; }
 /* a = conj(b)*c */
 #define MULPMSIGNC(a,b,c) { a.r=b.r*c.r+b.i*c.i; a.i=b.r*c.i-b.i*c.r; }
 #endif
 static void X(2) (size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
  const cmplx *wa)
  {
  const size_t cdim=2;
  size_t k,i;
  cmplx t;
  if (ido==1)
    for (k=0;k<l1;++k)
      PMC (CH(0,k,0),CH(0,k,1),CC(0,0,k),CC(0,1,k))
  else
    for (k=0;k<l1;++k)
      for (i=0;i<ido;++i)
        {
        PMC (CH(i,k,0),t,CC(i,0,k),CC(i,1,k))
        MULPMSIGNC (CH(i,k,1),WA(0,i),t)
        }
  }
 static void X(3)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
  const cmplx *wa)
  {
  const size_t cdim=3;
  static const double taur=-0.5, taui= PSIGN 0.86602540378443864676;
  size_t i, k;
  cmplx c2, c3, d2, d3, t2;
  if (ido==1)
    for (k=0; k<l1; ++k)
      {
      PMC (t2,c3,CC(0,1,k),CC(0,2,k))
      ADDC (CH(0,k,0),t2,CC(0,0,k))
      SCALEC(t2,taur)
      ADDC(c2,CC(0,0,k),t2)
      SCALEC(c3,taui)
      CONJFLIPC(c3)
      PMC(CH(0,k,1),CH(0,k,2),c2,c3)
      }
  else
    for (k=0; k<l1; ++k)
      for (i=0; i<ido; ++i)
        {
        PMC (t2,c3,CC(i,1,k),CC(i,2,k))
        ADDC (CH(i,k,0),t2,CC(i,0,k))
        SCALEC(t2,taur)
        ADDC(c2,CC(i,0,k),t2)
        SCALEC(c3,taui)
        CONJFLIPC(c3)
        PMC(d2,d3,c2,c3)
        MULPMSIGNC(CH(i,k,1),WA(0,i),d2)
        MULPMSIGNC(CH(i,k,2),WA(1,i),d3)
        }
  }
 static void X(4)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
  const cmplx *wa)
  {
  const size_t cdim=4;
  size_t i, k;
  cmplx c2, c3, c4, t1, t2, t3, t4;
  if (ido==1)
    for (k=0; k<l1; ++k)
      {
      PMC(t2,t1,CC(0,0,k),CC(0,2,k))
      PMC(t3,t4,CC(0,1,k),CC(0,3,k))
      CONJFLIPC(t4)
      PMC(CH(0,k,0),CH(0,k,2),t2,t3)
      PMSIGNC (CH(0,k,1),CH(0,k,3),t1,t4)
      }
  else
    for (k=0; k<l1; ++k)
      for (i=0; i<ido; ++i)
        {
        PMC(t2,t1,CC(i,0,k),CC(i,2,k))
        PMC(t3,t4,CC(i,1,k),CC(i,3,k))
        CONJFLIPC(t4)
        PMC(CH(i,k,0),c3,t2,t3)
        PMSIGNC (c2,c4,t1,t4)
        MULPMSIGNC (CH(i,k,1),WA(0,i),c2)
        MULPMSIGNC (CH(i,k,2),WA(1,i),c3)
        MULPMSIGNC (CH(i,k,3),WA(2,i),c4)
        }
  }
 static void X(5)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
  const cmplx *wa)
  {
  const size_t cdim=5;
  static const double tr11= 0.3090169943749474241,
                      ti11= PSIGN 0.95105651629515357212,
                      tr12=-0.8090169943749474241,
                      ti12= PSIGN 0.58778525229247312917;
  size_t i, k;
  cmplx c2, c3, c4, c5, d2, d3, d4, d5, t2, t3, t4, t5;
  if (ido==1)
    for (k=0; k<l1; ++k)
      {
      PMC (t2,t5,CC(0,1,k),CC(0,4,k))
      PMC (t3,t4,CC(0,2,k),CC(0,3,k))
      CH(0,k,0).r=CC(0,0,k).r+t2.r+t3.r;
      CH(0,k,0).i=CC(0,0,k).i+t2.i+t3.i;
      c2.r=CC(0,0,k).r+tr11*t2.r+tr12*t3.r;
      c2.i=CC(0,0,k).i+tr11*t2.i+tr12*t3.i;
      c3.r=CC(0,0,k).r+tr12*t2.r+tr11*t3.r;
      c3.i=CC(0,0,k).i+tr12*t2.i+tr11*t3.i;
      c5.r=ti11*t5.r+ti12*t4.r;
      c5.i=ti11*t5.i+ti12*t4.i;
      c4.r=ti12*t5.r-ti11*t4.r;
      c4.i=ti12*t5.i-ti11*t4.i;
      CONJFLIPC(c5)
      PMC(CH(0,k,1),CH(0,k,4),c2,c5)
      CONJFLIPC(c4)
      PMC(CH(0,k,2),CH(0,k,3),c3,c4)
      }
  else
    for (k=0; k<l1; ++k)
      for (i=0; i<ido; ++i)
        {
        PMC (t2,t5,CC(i,1,k),CC(i,4,k))
        PMC (t3,t4,CC(i,2,k),CC(i,3,k))
        CH(i,k,0).r=CC(i,0,k).r+t2.r+t3.r;
        CH(i,k,0).i=CC(i,0,k).i+t2.i+t3.i;
        c2.r=CC(i,0,k).r+tr11*t2.r+tr12*t3.r;
        c2.i=CC(i,0,k).i+tr11*t2.i+tr12*t3.i;
        c3.r=CC(i,0,k).r+tr12*t2.r+tr11*t3.r;
        c3.i=CC(i,0,k).i+tr12*t2.i+tr11*t3.i;
        c5.r=ti11*t5.r+ti12*t4.r;
        c5.i=ti11*t5.i+ti12*t4.i;
        c4.r=ti12*t5.r-ti11*t4.r;
        c4.i=ti12*t5.i-ti11*t4.i;
        CONJFLIPC(c5)
        PMC(d2,d5,c2,c5)
        CONJFLIPC(c4)
        PMC(d3,d4,c3,c4)
        MULPMSIGNC (CH(i,k,1),WA(0,i),d2)
        MULPMSIGNC (CH(i,k,2),WA(1,i),d3)
        MULPMSIGNC (CH(i,k,3),WA(2,i),d4)
        MULPMSIGNC (CH(i,k,4),WA(3,i),d5)
        }
  }
 static void X(6)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
  const cmplx *wa)
  {
  const size_t cdim=6;
  static const double taui= PSIGN 0.86602540378443864676;
  cmplx ta1,ta2,ta3,a0,a1,a2,tb1,tb2,tb3,b0,b1,b2,d1,d2,d3,d4,d5;
  size_t i, k;
  if (ido==1)
    for (k=0; k<l1; ++k)
      {
      PMC(ta1,ta3,CC(0,2,k),CC(0,4,k))
      ta2.r = CC(0,0,k).r - .5*ta1.r;
      ta2.i = CC(0,0,k).i - .5*ta1.i;
      SCALEC(ta3,taui)
      ADDC(a0,CC(0,0,k),ta1)
      CONJFLIPC(ta3)
      PMC(a1,a2,ta2,ta3)
      PMC(tb1,tb3,CC(0,5,k),CC(0,1,k))
      tb2.r = CC(0,3,k).r - .5*tb1.r;
      tb2.i = CC(0,3,k).i - .5*tb1.i;
      SCALEC(tb3,taui)
      ADDC(b0,CC(0,3,k),tb1)
      CONJFLIPC(tb3)
      PMC(b1,b2,tb2,tb3)
      PMC(CH(0,k,0),CH(0,k,3),a0,b0)
      PMC(CH(0,k,4),CH(0,k,1),a1,b1)
      PMC(CH(0,k,2),CH(0,k,5),a2,b2)
      }
  else
    for (k=0; k<l1; ++k)
      for (i=0; i<ido; ++i)
        {
        PMC(ta1,ta3,CC(i,2,k),CC(i,4,k))
        ta2.r = CC(i,0,k).r - .5*ta1.r;
        ta2.i = CC(i,0,k).i - .5*ta1.i;
        SCALEC(ta3,taui)
        ADDC(a0,CC(i,0,k),ta1)
        CONJFLIPC(ta3)
        PMC(a1,a2,ta2,ta3)
        PMC(tb1,tb3,CC(i,5,k),CC(i,1,k))
        tb2.r = CC(i,3,k).r - .5*tb1.r;
        tb2.i = CC(i,3,k).i - .5*tb1.i;
        SCALEC(tb3,taui)
        ADDC(b0,CC(i,3,k),tb1)
        CONJFLIPC(tb3)
        PMC(b1,b2,tb2,tb3)
        PMC(CH(i,k,0),d3,a0,b0)
        PMC(d4,d1,a1,b1)
        PMC(d2,d5,a2,b2)
        MULPMSIGNC (CH(i,k,1),WA(0,i),d1)
        MULPMSIGNC (CH(i,k,2),WA(1,i),d2)
        MULPMSIGNC (CH(i,k,3),WA(2,i),d3)
        MULPMSIGNC (CH(i,k,4),WA(3,i),d4)
        MULPMSIGNC (CH(i,k,5),WA(4,i),d5)
        }
  }
 static void X(g)(size_t ido, size_t ip, size_t l1, const cmplx *cc, cmplx *ch,
  const cmplx *wa)
  {
  const size_t cdim=ip;
  cmplx *tarr=RALLOC(cmplx,2*ip);
  cmplx *ccl=tarr, *wal=tarr+ip;
  size_t i,j,k,l,jc,lc;
  size_t ipph = (ip+1)/2;
  for (i=1; i<ip; ++i)
    wal[i]=wa[ido*(i-1)];
  for (k=0; k<l1; ++k)
    for (i=0; i<ido; ++i)
      {
      cmplx s=CC(i,0,k);
      ccl[0] = CC(i,0,k);
      for(j=1,jc=ip-1; j<ipph; ++j,--jc)
        {
        PMC (ccl[j],ccl[jc],CC(i,j,k),CC(i,jc,k))
        ADDC (s,s,ccl[j])
        }
      CH(i,k,0) = s;
      for (j=1, jc=ip-1; j<=ipph; ++j,--jc)
        {
        cmplx abr=ccl[0], abi={0.,0.};
        size_t iang=0;
        for (l=1,lc=ip-1; l<ipph; ++l,--lc)
          {
          iang+=j;
          if (iang>ip) iang-=ip;
          abr.r += ccl[l ].r*wal[iang].r;
          abr.i += ccl[l ].i*wal[iang].r;
          abi.r += ccl[lc].r*wal[iang].i;
          abi.i += ccl[lc].i*wal[iang].i;
          }
 #ifndef BACKWARD
          { abi.i=-abi.i; abi.r=-abi.r; }
 #endif
        CONJFLIPC(abi)
        PMC(CH(i,k,j),CH(i,k,jc),abr,abi)
        }
      }
  DEALLOC(tarr);
  if (ido==1) return;
  for (j=1; j<ip; ++j)
    for (k=0; k<l1; ++k)
      {
      size_t idij=(j-1)*ido+1;
      for(i=1; i<ido; ++i, ++idij)
        {
        cmplx t=CH(i,k,j);
        MULPMSIGNC (CH(i,k,j),wa[idij],t)
        }
      }
  }
 #undef PSIGN
 #undef PMSIGNC
 #undef MULPMSIGNC
--- a/external/sharp/libfftpack/libfftpack.dox
+++ b/external/sharp/libfftpack/libfftpack.dox
@ -1,5 +0,0 @@
 /*! \mainpage Libfftpack documentation
  <ul>
  <li>\ref fftgroup "Programming interface"
  </ul>
 */
--- a/external/sharp/libfftpack/ls_fft.c
+++ b/external/sharp/libfftpack/ls_fft.c
@ -1,291 +0,0 @@
 /*
 *  This file is part of libfftpack.
 *
 *  libfftpack is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libfftpack is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libfftpack; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*
 *  Copyright (C) 2005 Max-Planck-Society
 *  \author Martin Reinecke
 */
 #include <stdlib.h>
 #include <math.h>
 #include <string.h>
 #include "bluestein.h"
 #include "fftpack.h"
 #include "ls_fft.h"
 complex_plan make_complex_plan (size_t length)
  {
  complex_plan plan = RALLOC(complex_plan_i,1);
  size_t pfsum = prime_factor_sum(length);
  double comp1 = (double)(length*pfsum);
  double comp2 = 2*3*length*log(3.*length);
  comp2*=3.; /* fudge factor that appears to give good overall performance */
  plan->length=length;
  plan->bluestein = (comp2<comp1);
  if (plan->bluestein)
    bluestein_i (length,&(plan->work),&(plan->worksize));
  else
    {
    plan->worksize=4*length+15;
    plan->work=RALLOC(double,4*length+15);
    cffti(length, plan->work);
    }
  return plan;
  }
 complex_plan copy_complex_plan (complex_plan plan)
  {
  if (!plan) return NULL;
  {
  complex_plan newplan = RALLOC(complex_plan_i,1);
  *newplan = *plan;
  newplan->work=RALLOC(double,newplan->worksize);
  memcpy(newplan->work,plan->work,sizeof(double)*newplan->worksize);
  return newplan;
  }
  }
 void kill_complex_plan (complex_plan plan)
  {
  DEALLOC(plan->work);
  DEALLOC(plan);
  }
 void complex_plan_forward (complex_plan plan, double *data)
  {
  if (plan->bluestein)
    bluestein (plan->length, data, plan->work, -1);
  else
    cfftf (plan->length, data, plan->work);
  }
 void complex_plan_backward (complex_plan plan, double *data)
  {
  if (plan->bluestein)
    bluestein (plan->length, data, plan->work, 1);
  else
    cfftb (plan->length, data, plan->work);
  }
 real_plan make_real_plan (size_t length)
  {
  real_plan plan = RALLOC(real_plan_i,1);
  size_t pfsum = prime_factor_sum(length);
  double comp1 = .5*length*pfsum;
  double comp2 = 2*3*length*log(3.*length);
  comp2*=3; /* fudge factor that appears to give good overall performance */
  plan->length=length;
  plan->bluestein = (comp2<comp1);
  if (plan->bluestein)
    bluestein_i (length,&(plan->work),&(plan->worksize));
  else
    {
    plan->worksize=2*length+15;
    plan->work=RALLOC(double,2*length+15);
    rffti(length, plan->work);
    }
  return plan;
  }
 real_plan copy_real_plan (real_plan plan)
  {
  if (!plan) return NULL;
  {
  real_plan newplan = RALLOC(real_plan_i,1);
  *newplan = *plan;
  newplan->work=RALLOC(double,newplan->worksize);
  memcpy(newplan->work,plan->work,sizeof(double)*newplan->worksize);
  return newplan;
  }
  }
 void kill_real_plan (real_plan plan)
  {
  DEALLOC(plan->work);
  DEALLOC(plan);
  }
 void real_plan_forward_fftpack (real_plan plan, double *data)
  {
  if (plan->bluestein)
    {
    size_t m;
    size_t n=plan->length;
    double *tmp = RALLOC(double,2*n);
    for (m=0; m<n; ++m)
      {
      tmp[2*m] = data[m];
      tmp[2*m+1] = 0.;
      }
    bluestein(n,tmp,plan->work,-1);
    data[0] = tmp[0];
    memcpy (data+1, tmp+2, (n-1)*sizeof(double));
    DEALLOC(tmp);
    }
  else
    rfftf (plan->length, data, plan->work);
  }
 static void fftpack2halfcomplex (double *data, size_t n)
  {
  size_t m;
  double *tmp = RALLOC(double,n);
  tmp[0]=data[0];
  for (m=1; m<(n+1)/2; ++m)
    {
    tmp[m]=data[2*m-1];
    tmp[n-m]=data[2*m];
    }
  if (!(n&1))
    tmp[n/2]=data[n-1];
  memcpy (data,tmp,n*sizeof(double));
  DEALLOC(tmp);
  }
 static void halfcomplex2fftpack (double *data, size_t n)
  {
  size_t m;
  double *tmp = RALLOC(double,n);
  tmp[0]=data[0];
  for (m=1; m<(n+1)/2; ++m)
    {
    tmp[2*m-1]=data[m];
    tmp[2*m]=data[n-m];
    }
  if (!(n&1))
    tmp[n-1]=data[n/2];
  memcpy (data,tmp,n*sizeof(double));
  DEALLOC(tmp);
  }
 void real_plan_forward_fftw (real_plan plan, double *data)
  {
  real_plan_forward_fftpack (plan, data);
  fftpack2halfcomplex (data,plan->length);
  }
 void real_plan_backward_fftpack (real_plan plan, double *data)
  {
  if (plan->bluestein)
    {
    size_t m;
    size_t n=plan->length;
    double *tmp = RALLOC(double,2*n);
    tmp[0]=data[0];
    tmp[1]=0.;
    memcpy (tmp+2,data+1, (n-1)*sizeof(double));
    if ((n&1)==0) tmp[n+1]=0.;
    for (m=2; m<n; m+=2)
      {
      tmp[2*n-m]=tmp[m];
      tmp[2*n-m+1]=-tmp[m+1];
      }
    bluestein (n, tmp, plan->work, 1);
    for (m=0; m<n; ++m)
      data[m] = tmp[2*m];
    DEALLOC(tmp);
    }
  else
    rfftb (plan->length, data, plan->work);
  }
 void real_plan_backward_fftw (real_plan plan, double *data)
  {
  halfcomplex2fftpack (data,plan->length);
  real_plan_backward_fftpack (plan, data);
  }
 void real_plan_forward_c (real_plan plan, double *data)
  {
  size_t m;
  size_t n=plan->length;
  if (plan->bluestein)
    {
    for (m=1; m<2*n; m+=2)
      data[m]=0;
    bluestein (plan->length, data, plan->work, -1);
    data[1]=0;
    for (m=2; m<n; m+=2)
      {
      double avg;
      avg = 0.5*(data[2*n-m]+data[m]);
      data[2*n-m] = data[m] = avg;
      avg = 0.5*(data[2*n-m+1]-data[m+1]);
      data[2*n-m+1] = avg;
      data[m+1] = -avg;
      }
    if ((n&1)==0) data[n+1] = 0.;
    }
  else
    {
 /* using "m+m" instead of "2*m" to avoid a nasty bug in Intel's compiler */
    for (m=0; m<n; ++m) data[m+1] = data[m+m];
    rfftf (n, data+1, plan->work);
    data[0] = data[1];
    data[1] = 0;
    for (m=2; m<n; m+=2)
      {
      data[2*n-m]   =  data[m];
      data[2*n-m+1] = -data[m+1];
      }
    if ((n&1)==0) data[n+1] = 0.;
    }
  }
 void real_plan_backward_c (real_plan plan, double *data)
  {
  size_t n=plan->length;
  if (plan->bluestein)
    {
    size_t m;
    data[1]=0;
    for (m=2; m<n; m+=2)
      {
      double avg;
      avg = 0.5*(data[2*n-m]+data[m]);
      data[2*n-m] = data[m] = avg;
      avg = 0.5*(data[2*n-m+1]-data[m+1]);
      data[2*n-m+1] = avg;
      data[m+1] = -avg;
      }
    if ((n&1)==0) data[n+1] = 0.;
    bluestein (plan->length, data, plan->work, 1);
    for (m=1; m<2*n; m+=2)
      data[m]=0;
    }
  else
    {
    ptrdiff_t m;
    data[1] = data[0];
    rfftb (n, data+1, plan->work);
    for (m=n-1; m>=0; --m)
      {
      data[2*m]   = data[m+1];
      data[2*m+1] = 0.;
      }
    }
  }
--- a/external/sharp/libfftpack/ls_fft.h
+++ b/external/sharp/libfftpack/ls_fft.h
@ -1,161 +0,0 @@
 /*
 *  This file is part of libfftpack.
 *
 *  libfftpack is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libfftpack is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libfftpack; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*! \file ls_fft.h
 *  Interface for the LevelS FFT package.
 *
 *  Copyright (C) 2004 Max-Planck-Society
 *  \author Martin Reinecke
 */
 #ifndef PLANCK_LS_FFT_H
 #define PLANCK_LS_FFT_H
 #include "c_utils.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 /*!\defgroup fftgroup FFT interface
 This package is intended to calculate one-dimensional real or complex FFTs
 with high accuracy and good efficiency even for lengths containing large
 prime factors.
 The code is written in C, but a Fortran wrapper exists as well.
 Before any FFT is executed, a plan must be generated for it. Plan creation
 is designed to be fast, so that there is no significant overhead if the
 plan is only used once or a few times.
 The main component of the code is based on Paul N. Swarztrauber's FFTPACK in the
 double precision incarnation by Hugh C. Pumphrey
 (http://www.netlib.org/fftpack/dp.tgz).
 I replaced the iterative sine and cosine calculations in radfg() and radbg()
 by an exact calculation, which slightly improves the transform accuracy for
 real FFTs with lengths containing large prime factors.
 Since FFTPACK becomes quite slow for FFT lengths with large prime factors
 (in the worst case of prime lengths it reaches \f$\mathcal{O}(n^2)\f$
 complexity), I implemented Bluestein's algorithm, which computes a FFT of length
 \f$n\f$ by several FFTs of length \f$n_2\ge 2n-1\f$ and a convolution. Since
 \f$n_2\f$ can be chosen to be highly composite, this algorithm is more efficient
 if \f$n\f$ has large prime factors. The longer FFTs themselves are then computed
 using the FFTPACK routines.
 Bluestein's algorithm was implemented according to the description on Wikipedia
 (<a href="http://en.wikipedia.org/wiki/Bluestein%27s_FFT_algorithm">
 http://en.wikipedia.org/wiki/Bluestein%27s_FFT_algorithm</a>).
 \b Thread-safety:
 All routines can be called concurrently; all information needed by
 <tt>ls_fft</tt> is stored in the plan variable. However, using the same plan
 variable on multiple threads simultaneously is not supported and will lead to
 data corruption.
 */
 /*! \{ */
 typedef struct
  {
  double *work;
  size_t length, worksize;
  int bluestein;
  } complex_plan_i;
 /*! The opaque handle type for complex-FFT plans. */
 typedef complex_plan_i * complex_plan;
 /*! Returns a plan for a complex FFT with \a length elements. */
 complex_plan make_complex_plan (size_t length);
 /*! Constructs a copy of \a plan. */
 complex_plan copy_complex_plan (complex_plan plan);
 /*! Destroys a plan for a complex FFT. */
 void kill_complex_plan (complex_plan plan);
 /*! Computes a complex forward FFT on \a data, using \a plan.
    \a Data has the form <tt>r0, i0, r1, i1, ...,
    r[length-1], i[length-1]</tt>. */
 void complex_plan_forward (complex_plan plan, double *data);
 /*! Computes a complex backward FFT on \a data, using \a plan.
    \a Data has the form <tt>r0, i0, r1, i1, ...,
    r[length-1], i[length-1]</tt>. */
 void complex_plan_backward (complex_plan plan, double *data);
 typedef struct
  {
  double *work;
  size_t length, worksize;
  int bluestein;
  } real_plan_i;
 /*! The opaque handle type for real-FFT plans. */
 typedef real_plan_i * real_plan;
 /*! Returns a plan for a real FFT with \a length elements. */
 real_plan make_real_plan (size_t length);
 /*! Constructs a copy of \a plan. */
 real_plan copy_real_plan (real_plan plan);
 /*! Destroys a plan for a real FFT. */
 void kill_real_plan (real_plan plan);
 /*! Computes a real forward FFT on \a data, using \a plan
    and assuming the FFTPACK storage scheme:
    - on entry, \a data has the form <tt>r0, r1, ..., r[length-1]</tt>;
    - on exit, it has the form <tt>r0, r1, i1, r2, i2, ...</tt>
      (a total of \a length values). */
 void real_plan_forward_fftpack (real_plan plan, double *data);
 /*! Computes a real backward FFT on \a data, using \a plan
    and assuming the FFTPACK storage scheme:
    - on entry, \a data has the form <tt>r0, r1, i1, r2, i2, ...</tt>
    (a total of \a length values);
    - on exit, it has the form <tt>r0, r1, ..., r[length-1]</tt>. */
 void real_plan_backward_fftpack (real_plan plan, double *data);
 /*! Computes a real forward FFT on \a data, using \a plan
    and assuming the FFTW halfcomplex storage scheme:
    - on entry, \a data has the form <tt>r0, r1, ..., r[length-1]</tt>;
    - on exit, it has the form <tt>r0, r1, r2, ..., i2, i1</tt>. */
 void real_plan_forward_fftw (real_plan plan, double *data);
 /*! Computes a real backward FFT on \a data, using \a plan
    and assuming the FFTW halfcomplex storage scheme:
    - on entry, \a data has the form <tt>r0, r1, r2, ..., i2, i1</tt>.
    - on exit, it has the form <tt>r0, r1, ..., r[length-1]</tt>. */
 void real_plan_backward_fftw (real_plan plan, double *data);
 /*! Computes a real forward FFT on \a data, using \a plan
    and assuming a full-complex storage scheme:
    - on entry, \a data has the form <tt>r0, [ignored], r1, [ignored], ...,
      r[length-1], [ignored]</tt>;
    - on exit, it has the form <tt>r0, i0, r1, i1, ...,
      r[length-1], i[length-1]</tt>. */
 void real_plan_forward_c (real_plan plan, double *data);
 /*! Computes a real backward FFT on \a data, using \a plan
    and assuming a full-complex storage scheme:
    - on entry, \a data has the form <tt>r0, i0, r1, i1, ...,
      r[length-1], i[length-1]</tt>;
    - on exit, it has the form <tt>r0, 0, r1, 0, ..., r[length-1], 0</tt>. */
 void real_plan_backward_c (real_plan plan, double *data);
 /*! \} */
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/external/sharp/libfftpack/planck.make
+++ b/external/sharp/libfftpack/planck.make
@ -1,21 +0,0 @@
 PKG:=libfftpack
 SD:=$(SRCROOT)/$(PKG)
 OD:=$(BLDROOT)/$(PKG)
 FULL_INCLUDE+= -I$(SD)
 HDR_$(PKG):=$(SD)/*.h
 LIB_$(PKG):=$(LIBDIR)/libfftpack.a
 OBJ:=fftpack.o bluestein.o ls_fft.o
 OBJ:=$(OBJ:%=$(OD)/%)
 ODEP:=$(HDR_$(PKG)) $(HDR_c_utils)
 $(OD)/fftpack.o: $(SD)/fftpack_inc.c
 $(OBJ): $(ODEP) | $(OD)_mkdir
 $(LIB_$(PKG)): $(OBJ)
 all_hdr+=$(HDR_$(PKG))
 all_lib+=$(LIB_$(PKG))
--- a/external/sharp/libsharp/libsharp.dox
+++ b/external/sharp/libsharp/libsharp.dox
@ -1,85 +0,0 @@
 /*! \mainpage libsharp documentation
  <ul>
  <li>\ref introduction "Introduction"
  <li><a href="modules.html">Programming interface</a>
  </ul>
 */
 /*! \page introduction Introduction to libsharp
  "SHARP" is an acronym for <i>Spherical HARmonic Package</i>.
  All user-visible data types and functions in this library start with
  the prefix "sharp_" to avoid pollution of the global C namespace.
  <i>libsharp</i>'s main functionality is the conversion between <i>maps</i>
  on the sphere and <i>spherical harmonic coefficients</i> (or <i>a_lm</i>).
  A map is defined as a set of <i>rings</i>, which in turn consist of
  individual pixels that
  <ul>
  <li>all have the same colatitude and</li>
  <li>are uniformly spaced in azimuthal direction.</li>
  </ul>
  Consequently, a ring is completely defined by
  <ul>
  <li>its colatitute (in radians)</li>
  <li>the number of pixels it contains</li>
  <li>the azimuth (in radians) of the first pixel in the ring</li>
  <li>the weight that must be multiplied to every pixel during a map
      analysis (typically the solid angle of a pixel in the ring) </li>
  <li>the offset of the first ring pixel in the <i>map array</i></li>
  <li>the stride between consecutive pixels in the ring.</li>
  </ul>
  The map array is a one-dimensional array of type <i>float</i> or
  <i>double</i>, which contains the values of all map pixels. It is assumed
  that the pixels of every ring are stored inside this array in order of
  increasing azimuth and with the specified stride. Note however that the rings
  themselves can be stored in any order inside the array.
  The a_lm array is a one-dimensional array of type <i>complex float</i> or
  <i>complex double</i>, which contains all spherical harmonic coefficients
  for a full or partial set of m quantum numbers with 0<=m<=mmax and m<=l<=lmax.
  There is only one constraint on the internal structure of the array, which is:
  <code>Index[a_l+1,m] = Index[a_l,m] + stride</code>
  That means that coefficients with identical <i>m</i> but different <i>l</i>
  can be interpreted as a one-dimensional array in <i>l</i> with a unique
  stride.
  Several functions are provided for efficient index computation in this array;
  they are documented \ref almgroup "here".
  Information about a pixelisation of the sphere is stored in objects of
  type sharp_geom_info. It is possible to create such an object for any
  supported pixelisation by using the function sharp_make_geometry_info();
  however, several easier-to-use functions are \ref geominfogroup "supplied"
  for generating often-used pixelisations like ECP grids, Gaussian grids,
  and Healpix grids.
  Currently, libsharp supports the following kinds of transforms:
  <ul>
  <li>scalar a_lm to map</li>
  <li>scalar map to a_lm</li>
 <!--  <li>polarised a_lm to map</li>
  <li>polarised map to a_lm</li> !-->
  <li>spin a_lm to map</li>
  <li>spin map to a_lm</li>
  <li>scalar a_lm to maps of first derivatives</li>
  </ul>
  libsharp supports shared-memory parallelisation via OpenMP; this feature will
  be automatically enabled if the compiler supports it.
  Libsharp will also make use of SSE2 and AVX instructions when compiled for a
  platform known to support them.
  Support for MPI-parallel transforms is also available; in this mode,
  every MPI task must provide a unique subset of the map and a_lm coefficients.
  The spherical harmonic transforms can be executed on double-precision and
  single-precision maps and a_lm, but for accuracy reasons the computations
  will always be performed in double precision. As a consequence,
  single-precision transforms will most likely not be faster than their
  double-precision counterparts, but they will require significantly less
  memory.
 */
--- a/external/sharp/libsharp/planck.make
+++ b/external/sharp/libsharp/planck.make
@ -1,29 +0,0 @@
 PKG:=libsharp
 SD:=$(SRCROOT)/$(PKG)
 OD:=$(BLDROOT)/$(PKG)
 FULL_INCLUDE+= -I$(SD)
 HDR_$(PKG):=$(SD)/*.h
 LIB_$(PKG):=$(LIBDIR)/libsharp.a
 BIN:=sharp_testsuite
 LIBOBJ:=sharp_ylmgen_c.o sharp.o sharp_announce.o sharp_geomhelpers.o sharp_almhelpers.o sharp_core.o sharp_legendre.o sharp_legendre_roots.o
 ALLOBJ:=$(LIBOBJ) sharp_testsuite.o
 LIBOBJ:=$(LIBOBJ:%=$(OD)/%)
 ALLOBJ:=$(ALLOBJ:%=$(OD)/%)
 ODEP:=$(HDR_$(PKG)) $(HDR_libfftpack) $(HDR_c_utils)
 $(OD)/sharp_core.o: $(SD)/sharp_core_inchelper.c $(SD)/sharp_core_inc.c $(SD)/sharp_core_inc2.c
 $(OD)/sharp.o: $(SD)/sharp_mpi.c
 BDEP:=$(LIB_$(PKG)) $(LIB_libfftpack) $(LIB_c_utils)
 $(LIB_$(PKG)): $(LIBOBJ)
 $(ALLOBJ): $(ODEP) | $(OD)_mkdir
 BIN:=$(BIN:%=$(BINDIR)/%)
 $(BIN): $(BINDIR)/% : $(OD)/%.o $(BDEP)
 all_hdr+=$(HDR_$(PKG))
 all_lib+=$(LIB_$(PKG))
 all_cbin+=$(BIN)
--- a/external/sharp/libsharp/sharp.c
+++ b/external/sharp/libsharp/sharp.c
--- a/external/sharp/libsharp/sharp.h
+++ b/external/sharp/libsharp/sharp.h
@ -1,45 +0,0 @@
 /*
 *  This file is part of libsharp.
 *
 *  libsharp is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libsharp is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libsharp; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*! \file sharp.h
 *  Interface for the spherical transform library.
 *
 *  Copyright (C) 2006-2012 Max-Planck-Society
 *  \author Martin Reinecke
 */
 #ifndef PLANCK_SHARP_H
 #define PLANCK_SHARP_H
 #ifdef __cplusplus
 #error This header file cannot be included from C++, only from C
 #endif
 #include <complex.h>
 #include "sharp_lowlevel.h"
 #include "sharp_legendre.h"
 #include "sharp_legendre_roots.h"
 #endif
--- a/external/sharp/libsharp/sharp_almhelpers.c
+++ b/external/sharp/libsharp/sharp_almhelpers.c
@ -1,94 +0,0 @@
 /*
 *  This file is part of libsharp.
 *
 *  libsharp is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libsharp is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libsharp; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*! \file sharp_almhelpers.c
 *  Spherical transform library
 *
 *  Copyright (C) 2008-2013 Max-Planck-Society
 *  \author Martin Reinecke
 */
 #include "sharp_almhelpers.h"
 #include "c_utils.h"
 void sharp_make_triangular_alm_info (int lmax, int mmax, int stride,
  sharp_alm_info **alm_info)
  {
  sharp_alm_info *info = RALLOC(sharp_alm_info,1);
  info->lmax = lmax;
  info->nm = mmax+1;
  info->mval = RALLOC(int,mmax+1);
  info->mvstart = RALLOC(ptrdiff_t,mmax+1);
  info->stride = stride;
  info->flags = 0;
  ptrdiff_t tval = 2*lmax+1;
  for (ptrdiff_t m=0; m<=mmax; ++m)
    {
    info->mval[m] = m;
    info->mvstart[m] = stride*((m*(tval-m))>>1);
    }
  *alm_info = info;
  }
 void sharp_make_rectangular_alm_info (int lmax, int mmax, int stride,
  sharp_alm_info **alm_info)
  {
  sharp_alm_info *info = RALLOC(sharp_alm_info,1);
  info->lmax = lmax;
  info->nm = mmax+1;
  info->mval = RALLOC(int,mmax+1);
  info->mvstart = RALLOC(ptrdiff_t,mmax+1);
  info->stride = stride;
  info->flags = 0;
  for (ptrdiff_t m=0; m<=mmax; ++m)
    {
    info->mval[m] = m;
    info->mvstart[m] = stride*m*(lmax+1);
    }
  *alm_info = info;
  }
 void sharp_make_mmajor_real_packed_alm_info (int lmax, int stride,
  int nm, const int *ms, sharp_alm_info **alm_info)
  {
  ptrdiff_t idx;
  int f;
  sharp_alm_info *info = RALLOC(sharp_alm_info,1);
  info->lmax = lmax;
  info->nm = nm;
  info->mval = RALLOC(int,nm);
  info->mvstart = RALLOC(ptrdiff_t,nm);
  info->stride = stride;
  info->flags = SHARP_PACKED | SHARP_REAL_HARMONICS;
  idx = 0;  /* tracks the number of 'consumed' elements so far; need to correct by m */
  for (int im=0; im!=nm; ++im)
    {
    int m=(ms==NULL)?im:ms[im];
    f = (m==0) ? 1 : 2;
    info->mval[im] = m;
    info->mvstart[im] = stride * (idx - f * m);
    idx += f * (lmax + 1 - m);
    }
  *alm_info = info;
  }
--- a/external/sharp/libsharp/sharp_almhelpers.h
+++ b/external/sharp/libsharp/sharp_almhelpers.h
@ -1,65 +0,0 @@
 /*
 *  This file is part of libsharp.
 *
 *  libsharp is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libsharp is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libsharp; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*! \file sharp_almhelpers.h
 *  SHARP helper function for the creation of a_lm data structures
 *
 *  Copyright (C) 2008-2011 Max-Planck-Society
 *  \author Martin Reinecke
 */
 #ifndef PLANCK_SHARP_ALMHELPERS_H
 #define PLANCK_SHARP_ALMHELPERS_H
 #include "sharp_lowlevel.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 /*! Initialises an a_lm data structure according to the scheme used by
    Healpix_cxx.
    \ingroup almgroup */
 void sharp_make_triangular_alm_info (int lmax, int mmax, int stride,
  sharp_alm_info **alm_info);
 /*! Initialises an a_lm data structure according to the scheme used by
    Fortran Healpix
    \ingroup almgroup */
 void sharp_make_rectangular_alm_info (int lmax, int mmax, int stride,
  sharp_alm_info **alm_info);
 /*! Initialises alm_info for mmajor, real, packed spherical harmonics.
    Pass \a mmax + 1 to nm and NULL to \a ms in order to use everything;
    otherwise you can pick a subset of m to process (should only be used
    for MPI parallelization).
    \ingroup almgroup */
 void sharp_make_mmajor_real_packed_alm_info (int lmax, int stride,
  int nm, const int *ms, sharp_alm_info **alm_info);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/external/sharp/libsharp/sharp_announce.c
+++ b/external/sharp/libsharp/sharp_announce.c
@ -1,98 +0,0 @@
 /*
 *  This file is part of libc_utils.
 *
 *  libc_utils is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libc_utils is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libc_utils; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*! \file sharp_announce.c
 *  Banner for module startup
 *
 *  Copyright (C) 2012 Max-Planck-Society
 *  \author Martin Reinecke
 */
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 #ifdef _OPENMP
 #include <omp.h>
 #endif
 #ifdef USE_MPI
 #include <mpi.h>
 #endif
 #include "sharp_announce.h"
 #include "sharp_vecutil.h"
 static void OpenMP_status(void)
  {
 #ifndef _OPENMP
  printf("OpenMP: not supported by this binary\n");
 #else
  int threads = omp_get_max_threads();
  if (threads>1)
    printf("OpenMP active: max. %d threads.\n",threads);
  else
    printf("OpenMP active, but running with 1 thread only.\n");
 #endif
  }
 static void MPI_status(void)
  {
 #ifndef USE_MPI
  printf("MPI: not supported by this binary\n");
 #else
  int tasks;
  MPI_Comm_size(MPI_COMM_WORLD,&tasks);
  if (tasks>1)
    printf("MPI active with %d tasks.\n",tasks);
  else
    printf("MPI active, but running with 1 task only.\n");
 #endif
  }
 static void vecmath_status(void)
  { printf("Supported vector length: %d\n",VLEN); }
 void sharp_announce (const char *name)
  {
  size_t m, nlen=strlen(name);
  printf("\n+-");
  for (m=0; m<nlen; ++m) printf("-");
  printf("-+\n");
  printf("| %s |\n", name);
  printf("+-");
  for (m=0; m<nlen; ++m) printf("-");
  printf("-+\n\n");
  vecmath_status();
  OpenMP_status();
  MPI_status();
  printf("\n");
  }
 void sharp_module_startup (const char *name, int argc, int argc_expected,
  const char *argv_expected, int verbose)
  {
  if (verbose) sharp_announce (name);
  if (argc==argc_expected) return;
  if (verbose) fprintf(stderr, "Usage: %s %s\n", name, argv_expected);
  exit(1);
  }
--- a/external/sharp/libsharp/sharp_announce.h
+++ b/external/sharp/libsharp/sharp_announce.h
@ -1,39 +0,0 @@
 /*
 *  This file is part of libc_utils.
 *
 *  libc_utils is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libc_utils is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libc_utils; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*! \file sharp_announce.h
 *  Banner for module startup
 *
 *  Copyright (C) 2012 Max-Planck-Society
 *  \author Martin Reinecke
 */
 #ifndef SHARP_ANNOUNCE_H
 #define SHARP_ANNOUNCE_H
 void sharp_announce (const char *name);
 void sharp_module_startup (const char *name, int argc, int argc_expected,
  const char *argv_expected, int verbose);
 #endif
--- a/external/sharp/libsharp/sharp_complex_hacks.h
+++ b/external/sharp/libsharp/sharp_complex_hacks.h
@ -1,149 +0,0 @@
 /*
 *  This file is part of libsharp.
 *
 *  libsharp is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libsharp is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libsharp; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*  \file sharp_complex_hacks.h
 *  support for converting vector types and complex numbers
 *
 *  Copyright (C) 2012,2013 Max-Planck-Society
 *  Author: Martin Reinecke
 */
 #ifndef SHARP_COMPLEX_HACKS_H
 #define SHARP_COMPLEX_HACKS_H
 #ifdef __cplusplus
 #error This header file cannot be included from C++, only from C
 #endif
 #include <math.h>
 #include <complex.h>
 #include "sharp_vecsupport.h"
 #define UNSAFE_CODE
 #if (VLEN==1)
 static inline complex double vhsum_cmplx(Tv a, Tv b)
  { return a+_Complex_I*b; }
 static inline void vhsum_cmplx2 (Tv a, Tv b, Tv c, Tv d,
  complex double * restrict c1, complex double * restrict c2)
  { *c1 += a+_Complex_I*b; *c2 += c+_Complex_I*d; }
 #endif
 #if (VLEN==2)
 static inline complex double vhsum_cmplx (Tv a, Tv b)
  {
 #if defined(__SSE3__)
  Tv tmp = _mm_hadd_pd(a,b);
 #else
  Tv tmp = vadd(_mm_shuffle_pd(a,b,_MM_SHUFFLE2(0,1)),
                _mm_shuffle_pd(a,b,_MM_SHUFFLE2(1,0)));
 #endif
  union {Tv v; complex double c; } u;
  u.v=tmp; return u.c;
  }
 static inline void vhsum_cmplx2 (Tv a, Tv b, Tv c,
  Tv d, complex double * restrict c1, complex double * restrict c2)
  {
 #ifdef UNSAFE_CODE
 #if defined(__SSE3__)
  vaddeq(*((__m128d *)c1),_mm_hadd_pd(a,b));
  vaddeq(*((__m128d *)c2),_mm_hadd_pd(c,d));
 #else
  vaddeq(*((__m128d *)c1),vadd(_mm_shuffle_pd(a,b,_MM_SHUFFLE2(0,1)),
                               _mm_shuffle_pd(a,b,_MM_SHUFFLE2(1,0))));
  vaddeq(*((__m128d *)c2),vadd(_mm_shuffle_pd(c,d,_MM_SHUFFLE2(0,1)),
                               _mm_shuffle_pd(c,d,_MM_SHUFFLE2(1,0))));
 #endif
 #else
  union {Tv v; complex double c; } u1, u2;
 #if defined(__SSE3__)
  u1.v = _mm_hadd_pd(a,b); u2.v=_mm_hadd_pd(c,d);
 #else
  u1.v = vadd(_mm_shuffle_pd(a,b,_MM_SHUFFLE2(0,1)),
              _mm_shuffle_pd(a,b,_MM_SHUFFLE2(1,0)));
  u2.v = vadd(_mm_shuffle_pd(c,d,_MM_SHUFFLE2(0,1)),
              _mm_shuffle_pd(c,d,_MM_SHUFFLE2(1,0)));
 #endif
  *c1+=u1.c; *c2+=u2.c;
 #endif
  }
 #endif
 #if (VLEN==4)
 static inline complex double vhsum_cmplx (Tv a, Tv b)
  {
  Tv tmp=_mm256_hadd_pd(a,b);
  Tv tmp2=_mm256_permute2f128_pd(tmp,tmp,1);
  tmp=_mm256_add_pd(tmp,tmp2);
 #ifdef UNSAFE_CODE
  complex double ret;
  *((__m128d *)&ret)=_mm256_extractf128_pd(tmp, 0);
  return ret;
 #else
  union {Tv v; complex double c[2]; } u;
  u.v=tmp; return u.c[0];
 #endif
  }
 static inline void vhsum_cmplx2 (Tv a, Tv b, Tv c, Tv d,
  complex double * restrict c1, complex double * restrict c2)
  {
  Tv tmp1=_mm256_hadd_pd(a,b), tmp2=_mm256_hadd_pd(c,d);
  Tv tmp3=_mm256_permute2f128_pd(tmp1,tmp2,49),
     tmp4=_mm256_permute2f128_pd(tmp1,tmp2,32);
  tmp1=vadd(tmp3,tmp4);
 #ifdef UNSAFE_CODE
  *((__m128d *)c1)=_mm_add_pd(*((__m128d *)c1),_mm256_extractf128_pd(tmp1, 0));
  *((__m128d *)c2)=_mm_add_pd(*((__m128d *)c2),_mm256_extractf128_pd(tmp1, 1));
 #else
  union {Tv v; complex double c[2]; } u;
  u.v=tmp1;
  *c1+=u.c[0]; *c2+=u.c[1];
 #endif
  }
 #endif
 #if (VLEN==8)
 static inline complex double vhsum_cmplx(Tv a, Tv b)
  { return _mm512_reduce_add_pd(a)+_Complex_I*_mm512_reduce_add_pd(b); }
 static inline void vhsum_cmplx2 (Tv a, Tv b, Tv c, Tv d,
  complex double * restrict c1, complex double * restrict c2)
  {
  *c1 += _mm512_reduce_add_pd(a)+_Complex_I*_mm512_reduce_add_pd(b);
  *c2 += _mm512_reduce_add_pd(c)+_Complex_I*_mm512_reduce_add_pd(d);
  }
 #endif
 #endif
--- a/external/sharp/libsharp/sharp_core.c
+++ b/external/sharp/libsharp/sharp_core.c
@ -1,240 +0,0 @@
 /*
 *  This file is part of libsharp.
 *
 *  libsharp is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libsharp is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libsharp; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*! \file sharp_core.c
 *  Computational core
 *
 *  Copyright (C) 2012-2013 Max-Planck-Society
 *  \author Martin Reinecke
 */
 #include <complex.h>
 #include <math.h>
 #include <string.h>
 #include "sharp_vecsupport.h"
 #include "sharp_complex_hacks.h"
 #include "sharp_ylmgen_c.h"
 #include "sharp.h"
 #include "sharp_core.h"
 #include "c_utils.h"
 typedef complex double dcmplx;
 // must be in the range [0;6]
 #define MAXJOB_SPECIAL 2
 #define XCONCAT2(a,b) a##_##b
 #define CONCAT2(a,b) XCONCAT2(a,b)
 #define XCONCAT3(a,b,c) a##_##b##_##c
 #define CONCAT3(a,b,c) XCONCAT3(a,b,c)
 #define nvec 1
 #include "sharp_core_inchelper.c"
 #undef nvec
 #define nvec 2
 #include "sharp_core_inchelper.c"
 #undef nvec
 #define nvec 3
 #include "sharp_core_inchelper.c"
 #undef nvec
 #define nvec 4
 #include "sharp_core_inchelper.c"
 #undef nvec
 #define nvec 5
 #include "sharp_core_inchelper.c"
 #undef nvec
 #define nvec 6
 #include "sharp_core_inchelper.c"
 #undef nvec
 void inner_loop (sharp_job *job, const int *ispair,const double *cth,
  const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
  const int *mlim)
  {
  int njobs=job->ntrans, nv=job->flags&SHARP_NVMAX;
  if (njobs<=MAXJOB_SPECIAL)
    {
    switch (njobs*16+nv)
      {
 #if ((MAXJOB_SPECIAL>=1)&&(SHARP_MAXTRANS>=1))
      case 0x11:
        CONCAT3(inner_loop,1,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x12:
        CONCAT3(inner_loop,2,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x13:
        CONCAT3(inner_loop,3,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x14:
        CONCAT3(inner_loop,4,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x15:
        CONCAT3(inner_loop,5,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x16:
        CONCAT3(inner_loop,6,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
 #endif
 #if ((MAXJOB_SPECIAL>=2)&&(SHARP_MAXTRANS>=2))
      case 0x21:
        CONCAT3(inner_loop,1,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x22:
        CONCAT3(inner_loop,2,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x23:
        CONCAT3(inner_loop,3,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x24:
        CONCAT3(inner_loop,4,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x25:
        CONCAT3(inner_loop,5,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x26:
        CONCAT3(inner_loop,6,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
 #endif
 #if ((MAXJOB_SPECIAL>=3)&&(SHARP_MAXTRANS>=3))
      case 0x31:
        CONCAT3(inner_loop,1,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x32:
        CONCAT3(inner_loop,2,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x33:
        CONCAT3(inner_loop,3,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x34:
        CONCAT3(inner_loop,4,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x35:
        CONCAT3(inner_loop,5,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x36:
        CONCAT3(inner_loop,6,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
 #endif
 #if ((MAXJOB_SPECIAL>=4)&&(SHARP_MAXTRANS>=4))
      case 0x41:
        CONCAT3(inner_loop,1,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x42:
        CONCAT3(inner_loop,2,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x43:
        CONCAT3(inner_loop,3,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x44:
        CONCAT3(inner_loop,4,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x45:
        CONCAT3(inner_loop,5,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x46:
        CONCAT3(inner_loop,6,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
 #endif
 #if ((MAXJOB_SPECIAL>=5)&&(SHARP_MAXTRANS>=5))
      case 0x51:
        CONCAT3(inner_loop,1,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x52:
        CONCAT3(inner_loop,2,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x53:
        CONCAT3(inner_loop,3,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x54:
        CONCAT3(inner_loop,4,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x55:
        CONCAT3(inner_loop,5,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x56:
        CONCAT3(inner_loop,6,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
 #endif
 #if ((MAXJOB_SPECIAL>=6)&&(SHARP_MAXTRANS>=6))
      case 0x61:
        CONCAT3(inner_loop,1,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x62:
        CONCAT3(inner_loop,2,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x63:
        CONCAT3(inner_loop,3,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x64:
        CONCAT3(inner_loop,4,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x65:
        CONCAT3(inner_loop,5,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
      case 0x66:
        CONCAT3(inner_loop,6,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
        return;
 #endif
      }
    }
 #if (SHARP_MAXTRANS>MAXJOB_SPECIAL)
  else
    {
    switch (nv)
      {
      case 1:
        CONCAT2(inner_loop,1)
          (job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
        return;
      case 2:
        CONCAT2(inner_loop,2)
          (job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
        return;
      case 3:
        CONCAT2(inner_loop,3)
          (job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
        return;
      case 4:
        CONCAT2(inner_loop,4)
          (job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
        return;
      case 5:
        CONCAT2(inner_loop,5)
          (job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
        return;
      case 6:
        CONCAT2(inner_loop,6)
          (job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
        return;
      }
    }
 #endif
  UTIL_FAIL("Incorrect vector parameters");
  }
--- a/external/sharp/libsharp/sharp_core.h
+++ b/external/sharp/libsharp/sharp_core.h
@ -1,50 +0,0 @@
 /*
 *  This file is part of libsharp.
 *
 *  libsharp is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libsharp is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libsharp; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*! \file sharp_core.h
 *  Interface for the computational core
 *
 *  Copyright (C) 2012-2013 Max-Planck-Society
 *  \author Martin Reinecke
 */
 #ifndef PLANCK_SHARP_CORE_H
 #define PLANCK_SHARP_CORE_H
 #include "sharp_internal.h"
 #include "sharp_ylmgen_c.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 void inner_loop (sharp_job *job, const int *ispair,const double *cth,
  const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
  const int *mlim);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/external/sharp/libsharp/sharp_core_inc.c
+++ b/external/sharp/libsharp/sharp_core_inc.c
@ -1,293 +0,0 @@
 /*
 *  This file is part of libsharp.
 *
 *  libsharp is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libsharp is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libsharp; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*! \file sharp_core_inc.c
 *  Type-dependent code for the computational core
 *
 *  Copyright (C) 2012 Max-Planck-Society
 *  \author Martin Reinecke
 */
 typedef struct
  { Tv v[nvec]; } Tb;
 typedef union
  { Tb b; double s[VLEN*nvec]; } Y(Tbu);
 typedef struct
  { Tb r, i; } Y(Tbri);
 typedef struct
  { Tb qr, qi, ur, ui; } Y(Tbqu);
 typedef struct
  { double r[VLEN*nvec], i[VLEN*nvec]; } Y(Tsri);
 typedef struct
  { double qr[VLEN*nvec],qi[VLEN*nvec],ur[VLEN*nvec],ui[VLEN*nvec]; } Y(Tsqu);
 typedef union
  { Y(Tbri) b; Y(Tsri)s; } Y(Tburi);
 typedef union
  { Y(Tbqu) b; Y(Tsqu)s; } Y(Tbuqu);
 static inline Tb Y(Tbconst)(double val)
  {
  Tv v=vload(val);
  Tb res;
  for (int i=0; i<nvec; ++i) res.v[i]=v;
  return res;
  }
 static inline void Y(Tbmuleq1)(Tb * restrict a, double b)
  { Tv v=vload(b); for (int i=0; i<nvec; ++i) vmuleq(a->v[i],v); }
 static inline Tb Y(Tbprod)(Tb a, Tb b)
  { Tb r; for (int i=0; i<nvec; ++i) r.v[i]=vmul(a.v[i],b.v[i]); return r; }
 static inline void Y(Tbmuleq)(Tb * restrict a, Tb b)
  { for (int i=0; i<nvec; ++i) vmuleq(a->v[i],b.v[i]); }
 static void Y(Tbnormalize) (Tb * restrict val, Tb * restrict scale,
  double maxval)
  {
  const Tv vfsmall=vload(sharp_fsmall), vfbig=vload(sharp_fbig);
  const Tv vfmin=vload(sharp_fsmall*maxval), vfmax=vload(maxval);
  for (int i=0;i<nvec; ++i)
    {
    Tm mask = vgt(vabs(val->v[i]),vfmax);
    while (vanyTrue(mask))
      {
      vmuleq_mask(mask,val->v[i],vfsmall);
      vaddeq_mask(mask,scale->v[i],vone);
      mask = vgt(vabs(val->v[i]),vfmax);
      }
    mask = vand_mask(vlt(vabs(val->v[i]),vfmin),vne(val->v[i],vzero));
    while (vanyTrue(mask))
      {
      vmuleq_mask(mask,val->v[i],vfbig);
      vsubeq_mask(mask,scale->v[i],vone);
      mask = vand_mask(vlt(vabs(val->v[i]),vfmin),vne(val->v[i],vzero));
      }
    }
  }
 static void Y(mypow) (Tb val, int npow, Tb * restrict resd,
  Tb * restrict ress)
  {
  Tb scale=Y(Tbconst)(0.), scaleint=Y(Tbconst)(0.), res=Y(Tbconst)(1.);
  Y(Tbnormalize)(&val,&scaleint,sharp_fbighalf);
  do
    {
    if (npow&1)
      {
      for (int i=0; i<nvec; ++i)
        {
        vmuleq(res.v[i],val.v[i]);
        vaddeq(scale.v[i],scaleint.v[i]);
        }
      Y(Tbnormalize)(&res,&scale,sharp_fbighalf);
      }
    for (int i=0; i<nvec; ++i)
      {
      vmuleq(val.v[i],val.v[i]);
      vaddeq(scaleint.v[i],scaleint.v[i]);
      }
    Y(Tbnormalize)(&val,&scaleint,sharp_fbighalf);
    }
  while(npow>>=1);
  *resd=res;
  *ress=scale;
  }
 static inline int Y(rescale) (Tb * restrict lam1, Tb * restrict lam2,
  Tb * restrict scale)
  {
  int did_scale=0;
  for (int i=0;i<nvec; ++i)
    {
    Tm mask = vgt(vabs(lam2->v[i]),vload(sharp_ftol));
    if (vanyTrue(mask))
      {
      did_scale=1;
      vmuleq_mask(mask,lam1->v[i],vload(sharp_fsmall));
      vmuleq_mask(mask,lam2->v[i],vload(sharp_fsmall));
      vaddeq_mask(mask,scale->v[i],vone);
      }
    }
  return did_scale;
  }
 static inline int Y(TballLt)(Tb a,double b)
  {
  Tv vb=vload(b);
  Tm res=vlt(a.v[0],vb);
  for (int i=1; i<nvec; ++i)
    res=vand_mask(res,vlt(a.v[i],vb));
  return vallTrue(res);
  }
 static inline int Y(TballGt)(Tb a,double b)
  {
  Tv vb=vload(b);
  Tm res=vgt(a.v[0],vb);
  for (int i=1; i<nvec; ++i)
    res=vand_mask(res,vgt(a.v[i],vb));
  return vallTrue(res);
  }
 static inline int Y(TballGe)(Tb a,double b)
  {
  Tv vb=vload(b);
  Tm res=vge(a.v[0],vb);
  for (int i=1; i<nvec; ++i)
    res=vand_mask(res,vge(a.v[i],vb));
  return vallTrue(res);
  }
 static void Y(getCorfac)(Tb scale, Tb * restrict corfac,
  const double * restrict cf)
  {
  Y(Tbu) sc, corf;
  sc.b=scale;
  for (int i=0; i<VLEN*nvec; ++i)
    corf.s[i] = (sc.s[i]<sharp_minscale) ?
      0. : cf[(int)(sc.s[i])-sharp_minscale];
  *corfac=corf.b;
  }
 static void Y(iter_to_ieee) (const Tb sth, Tb cth, int *l_,
  Tb * restrict lam_1_, Tb * restrict lam_2_, Tb * restrict scale_,
  const sharp_Ylmgen_C * restrict gen)
  {
  int l=gen->m;
  Tb lam_1=Y(Tbconst)(0.), lam_2, scale;
  Y(mypow) (sth,l,&lam_2,&scale);
  Y(Tbmuleq1) (&lam_2,(gen->m&1) ? -gen->mfac[gen->m]:gen->mfac[gen->m]);
  Y(Tbnormalize)(&lam_2,&scale,sharp_ftol);
  int below_limit = Y(TballLt)(scale,sharp_limscale);
  while (below_limit)
    {
    if (l+2>gen->lmax) {*l_=gen->lmax+1;return;}
    Tv r0=vload(gen->rf[l].f[0]),r1=vload(gen->rf[l].f[1]);
    for (int i=0; i<nvec; ++i)
      lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
    r0=vload(gen->rf[l+1].f[0]); r1=vload(gen->rf[l+1].f[1]);
    for (int i=0; i<nvec; ++i)
      lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_2.v[i],r1));
    if (Y(rescale)(&lam_1,&lam_2,&scale))
      below_limit = Y(TballLt)(scale,sharp_limscale);
    l+=2;
    }
  *l_=l; *lam_1_=lam_1; *lam_2_=lam_2; *scale_=scale;
  }
 static inline void Y(rec_step) (Tb * restrict rxp, Tb * restrict rxm,
  Tb * restrict ryp, Tb * restrict rym, const Tb cth,
  const sharp_ylmgen_dbl3 fx)
  {
  Tv fx0=vload(fx.f[0]),fx1=vload(fx.f[1]),fx2=vload(fx.f[2]);
  for (int i=0; i<nvec; ++i)
    {
    rxp->v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,ryp->v[i])),
                vmul(fx2,rxp->v[i]));
    rxm->v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rym->v[i])),
                vmul(fx2,rxm->v[i]));
    }
  }
 static void Y(iter_to_ieee_spin) (const Tb cth, const Tb sth, int *l_,
  Tb * rec1p_, Tb * rec1m_, Tb * rec2p_, Tb * rec2m_,
  Tb * scalep_, Tb * scalem_, const sharp_Ylmgen_C * restrict gen)
  {
  const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
  Tb cth2, sth2;
  for (int i=0; i<nvec; ++i)
    {
    cth2.v[i]=vsqrt(vmul(vadd(vone,cth.v[i]),vload(0.5)));
    cth2.v[i]=vmax(cth2.v[i],vload(1e-15));
    sth2.v[i]=vsqrt(vmul(vsub(vone,cth.v[i]),vload(0.5)));
    sth2.v[i]=vmax(sth2.v[i],vload(1e-15));
    Tm mask=vlt(sth.v[i],vzero);
    Tm cmask=vand_mask(mask,vlt(cth.v[i],vzero));
    vmuleq_mask(cmask,cth2.v[i],vload(-1.));
    Tm smask=vand_mask(mask,vgt(cth.v[i],vzero));
    vmuleq_mask(smask,sth2.v[i],vload(-1.));
    }
  Tb ccp, ccps, ssp, ssps, csp, csps, scp, scps;
  Y(mypow)(cth2,gen->cosPow,&ccp,&ccps); Y(mypow)(sth2,gen->sinPow,&ssp,&ssps);
  Y(mypow)(cth2,gen->sinPow,&csp,&csps); Y(mypow)(sth2,gen->cosPow,&scp,&scps);
  Tb rec2p, rec2m, scalep, scalem;
  Tb rec1p=Y(Tbconst)(0.), rec1m=Y(Tbconst)(0.);
  Tv prefac=vload(gen->prefac[gen->m]),
     prescale=vload(gen->fscale[gen->m]);
  for (int i=0; i<nvec; ++i)
    {
    rec2p.v[i]=vmul(prefac,ccp.v[i]);
    scalep.v[i]=vadd(prescale,ccps.v[i]);
    rec2m.v[i]=vmul(prefac,csp.v[i]);
    scalem.v[i]=vadd(prescale,csps.v[i]);
    }
  Y(Tbnormalize)(&rec2m,&scalem,sharp_fbighalf);
  Y(Tbnormalize)(&rec2p,&scalep,sharp_fbighalf);
  for (int i=0; i<nvec; ++i)
    {
    rec2p.v[i]=vmul(rec2p.v[i],ssp.v[i]);
    scalep.v[i]=vadd(scalep.v[i],ssps.v[i]);
    rec2m.v[i]=vmul(rec2m.v[i],scp.v[i]);
    scalem.v[i]=vadd(scalem.v[i],scps.v[i]);
    if (gen->preMinus_p)
      rec2p.v[i]=vneg(rec2p.v[i]);
    if (gen->preMinus_m)
      rec2m.v[i]=vneg(rec2m.v[i]);
    if (gen->s&1)
      rec2p.v[i]=vneg(rec2p.v[i]);
    }
  Y(Tbnormalize)(&rec2m,&scalem,sharp_ftol);
  Y(Tbnormalize)(&rec2p,&scalep,sharp_ftol);
  int l=gen->mhi;
  int below_limit = Y(TballLt)(scalep,sharp_limscale)
                 && Y(TballLt)(scalem,sharp_limscale);
  while (below_limit)
    {
    if (l+2>gen->lmax) {*l_=gen->lmax+1;return;}
    Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l+1]);
    Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l+2]);
    if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem))
      below_limit = Y(TballLt)(scalep,sharp_limscale)
                 && Y(TballLt)(scalem,sharp_limscale);
    l+=2;
    }
  *l_=l;
  *rec1p_=rec1p; *rec2p_=rec2p; *scalep_=scalep;
  *rec1m_=rec1m; *rec2m_=rec2m; *scalem_=scalem;
  }
--- a/external/sharp/libsharp/sharp_core_inc2.c
+++ b/external/sharp/libsharp/sharp_core_inc2.c
@ -1,803 +0,0 @@
 /*
 *  This file is part of libsharp.
 *
 *  libsharp is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libsharp is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libsharp; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*! \file sharp_core_inc2.c
 *  Type-dependent code for the computational core
 *
 *  Copyright (C) 2012-2013 Max-Planck-Society
 *  \author Martin Reinecke
 */
 static void Z(alm2map_kernel) (const Tb cth, Y(Tbri) * restrict p1,
  Y(Tbri) * restrict p2, Tb lam_1, Tb lam_2,
  const sharp_ylmgen_dbl2 * restrict rf, const dcmplx * restrict alm,
  int l, int lmax NJ1)
  {
 if (njobs>1)
  {
  while (l<lmax-2)
    {
    Tb lam_3, lam_4;
    Tv r0=vload(rf[l].f[0]),r1=vload(rf[l].f[1]);
    for (int i=0; i<nvec; ++i)
      lam_3.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
    r0=vload(rf[l+1].f[0]);r1=vload(rf[l+1].f[1]);
    for (int i=0; i<nvec; ++i)
      lam_4.v[i] = vsub(vmul(vmul(cth.v[i],lam_3.v[i]),r0),vmul(lam_2.v[i],r1));
    r0=vload(rf[l+2].f[0]);r1=vload(rf[l+2].f[1]);
    for (int i=0; i<nvec; ++i)
      lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_4.v[i]),r0),vmul(lam_3.v[i],r1));
    for (int j=0; j<njobs; ++j)
      {
      Tv ar2=vload(creal(alm[njobs*l+j])),
         ai2=vload(cimag(alm[njobs*l+j])),
         ar4=vload(creal(alm[njobs*(l+2)+j])),
         ai4=vload(cimag(alm[njobs*(l+2)+j]));
      for (int i=0; i<nvec; ++i)
        {
        vfmaaeq(p1[j].r.v[i],lam_2.v[i],ar2,lam_4.v[i],ar4);
        vfmaaeq(p1[j].i.v[i],lam_2.v[i],ai2,lam_4.v[i],ai4);
        }
      Tv ar3=vload(creal(alm[njobs*(l+1)+j])),
         ai3=vload(cimag(alm[njobs*(l+1)+j])),
         ar1=vload(creal(alm[njobs*(l+3)+j])),
         ai1=vload(cimag(alm[njobs*(l+3)+j]));
      for (int i=0; i<nvec; ++i)
        {
        vfmaaeq(p2[j].r.v[i],lam_3.v[i],ar3,lam_1.v[i],ar1);
        vfmaaeq(p2[j].i.v[i],lam_3.v[i],ai3,lam_1.v[i],ai1);
        }
      }
    r0=vload(rf[l+3].f[0]);r1=vload(rf[l+3].f[1]);
    for (int i=0; i<nvec; ++i)
      lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_4.v[i],r1));
    l+=4;
    }
  }
  while (l<lmax)
    {
    Tv r0=vload(rf[l].f[0]),r1=vload(rf[l].f[1]);
    for (int i=0; i<nvec; ++i)
      lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
    for (int j=0; j<njobs; ++j)
      {
      Tv ar=vload(creal(alm[njobs*l+j])),
         ai=vload(cimag(alm[njobs*l+j]));
      for (int i=0; i<nvec; ++i)
        {
        vfmaeq(p1[j].r.v[i],lam_2.v[i],ar);
        vfmaeq(p1[j].i.v[i],lam_2.v[i],ai);
        }
      ar=vload(creal(alm[njobs*(l+1)+j]));
      ai=vload(cimag(alm[njobs*(l+1)+j]));
      for (int i=0; i<nvec; ++i)
        {
        vfmaeq(p2[j].r.v[i],lam_1.v[i],ar);
        vfmaeq(p2[j].i.v[i],lam_1.v[i],ai);
        }
      }
    r0=vload(rf[l+1].f[0]);r1=vload(rf[l+1].f[1]);
    for (int i=0; i<nvec; ++i)
      lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_2.v[i],r1));
    l+=2;
    }
  if (l==lmax)
    {
    for (int j=0; j<njobs; ++j)
      {
      Tv ar=vload(creal(alm[njobs*l+j])),ai=vload(cimag(alm[njobs*l+j]));
      for (int i=0; i<nvec; ++i)
        {
        vfmaeq(p1[j].r.v[i],lam_2.v[i],ar);
        vfmaeq(p1[j].i.v[i],lam_2.v[i],ai);
        }
      }
    }
  }
 static void Z(map2alm_kernel) (const Tb cth, const Y(Tbri) * restrict p1,
  const Y(Tbri) * restrict p2, Tb lam_1, Tb lam_2,
  const sharp_ylmgen_dbl2 * restrict rf, dcmplx * restrict alm, int l, int lmax
  NJ1)
  {
  while (l<lmax)
    {
    Tv r0=vload(rf[l].f[0]),r1=vload(rf[l].f[1]);
    for (int i=0; i<nvec; ++i)
      lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
    for (int j=0; j<njobs; ++j)
      {
      Tv tr1=vzero, ti1=vzero, tr2=vzero, ti2=vzero;
      for (int i=0; i<nvec; ++i)
        {
        vfmaeq(tr1,lam_2.v[i],p1[j].r.v[i]);
        vfmaeq(ti1,lam_2.v[i],p1[j].i.v[i]);
        }
      for (int i=0; i<nvec; ++i)
        {
        vfmaeq(tr2,lam_1.v[i],p2[j].r.v[i]);
        vfmaeq(ti2,lam_1.v[i],p2[j].i.v[i]);
        }
      vhsum_cmplx2(tr1,ti1,tr2,ti2,&alm[l*njobs+j],&alm[(l+1)*njobs+j]);
      }
    r0=vload(rf[l+1].f[0]);r1=vload(rf[l+1].f[1]);
    for (int i=0; i<nvec; ++i)
      lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_2.v[i],r1));
    l+=2;
    }
  if (l==lmax)
    {
    for (int j=0; j<njobs; ++j)
      {
      Tv tre=vzero, tim=vzero;
      for (int i=0; i<nvec; ++i)
        {
        vfmaeq(tre,lam_2.v[i],p1[j].r.v[i]);
        vfmaeq(tim,lam_2.v[i],p1[j].i.v[i]);
        }
      alm[l*njobs+j]+=vhsum_cmplx(tre,tim);
      }
    }
  }
 static void Z(calc_alm2map) (const Tb cth, const Tb sth,
  const sharp_Ylmgen_C *gen, sharp_job *job, Y(Tbri) * restrict p1,
  Y(Tbri) * restrict p2 NJ1)
  {
  int l,lmax=gen->lmax;
  Tb lam_1,lam_2,scale;
  Y(iter_to_ieee) (sth,cth,&l,&lam_1,&lam_2,&scale,gen);
  job->opcnt += (l-gen->m) * 4*VLEN*nvec;
  if (l>lmax) return;
  job->opcnt += (lmax+1-l) * (4+4*njobs)*VLEN*nvec;
  Tb corfac;
  Y(getCorfac)(scale,&corfac,gen->cf);
  const sharp_ylmgen_dbl2 * restrict rf = gen->rf;
  const dcmplx * restrict alm=job->almtmp;
  int full_ieee = Y(TballGe)(scale,sharp_minscale);
  while (!full_ieee)
    {
    for (int j=0; j<njobs; ++j)
      {
      Tv ar=vload(creal(alm[njobs*l+j])),ai=vload(cimag(alm[njobs*l+j]));
      for (int i=0; i<nvec; ++i)
        {
        Tv tmp=vmul(lam_2.v[i],corfac.v[i]);
        vfmaeq(p1[j].r.v[i],tmp,ar);
        vfmaeq(p1[j].i.v[i],tmp,ai);
        }
      }
    if (++l>lmax) break;
    Tv r0=vload(rf[l-1].f[0]),r1=vload(rf[l-1].f[1]);
    for (int i=0; i<nvec; ++i)
      lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
    for (int j=0; j<njobs; ++j)
      {
      Tv ar=vload(creal(alm[njobs*l+j])),ai=vload(cimag(alm[njobs*l+j]));
      for (int i=0; i<nvec; ++i)
        {
        Tv tmp=vmul(lam_1.v[i],corfac.v[i]);
        vfmaeq(p2[j].r.v[i],tmp,ar);
        vfmaeq(p2[j].i.v[i],tmp,ai);
        }
      }
    if (++l>lmax) break;
    r0=vload(rf[l-1].f[0]); r1=vload(rf[l-1].f[1]);
    for (int i=0; i<nvec; ++i)
      lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_2.v[i],r1));
    if (Y(rescale)(&lam_1,&lam_2,&scale))
      {
      Y(getCorfac)(scale,&corfac,gen->cf);
      full_ieee = Y(TballGe)(scale,sharp_minscale);
      }
    }
  if (l>lmax) return;
  Y(Tbmuleq)(&lam_1,corfac); Y(Tbmuleq)(&lam_2,corfac);
  Z(alm2map_kernel) (cth, p1, p2, lam_1, lam_2, rf, alm, l, lmax NJ2);
  }
 static void Z(calc_map2alm) (const Tb cth, const Tb sth,
  const sharp_Ylmgen_C *gen, sharp_job *job, const Y(Tbri) * restrict p1,
  const Y(Tbri) * restrict p2 NJ1)
  {
  int lmax=gen->lmax;
  Tb lam_1,lam_2,scale;
  int l=gen->m;
  Y(iter_to_ieee) (sth,cth,&l,&lam_1,&lam_2,&scale,gen);
  job->opcnt += (l-gen->m) * 4*VLEN*nvec;
  if (l>lmax) return;
  job->opcnt += (lmax+1-l) * (4+4*njobs)*VLEN*nvec;
  const sharp_ylmgen_dbl2 * restrict rf = gen->rf;
  Tb corfac;
  Y(getCorfac)(scale,&corfac,gen->cf);
  dcmplx * restrict alm=job->almtmp;
  int full_ieee = Y(TballGe)(scale,sharp_minscale);
  while (!full_ieee)
    {
    for (int j=0; j<njobs; ++j)
      {
      Tv tre=vzero, tim=vzero;
      for (int i=0; i<nvec; ++i)
        {
        Tv tmp=vmul(lam_2.v[i],corfac.v[i]);
        vfmaeq(tre,tmp,p1[j].r.v[i]);
        vfmaeq(tim,tmp,p1[j].i.v[i]);
        }
      alm[l*njobs+j]+=vhsum_cmplx(tre,tim);
      }
    if (++l>lmax) return;
    Tv r0=vload(rf[l-1].f[0]),r1=vload(rf[l-1].f[1]);
    for (int i=0; i<nvec; ++i)
      lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
    for (int j=0; j<njobs; ++j)
      {
      Tv tre=vzero, tim=vzero;
      for (int i=0; i<nvec; ++i)
        {
        Tv tmp=vmul(lam_1.v[i],corfac.v[i]);
        vfmaeq(tre,tmp,p2[j].r.v[i]);
        vfmaeq(tim,tmp,p2[j].i.v[i]);
        }
      alm[l*njobs+j]+=vhsum_cmplx(tre,tim);
      }
    if (++l>lmax) return;
    r0=vload(rf[l-1].f[0]); r1=vload(rf[l-1].f[1]);
    for (int i=0; i<nvec; ++i)
      lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_2.v[i],r1));
    if (Y(rescale)(&lam_1,&lam_2,&scale))
      {
      Y(getCorfac)(scale,&corfac,gen->cf);
      full_ieee = Y(TballGe)(scale,sharp_minscale);
      }
    }
  Y(Tbmuleq)(&lam_1,corfac); Y(Tbmuleq)(&lam_2,corfac);
  Z(map2alm_kernel) (cth, p1, p2, lam_1, lam_2, rf, alm, l, lmax NJ2);
  }
 static inline void Z(saddstep) (Y(Tbqu) * restrict px, Y(Tbqu) * restrict py,
  const Tb rxp, const Tb rxm, const dcmplx * restrict alm NJ1)
  {
  for (int j=0; j<njobs; ++j)
    {
    Tv agr=vload(creal(alm[2*j])), agi=vload(cimag(alm[2*j])),
       acr=vload(creal(alm[2*j+1])), aci=vload(cimag(alm[2*j+1]));
    for (int i=0; i<nvec; ++i)
      {
      Tv lw=vadd(rxp.v[i],rxm.v[i]);
      vfmaeq(px[j].qr.v[i],agr,lw);
      vfmaeq(px[j].qi.v[i],agi,lw);
      vfmaeq(px[j].ur.v[i],acr,lw);
      vfmaeq(px[j].ui.v[i],aci,lw);
      }
    for (int i=0; i<nvec; ++i)
      {
      Tv lx=vsub(rxm.v[i],rxp.v[i]);
      vfmseq(py[j].qr.v[i],aci,lx);
      vfmaeq(py[j].qi.v[i],acr,lx);
      vfmaeq(py[j].ur.v[i],agi,lx);
      vfmseq(py[j].ui.v[i],agr,lx);
      }
    }
  }
 static inline void Z(saddstepb) (Y(Tbqu) * restrict p1, Y(Tbqu) * restrict p2,
  const Tb r1p, const Tb r1m, const Tb r2p, const Tb r2m,
  const dcmplx * restrict alm1, const dcmplx * restrict alm2 NJ1)
  {
  for (int j=0; j<njobs; ++j)
    {
    Tv agr1=vload(creal(alm1[2*j])), agi1=vload(cimag(alm1[2*j])),
       acr1=vload(creal(alm1[2*j+1])), aci1=vload(cimag(alm1[2*j+1]));
    Tv agr2=vload(creal(alm2[2*j])), agi2=vload(cimag(alm2[2*j])),
       acr2=vload(creal(alm2[2*j+1])), aci2=vload(cimag(alm2[2*j+1]));
    for (int i=0; i<nvec; ++i)
      {
      Tv lw1=vadd(r2p.v[i],r2m.v[i]);
      Tv lx2=vsub(r1m.v[i],r1p.v[i]);
      vfmaseq(p1[j].qr.v[i],agr1,lw1,aci2,lx2);
      vfmaaeq(p1[j].qi.v[i],agi1,lw1,acr2,lx2);
      vfmaaeq(p1[j].ur.v[i],acr1,lw1,agi2,lx2);
      vfmaseq(p1[j].ui.v[i],aci1,lw1,agr2,lx2);
      }
    for (int i=0; i<nvec; ++i)
      {
      Tv lx1=vsub(r2m.v[i],r2p.v[i]);
      Tv lw2=vadd(r1p.v[i],r1m.v[i]);
      vfmaseq(p2[j].qr.v[i],agr2,lw2,aci1,lx1);
      vfmaaeq(p2[j].qi.v[i],agi2,lw2,acr1,lx1);
      vfmaaeq(p2[j].ur.v[i],acr2,lw2,agi1,lx1);
      vfmaseq(p2[j].ui.v[i],aci2,lw2,agr1,lx1);
      }
    }
  }
 static inline void Z(saddstep2) (const Y(Tbqu) * restrict px,
  const Y(Tbqu) * restrict py, const Tb * restrict rxp,
  const Tb * restrict rxm, dcmplx * restrict alm NJ1)
  {
  for (int j=0; j<njobs; ++j)
    {
    Tv agr=vzero, agi=vzero, acr=vzero, aci=vzero;
    for (int i=0; i<nvec; ++i)
      {
      Tv lw=vadd(rxp->v[i],rxm->v[i]);
      vfmaeq(agr,px[j].qr.v[i],lw);
      vfmaeq(agi,px[j].qi.v[i],lw);
      vfmaeq(acr,px[j].ur.v[i],lw);
      vfmaeq(aci,px[j].ui.v[i],lw);
      }
    for (int i=0; i<nvec; ++i)
      {
      Tv lx=vsub(rxm->v[i],rxp->v[i]);
      vfmseq(agr,py[j].ui.v[i],lx);
      vfmaeq(agi,py[j].ur.v[i],lx);
      vfmaeq(acr,py[j].qi.v[i],lx);
      vfmseq(aci,py[j].qr.v[i],lx);
      }
    vhsum_cmplx2(agr,agi,acr,aci,&alm[2*j],&alm[2*j+1]);
    }
  }
 static void Z(alm2map_spin_kernel) (Tb cth, Y(Tbqu) * restrict p1,
  Y(Tbqu) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
  const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l,
  int lmax NJ1)
  {
  while (l<lmax)
    {
    Tv fx0=vload(fx[l+1].f[0]),fx1=vload(fx[l+1].f[1]),
       fx2=vload(fx[l+1].f[2]);
    for (int i=0; i<nvec; ++i)
      {
      rec1p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec2p.v[i])),
                        vmul(fx2,rec1p.v[i]));
      rec1m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec2m.v[i])),
                        vmul(fx2,rec1m.v[i]));
      }
    Z(saddstepb)(p1,p2,rec1p,rec1m,rec2p,rec2m,&alm[2*njobs*l],
      &alm[2*njobs*(l+1)] NJ2);
    fx0=vload(fx[l+2].f[0]);fx1=vload(fx[l+2].f[1]);
    fx2=vload(fx[l+2].f[2]);
    for (int i=0; i<nvec; ++i)
      {
      rec2p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec1p.v[i])),
                        vmul(fx2,rec2p.v[i]));
      rec2m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec1m.v[i])),
                        vmul(fx2,rec2m.v[i]));
      }
    l+=2;
    }
  if (l==lmax)
    Z(saddstep)(p1, p2, rec2p, rec2m, &alm[2*njobs*l] NJ2);
  }
 static void Z(map2alm_spin_kernel) (Tb cth, const Y(Tbqu) * restrict p1,
  const Y(Tbqu) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
  const sharp_ylmgen_dbl3 * restrict fx, dcmplx * restrict alm, int l, int lmax
  NJ1)
  {
  while (l<lmax)
    {
    Tv fx0=vload(fx[l+1].f[0]),fx1=vload(fx[l+1].f[1]),
       fx2=vload(fx[l+1].f[2]);
    for (int i=0; i<nvec; ++i)
      {
      rec1p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec2p.v[i])),
                        vmul(fx2,rec1p.v[i]));
      rec1m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec2m.v[i])),
                        vmul(fx2,rec1m.v[i]));
      }
    Z(saddstep2)(p1, p2, &rec2p, &rec2m, &alm[2*njobs*l] NJ2);
    Z(saddstep2)(p2, p1, &rec1p, &rec1m, &alm[2*njobs*(l+1)] NJ2);
    fx0=vload(fx[l+2].f[0]);fx1=vload(fx[l+2].f[1]);
    fx2=vload(fx[l+2].f[2]);
    for (int i=0; i<nvec; ++i)
      {
      rec2p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec1p.v[i])),
                        vmul(fx2,rec2p.v[i]));
      rec2m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec1m.v[i])),
                        vmul(fx2,rec2m.v[i]));
      }
    l+=2;
    }
  if (l==lmax)
    Z(saddstep2)(p1, p2, &rec2p, &rec2m, &alm[2*njobs*l] NJ2);
  }
 static void Z(calc_alm2map_spin) (const Tb cth, const Tb sth,
  const sharp_Ylmgen_C *gen, sharp_job *job, Y(Tbqu) * restrict p1,
  Y(Tbqu) * restrict p2 NJ1)
  {
  int l, lmax=gen->lmax;
  Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
  Y(iter_to_ieee_spin)
    (cth,sth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
  job->opcnt += (l-gen->m) * 10*VLEN*nvec;
  if (l>lmax) return;
  job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec;
  const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
  Tb corfacp,corfacm;
  Y(getCorfac)(scalep,&corfacp,gen->cf);
  Y(getCorfac)(scalem,&corfacm,gen->cf);
  const dcmplx * restrict alm=job->almtmp;
  int full_ieee = Y(TballGe)(scalep,sharp_minscale)
               && Y(TballGe)(scalem,sharp_minscale);
  while (!full_ieee)
    {
    Z(saddstep)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm),
      &alm[2*njobs*l] NJ2);
    if (++l>lmax) break;
    Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]);
    Z(saddstep)(p2, p1, Y(Tbprod)(rec1p,corfacp), Y(Tbprod)(rec1m,corfacm),
      &alm[2*njobs*l] NJ2);
    if (++l>lmax) break;
    Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]);
    if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem))
      {
      Y(getCorfac)(scalep,&corfacp,gen->cf);
      Y(getCorfac)(scalem,&corfacm,gen->cf);
      full_ieee = Y(TballGe)(scalep,sharp_minscale)
               && Y(TballGe)(scalem,sharp_minscale);
      }
    }
  if (l>lmax) return;
  Y(Tbmuleq)(&rec1p,corfacp); Y(Tbmuleq)(&rec2p,corfacp);
  Y(Tbmuleq)(&rec1m,corfacm); Y(Tbmuleq)(&rec2m,corfacm);
  Z(alm2map_spin_kernel) (cth, p1, p2, rec1p, rec1m, rec2p, rec2m, fx, alm, l,
    lmax NJ2);
  }
 static void Z(calc_map2alm_spin) (Tb cth, Tb sth,
  const sharp_Ylmgen_C * restrict gen, sharp_job *job,
  const Y(Tbqu) * restrict p1, const Y(Tbqu) * restrict p2 NJ1)
  {
  int l, lmax=gen->lmax;
  Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
  Y(iter_to_ieee_spin)
    (cth,sth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
  job->opcnt += (l-gen->m) * 10*VLEN*nvec;
  if (l>lmax) return;
  job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec;
  const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
  Tb corfacp,corfacm;
  Y(getCorfac)(scalep,&corfacp,gen->cf);
  Y(getCorfac)(scalem,&corfacm,gen->cf);
  dcmplx * restrict alm=job->almtmp;
  int full_ieee = Y(TballGe)(scalep,sharp_minscale)
               && Y(TballGe)(scalem,sharp_minscale);
  while (!full_ieee)
    {
    Tb t1=Y(Tbprod)(rec2p,corfacp), t2=Y(Tbprod)(rec2m,corfacm);
    Z(saddstep2)(p1, p2, &t1, &t2, &alm[2*njobs*l] NJ2);
    if (++l>lmax) return;
    Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]);
    t1=Y(Tbprod)(rec1p,corfacp); t2=Y(Tbprod)(rec1m,corfacm);
    Z(saddstep2)(p2, p1, &t1, &t2, &alm[2*njobs*l] NJ2);
    if (++l>lmax) return;
    Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]);
    if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem))
      {
      Y(getCorfac)(scalep,&corfacp,gen->cf);
      Y(getCorfac)(scalem,&corfacm,gen->cf);
      full_ieee = Y(TballGe)(scalep,sharp_minscale)
               && Y(TballGe)(scalem,sharp_minscale);
      }
    }
  Y(Tbmuleq)(&rec1p,corfacp); Y(Tbmuleq)(&rec2p,corfacp);
  Y(Tbmuleq)(&rec1m,corfacm); Y(Tbmuleq)(&rec2m,corfacm);
  Z(map2alm_spin_kernel)(cth,p1,p2,rec1p,rec1m,rec2p,rec2m,fx,alm,l,lmax NJ2);
  }
 static inline void Z(saddstep_d) (Y(Tbqu) * restrict px, Y(Tbqu) * restrict py,
  const Tb rxp, const Tb rxm, const dcmplx * restrict alm NJ1)
  {
  for (int j=0; j<njobs; ++j)
    {
    Tv ar=vload(creal(alm[j])), ai=vload(cimag(alm[j]));
    for (int i=0; i<nvec; ++i)
      {
      Tv lw=vadd(rxp.v[i],rxm.v[i]);
      vfmaeq(px[j].qr.v[i],ar,lw);
      vfmaeq(px[j].qi.v[i],ai,lw);
      }
    for (int i=0; i<nvec; ++i)
      {
      Tv lx=vsub(rxm.v[i],rxp.v[i]);
      vfmaeq(py[j].ur.v[i],ai,lx);
      vfmseq(py[j].ui.v[i],ar,lx);
      }
    }
  }
 static void Z(alm2map_deriv1_kernel) (Tb cth, Y(Tbqu) * restrict p1,
  Y(Tbqu) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
  const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l,
  int lmax NJ1)
  {
  while (l<lmax)
    {
    Tv fx0=vload(fx[l+1].f[0]),fx1=vload(fx[l+1].f[1]),
       fx2=vload(fx[l+1].f[2]);
    for (int i=0; i<nvec; ++i)
      {
      rec1p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec2p.v[i])),
                        vmul(fx2,rec1p.v[i]));
      rec1m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec2m.v[i])),
                        vmul(fx2,rec1m.v[i]));
      }
    Z(saddstep_d)(p1,p2,rec2p,rec2m,&alm[njobs*l] NJ2);
    Z(saddstep_d)(p2,p1,rec1p,rec1m,&alm[njobs*(l+1)] NJ2);
    fx0=vload(fx[l+2].f[0]);fx1=vload(fx[l+2].f[1]);
    fx2=vload(fx[l+2].f[2]);
    for (int i=0; i<nvec; ++i)
      {
      rec2p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec1p.v[i])),
                        vmul(fx2,rec2p.v[i]));
      rec2m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec1m.v[i])),
                        vmul(fx2,rec2m.v[i]));
      }
    l+=2;
    }
  if (l==lmax)
    Z(saddstep_d)(p1, p2, rec2p, rec2m, &alm[njobs*l] NJ2);
  }
 static void Z(calc_alm2map_deriv1) (const Tb cth, const Tb sth,
  const sharp_Ylmgen_C *gen, sharp_job *job, Y(Tbqu) * restrict p1,
  Y(Tbqu) * restrict p2 NJ1)
  {
  int l, lmax=gen->lmax;
  Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
  Y(iter_to_ieee_spin)
    (cth,sth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
  job->opcnt += (l-gen->m) * 10*VLEN*nvec;
  if (l>lmax) return;
  job->opcnt += (lmax+1-l) * (12+8*njobs)*VLEN*nvec;
  const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
  Tb corfacp,corfacm;
  Y(getCorfac)(scalep,&corfacp,gen->cf);
  Y(getCorfac)(scalem,&corfacm,gen->cf);
  const dcmplx * restrict alm=job->almtmp;
  int full_ieee = Y(TballGe)(scalep,sharp_minscale)
               && Y(TballGe)(scalem,sharp_minscale);
  while (!full_ieee)
    {
    Z(saddstep_d)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm),
      &alm[njobs*l] NJ2);
    if (++l>lmax) break;
    Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]);
    Z(saddstep_d)(p2, p1, Y(Tbprod)(rec1p,corfacp), Y(Tbprod)(rec1m,corfacm),
      &alm[njobs*l] NJ2);
    if (++l>lmax) break;
    Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]);
    if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem))
      {
      Y(getCorfac)(scalep,&corfacp,gen->cf);
      Y(getCorfac)(scalem,&corfacm,gen->cf);
      full_ieee = Y(TballGe)(scalep,sharp_minscale)
               && Y(TballGe)(scalem,sharp_minscale);
      }
    }
  if (l>lmax) return;
  Y(Tbmuleq)(&rec1p,corfacp); Y(Tbmuleq)(&rec2p,corfacp);
  Y(Tbmuleq)(&rec1m,corfacm); Y(Tbmuleq)(&rec2m,corfacm);
  Z(alm2map_deriv1_kernel) (cth, p1, p2, rec1p, rec1m, rec2p, rec2m, fx, alm, l,
    lmax NJ2);
  }
 #define VZERO(var) do { memset(&(var),0,sizeof(var)); } while(0)
 static void Z(inner_loop) (sharp_job *job, const int *ispair,
  const double *cth_, const double *sth_, int llim, int ulim,
  sharp_Ylmgen_C *gen, int mi, const int *mlim NJ1)
  {
  const int nval=nvec*VLEN;
  const int m = job->ainfo->mval[mi];
  sharp_Ylmgen_prepare (gen, m);
  switch (job->type)
    {
    case SHARP_ALM2MAP:
    case SHARP_ALM2MAP_DERIV1:
      {
      if (job->spin==0)
        {
        for (int ith=0; ith<ulim-llim; ith+=nval)
          {
          Y(Tburi) p1[njobs],p2[njobs]; VZERO(p1); VZERO(p2);
          Y(Tbu) cth, sth;
          int skip=1;
          for (int i=0; i<nval; ++i)
            {
            int itot=i+ith;
            if (itot>=ulim-llim) itot=ulim-llim-1;
            if (mlim[itot]>=m) skip=0;
            cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot];
            }
          if (!skip)
            Z(calc_alm2map) (cth.b,sth.b,gen,job,&p1[0].b,&p2[0].b NJ2);
          for (int i=0; i<nval; ++i)
            {
            int itot=i+ith;
            if (itot<ulim-llim)
              {
              for (int j=0; j<njobs; ++j)
                {
                int phas_idx = itot*job->s_th + mi*job->s_m + 2*j;
                complex double r1 = p1[j].s.r[i] + p1[j].s.i[i]*_Complex_I,
                               r2 = p2[j].s.r[i] + p2[j].s.i[i]*_Complex_I;
                job->phase[phas_idx] = r1+r2;
                if (ispair[itot])
                  job->phase[phas_idx+1] = r1-r2;
                }
              }
            }
          }
        }
      else
        {
        for (int ith=0; ith<ulim-llim; ith+=nval)
          {
          Y(Tbuqu) p1[njobs],p2[njobs]; VZERO(p1); VZERO(p2);
          Y(Tbu) cth, sth;
          int skip=1;
          for (int i=0; i<nval; ++i)
            {
            int itot=i+ith;
            if (itot>=ulim-llim) itot=ulim-llim-1;
            if (mlim[itot]>=m) skip=0;
            cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot];
            }
          if (!skip)
            (job->type==SHARP_ALM2MAP) ?
              Z(calc_alm2map_spin  )
                (cth.b,sth.b,gen,job,&p1[0].b,&p2[0].b NJ2) :
              Z(calc_alm2map_deriv1)
                (cth.b,sth.b,gen,job,&p1[0].b,&p2[0].b NJ2);
          for (int i=0; i<nval; ++i)
            {
            int itot=i+ith;
            if (itot<ulim-llim)
              {
              for (int j=0; j<njobs; ++j)
                {
                int phas_idx = itot*job->s_th + mi*job->s_m + 4*j;
                complex double q1 = p1[j].s.qr[i] + p1[j].s.qi[i]*_Complex_I,
                               q2 = p2[j].s.qr[i] + p2[j].s.qi[i]*_Complex_I,
                               u1 = p1[j].s.ur[i] + p1[j].s.ui[i]*_Complex_I,
                               u2 = p2[j].s.ur[i] + p2[j].s.ui[i]*_Complex_I;
                job->phase[phas_idx] = q1+q2;
                job->phase[phas_idx+2] = u1+u2;
                if (ispair[itot])
                  {
                  dcmplx *phQ = &(job->phase[phas_idx+1]),
                         *phU = &(job->phase[phas_idx+3]);
                  *phQ = q1-q2;
                  *phU = u1-u2;
                  if ((gen->mhi-gen->m+gen->s)&1)
                    { *phQ=-(*phQ); *phU=-(*phU); }
                  }
                }
              }
            }
          }
        }
      break;
      }
    case SHARP_MAP2ALM:
      {
      if (job->spin==0)
        {
        for (int ith=0; ith<ulim-llim; ith+=nval)
          {
          Y(Tburi) p1[njobs], p2[njobs]; VZERO(p1); VZERO(p2);
          Y(Tbu) cth, sth;
          int skip=1;
          for (int i=0; i<nval; ++i)
            {
            int itot=i+ith;
            if (itot>=ulim-llim) itot=ulim-llim-1;
            if (mlim[itot]>=m) skip=0;
            cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot];
            if ((i+ith<ulim-llim)&&(mlim[itot]>=m))
              {
              for (int j=0; j<njobs; ++j)
                {
                int phas_idx = itot*job->s_th + mi*job->s_m + 2*j;
                dcmplx ph1=job->phase[phas_idx];
                dcmplx ph2=ispair[itot] ? job->phase[phas_idx+1] : 0.;
                p1[j].s.r[i]=creal(ph1+ph2); p1[j].s.i[i]=cimag(ph1+ph2);
                p2[j].s.r[i]=creal(ph1-ph2); p2[j].s.i[i]=cimag(ph1-ph2);
                }
              }
            }
          if (!skip)
            Z(calc_map2alm)(cth.b,sth.b,gen,job,&p1[0].b,&p2[0].b NJ2);
          }
        }
      else
        {
        for (int ith=0; ith<ulim-llim; ith+=nval)
          {
          Y(Tbuqu) p1[njobs], p2[njobs]; VZERO(p1); VZERO(p2);
          Y(Tbu) cth, sth;
          int skip=1;
          for (int i=0; i<nval; ++i)
            {
            int itot=i+ith;
            if (itot>=ulim-llim) itot=ulim-llim-1;
            if (mlim[itot]>=m) skip=0;
            cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot];
            if (i+ith<ulim-llim)
              {
              for (int j=0; j<njobs; ++j)
                {
                int phas_idx = itot*job->s_th + mi*job->s_m + 4*j;
                dcmplx p1Q=job->phase[phas_idx],
                       p1U=job->phase[phas_idx+2],
                       p2Q=ispair[itot] ? job->phase[phas_idx+1]:0.,
                       p2U=ispair[itot] ? job->phase[phas_idx+3]:0.;
                if ((gen->mhi-gen->m+gen->s)&1)
                  { p2Q=-p2Q; p2U=-p2U; }
                p1[j].s.qr[i]=creal(p1Q+p2Q); p1[j].s.qi[i]=cimag(p1Q+p2Q);
                p1[j].s.ur[i]=creal(p1U+p2U); p1[j].s.ui[i]=cimag(p1U+p2U);
                p2[j].s.qr[i]=creal(p1Q-p2Q); p2[j].s.qi[i]=cimag(p1Q-p2Q);
                p2[j].s.ur[i]=creal(p1U-p2U); p2[j].s.ui[i]=cimag(p1U-p2U);
                }
              }
            }
          if (!skip)
            Z(calc_map2alm_spin) (cth.b,sth.b,gen,job,&p1[0].b,&p2[0].b NJ2);
          }
        }
      break;
      }
    default:
      {
      UTIL_FAIL("must not happen");
      break;
      }
    }
  }
 #undef VZERO
--- a/external/sharp/libsharp/sharp_core_inchelper.c
+++ b/external/sharp/libsharp/sharp_core_inchelper.c
@ -1,70 +0,0 @@
 #define Tb CONCAT2(Tb,nvec)
 #define Y(arg) CONCAT2(arg,nvec)
 #include "sharp_core_inc.c"
 #if (SHARP_MAXTRANS>MAXJOB_SPECIAL)
 #define NJ1 , int njobs
 #define NJ2 , njobs
 #define Z(arg) CONCAT2(arg,nvec)
 #include "sharp_core_inc2.c"
 #undef Z
 #undef NJ1
 #undef NJ2
 #endif
 #define NJ1
 #define NJ2
 #if ((MAXJOB_SPECIAL>=1)&&(SHARP_MAXTRANS>=1))
 #define njobs 1
 #define Z(arg) CONCAT3(arg,nvec,njobs)
 #include "sharp_core_inc2.c"
 #undef Z
 #undef njobs
 #endif
 #if ((MAXJOB_SPECIAL>=2)&&(SHARP_MAXTRANS>=2))
 #define njobs 2
 #define Z(arg) CONCAT3(arg,nvec,njobs)
 #include "sharp_core_inc2.c"
 #undef Z
 #undef njobs
 #endif
 #if ((MAXJOB_SPECIAL>=3)&&(SHARP_MAXTRANS>=3))
 #define njobs 3
 #define Z(arg) CONCAT3(arg,nvec,njobs)
 #include "sharp_core_inc2.c"
 #undef Z
 #undef njobs
 #endif
 #if ((MAXJOB_SPECIAL>=4)&&(SHARP_MAXTRANS>=4))
 #define njobs 4
 #define Z(arg) CONCAT3(arg,nvec,njobs)
 #include "sharp_core_inc2.c"
 #undef Z
 #undef njobs
 #endif
 #if ((MAXJOB_SPECIAL>=5)&&(SHARP_MAXTRANS>=5))
 #define njobs 5
 #define Z(arg) CONCAT3(arg,nvec,njobs)
 #include "sharp_core_inc2.c"
 #undef Z
 #undef njobs
 #endif
 #if ((MAXJOB_SPECIAL>=6)&&(SHARP_MAXTRANS>=6))
 #define njobs 6
 #define Z(arg) CONCAT3(arg,nvec,njobs)
 #include "sharp_core_inc2.c"
 #undef Z
 #undef njobs
 #endif
 #undef NJ1
 #undef NJ2
 #undef Y
 #undef Tb
--- a/external/sharp/libsharp/sharp_cxx.h
+++ b/external/sharp/libsharp/sharp_cxx.h
@ -1,154 +0,0 @@
 /*
 *  This file is part of libsharp.
 *
 *  libsharp is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libsharp is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libsharp; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*! \file sharp_cxx.h
 *  Spherical transform library
 *
 *  Copyright (C) 2012-2015 Max-Planck-Society
 *  \author Martin Reinecke
 */
 #ifndef PLANCK_SHARP_CXX_H
 #define PLANCK_SHARP_CXX_H
 #include "sharp_lowlevel.h"
 #include "sharp_geomhelpers.h"
 #include "sharp_almhelpers.h"
 class sharp_base
  {
  protected:
    sharp_alm_info *ainfo;
    sharp_geom_info *ginfo;
  public:
    sharp_base()
      : ainfo(0), ginfo(0) {}
    ~sharp_base()
      {
      sharp_destroy_geom_info(ginfo);
      sharp_destroy_alm_info(ainfo);
      }
    void set_general_geometry (int nrings, const int *nph, const ptrdiff_t *ofs,
      const int *stride, const double *phi0, const double *theta,
      const double *wgt)
      {
      if (ginfo) sharp_destroy_geom_info(ginfo);
      sharp_make_geom_info (nrings, nph, ofs, stride, phi0, theta, wgt, &ginfo);
      }
    void set_ECP_geometry (int nrings, int nphi)
      {
      if (ginfo) sharp_destroy_geom_info(ginfo);
      sharp_make_ecp_geom_info (nrings, nphi, 0., 1, nphi, &ginfo);
      }
    void set_Gauss_geometry (int nrings, int nphi)
      {
      if (ginfo) sharp_destroy_geom_info(ginfo);
      sharp_make_gauss_geom_info (nrings, nphi, 0., 1, nphi, &ginfo);
      }
    void set_Healpix_geometry (int nside)
      {
      if (ginfo) sharp_destroy_geom_info(ginfo);
      sharp_make_healpix_geom_info (nside, 1, &ginfo);
      }
    void set_weighted_Healpix_geometry (int nside, const double *weight)
      {
      if (ginfo) sharp_destroy_geom_info(ginfo);
      sharp_make_weighted_healpix_geom_info (nside, 1, weight, &ginfo);
      }
    void set_triangular_alm_info (int lmax, int mmax)
      {
      if (ainfo) sharp_destroy_alm_info(ainfo);
      sharp_make_triangular_alm_info (lmax, mmax, 1, &ainfo);
      }
    const sharp_geom_info* get_geom_info() const { return ginfo; }
    const sharp_alm_info* get_alm_info() const { return ainfo; }
  };
 template<typename T> struct cxxjobhelper__ {};
 template<> struct cxxjobhelper__<double>
  { enum {val=SHARP_DP}; };
 template<> struct cxxjobhelper__<float>
  { enum {val=0}; };
 template<typename T> class sharp_cxxjob: public sharp_base
  {
  private:
    static void *conv (T *ptr)
      { return reinterpret_cast<void *>(ptr); }
    static void *conv (const T *ptr)
      { return const_cast<void *>(reinterpret_cast<const void *>(ptr)); }
  public:
    void alm2map (const T *alm, T *map, bool add)
      {
      void *aptr=conv(alm), *mptr=conv(map);
      int flags=cxxjobhelper__<T>::val | (add ? SHARP_ADD : 0);
      sharp_execute (SHARP_ALM2MAP, 0, &aptr, &mptr, ginfo, ainfo, 1,
        flags,0,0);
      }
    void alm2map_spin (const T *alm1, const T *alm2, T *map1, T *map2,
      int spin, bool add)
      {
      void *aptr[2], *mptr[2];
      aptr[0]=conv(alm1); aptr[1]=conv(alm2);
      mptr[0]=conv(map1); mptr[1]=conv(map2);
      int flags=cxxjobhelper__<T>::val | (add ? SHARP_ADD : 0);
      sharp_execute (SHARP_ALM2MAP,spin,aptr,mptr,ginfo,ainfo,1,flags,0,0);
      }
    void alm2map_der1 (const T *alm, T *map1, T *map2, bool add)
      {
      void *aptr=conv(alm), *mptr[2];
      mptr[0]=conv(map1); mptr[1]=conv(map2);
      int flags=cxxjobhelper__<T>::val | (add ? SHARP_ADD : 0);
      sharp_execute (SHARP_ALM2MAP_DERIV1,1,&aptr,mptr,ginfo,ainfo,1,flags,0,0);
      }
    void map2alm (const T *map, T *alm, bool add)
      {
      void *aptr=conv(alm), *mptr=conv(map);
      int flags=cxxjobhelper__<T>::val | (add ? SHARP_ADD : 0);
      sharp_execute (SHARP_MAP2ALM,0,&aptr,&mptr,ginfo,ainfo,1,flags,0,0);
      }
    void map2alm_spin (const T *map1, const T *map2, T *alm1, T *alm2,
      int spin, bool add)
      {
      void *aptr[2], *mptr[2];
      aptr[0]=conv(alm1); aptr[1]=conv(alm2);
      mptr[0]=conv(map1); mptr[1]=conv(map2);
      int flags=cxxjobhelper__<T>::val | (add ? SHARP_ADD : 0);
      sharp_execute (SHARP_MAP2ALM,spin,aptr,mptr,ginfo,ainfo,1,flags,0,0);
      }
  };
 #endif
--- a/external/sharp/libsharp/sharp_geomhelpers.c
+++ b/external/sharp/libsharp/sharp_geomhelpers.c
@ -1,317 +0,0 @@
 /*
 *  This file is part of libsharp.
 *
 *  libsharp is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libsharp is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libsharp; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*! \file sharp_geomhelpers.c
 *  Spherical transform library
 *
 *  Copyright (C) 2006-2012 Max-Planck-Society<br>
 *  Copyright (C) 2007-2008 Pavel Holoborodko (for gauss_legendre_tbl)
 *  \author Martin Reinecke \author Pavel Holoborodko
 */
 #include <math.h>
 #include "sharp_geomhelpers.h"
 #include "sharp_legendre_roots.h"
 #include "c_utils.h"
 #include "ls_fft.h"
 #include <stdio.h>
 void sharp_make_subset_healpix_geom_info (int nside, int stride, int nrings,
  const int *rings, const double *weight, sharp_geom_info **geom_info)
  {
  const double pi=3.141592653589793238462643383279502884197;
  ptrdiff_t npix=(ptrdiff_t)nside*nside*12;
  ptrdiff_t ncap=2*(ptrdiff_t)nside*(nside-1);
  double *theta=RALLOC(double,nrings);
  double *weight_=RALLOC(double,nrings);
  int *nph=RALLOC(int,nrings);
  double *phi0=RALLOC(double,nrings);
  ptrdiff_t *ofs=RALLOC(ptrdiff_t,nrings);
  int *stride_=RALLOC(int,nrings);
  ptrdiff_t curofs=0, checkofs; /* checkofs used for assertion introduced when adding rings arg */
  for (int m=0; m<nrings; ++m)
    {
    int ring = (rings==NULL)? (m+1) : rings[m];
    ptrdiff_t northring = (ring>2*nside) ? 4*nside-ring : ring;
    stride_[m] = stride;
    if (northring < nside)
      {
      theta[m] = 2*asin(northring/(sqrt(6.)*nside));
      nph[m] = 4*northring;
      phi0[m] = pi/nph[m];
      checkofs = 2*northring*(northring-1)*stride;
      }
    else
      {
      double fact1 = (8.*nside)/npix;
      double costheta = (2*nside-northring)*fact1;
      theta[m] = acos(costheta);
      nph[m] = 4*nside;
      if ((northring-nside) & 1)
        phi0[m] = 0;
      else
        phi0[m] = pi/nph[m];
      checkofs = (ncap + (northring-nside)*nph[m])*stride;
      ofs[m] = curofs;
      }
    if (northring != ring) /* southern hemisphere */
      {
      theta[m] = pi-theta[m];
      checkofs = (npix - nph[m])*stride - checkofs;
      ofs[m] = curofs;
      }
    weight_[m]=4.*pi/npix*((weight==NULL) ? 1. : weight[northring-1]);
    if (rings==NULL) {
        UTIL_ASSERT(curofs==checkofs, "Bug in computing ofs[m]");
    }
    ofs[m] = curofs;
    curofs+=nph[m];
    }
  sharp_make_geom_info (nrings, nph, ofs, stride_, phi0, theta, weight_,
    geom_info);
  DEALLOC(theta);
  DEALLOC(weight_);
  DEALLOC(nph);
  DEALLOC(phi0);
  DEALLOC(ofs);
  DEALLOC(stride_);
  }
 void sharp_make_weighted_healpix_geom_info (int nside, int stride,
  const double *weight, sharp_geom_info **geom_info)
  {
  sharp_make_subset_healpix_geom_info(nside, stride, 4 * nside - 1, NULL, weight, geom_info);
  }
 void sharp_make_gauss_geom_info (int nrings, int nphi, double phi0,
  int stride_lon, int stride_lat, sharp_geom_info **geom_info)
  {
  const double pi=3.141592653589793238462643383279502884197;
  double *theta=RALLOC(double,nrings);
  double *weight=RALLOC(double,nrings);
  int *nph=RALLOC(int,nrings);
  double *phi0_=RALLOC(double,nrings);
  ptrdiff_t *ofs=RALLOC(ptrdiff_t,nrings);
  int *stride_=RALLOC(int,nrings);
  sharp_legendre_roots(nrings,theta,weight);
  for (int m=0; m<nrings; ++m)
    {
    theta[m] = acos(-theta[m]);
    nph[m]=nphi;
    phi0_[m]=phi0;
    ofs[m]=(ptrdiff_t)m*stride_lat;
    stride_[m]=stride_lon;
    weight[m]*=2*pi/nphi;
    }
  sharp_make_geom_info (nrings, nph, ofs, stride_, phi0_, theta, weight,
    geom_info);
  DEALLOC(theta);
  DEALLOC(weight);
  DEALLOC(nph);
  DEALLOC(phi0_);
  DEALLOC(ofs);
  DEALLOC(stride_);
  }
 /* Weights from Waldvogel 2006: BIT Numerical Mathematics 46, p. 195 */
 void sharp_make_fejer1_geom_info (int nrings, int ppring, double phi0,
  int stride_lon, int stride_lat, sharp_geom_info **geom_info)
  {
  const double pi=3.141592653589793238462643383279502884197;
  double *theta=RALLOC(double,nrings);
  double *weight=RALLOC(double,nrings);
  int *nph=RALLOC(int,nrings);
  double *phi0_=RALLOC(double,nrings);
  ptrdiff_t *ofs=RALLOC(ptrdiff_t,nrings);
  int *stride_=RALLOC(int,nrings);
  weight[0]=2.;
  for (int k=1; k<=(nrings-1)/2; ++k)
    {
    weight[2*k-1]=2./(1.-4.*k*k)*cos((k*pi)/nrings);
    weight[2*k  ]=2./(1.-4.*k*k)*sin((k*pi)/nrings);
    }
  if ((nrings&1)==0) weight[nrings-1]=0.;
  real_plan plan = make_real_plan(nrings);
  real_plan_backward_fftpack(plan,weight);
  kill_real_plan(plan);
  for (int m=0; m<(nrings+1)/2; ++m)
    {
    theta[m]=pi*(m+0.5)/nrings;
    theta[nrings-1-m]=pi-theta[m];
    nph[m]=nph[nrings-1-m]=ppring;
    phi0_[m]=phi0_[nrings-1-m]=phi0;
    ofs[m]=(ptrdiff_t)m*stride_lat;
    ofs[nrings-1-m]=(ptrdiff_t)((nrings-1-m)*stride_lat);
    stride_[m]=stride_[nrings-1-m]=stride_lon;
    weight[m]=weight[nrings-1-m]=weight[m]*2*pi/(nrings*nph[m]);
    }
  sharp_make_geom_info (nrings, nph, ofs, stride_, phi0_, theta, weight,
    geom_info);
  DEALLOC(theta);
  DEALLOC(weight);
  DEALLOC(nph);
  DEALLOC(phi0_);
  DEALLOC(ofs);
  DEALLOC(stride_);
  }
 /* Weights from Waldvogel 2006: BIT Numerical Mathematics 46, p. 195 */
 void sharp_make_cc_geom_info (int nrings, int ppring, double phi0,
  int stride_lon, int stride_lat, sharp_geom_info **geom_info)
  {
  const double pi=3.141592653589793238462643383279502884197;
  double *theta=RALLOC(double,nrings);
  double *weight=RALLOC(double,nrings);
  int *nph=RALLOC(int,nrings);
  double *phi0_=RALLOC(double,nrings);
  ptrdiff_t *ofs=RALLOC(ptrdiff_t,nrings);
  int *stride_=RALLOC(int,nrings);
  int n=nrings-1;
  SET_ARRAY(weight,0,nrings,0.);
  double dw=-1./(n*n-1.+(n&1));
  weight[0]=2.+dw;
  for (int k=1; k<=(n/2-1); ++k)
    weight[2*k-1]=2./(1.-4.*k*k) + dw;
  weight[2*(n/2)-1]=(n-3.)/(2*(n/2)-1) -1. -dw*((2-(n&1))*n-1);
  real_plan plan = make_real_plan(n);
  real_plan_backward_fftpack(plan,weight);
  kill_real_plan(plan);
  weight[n]=weight[0];
  for (int m=0; m<(nrings+1)/2; ++m)
    {
    theta[m]=pi*m/(nrings-1.);
    if (theta[m]<1e-15) theta[m]=1e-15;
    theta[nrings-1-m]=pi-theta[m];
    nph[m]=nph[nrings-1-m]=ppring;
    phi0_[m]=phi0_[nrings-1-m]=phi0;
    ofs[m]=(ptrdiff_t)m*stride_lat;
    ofs[nrings-1-m]=(ptrdiff_t)((nrings-1-m)*stride_lat);
    stride_[m]=stride_[nrings-1-m]=stride_lon;
    weight[m]=weight[nrings-1-m]=weight[m]*2*pi/(n*nph[m]);
    }
  sharp_make_geom_info (nrings, nph, ofs, stride_, phi0_, theta, weight,
    geom_info);
  DEALLOC(theta);
  DEALLOC(weight);
  DEALLOC(nph);
  DEALLOC(phi0_);
  DEALLOC(ofs);
  DEALLOC(stride_);
  }
 /* Weights from Waldvogel 2006: BIT Numerical Mathematics 46, p. 195 */
 void sharp_make_fejer2_geom_info (int nrings, int ppring, double phi0,
  int stride_lon, int stride_lat, sharp_geom_info **geom_info)
  {
  const double pi=3.141592653589793238462643383279502884197;
  double *theta=RALLOC(double,nrings);
  double *weight=RALLOC(double,nrings+1);
  int *nph=RALLOC(int,nrings);
  double *phi0_=RALLOC(double,nrings);
  ptrdiff_t *ofs=RALLOC(ptrdiff_t,nrings);
  int *stride_=RALLOC(int,nrings);
  int n=nrings+1;
  SET_ARRAY(weight,0,n,0.);
  weight[0]=2.;
  for (int k=1; k<=(n/2-1); ++k)
    weight[2*k-1]=2./(1.-4.*k*k);
  weight[2*(n/2)-1]=(n-3.)/(2*(n/2)-1) -1.;
  real_plan plan = make_real_plan(n);
  real_plan_backward_fftpack(plan,weight);
  kill_real_plan(plan);
  for (int m=0; m<nrings; ++m)
    weight[m]=weight[m+1];
  for (int m=0; m<(nrings+1)/2; ++m)
    {
    theta[m]=pi*(m+1)/(nrings+1.);
    theta[nrings-1-m]=pi-theta[m];
    nph[m]=nph[nrings-1-m]=ppring;
    phi0_[m]=phi0_[nrings-1-m]=phi0;
    ofs[m]=(ptrdiff_t)m*stride_lat;
    ofs[nrings-1-m]=(ptrdiff_t)((nrings-1-m)*stride_lat);
    stride_[m]=stride_[nrings-1-m]=stride_lon;
    weight[m]=weight[nrings-1-m]=weight[m]*2*pi/(n*nph[m]);
    }
  sharp_make_geom_info (nrings, nph, ofs, stride_, phi0_, theta, weight,
    geom_info);
  DEALLOC(theta);
  DEALLOC(weight);
  DEALLOC(nph);
  DEALLOC(phi0_);
  DEALLOC(ofs);
  DEALLOC(stride_);
  }
 void sharp_make_mw_geom_info (int nrings, int ppring, double phi0,
  int stride_lon, int stride_lat, sharp_geom_info **geom_info)
  {
  const double pi=3.141592653589793238462643383279502884197;
  double *theta=RALLOC(double,nrings);
  int *nph=RALLOC(int,nrings);
  double *phi0_=RALLOC(double,nrings);
  ptrdiff_t *ofs=RALLOC(ptrdiff_t,nrings);
  int *stride_=RALLOC(int,nrings);
  for (int m=0; m<nrings; ++m)
    {
    theta[m]=pi*(2.*m+1.)/(2.*nrings-1.);
    if (theta[m]>pi-1e-15) theta[m]=pi-1e-15;
    nph[m]=ppring;
    phi0_[m]=phi0;
    ofs[m]=(ptrdiff_t)m*stride_lat;
    stride_[m]=stride_lon;
    }
  sharp_make_geom_info (nrings, nph, ofs, stride_, phi0_, theta, NULL,
    geom_info);
  DEALLOC(theta);
  DEALLOC(nph);
  DEALLOC(phi0_);
  DEALLOC(ofs);
  DEALLOC(stride_);
  }
--- a/external/sharp/libsharp/sharp_geomhelpers.h
+++ b/external/sharp/libsharp/sharp_geomhelpers.h
@ -1,153 +0,0 @@
 /*
 *  This file is part of libsharp.
 *
 *  libsharp is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libsharp is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libsharp; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*! \file sharp_geomhelpers.h
 *  SHARP helper function for the creation of grid geometries
 *
 *  Copyright (C) 2006-2013 Max-Planck-Society
 *  \author Martin Reinecke
 */
 #ifndef PLANCK_SHARP_GEOMHELPERS_H
 #define PLANCK_SHARP_GEOMHELPERS_H
 #include "sharp_lowlevel.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 /*! Creates a geometry information describing a HEALPix map with an
    Nside parameter \a nside. \a weight contains the relative ring
    weights and must have \a 2*nside entries. The rings array contains
    the indices of the rings, with 1 being the first ring at the north
    pole; if NULL then we take them to be sequential. Pass 4 * nside - 1
    as nrings and NULL to rings to get the full HEALPix grid.
    \note if \a weight is a null pointer, all weights are assumed to be 1.
    \note if \a rings is a null pointer, take all rings
    \ingroup geominfogroup */
 void sharp_make_subset_healpix_geom_info (int nside, int stride, int nrings,
  const int *rings, const double *weight, sharp_geom_info **geom_info);
 /*! Creates a geometry information describing a HEALPix map with an
    Nside parameter \a nside. \a weight contains the relative ring
    weights and must have \a 2*nside entries.
    \note if \a weight is a null pointer, all weights are assumed to be 1.
    \ingroup geominfogroup */
 void sharp_make_weighted_healpix_geom_info (int nside, int stride,
  const double *weight, sharp_geom_info **geom_info);
 /*! Creates a geometry information describing a HEALPix map with an
    Nside parameter \a nside.
    \ingroup geominfogroup */
 static inline void sharp_make_healpix_geom_info (int nside, int stride,
  sharp_geom_info **geom_info)
  { sharp_make_weighted_healpix_geom_info (nside, stride, NULL, geom_info); }
 /*! Creates a geometry information describing a Gaussian map with \a nrings
    iso-latitude rings and \a nphi pixels per ring. The azimuth of the first
    pixel in each ring is \a phi0 (in radians). The index difference between
    two adjacent pixels in an iso-latitude ring is \a stride_lon, the index
    difference between the two start pixels in consecutive iso-latitude rings
    is \a stride_lat.
    \ingroup geominfogroup */
 void sharp_make_gauss_geom_info (int nrings, int nphi, double phi0,
  int stride_lon, int stride_lat, sharp_geom_info **geom_info);
 /*! Creates a geometry information describing an ECP map with \a nrings
    iso-latitude rings and \a nphi pixels per ring. The azimuth of the first
    pixel in each ring is \a phi0 (in radians). The index difference between
    two adjacent pixels in an iso-latitude ring is \a stride_lon, the index
    difference between the two start pixels in consecutive iso-latitude rings
    is \a stride_lat.
    \note The spacing of pixel centers is equidistant in colatitude and
      longitude.
    \note The sphere is pixelized in a way that the colatitude of the first ring
      is \a 0.5*(pi/nrings) and the colatitude of the last ring is
      \a pi-0.5*(pi/nrings). There are no pixel centers at the poles.
    \note This grid corresponds to Fejer's first rule.
    \ingroup geominfogroup */
 void sharp_make_fejer1_geom_info (int nrings, int nphi, double phi0,
  int stride_lon, int stride_lat, sharp_geom_info **geom_info);
 /*! Old name for sharp_make_fejer1_geom_info()
    \ingroup geominfogroup */
 static inline void sharp_make_ecp_geom_info (int nrings, int nphi, double phi0,
  int stride_lon, int stride_lat, sharp_geom_info **geom_info)
  {
  sharp_make_fejer1_geom_info (nrings, nphi, phi0, stride_lon, stride_lat,
  geom_info);
  }
 /*! Creates a geometry information describing an ECP map with \a nrings
    iso-latitude rings and \a nphi pixels per ring. The azimuth of the first
    pixel in each ring is \a phi0 (in radians). The index difference between
    two adjacent pixels in an iso-latitude ring is \a stride_lon, the index
    difference between the two start pixels in consecutive iso-latitude rings
    is \a stride_lat.
    \note The spacing of pixel centers is equidistant in colatitude and
      longitude.
    \note The sphere is pixelized in a way that the colatitude of the first ring
      is \a 0 and that of the last ring is \a pi.
    \note This grid corresponds to Clenshaw-Curtis integration.
    \ingroup geominfogroup */
 void sharp_make_cc_geom_info (int nrings, int ppring, double phi0,
  int stride_lon, int stride_lat, sharp_geom_info **geom_info);
 /*! Creates a geometry information describing an ECP map with \a nrings
    iso-latitude rings and \a nphi pixels per ring. The azimuth of the first
    pixel in each ring is \a phi0 (in radians). The index difference between
    two adjacent pixels in an iso-latitude ring is \a stride_lon, the index
    difference between the two start pixels in consecutive iso-latitude rings
    is \a stride_lat.
    \note The spacing of pixel centers is equidistant in colatitude and
      longitude.
    \note The sphere is pixelized in a way that the colatitude of the first ring
      is \a pi/(nrings+1) and that of the last ring is \a pi-pi/(nrings+1).
    \note This grid corresponds to Fejer's second rule.
    \ingroup geominfogroup */
 void sharp_make_fejer2_geom_info (int nrings, int ppring, double phi0,
  int stride_lon, int stride_lat, sharp_geom_info **geom_info);
 /*! Creates a geometry information describing a map with \a nrings
    iso-latitude rings and \a nphi pixels per ring. The azimuth of the first
    pixel in each ring is \a phi0 (in radians). The index difference between
    two adjacent pixels in an iso-latitude ring is \a stride_lon, the index
    difference between the two start pixels in consecutive iso-latitude rings
    is \a stride_lat.
    \note The spacing of pixel centers is equidistant in colatitude and
      longitude.
    \note The sphere is pixelized in a way that the colatitude of the first ring
      is \a pi/(2*nrings-1) and that of the last ring is \a pi.
    \note This is the grid introduced by McEwen & Wiaux 2011.
    \note This function does \e not define any quadrature weights.
    \ingroup geominfogroup */
 void sharp_make_mw_geom_info (int nrings, int ppring, double phi0,
  int stride_lon, int stride_lat, sharp_geom_info **geom_info);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/external/sharp/libsharp/sharp_internal.h
+++ b/external/sharp/libsharp/sharp_internal.h
@ -1,66 +0,0 @@
 /*
 *  This file is part of libsharp.
 *
 *  libsharp is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libsharp is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libsharp; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*! \file sharp_internal.h
 *  Internally used functionality for the spherical transform library.
 *
 *  Copyright (C) 2006-2013 Max-Planck-Society
 *  \author Martin Reinecke \author Dag Sverre Seljebotn
 */
 #ifndef PLANCK_SHARP_INTERNAL_H
 #define PLANCK_SHARP_INTERNAL_H
 #ifdef __cplusplus
 #error This header file cannot be included from C++, only from C
 #endif
 #include "sharp.h"
 #define SHARP_MAXTRANS 100
 typedef struct
  {
  sharp_jobtype type;
  int spin;
  int nmaps, nalm;
  int flags;
  void **map;
  void **alm;
  int s_m, s_th; // strides in m and theta direction
  complex double *phase;
  double *norm_l;
  complex double *almtmp;
  const sharp_geom_info *ginfo;
  const sharp_alm_info *ainfo;
  double time;
  int ntrans;
  unsigned long long opcnt;
  } sharp_job;
 int sharp_get_nv_max (void);
 int sharp_nv_oracle (sharp_jobtype type, int spin, int ntrans);
 int sharp_get_mlim (int lmax, int spin, double sth, double cth);
 #endif
--- a/external/sharp/libsharp/sharp_legendre.c
+++ b/external/sharp/libsharp/sharp_legendre.c
--- a/external/sharp/libsharp/sharp_legendre.c.in
+++ b/external/sharp/libsharp/sharp_legendre.c.in
@ -1,176 +0,0 @@
 /*
    NOTE NOTE NOTE
    This file is edited in sharp_legendre.c.in which is then preprocessed.
    Do not make manual  modifications to sharp_legendre.c.
    NOTE NOTE NOTE
 */
 /*
 *  This file is part of libsharp.
 *
 * Redistribution and use in source and binary forms, with or without
 * met:
 * 
 * 1. Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
 * 
 * 2. Redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the distribution.
 * 
 * 3. Neither the name of the copyright holder nor the names of its
 * contributors may be used to endorse or promote products derived from
 * this software without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 /*! \file sharp_legendre.c.in
 *
 *  Copyright (C) 2015 University of Oslo
 *  \author Dag Sverre Seljebotn
 */
 #ifndef NO_LEGENDRE
 #if (VLEN==8)
 #error This code is not tested with MIC; please compile with -DNO_LEGENDRE
 /* ...or test it (it probably works) and remove this check */
 #endif
 #ifndef SHARP_LEGENDRE_CS
 #define SHARP_LEGENDRE_CS 4
 #endif
 #define MAX_CS 6
 #if (SHARP_LEGENDRE_CS > MAX_CS)
 #error (SHARP_LEGENDRE_CS > MAX_CS)
 #endif
 #include "sharp_legendre.h"
 #include "sharp_vecsupport.h"
 #include <malloc.h>
 /*{ for scalar, T in [("double", ""), ("float", "_s")] }*/
 /*{ for cs in range(1, 7) }*/
 static void legendre_transform_vec{{cs}}{{T}}({{scalar}} *recfacs, {{scalar}} *bl, ptrdiff_t lmax,
                                              {{scalar}} xarr[({{cs}}) * VLEN{{T}}],
                                              {{scalar}} out[({{cs}}) * VLEN{{T}}]) {
    /*{ for i in range(cs) }*/
    Tv{{T}} P_{{i}}, Pm1_{{i}}, Pm2_{{i}}, x{{i}}, y{{i}};
    /*{ endfor }*/
    Tv{{T}} W1, W2, b, R;
    ptrdiff_t l;
    /*{ for i in range(cs) }*/
    x{{i}} = vloadu{{T}}(xarr + {{i}} * VLEN{{T}});
    Pm1_{{i}} = vload{{T}}(1.0);
    P_{{i}} = x{{i}};
    b = vload{{T}}(*bl);
    y{{i}} = vmul{{T}}(Pm1_{{i}}, b);
    /*{ endfor }*/
    b = vload{{T}}(*(bl + 1));
    /*{ for i in range(cs) }*/
    vfmaeq{{T}}(y{{i}}, P_{{i}}, b);
    /*{ endfor }*/
    for (l = 2; l <= lmax; ++l) {
        b = vload{{T}}(*(bl + l));
        R = vload{{T}}(*(recfacs + l));
        /* 
           P = x * Pm1 + recfacs[l] * (x * Pm1 - Pm2)
        */
        /*{ for i in range(cs) }*/
        Pm2_{{i}} = Pm1_{{i}}; Pm1_{{i}} = P_{{i}};
        W1 = vmul{{T}}(x{{i}}, Pm1_{{i}});
        W2 = W1;
        W2 = vsub{{T}}(W2, Pm2_{{i}});
        P_{{i}} = W1;
        vfmaeq{{T}}(P_{{i}}, W2, R);
        vfmaeq{{T}}(y{{i}}, P_{{i}}, b);
        /*{ endfor }*/
    }
    /*{ for i in range(cs) }*/
    vstoreu{{T}}(out + {{i}} * VLEN{{T}}, y{{i}});
    /*{ endfor }*/
 }
 /*{ endfor }*/
 /*{ endfor }*/
 /*{ for scalar, T in [("double", ""), ("float", "_s")] }*/
 void sharp_legendre_transform_recfac{{T}}({{scalar}} *r, ptrdiff_t lmax) {
    /* (l - 1) / l, for l >= 2 */
    ptrdiff_t l;
    r[0] = 0;
    r[1] = 1;
    for (l = 2; l <= lmax; ++l) {
        r[l] = ({{scalar}})(l - 1) / ({{scalar}})l;
    }
 }
 /*{ endfor }*/
 /*
  Compute sum_l b_l P_l(x_i) for all i. 
 */
 #define LEN (SHARP_LEGENDRE_CS * VLEN)
 #define LEN_s (SHARP_LEGENDRE_CS * VLEN_s)
 /*{ for scalar, T in [("double", ""), ("float", "_s")] }*/
 void sharp_legendre_transform{{T}}({{scalar}} *bl,
                                   {{scalar}} *recfac,
                                   ptrdiff_t lmax,
                                   {{scalar}} *x, {{scalar}} *out, ptrdiff_t nx) {
    {{scalar}} xchunk[MAX_CS * VLEN{{T}}], outchunk[MAX_CS * LEN{{T}}];
    int compute_recfac;
    ptrdiff_t i, j, len;
    compute_recfac = (recfac == NULL);
    if (compute_recfac) {
        recfac = malloc(sizeof({{scalar}}) * (lmax + 1));
        sharp_legendre_transform_recfac{{T}}(recfac, lmax);
    }
    for (j = 0; j != LEN{{T}}; ++j) xchunk[j] = 0;
    for (i = 0; i < nx; i += LEN{{T}}) {
        len = (i + (LEN{{T}}) <= nx) ? (LEN{{T}}) : (nx - i);
        for (j = 0; j != len; ++j) xchunk[j] = x[i + j];
        switch ((len + VLEN{{T}} - 1) / VLEN{{T}}) {
          case 6: legendre_transform_vec6{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
          case 5: legendre_transform_vec5{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
          case 4: legendre_transform_vec4{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
          case 3: legendre_transform_vec3{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
          case 2: legendre_transform_vec2{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
          case 1:
          case 0:
              legendre_transform_vec1{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
        }
        for (j = 0; j != len; ++j) out[i + j] = outchunk[j];
    }
    if (compute_recfac) {
        free(recfac);
    }
 }
 /*{ endfor }*/
 #endif
--- a/external/sharp/libsharp/sharp_legendre.h
+++ b/external/sharp/libsharp/sharp_legendre.h
@ -1,62 +0,0 @@
 /*
 *  This file is part of libsharp.
 *
 * Redistribution and use in source and binary forms, with or without
 * met:
 * 
 * 1. Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
 * 
 * 2. Redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the distribution.
 * 
 * 3. Neither the name of the copyright holder nor the names of its
 * contributors may be used to endorse or promote products derived from
 * this software without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 /*! \file sharp_legendre.h
 *  Interface for the Legendre transform parts of the spherical transform library.
 *
 *  Copyright (C) 2015 University of Oslo
 *  \author Dag Sverre Seljebotn
 */
 #ifndef SHARP_LEGENDRE_H
 #define SHARP_LEGENDRE_H
 #include <stddef.h>
 #ifdef __cplusplus
 extern "C" {
 #endif
 #ifndef NO_LEGENDRE
 void sharp_legendre_transform(double *bl, double *recfac, ptrdiff_t lmax, double *x,
                              double *out, ptrdiff_t nx);
 void sharp_legendre_transform_s(float *bl, float *recfac, ptrdiff_t lmax, float *x,
                                float *out, ptrdiff_t nx);
 void sharp_legendre_transform_recfac(double *r, ptrdiff_t lmax);
 void sharp_legendre_transform_recfac_s(float *r, ptrdiff_t lmax);
 #endif
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/external/sharp/libsharp/sharp_legendre_roots.c
+++ b/external/sharp/libsharp/sharp_legendre_roots.c
@ -1,67 +0,0 @@
 /* Function adapted from GNU GSL file glfixed.c
   Original author: Pavel Holoborodko (http://www.holoborodko.com)
   Adjustments by M. Reinecke
    - adjusted interface (keep epsilon internal, return full number of points)
    - removed precomputed tables
    - tweaked Newton iteration to obtain higher accuracy */
 #include <math.h>
 #include "sharp_legendre_roots.h"
 #include "c_utils.h"
 static inline double one_minus_x2 (double x)
  { return (fabs(x)>0.1) ? (1.+x)*(1.-x) : 1.-x*x; }
 void sharp_legendre_roots(int n, double *x, double *w)
  {
  const double pi = 3.141592653589793238462643383279502884197;
  const double eps = 3e-14;
  int m = (n+1)>>1;
  double t0 = 1 - (1-1./n) / (8.*n*n);
  double t1 = 1./(4.*n+2.);
 #pragma omp parallel
 {
  int i;
 #pragma omp for schedule(dynamic,100)
  for (i=1; i<=m; ++i)
    {
    double x0 = cos(pi * ((i<<2)-1) * t1) * t0;
    int dobreak=0;
    int j=0;
    double dpdx;
    while(1)
      {
      double P_1 = 1.0;
      double P0 = x0;
      double dx, x1;
      for (int k=2; k<=n; k++)
        {
        double P_2 = P_1;
        P_1 = P0;
 //        P0 = ((2*k-1)*x0*P_1-(k-1)*P_2)/k;
        P0 = x0*P_1 + (k-1.)/k * (x0*P_1-P_2);
        }
      dpdx = (P_1 - x0*P0) * n / one_minus_x2(x0);
      /* Newton step */
      x1 = x0 - P0/dpdx;
      dx = x0-x1;
      x0 = x1;
      if (dobreak) break;
      if (fabs(dx)<=eps) dobreak=1;
      UTIL_ASSERT(++j<100,"convergence problem");
      }
    x[i-1] = -x0;
    x[n-i] = x0;
    w[i-1] = w[n-i] = 2. / (one_minus_x2(x0) * dpdx * dpdx);
    }
 } // end of parallel region
  }
--- a/external/sharp/libsharp/sharp_legendre_roots.h
+++ b/external/sharp/libsharp/sharp_legendre_roots.h
@ -1,50 +0,0 @@
 /*
 *  This file is part of libsharp.
 *
 *  libsharp is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libsharp is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libsharp; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*! \file sharp_legendre_roots.h
 *
 *  Copyright (C) 2006-2012 Max-Planck-Society
 *  \author Martin Reinecke
 */
 #ifndef SHARP_LEGENDRE_ROOTS_H
 #define SHARP_LEGENDRE_ROOTS_H
 #ifdef __cplusplus
 extern "C" {
 #endif
 /*! Computes roots and Gaussian quadrature weights for Legendre polynomial
    of degree \a n.
    \param n Order of Legendre polynomial
    \param x Array of length \a n for output (root position)
    \param w Array of length \a w for output (weight for Gaussian quadrature)
 */
 void sharp_legendre_roots(int n, double *x, double *w);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/external/sharp/libsharp/sharp_lowlevel.h
+++ b/external/sharp/libsharp/sharp_lowlevel.h
@ -1,272 +0,0 @@
 /*
 *  This file is part of libsharp.
 *
 *  libsharp is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libsharp is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libsharp; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*! \file sharp_lowlevel.h
 *  Low-level, portable interface for the spherical transform library.
 *
 *  Copyright (C) 2012-2013 Max-Planck-Society
 *  \author Martin Reinecke \author Dag Sverre Seljebotn
 */
 #ifndef PLANCK_SHARP_LOWLEVEL_H
 #define PLANCK_SHARP_LOWLEVEL_H
 #include <stddef.h>
 #ifdef __cplusplus
 extern "C" {
 #endif
 /*! \internal
    Helper type containing information about a single ring. */
 typedef struct
  {
  double theta, phi0, weight, cth, sth;
  ptrdiff_t ofs;
  int nph, stride;
  } sharp_ringinfo;
 /*! \internal
    Helper type containing information about a pair of rings with colatitudes
    symmetric around the equator. */
 typedef struct
  {
  sharp_ringinfo r1,r2;
  } sharp_ringpair;
 /*! \internal
    Type holding all required information about a map geometry. */
 typedef struct
  {
  sharp_ringpair *pair;
  int npairs, nphmax;
  } sharp_geom_info;
 /*! \defgroup almgroup Helpers for dealing with a_lm */
 /*! \{ */
 /*! \internal
    Helper type for index calculation in a_lm arrays. */
 typedef struct
  {
  /*! Maximum \a l index of the array */
  int lmax;
  /*! Number of different \a m values in this object */
  int nm;
  /*! Array with \a nm entries containing the individual m values */
  int *mval;
  /*! Combination of flags from sharp_almflags */
  int flags;
  /*! Array with \a nm entries containing the (hypothetical) indices of
      the coefficients with quantum numbers 0,\a mval[i] */
  ptrdiff_t *mvstart;
  /*! Stride between a_lm and a_(l+1),m */
  ptrdiff_t stride;
  } sharp_alm_info;
 /*! alm_info flags */
 typedef enum { SHARP_PACKED = 1,
               /*!< m=0-coefficients are packed so that the (zero) imaginary part is
                    not present. mvstart is in units of *real* float/double for all
                    m; stride is in units of reals for m=0 and complex for m!=0 */
               SHARP_REAL_HARMONICS  = 1<<6
               /*!< Use the real spherical harmonic convention. For
                    m==0, the alm are treated exactly the same as in
                    the complex case.  For m!=0, alm[i] represent a
                    pair (+abs(m), -abs(m)) instead of (real, imag),
                    and the coefficients are scaled by a factor of
                    sqrt(2) relative to the complex case.  In other
                    words, (sqrt(.5) * alm[i]) recovers the
                    corresponding complex coefficient (when accessed
                    as complex).
                */
             } sharp_almflags;
 /*! Creates an a_lm data structure from the following parameters:
    \param lmax maximum \a l quantum number (>=0)
    \param mmax maximum \a m quantum number (0<= \a mmax <= \a lmax)
    \param stride the stride between entries with identical \a m, and \a l
      differing by 1.
    \param mstart the index of the (hypothetical) coefficient with the
      quantum numbers 0,\a m. Must have \a mmax+1 entries.
    \param alm_info will hold a pointer to the newly created data structure
 */
 void sharp_make_alm_info (int lmax, int mmax, int stride,
  const ptrdiff_t *mstart, sharp_alm_info **alm_info);
 /*! Creates an a_lm data structure which from the following parameters:
    \param lmax maximum \a l quantum number (\a >=0)
    \param nm number of different \a m (\a 0<=nm<=lmax+1)
    \param stride the stride between entries with identical \a m, and \a l
      differing by 1.
    \param mval array with \a nm entries containing the individual m values
    \param mvstart array with \a nm entries containing the (hypothetical)
      indices of the coefficients with the quantum numbers 0,\a mval[i]
    \param flags a combination of sharp_almflags (pass 0 unless you know you need this)
    \param alm_info will hold a pointer to the newly created data structure
 */
 void sharp_make_general_alm_info (int lmax, int nm, int stride, const int *mval,
  const ptrdiff_t *mvstart, int flags, sharp_alm_info **alm_info);
 /*! Returns the index of the coefficient with quantum numbers \a l,
    \a mval[mi].
    \note for a \a sharp_alm_info generated by sharp_make_alm_info() this is
    the index for the coefficient with the quantum numbers \a l, \a mi. */
 ptrdiff_t sharp_alm_index (const sharp_alm_info *self, int l, int mi);
 /*! Returns the number of alm coefficients described by \a self. If the SHARP_PACKED
    flag is set, this is number of "real" coeffecients (for m < 0 and m >= 0),
    otherwise it is the number of complex coefficients (with m>=0). */
 ptrdiff_t sharp_alm_count(const sharp_alm_info *self);
 /*! Deallocates the a_lm info object. */
 void sharp_destroy_alm_info (sharp_alm_info *info);
 /*! \} */
 /*! \defgroup geominfogroup Functions for dealing with geometry information */
 /*! \{ */
 /*! Creates a geometry information from a set of ring descriptions.
    All arrays passed to this function must have \a nrings elements.
    \param nrings the number of rings in the map
    \param nph the number of pixels in each ring
    \param ofs the index of the first pixel in each ring in the map array
    \param stride the stride between consecutive pixels
    \param phi0 the azimuth (in radians) of the first pixel in each ring
    \param theta the colatitude (in radians) of each ring
    \param wgt the pixel weight to be used for the ring in map2alm
      and adjoint map2alm transforms.
      Pass NULL to use 1.0 as weight for all rings.
    \param geom_info will hold a pointer to the newly created data structure
 */
 void sharp_make_geom_info (int nrings, const int *nph, const ptrdiff_t *ofs,
  const int *stride, const double *phi0, const double *theta,
  const double *wgt, sharp_geom_info **geom_info);
 /*! Counts the number of grid points needed for (the local part of) a map described
    by \a info.
 */
 ptrdiff_t sharp_map_size(const sharp_geom_info *info);
 /*! Deallocates the geometry information in \a info. */
 void sharp_destroy_geom_info (sharp_geom_info *info);
 /*! \} */
 /*! \defgroup lowlevelgroup Low-level libsharp SHT interface */
 /*! \{ */
 /*! Enumeration of SHARP job types. */
 typedef enum { SHARP_YtW=0,               /*!< analysis */
               SHARP_MAP2ALM=SHARP_YtW,   /*!< analysis */
               SHARP_Y=1,                 /*!< synthesis */
               SHARP_ALM2MAP=SHARP_Y,     /*!< synthesis */
               SHARP_Yt=2,                /*!< adjoint synthesis */
               SHARP_WY=3,                /*!< adjoint analysis */
               SHARP_ALM2MAP_DERIV1=4     /*!< synthesis of first derivatives */
             } sharp_jobtype;
 /*! Job flags */
 typedef enum { SHARP_DP              = 1<<4,
               /*!< map and a_lm are in double precision */
               SHARP_ADD             = 1<<5,
               /*!< results are added to the output arrays, instead of
                    overwriting them */
               /* NOTE: SHARP_REAL_HARMONICS, 1<<6, is also available in sharp_jobflags,
                  but its use here is deprecated in favor of having it in the sharp_alm_info */
               SHARP_NO_FFT          = 1<<7,
               SHARP_USE_WEIGHTS     = 1<<20,    /* internal use only */
               SHARP_NO_OPENMP       = 1<<21,    /* internal use only */
               SHARP_NVMAX           = (1<<4)-1 /* internal use only */
             } sharp_jobflags;
 /*! Performs a libsharp SHT job. The interface deliberately does not use
  the C99 "complex" data type, in order to be callable from C89 and C++.
  \param type the type of SHT
  \param spin the spin of the quantities to be transformed
  \param alm contains pointers to the a_lm coefficients. If \a spin==0,
    alm[0] points to the a_lm of the first SHT, alm[1] to those of the second
    etc. If \a spin>0, alm[0] and alm[1] point to the a_lm of the first SHT,
    alm[2] and alm[3] to those of the second, etc. The exact data type of \a alm
    depends on whether the SHARP_DP flag is set.
  \param map contains pointers to the maps. If \a spin==0,
    map[0] points to the map of the first SHT, map[1] to that of the second
    etc. If \a spin>0, or \a type is SHARP_ALM2MAP_DERIV1, map[0] and map[1]
    point to the maps of the first SHT, map[2] and map[3] to those of the
    second, etc. The exact data type of \a map depends on whether the SHARP_DP
    flag is set.
  \param geom_info A \c sharp_geom_info object compatible with the provided
    \a map arrays.
  \param alm_info A \c sharp_alm_info object compatible with the provided
    \a alm arrays. All \c m values from 0 to some \c mmax<=lmax must be present
    exactly once.
  \param ntrans the number of simultaneous SHTs
  \param flags See sharp_jobflags. In particular, if SHARP_DP is set, then
    \a alm is expected to have the type "complex double **" and \a map is
    expected to have the type "double **"; otherwise, the expected
    types are "complex float **" and "float **", respectively.
  \param time If not NULL, the wall clock time required for this SHT
    (in seconds) will be written here.
  \param opcnt If not NULL, a conservative estimate of the total floating point
    operation count for this SHT will be written here. */
 void sharp_execute (sharp_jobtype type, int spin, void *alm, void *map,
  const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans,
  int flags, double *time, unsigned long long *opcnt);
 void sharp_set_chunksize_min(int new_chunksize_min);
 void sharp_set_nchunks_max(int new_nchunks_max);
 typedef enum { SHARP_ERROR_NO_MPI = 1,
               /*!< libsharp not compiled with MPI support */
              } sharp_errors;
 /*! Works like sharp_execute_mpi, but is always present whether or not libsharp
    is compiled with USE_MPI. This is primarily useful for wrapper code etc.
    Note that \a pcomm has the type MPI_Comm*, except we declare void* to avoid
    pulling in MPI headers. I.e., the comm argument of sharp_execute_mpi
    is *(MPI_Comm*)pcomm.
    Other parameters are the same as sharp_execute_mpi.
    Returns 0 if successful, or SHARP_ERROR_NO_MPI if MPI is not available
    (in which case nothing is done).
 */
 int sharp_execute_mpi_maybe (void *pcomm, sharp_jobtype type, int spin,
  void *alm, void *map, const sharp_geom_info *geom_info,
  const sharp_alm_info *alm_info, int ntrans, int flags, double *time,
  unsigned long long *opcnt);
 /*! \} */
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/external/sharp/libsharp/sharp_mpi.c
+++ b/external/sharp/libsharp/sharp_mpi.c
@ -1,345 +0,0 @@
 /*
 *  This file is part of libsharp.
 *
 *  libsharp is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libsharp is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libsharp; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*! \file sharp_mpi.c
 *  Functionality only needed for MPI-parallel transforms
 *
 *  Copyright (C) 2012-2013 Max-Planck-Society
 *  \author Martin Reinecke \author Dag Sverre Seljebotn
 */
 #ifdef USE_MPI
 #include "sharp_mpi.h"
 typedef struct
  {
  int ntasks;     /* number of tasks */
  int mytask;     /* own task number */
  MPI_Comm comm;  /* communicator to use */
  int *nm;        /* number of m values on every task */
  int *ofs_m;     /* accumulated nm */
  int nmtotal;    /* total number of m values (must be mmax+1) */
  int *mval;      /* array containing all m values of task 0, task 1 etc. */
  int mmax;
  int nph;
  int *npair;     /* number of ring pairs on every task */
  int *ofs_pair;  /* accumulated npair */
  int npairtotal; /* total number of ring pairs */
  double *theta;  /* theta of first ring of every pair on task 0, task 1 etc. */
  int *ispair;    /* is this really a pair? */
  int *almcount, *almdisp, *mapcount, *mapdisp; /* for all2all communication */
  } sharp_mpi_info;
 static void sharp_make_mpi_info (MPI_Comm comm, const sharp_job *job,
  sharp_mpi_info *minfo)
  {
  minfo->comm = comm;
  MPI_Comm_size (comm, &minfo->ntasks);
  MPI_Comm_rank (comm, &minfo->mytask);
  minfo->nm=RALLOC(int,minfo->ntasks);
  MPI_Allgather ((int *)(&job->ainfo->nm),1,MPI_INT,minfo->nm,1,MPI_INT,comm);
  minfo->ofs_m=RALLOC(int,minfo->ntasks+1);
  minfo->ofs_m[0]=0;
  for (int i=1; i<=minfo->ntasks; ++i)
    minfo->ofs_m[i] = minfo->ofs_m[i-1]+minfo->nm[i-1];
  minfo->nmtotal=minfo->ofs_m[minfo->ntasks];
  minfo->mval=RALLOC(int,minfo->nmtotal);
  MPI_Allgatherv(job->ainfo->mval, job->ainfo->nm, MPI_INT, minfo->mval,
    minfo->nm, minfo->ofs_m, MPI_INT, comm);
  minfo->mmax=sharp_get_mmax(minfo->mval,minfo->nmtotal);
  minfo->npair=RALLOC(int,minfo->ntasks);
  MPI_Allgather ((int *)(&job->ginfo->npairs), 1, MPI_INT, minfo->npair, 1,
    MPI_INT, comm);
  minfo->ofs_pair=RALLOC(int,minfo->ntasks+1);
  minfo->ofs_pair[0]=0;
  for (int i=1; i<=minfo->ntasks; ++i)
    minfo->ofs_pair[i] = minfo->ofs_pair[i-1]+minfo->npair[i-1];
  minfo->npairtotal=minfo->ofs_pair[minfo->ntasks];
  double *theta_tmp=RALLOC(double,job->ginfo->npairs);
  int *ispair_tmp=RALLOC(int,job->ginfo->npairs);
  for (int i=0; i<job->ginfo->npairs; ++i)
    {
    theta_tmp[i]=job->ginfo->pair[i].r1.theta;
    ispair_tmp[i]=job->ginfo->pair[i].r2.nph>0;
    }
  minfo->theta=RALLOC(double,minfo->npairtotal);
  minfo->ispair=RALLOC(int,minfo->npairtotal);
  MPI_Allgatherv(theta_tmp, job->ginfo->npairs, MPI_DOUBLE, minfo->theta,
    minfo->npair, minfo->ofs_pair, MPI_DOUBLE, comm);
  MPI_Allgatherv(ispair_tmp, job->ginfo->npairs, MPI_INT, minfo->ispair,
    minfo->npair, minfo->ofs_pair, MPI_INT, comm);
  DEALLOC(theta_tmp);
  DEALLOC(ispair_tmp);
  minfo->nph=2*job->nmaps*job->ntrans;
  minfo->almcount=RALLOC(int,minfo->ntasks);
  minfo->almdisp=RALLOC(int,minfo->ntasks+1);
  minfo->mapcount=RALLOC(int,minfo->ntasks);
  minfo->mapdisp=RALLOC(int,minfo->ntasks+1);
  minfo->almdisp[0]=minfo->mapdisp[0]=0;
  for (int i=0; i<minfo->ntasks; ++i)
    {
    minfo->almcount[i] = 2*minfo->nph*minfo->nm[minfo->mytask]*minfo->npair[i];
    minfo->almdisp[i+1] = minfo->almdisp[i]+minfo->almcount[i];
    minfo->mapcount[i] = 2*minfo->nph*minfo->nm[i]*minfo->npair[minfo->mytask];
    minfo->mapdisp[i+1] = minfo->mapdisp[i]+minfo->mapcount[i];
    }
  }
 static void sharp_destroy_mpi_info (sharp_mpi_info *minfo)
  {
  DEALLOC(minfo->nm);
  DEALLOC(minfo->ofs_m);
  DEALLOC(minfo->mval);
  DEALLOC(minfo->npair);
  DEALLOC(minfo->ofs_pair);
  DEALLOC(minfo->theta);
  DEALLOC(minfo->ispair);
  DEALLOC(minfo->almcount);
  DEALLOC(minfo->almdisp);
  DEALLOC(minfo->mapcount);
  DEALLOC(minfo->mapdisp);
  }
 static void sharp_communicate_alm2map (const sharp_mpi_info *minfo, dcmplx **ph)
  {
  dcmplx *phas_tmp = RALLOC(dcmplx,minfo->mapdisp[minfo->ntasks]/2);
  MPI_Alltoallv (*ph,minfo->almcount,minfo->almdisp,MPI_DOUBLE,phas_tmp,
    minfo->mapcount,minfo->mapdisp,MPI_DOUBLE,minfo->comm);
  DEALLOC(*ph);
  ALLOC(*ph,dcmplx,minfo->nph*minfo->npair[minfo->mytask]*minfo->nmtotal);
  for (int task=0; task<minfo->ntasks; ++task)
    for (int th=0; th<minfo->npair[minfo->mytask]; ++th)
      for (int mi=0; mi<minfo->nm[task]; ++mi)
        {
        int m = minfo->mval[mi+minfo->ofs_m[task]];
        int o1 = minfo->nph*(th*(minfo->mmax+1) + m);
        int o2 = minfo->mapdisp[task]/2+minfo->nph*(mi+th*minfo->nm[task]);
        for (int i=0; i<minfo->nph; ++i)
          (*ph)[o1+i] = phas_tmp[o2+i];
        }
  DEALLOC(phas_tmp);
  }
 static void sharp_communicate_map2alm (const sharp_mpi_info *minfo, dcmplx **ph)
  {
  dcmplx *phas_tmp = RALLOC(dcmplx,minfo->mapdisp[minfo->ntasks]/2);
  for (int task=0; task<minfo->ntasks; ++task)
    for (int th=0; th<minfo->npair[minfo->mytask]; ++th)
      for (int mi=0; mi<minfo->nm[task]; ++mi)
        {
        int m = minfo->mval[mi+minfo->ofs_m[task]];
        int o1 = minfo->mapdisp[task]/2+minfo->nph*(mi+th*minfo->nm[task]);
        int o2 = minfo->nph*(th*(minfo->mmax+1) + m);
        for (int i=0; i<minfo->nph; ++i)
          phas_tmp[o1+i] = (*ph)[o2+i];
        }
  DEALLOC(*ph);
  ALLOC(*ph,dcmplx,minfo->nph*minfo->nm[minfo->mytask]*minfo->npairtotal);
  MPI_Alltoallv (phas_tmp,minfo->mapcount,minfo->mapdisp,MPI_DOUBLE,
    *ph,minfo->almcount,minfo->almdisp,MPI_DOUBLE,minfo->comm);
  DEALLOC(phas_tmp);
  }
 static void alloc_phase_mpi (sharp_job *job, int nm, int ntheta,
  int nmfull, int nthetafull)
  {
  ptrdiff_t phase_size = (job->type==SHARP_MAP2ALM) ?
    (ptrdiff_t)(nmfull)*ntheta : (ptrdiff_t)(nm)*nthetafull;
  job->phase=RALLOC(dcmplx,2*job->ntrans*job->nmaps*phase_size);
  job->s_m=2*job->ntrans*job->nmaps;
  job->s_th = job->s_m * ((job->type==SHARP_MAP2ALM) ? nmfull : nm);
  }
 static void alm2map_comm (sharp_job *job, const sharp_mpi_info *minfo)
  {
  if (job->type != SHARP_MAP2ALM)
    {
    sharp_communicate_alm2map (minfo,&job->phase);
    job->s_th=job->s_m*minfo->nmtotal;
    }
  }
 static void map2alm_comm (sharp_job *job, const sharp_mpi_info *minfo)
  {
  if (job->type == SHARP_MAP2ALM)
    {
    sharp_communicate_map2alm (minfo,&job->phase);
    job->s_th=job->s_m*minfo->nm[minfo->mytask];
    }
  }
 static void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm)
  {
  int ntasks;
  MPI_Comm_size(comm, &ntasks);
  if (ntasks==1) /* fall back to scalar implementation */
    { sharp_execute_job (job); return; }
  MPI_Barrier(comm);
  double timer=wallTime();
  job->opcnt=0;
  sharp_mpi_info minfo;
  sharp_make_mpi_info(comm, job, &minfo);
  if (minfo.npairtotal>minfo.ntasks*300)
    {
    int nsub=(minfo.npairtotal+minfo.ntasks*200-1)/(minfo.ntasks*200);
    for (int isub=0; isub<nsub; ++isub)
      {
      sharp_job ljob=*job;
      // When creating a_lm, every sub-job produces a complete set of
      // coefficients; they need to be added up.
      if ((isub>0)&&(job->type==SHARP_MAP2ALM)) ljob.flags|=SHARP_ADD;
      sharp_geom_info lginfo;
      lginfo.pair=RALLOC(sharp_ringpair,(job->ginfo->npairs/nsub)+1);
      lginfo.npairs=0;
      lginfo.nphmax = job->ginfo->nphmax;
      while (lginfo.npairs*nsub+isub<job->ginfo->npairs)
        {
        lginfo.pair[lginfo.npairs]=job->ginfo->pair[lginfo.npairs*nsub+isub];
        ++lginfo.npairs;
        }
      ljob.ginfo=&lginfo;
      sharp_execute_job_mpi (&ljob,comm);
      job->opcnt+=ljob.opcnt;
      DEALLOC(lginfo.pair);
      }
    }
  else
    {
    int lmax = job->ainfo->lmax;
    job->norm_l = sharp_Ylmgen_get_norm (lmax, job->spin);
    /* clear output arrays if requested */
    init_output (job);
    alloc_phase_mpi (job,job->ainfo->nm,job->ginfo->npairs,minfo.mmax+1,
      minfo.npairtotal);
    double *cth = RALLOC(double,minfo.npairtotal),
          *sth = RALLOC(double,minfo.npairtotal);
    int *mlim = RALLOC(int,minfo.npairtotal);
    for (int i=0; i<minfo.npairtotal; ++i)
      {
      cth[i] = cos(minfo.theta[i]);
      sth[i] = sin(minfo.theta[i]);
      mlim[i] = sharp_get_mlim(lmax, job->spin, sth[i], cth[i]);
      }
    /* map->phase where necessary */
    map2phase (job, minfo.mmax, 0, job->ginfo->npairs);
    map2alm_comm (job, &minfo);
 #pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0)
 {
    sharp_job ljob = *job;
    sharp_Ylmgen_C generator;
    sharp_Ylmgen_init (&generator,lmax,minfo.mmax,ljob.spin);
    alloc_almtmp(&ljob,lmax);
 #pragma omp for schedule(dynamic,1)
    for (int mi=0; mi<job->ainfo->nm; ++mi)
      {
  /* alm->alm_tmp where necessary */
      alm2almtmp (&ljob, lmax, mi);
  /* inner conversion loop */
      inner_loop (&ljob, minfo.ispair, cth, sth, 0, minfo.npairtotal,
        &generator, mi, mlim);
  /* alm_tmp->alm where necessary */
      almtmp2alm (&ljob, lmax, mi);
      }
    sharp_Ylmgen_destroy(&generator);
    dealloc_almtmp(&ljob);
 #pragma omp critical
    job->opcnt+=ljob.opcnt;
 } /* end of parallel region */
    alm2map_comm (job, &minfo);
  /* phase->map where necessary */
    phase2map (job, minfo.mmax, 0, job->ginfo->npairs);
    DEALLOC(mlim);
    DEALLOC(cth);
    DEALLOC(sth);
    DEALLOC(job->norm_l);
    dealloc_phase (job);
    }
  sharp_destroy_mpi_info(&minfo);
  job->time=wallTime()-timer;
  }
 void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin,
  void *alm, void *map, const sharp_geom_info *geom_info,
  const sharp_alm_info *alm_info, int ntrans, int flags, double *time,
  unsigned long long *opcnt)
  {
  sharp_job job;
  sharp_build_job_common (&job, type, spin, alm, map, geom_info, alm_info,
    ntrans, flags);
  sharp_execute_job_mpi (&job, comm);
  if (time!=NULL) *time = job.time;
  if (opcnt!=NULL) *opcnt = job.opcnt;
  }
 /* We declare this only in C file to make symbol available for Fortran wrappers;
   without declaring it in C header as it should not be available to C code */
 void sharp_execute_mpi_fortran(MPI_Fint comm, sharp_jobtype type, int spin,
  void *alm, void *map, const sharp_geom_info *geom_info,
  const sharp_alm_info *alm_info, int ntrans, int flags, double *time,
  unsigned long long *opcnt);
 void sharp_execute_mpi_fortran(MPI_Fint comm, sharp_jobtype type, int spin,
  void *alm, void *map, const sharp_geom_info *geom_info,
  const sharp_alm_info *alm_info, int ntrans, int flags, double *time,
  unsigned long long *opcnt)
  {
  sharp_execute_mpi(MPI_Comm_f2c(comm), type, spin, alm, map, geom_info,
                    alm_info, ntrans, flags, time, opcnt);
  }
 #endif
--- a/external/sharp/libsharp/sharp_mpi.h
+++ b/external/sharp/libsharp/sharp_mpi.h
@ -1,83 +0,0 @@
 /*
 *  This file is part of libsharp.
 *
 *  libsharp is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libsharp is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libsharp; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*! \file sharp_mpi.h
 *  Interface for the spherical transform library with MPI support.
 *
 *  Copyright (C) 2011,2012 Max-Planck-Society
 *  \author Martin Reinecke \author Dag Sverre Seljebotn
 */
 #ifndef PLANCK_SHARP_MPI_H
 #define PLANCK_SHARP_MPI_H
 #include <mpi.h>
 #include "sharp_lowlevel.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 /*! Performs an MPI parallel libsharp SHT job. The interface deliberately does
  not use the C99 "complex" data type, in order to be callable from C.
  \param comm the MPI communicator to be used for this SHT
  \param type the type of SHT
  \param spin the spin of the quantities to be transformed
  \param alm contains pointers to the a_lm coefficients. If \a spin==0,
    alm[0] points to the a_lm of the first SHT, alm[1] to those of the second
    etc. If \a spin>0, alm[0] and alm[1] point to the a_lm of the first SHT,
    alm[2] and alm[3] to those of the second, etc. The exact data type of \a alm
    depends on whether the SHARP_DP flag is set.
  \param map contains pointers to the maps. If \a spin==0,
    map[0] points to the map of the first SHT, map[1] to that of the second
    etc. If \a spin>0, or \a type is SHARP_ALM2MAP_DERIV1, map[0] and map[1]
    point to the maps of the first SHT, map[2] and map[3] to those of the
    second, etc. The exact data type of \a map depends on whether the SHARP_DP
    flag is set.
  \param geom_info A \c sharp_geom_info object compatible with the provided
    \a map arrays. The total map geometry is the union of all \a geom_info
    objects over the participating MPI tasks.
  \param alm_info A \c sharp_alm_info object compatible with the provided
    \a alm arrays. All \c m values from 0 to some \c mmax<=lmax must be present
    exactly once in the union of all \a alm_info objects over the participating
    MPI tasks.
  \param ntrans the number of simultaneous SHTs
  \param flags See sharp_jobflags. In particular, if SHARP_DP is set, then
    \a alm is expected to have the type "complex double **" and \a map is
    expected to have the type "double **"; otherwise, the expected
    types are "complex float **" and "float **", respectively.
  \param time If not NULL, the wall clock time required for this SHT
    (in seconds) will be written here.
  \param opcnt If not NULL, a conservative estimate of the total floating point
    operation count for this SHT will be written here. */
 void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin,
  void *alm, void *map, const sharp_geom_info *geom_info,
  const sharp_alm_info *alm_info, int ntrans, int flags, double *time,
  unsigned long long *opcnt);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/external/sharp/libsharp/sharp_testsuite.c
+++ b/external/sharp/libsharp/sharp_testsuite.c
@ -1,708 +0,0 @@
 /*
 *  This file is part of libsharp.
 *
 *  libsharp is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libsharp is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libsharp; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*  \file sharp_testsuite.c
 * 
 *  Copyright (C) 2012-2013 Max-Planck-Society
 *  \author Martin Reinecke
 */
 #include <stdio.h>
 #include <string.h>
 #ifdef USE_MPI
 #include "mpi.h"
 #include "sharp_mpi.h"
 #endif
 #ifdef _OPENMP
 #include <omp.h>
 #endif
 #include "sharp.h"
 #include "sharp_internal.h"
 #include "sharp_geomhelpers.h"
 #include "sharp_almhelpers.h"
 #include "c_utils.h"
 #include "sharp_announce.h"
 #include "memusage.h"
 #include "sharp_vecsupport.h"
 typedef complex double dcmplx;
 int ntasks, mytask;
 static double drand (double min, double max, int *state)
  {
  *state = (((*state) * 1103515245) + 12345) & 0x7fffffff;
  return min + (max-min)*(*state)/(0x7fffffff+1.0);
  }
 static void random_alm (dcmplx *alm, sharp_alm_info *helper, int spin, int cnt)
  {
 #pragma omp parallel
 {
  int mi;
 #pragma omp for schedule (dynamic,100)
  for (mi=0;mi<helper->nm; ++mi)
    {
    int m=helper->mval[mi];
    int state=1234567*cnt+8912*m; // random seed
    for (int l=m;l<=helper->lmax; ++l)
      {
      if ((l<spin)&&(m<spin))
        alm[sharp_alm_index(helper,l,mi)] = 0.;
      else
        {
        double rv = drand(-1,1,&state);
        double iv = (m==0) ? 0 : drand(-1,1,&state);
        alm[sharp_alm_index(helper,l,mi)] = rv+_Complex_I*iv;
        }
      }
    }
 } // end of parallel region
  }
 static unsigned long long totalops (unsigned long long val)
  {
 #ifdef USE_MPI
  unsigned long long tmp;
  MPI_Allreduce (&val, &tmp,1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
  return tmp;
 #else
  return val;
 #endif
  }
 static double maxTime (double val)
  {
 #ifdef USE_MPI
  double tmp;
  MPI_Allreduce (&val, &tmp,1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
  return tmp;
 #else
  return val;
 #endif
  }
 static double allreduceSumDouble (double val)
  {
 #ifdef USE_MPI
  double tmp;
  MPI_Allreduce (&val, &tmp,1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  return tmp;
 #else
  return val;
 #endif
  }
 static double totalMem()
  {
 #ifdef USE_MPI
  double tmp, val=VmHWM();
  MPI_Allreduce (&val, &tmp,1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  return tmp;
 #else
  return VmHWM();
 #endif
  }
 #ifdef USE_MPI
 static void reduce_alm_info(sharp_alm_info *ainfo)
  {
  int nmnew=0;
  ptrdiff_t ofs = 0;
  for (int i=mytask; i<ainfo->nm; i+=ntasks,++nmnew)
    {
    ainfo->mval[nmnew]=ainfo->mval[i];
    ainfo->mvstart[nmnew]=ofs-ainfo->mval[nmnew];
    ofs+=ainfo->lmax-ainfo->mval[nmnew]+1;
    }
  ainfo->nm=nmnew;
  }
 static void reduce_geom_info(sharp_geom_info *ginfo)
  {
  int npairsnew=0;
  ptrdiff_t ofs = 0;
  for (int i=mytask; i<ginfo->npairs; i+=ntasks,++npairsnew)
    {
    ginfo->pair[npairsnew]=ginfo->pair[i];
    ginfo->pair[npairsnew].r1.ofs=ofs;
    ofs+=ginfo->pair[npairsnew].r1.nph;
    ginfo->pair[npairsnew].r2.ofs=ofs;
    if (ginfo->pair[npairsnew].r2.nph>0) ofs+=ginfo->pair[npairsnew].r2.nph;
    }
  ginfo->npairs=npairsnew;
  }
 #endif
 static ptrdiff_t get_nalms(const sharp_alm_info *ainfo)
  {
  ptrdiff_t res=0;
  for (int i=0; i<ainfo->nm; ++i)
    res += ainfo->lmax-ainfo->mval[i]+1;
  return res;
  }
 static ptrdiff_t get_npix(const sharp_geom_info *ginfo)
  {
  ptrdiff_t res=0;
  for (int i=0; i<ginfo->npairs; ++i)
    {
    res += ginfo->pair[i].r1.nph;
    if (ginfo->pair[i].r2.nph>0) res += ginfo->pair[i].r2.nph;
    }
  return res;
  }
 static double *get_sqsum_and_invert (dcmplx **alm, ptrdiff_t nalms, int ncomp)
  {
  double *sqsum=RALLOC(double,ncomp);
  for (int i=0; i<ncomp; ++i)
    {
    sqsum[i]=0;
    for (ptrdiff_t j=0; j<nalms; ++j)
      {
      sqsum[i]+=creal(alm[i][j])*creal(alm[i][j])
               +cimag(alm[i][j])*cimag(alm[i][j]);
      alm[i][j]=-alm[i][j];
      }
    }
  return sqsum;
  }
 static void get_errors (dcmplx **alm, ptrdiff_t nalms, int ncomp, double *sqsum,
  double **err_abs, double **err_rel)
  {
  long nalms_tot=nalms;
 #ifdef USE_MPI
  MPI_Allreduce(&nalms,&nalms_tot,1,MPI_LONG,MPI_SUM,MPI_COMM_WORLD);
 #endif
  *err_abs=RALLOC(double,ncomp);
  *err_rel=RALLOC(double,ncomp);
  for (int i=0; i<ncomp; ++i)
    {
    double sum=0, maxdiff=0, sumtot, sqsumtot, maxdifftot;
    for (ptrdiff_t j=0; j<nalms; ++j)
      {
      double sqr=creal(alm[i][j])*creal(alm[i][j])
                +cimag(alm[i][j])*cimag(alm[i][j]);
      sum+=sqr;
      if (sqr>maxdiff) maxdiff=sqr;
      }
   maxdiff=sqrt(maxdiff);
 #ifdef USE_MPI
    MPI_Allreduce(&sum,&sumtot,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
    MPI_Allreduce(&sqsum[i],&sqsumtot,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
    MPI_Allreduce(&maxdiff,&maxdifftot,1,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD);
 #else
    sumtot=sum;
    sqsumtot=sqsum[i];
    maxdifftot=maxdiff;
 #endif
    sumtot=sqrt(sumtot/nalms_tot);
    sqsumtot=sqrt(sqsumtot/nalms_tot);
    (*err_abs)[i]=maxdifftot;
    (*err_rel)[i]=sumtot/sqsumtot;
    }
  }
 static int good_fft_size(int n)
  {
  if (n<=6) return n;
  int bestfac=2*n;
  for (int f2=1; f2<bestfac; f2*=2)
    for (int f23=f2; f23<bestfac; f23*=3)
      for (int f235=f23; f235<bestfac; f235*=5)
        if (f235>=n) bestfac=f235;
  return bestfac;
  }
 static void get_infos (const char *gname, int lmax, int *mmax, int *gpar1,
  int *gpar2, sharp_geom_info **ginfo, sharp_alm_info **ainfo)
  {
  UTIL_ASSERT(lmax>=0,"lmax must not be negative");
  if (*mmax<0) *mmax=lmax;
  UTIL_ASSERT(*mmax<=lmax,"mmax larger than lmax");
  if (mytask==0) printf ("lmax: %d, mmax: %d\n",lmax,*mmax);
  sharp_make_triangular_alm_info(lmax,*mmax,1,ainfo);
 #ifdef USE_MPI
  reduce_alm_info(*ainfo);
 #endif
  if (strcmp(gname,"healpix")==0)
    {
    if (*gpar1<1) *gpar1=lmax/2;
    if (*gpar1==0) ++(*gpar1);
    sharp_make_healpix_geom_info (*gpar1, 1, ginfo);
    if (mytask==0) printf ("HEALPix grid, nside=%d\n",*gpar1);
    }
  else if (strcmp(gname,"gauss")==0)
    {
    if (*gpar1<1) *gpar1=lmax+1;
    if (*gpar2<1) *gpar2=2*(*mmax)+1;
    sharp_make_gauss_geom_info (*gpar1, *gpar2, 0., 1, *gpar2, ginfo);
    if (mytask==0)
      printf ("Gauss-Legendre grid, nlat=%d, nlon=%d\n",*gpar1,*gpar2);
    }
  else if (strcmp(gname,"fejer1")==0)
    {
    if (*gpar1<1) *gpar1=2*lmax+1;
    if (*gpar2<1) *gpar2=2*(*mmax)+1;
    sharp_make_fejer1_geom_info (*gpar1, *gpar2, 0., 1, *gpar2, ginfo);
    if (mytask==0) printf ("Fejer1 grid, nlat=%d, nlon=%d\n",*gpar1,*gpar2);
    }
  else if (strcmp(gname,"fejer2")==0)
    {
    if (*gpar1<1) *gpar1=2*lmax+1;
    if (*gpar2<1) *gpar2=2*(*mmax)+1;
    sharp_make_fejer2_geom_info (*gpar1, *gpar2, 0., 1, *gpar2, ginfo);
    if (mytask==0) printf ("Fejer2 grid, nlat=%d, nlon=%d\n",*gpar1,*gpar2);
    }
  else if (strcmp(gname,"cc")==0)
    {
    if (*gpar1<1) *gpar1=2*lmax+1;
    if (*gpar2<1) *gpar2=2*(*mmax)+1;
    sharp_make_cc_geom_info (*gpar1, *gpar2, 0., 1, *gpar2, ginfo);
    if (mytask==0)
      printf("Clenshaw-Curtis grid, nlat=%d, nlon=%d\n",*gpar1,*gpar2);
    }
  else if (strcmp(gname,"smallgauss")==0)
    {
    int nlat=*gpar1, nlon=*gpar2;
    if (nlat<1) nlat=lmax+1;
    if (nlon<1) nlon=2*(*mmax)+1;
    *gpar1=nlat; *gpar2=nlon;
    sharp_make_gauss_geom_info (nlat, nlon, 0., 1, nlon, ginfo);
    ptrdiff_t npix_o=get_npix(*ginfo);
    size_t ofs=0;
    for (int i=0; i<(*ginfo)->npairs; ++i)
      {
      sharp_ringpair *pair=&((*ginfo)->pair[i]);
      int pring=1+2*sharp_get_mlim(lmax,0,pair->r1.sth,pair->r1.cth);
      if (pring>nlon) pring=nlon;
      pring=good_fft_size(pring);
      pair->r1.nph=pring;
      pair->r1.weight*=nlon*1./pring;
      pair->r1.ofs=ofs;
      ofs+=pring;
      if (pair->r2.nph>0)
        {
        pair->r2.nph=pring;
        pair->r2.weight*=nlon*1./pring;
        pair->r2.ofs=ofs;
        ofs+=pring;
        }
      }
    if (mytask==0)
      {
      ptrdiff_t npix=get_npix(*ginfo);
      printf("Small Gauss grid, nlat=%d, npix=%ld, savings=%.2f%%\n",
        nlat,(long)npix,(npix_o-npix)*100./npix_o);
      }
    }
  else
    UTIL_FAIL("unknown grid geometry");
 #ifdef USE_MPI
  reduce_geom_info(*ginfo);
 #endif
  }
 static void check_sign_scale(void)
  {
  int lmax=50;
  int mmax=lmax;
  sharp_geom_info *tinfo;
  int nrings=lmax+1;
  int ppring=2*lmax+2;
  ptrdiff_t npix=(ptrdiff_t)nrings*ppring;
  sharp_make_gauss_geom_info (nrings, ppring, 0., 1, ppring, &tinfo);
  /* flip theta to emulate the "old" Gaussian grid geometry */
  for (int i=0; i<tinfo->npairs; ++i)
    {
    const double pi=3.141592653589793238462643383279502884197;
    tinfo->pair[i].r1.cth=-tinfo->pair[i].r1.cth;
    tinfo->pair[i].r2.cth=-tinfo->pair[i].r2.cth;
    tinfo->pair[i].r1.theta=pi-tinfo->pair[i].r1.theta;
    tinfo->pair[i].r2.theta=pi-tinfo->pair[i].r2.theta;
    }
  sharp_alm_info *alms;
  sharp_make_triangular_alm_info(lmax,mmax,1,&alms);
  ptrdiff_t nalms = ((mmax+1)*(mmax+2))/2 + (mmax+1)*(lmax-mmax);
  for (int ntrans=1; ntrans<10; ++ntrans)
    {
    double **map;
    ALLOC2D(map,double,2*ntrans,npix);
    dcmplx **alm;
    ALLOC2D(alm,dcmplx,2*ntrans,nalms);
    for (int i=0; i<2*ntrans; ++i)
      for (int j=0; j<nalms; ++j)
        alm[i][j]=1.+_Complex_I;
    sharp_execute(SHARP_ALM2MAP,0,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
      NULL,NULL);
    for (int it=0; it<ntrans; ++it)
      {
      UTIL_ASSERT(FAPPROX(map[it][0     ], 3.588246976618616912e+00,1e-12),
        "error");
      UTIL_ASSERT(FAPPROX(map[it][npix/2], 4.042209792157496651e+01,1e-12),
        "error");
      UTIL_ASSERT(FAPPROX(map[it][npix-1],-1.234675107554816442e+01,1e-12),
        "error");
      }
    sharp_execute(SHARP_ALM2MAP,1,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
      NULL,NULL);
    for (int it=0; it<ntrans; ++it)
      {
      UTIL_ASSERT(FAPPROX(map[2*it  ][0     ], 2.750897760535633285e+00,1e-12),
        "error");
      UTIL_ASSERT(FAPPROX(map[2*it  ][npix/2], 3.137704477368562905e+01,1e-12),
        "error");
      UTIL_ASSERT(FAPPROX(map[2*it  ][npix-1],-8.405730859837063917e+01,1e-12),
        "error");
      UTIL_ASSERT(FAPPROX(map[2*it+1][0     ],-2.398026536095463346e+00,1e-12),
        "error");
      UTIL_ASSERT(FAPPROX(map[2*it+1][npix/2],-4.961140548331700728e+01,1e-12),
        "error");
      UTIL_ASSERT(FAPPROX(map[2*it+1][npix-1],-1.412765834230440021e+01,1e-12),
        "error");
      }
    sharp_execute(SHARP_ALM2MAP,2,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
      NULL,NULL);
    for (int it=0; it<ntrans; ++it)
      {
      UTIL_ASSERT(FAPPROX(map[2*it  ][0     ],-1.398186224727334448e+00,1e-12),
        "error");
      UTIL_ASSERT(FAPPROX(map[2*it  ][npix/2],-2.456676000884031197e+01,1e-12),
        "error");
      UTIL_ASSERT(FAPPROX(map[2*it  ][npix-1],-1.516249174408820863e+02,1e-12),
        "error");
      UTIL_ASSERT(FAPPROX(map[2*it+1][0     ],-3.173406200299964119e+00,1e-12),
        "error");
      UTIL_ASSERT(FAPPROX(map[2*it+1][npix/2],-5.831327404513146462e+01,1e-12),
        "error");
      UTIL_ASSERT(FAPPROX(map[2*it+1][npix-1],-1.863257892248353897e+01,1e-12),
        "error");
      }
    sharp_execute(SHARP_ALM2MAP_DERIV1,1,&alm[0],&map[0],tinfo,alms,ntrans,
      SHARP_DP,NULL,NULL);
    for (int it=0; it<ntrans; ++it)
      {
      UTIL_ASSERT(FAPPROX(map[2*it  ][0     ],-6.859393905369091105e-01,1e-11),
        "error");
      UTIL_ASSERT(FAPPROX(map[2*it  ][npix/2],-2.103947835973212364e+02,1e-12),
        "error");
      UTIL_ASSERT(FAPPROX(map[2*it  ][npix-1],-1.092463246472086439e+03,1e-12),
        "error");
      UTIL_ASSERT(FAPPROX(map[2*it+1][0     ],-1.411433220713928165e+02,1e-12),
        "error");
      UTIL_ASSERT(FAPPROX(map[2*it+1][npix/2],-1.146122859381925082e+03,1e-12),
        "error");
      UTIL_ASSERT(FAPPROX(map[2*it+1][npix-1], 7.821618677689795049e+02,1e-12),
        "error");
      }
    DEALLOC2D(map);
    DEALLOC2D(alm);
    }
  sharp_destroy_alm_info(alms);
  sharp_destroy_geom_info(tinfo);
  }
 static void do_sht (sharp_geom_info *ginfo, sharp_alm_info *ainfo,
  int spin, int ntrans, int nv, double **err_abs, double **err_rel,
  double *t_a2m, double *t_m2a, unsigned long long *op_a2m,
  unsigned long long *op_m2a)
  {
  ptrdiff_t nalms = get_nalms(ainfo);
  int ncomp = ntrans*((spin==0) ? 1 : 2);
  size_t npix = get_npix(ginfo);
  double **map;
  ALLOC2D(map,double,ncomp,npix);
  for (int i=0; i<ncomp; ++i)
    SET_ARRAY(map[i],0,(int)npix,0);
  dcmplx **alm;
  ALLOC2D(alm,dcmplx,ncomp,nalms);
  for (int i=0; i<ncomp; ++i)
    random_alm(alm[i],ainfo,spin,i+1);
 #ifdef USE_MPI
  sharp_execute_mpi(MPI_COMM_WORLD,SHARP_ALM2MAP,spin,&alm[0],&map[0],ginfo,
    ainfo,ntrans, SHARP_DP|SHARP_ADD|nv,t_a2m,op_a2m);
 #else
  sharp_execute(SHARP_ALM2MAP,spin,&alm[0],&map[0],ginfo,ainfo,ntrans,
    SHARP_DP|nv,t_a2m,op_a2m);
 #endif
  if (t_a2m!=NULL) *t_a2m=maxTime(*t_a2m);
  if (op_a2m!=NULL) *op_a2m=totalops(*op_a2m);
  double *sqsum=get_sqsum_and_invert(alm,nalms,ncomp);
 #ifdef USE_MPI
  sharp_execute_mpi(MPI_COMM_WORLD,SHARP_MAP2ALM,spin,&alm[0],&map[0],ginfo,
    ainfo,ntrans,SHARP_DP|SHARP_ADD|nv,t_m2a,op_m2a);
 #else
  sharp_execute(SHARP_MAP2ALM,spin,&alm[0],&map[0],ginfo,ainfo,ntrans,
    SHARP_DP|SHARP_ADD|nv,t_m2a,op_m2a);
 #endif
  if (t_m2a!=NULL) *t_m2a=maxTime(*t_m2a);
  if (op_m2a!=NULL) *op_m2a=totalops(*op_m2a);
  get_errors(alm, nalms, ncomp, sqsum, err_abs, err_rel);
  DEALLOC(sqsum);
  DEALLOC2D(map);
  DEALLOC2D(alm);
  }
 static void check_accuracy (sharp_geom_info *ginfo, sharp_alm_info *ainfo,
  int spin, int ntrans, int nv)
  {
  int ncomp = ntrans*((spin==0) ? 1 : 2);
  double *err_abs, *err_rel;
  do_sht (ginfo, ainfo, spin, ntrans, nv, &err_abs, &err_rel, NULL, NULL,
    NULL, NULL);
  for (int i=0; i<ncomp; ++i)
    UTIL_ASSERT((err_rel[i]<1e-10) && (err_abs[i]<1e-10),"error");
  DEALLOC(err_rel);
  DEALLOC(err_abs);
  }
 static void sharp_acctest(void)
  {
  if (mytask==0) sharp_module_startup("sharp_acctest",1,1,"",1);
  if (mytask==0) printf("Checking signs and scales.\n");
  check_sign_scale();
  if (mytask==0) printf("Passed.\n\n");
  if (mytask==0) printf("Testing map analysis accuracy.\n");
  sharp_geom_info *ginfo;
  sharp_alm_info *ainfo;
  int lmax=127, mmax=127, nlat=128, nlon=256;
  get_infos ("gauss", lmax, &mmax, &nlat, &nlon, &ginfo, &ainfo);
  for (int nv=1; nv<=6; ++nv)
    for (int ntrans=1; ntrans<=6; ++ntrans)
      {
      check_accuracy(ginfo,ainfo,0,ntrans,nv);
      check_accuracy(ginfo,ainfo,1,ntrans,nv);
      check_accuracy(ginfo,ainfo,2,ntrans,nv);
      check_accuracy(ginfo,ainfo,3,ntrans,nv);
      check_accuracy(ginfo,ainfo,30,ntrans,nv);
      }
  sharp_destroy_alm_info(ainfo);
  sharp_destroy_geom_info(ginfo);
  if (mytask==0) printf("Passed.\n\n");
  }
 static void sharp_test (int argc, const char **argv)
  {
  if (mytask==0) sharp_announce("sharp_test");
  UTIL_ASSERT(argc>=9,"usage: grid lmax mmax geom1 geom2 spin ntrans");
  int lmax=atoi(argv[3]);
  int mmax=atoi(argv[4]);
  int gpar1=atoi(argv[5]);
  int gpar2=atoi(argv[6]);
  int spin=atoi(argv[7]);
  int ntrans=atoi(argv[8]);
  if (mytask==0) printf("Testing map analysis accuracy.\n");
  if (mytask==0) printf("spin=%d, ntrans=%d\n", spin, ntrans);
  sharp_geom_info *ginfo;
  sharp_alm_info *ainfo;
  get_infos (argv[2], lmax, &mmax, &gpar1, &gpar2, &ginfo, &ainfo);
  int ncomp = ntrans*((spin==0) ? 1 : 2);
  double t_a2m=1e30, t_m2a=1e30;
  unsigned long long op_a2m, op_m2a;
  double *err_abs,*err_rel;
  double t_acc=0;
  int nrpt=0;
  while(1)
    {
    ++nrpt;
    double ta2m2, tm2a2;
    do_sht (ginfo, ainfo, spin, ntrans, 0, &err_abs, &err_rel, &ta2m2, &tm2a2,
      &op_a2m, &op_m2a);
    if (ta2m2<t_a2m) t_a2m=ta2m2;
    if (tm2a2<t_m2a) t_m2a=tm2a2;
    t_acc+=t_a2m+t_m2a;
    if (t_acc>2.)
      {
      if (mytask==0) printf("Best of %d runs\n",nrpt);
      break;
      }
    DEALLOC(err_abs);
    DEALLOC(err_rel);
    }
  if (mytask==0) printf("wall time for alm2map: %fs\n",t_a2m);
  if (mytask==0) printf("Performance: %fGFLOPs/s\n",1e-9*op_a2m/t_a2m);
  if (mytask==0) printf("wall time for map2alm: %fs\n",t_m2a);
  if (mytask==0) printf("Performance: %fGFLOPs/s\n",1e-9*op_m2a/t_m2a);
  if (mytask==0)
    for (int i=0; i<ncomp; ++i)
      printf("component %i: rms %e, maxerr %e\n",i,err_rel[i], err_abs[i]);
  double iosize = ncomp*(16.*get_nalms(ainfo) + 8.*get_npix(ginfo));
  iosize = allreduceSumDouble(iosize);
  sharp_destroy_alm_info(ainfo);
  sharp_destroy_geom_info(ginfo);
  double tmem=totalMem();
  if (mytask==0)
    printf("\nMemory high water mark: %.2f MB\n",tmem/(1<<20));
  if (mytask==0)
    printf("Memory overhead: %.2f MB (%.2f%% of working set)\n",
      (tmem-iosize)/(1<<20),100.*(1.-iosize/tmem));
 #ifdef _OPENMP
  int nomp=omp_get_max_threads();
 #else
  int nomp=1;
 #endif
  double maxerel=0., maxeabs=0.;
  for (int i=0; i<ncomp; ++i)
    {
    if (maxerel<err_rel[i]) maxerel=err_rel[i];
    if (maxeabs<err_abs[i]) maxeabs=err_abs[i];
    }
  if (mytask==0)
    printf("%-12s %-10s %2d %d %2d %3d %6d %6d %6d %6d %2d %.2e %7.2f %.2e %7.2f"
           " %9.2f %6.2f %.2e %.2e\n",
      getenv("HOST"),argv[2],spin,VLEN,nomp,ntasks,lmax,mmax,gpar1,gpar2,
      ntrans,t_a2m,1e-9*op_a2m/t_a2m,t_m2a,1e-9*op_m2a/t_m2a,tmem/(1<<20),
      100.*(1.-iosize/tmem),maxerel,maxeabs);
  DEALLOC(err_abs);
  DEALLOC(err_rel);
  }
 static void sharp_bench (int argc, const char **argv)
  {
  if (mytask==0) sharp_announce("sharp_bench");
  UTIL_ASSERT(argc>=9,"usage: grid lmax mmax geom1 geom2 spin ntrans");
  int lmax=atoi(argv[3]);
  int mmax=atoi(argv[4]);
  int gpar1=atoi(argv[5]);
  int gpar2=atoi(argv[6]);
  int spin=atoi(argv[7]);
  int ntrans=atoi(argv[8]);
  if (mytask==0) printf("Testing map analysis accuracy.\n");
  if (mytask==0) printf("spin=%d, ntrans=%d\n", spin, ntrans);
  sharp_geom_info *ginfo;
  sharp_alm_info *ainfo;
  get_infos (argv[2], lmax, &mmax, &gpar1, &gpar2, &ginfo, &ainfo);
  double ta2m_auto=1e30, tm2a_auto=1e30, ta2m_min=1e30, tm2a_min=1e30;
  unsigned long long opa2m_min=0, opm2a_min=0;
  int nvmin_a2m=-1, nvmin_m2a=-1;
  for (int nv=0; nv<=6; ++nv)
    {
    int ntries=0;
    double tacc=0;
    do
      {
      double t_a2m, t_m2a;
      unsigned long long op_a2m, op_m2a;
      double *err_abs,*err_rel;
      do_sht (ginfo, ainfo, spin, ntrans, nv, &err_abs, &err_rel,
        &t_a2m, &t_m2a, &op_a2m, &op_m2a);
      DEALLOC(err_abs);
      DEALLOC(err_rel);
      tacc+=t_a2m+t_m2a;
      ++ntries;
      if (nv==0)
        {
        if (t_a2m<ta2m_auto) ta2m_auto=t_a2m;
        if (t_m2a<tm2a_auto) tm2a_auto=t_m2a;
        }
      else
        {
        if (t_a2m<ta2m_min) { nvmin_a2m=nv; ta2m_min=t_a2m; opa2m_min=op_a2m; }
        if (t_m2a<tm2a_min) { nvmin_m2a=nv; tm2a_min=t_m2a; opm2a_min=op_m2a; }
        }
      } while((ntries<2)||(tacc<3.));
    }
  if (mytask==0)
    {
    printf("a2m: nvmin=%d tmin=%fs speedup=%.2f%% perf=%.2fGFlops/s\n",
      nvmin_a2m,ta2m_min,100.*(ta2m_auto-ta2m_min)/ta2m_auto,
      1e-9*opa2m_min/ta2m_min);
    printf("m2a: nvmin=%d tmin=%fs speedup=%.2f%% perf=%.2fGFlops/s\n",
      nvmin_m2a,tm2a_min,100.*(tm2a_auto-tm2a_min)/tm2a_auto,
      1e-9*opm2a_min/tm2a_min);
    }
  sharp_destroy_alm_info(ainfo);
  sharp_destroy_geom_info(ginfo);
  }
 int main(int argc, const char **argv)
  {
 #ifdef USE_MPI
  MPI_Init(NULL,NULL);
  MPI_Comm_size(MPI_COMM_WORLD,&ntasks);
  MPI_Comm_rank(MPI_COMM_WORLD,&mytask);
 #else
  mytask=0; ntasks=1;
 #endif
  UTIL_ASSERT(argc>=2,"need at least one command line argument");
  if (strcmp(argv[1],"acctest")==0)
    sharp_acctest();
  else if (strcmp(argv[1],"test")==0)
    sharp_test(argc,argv);
  else if (strcmp(argv[1],"bench")==0)
    sharp_bench(argc,argv);
  else
    UTIL_FAIL("unknown command");
 #ifdef USE_MPI
  MPI_Finalize();
 #endif
  return 0;
  }
--- a/external/sharp/libsharp/sharp_vecsupport.h
+++ b/external/sharp/libsharp/sharp_vecsupport.h
@ -1,255 +0,0 @@
 /*
 *  This file is part of libsharp.
 *
 *  libsharp is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libsharp is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libsharp; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*  \file sharp_vecsupport.h
 *  Convenience functions for vector arithmetics
 *
 *  Copyright (C) 2012,2013 Max-Planck-Society
 *  Author: Martin Reinecke
 */
 #ifndef SHARP_VECSUPPORT_H
 #define SHARP_VECSUPPORT_H
 #include <math.h>
 #include "sharp_vecutil.h"
 typedef double Ts;
 #if (VLEN==1)
 typedef double Tv;
 typedef float Tv_s;
 typedef int Tm;
 #define vadd(a,b) ((a)+(b))
 #define vadd_s(a,b) ((a)+(b))
 #define vaddeq(a,b) ((a)+=(b))
 #define vaddeq_mask(mask,a,b) if (mask) (a)+=(b);
 #define vsub(a,b) ((a)-(b))
 #define vsub_s(a,b) ((a)-(b))
 #define vsubeq(a,b) ((a)-=(b))
 #define vsubeq_mask(mask,a,b) if (mask) (a)-=(b);
 #define vmul(a,b) ((a)*(b))
 #define vmul_s(a,b) ((a)*(b))
 #define vmuleq(a,b) ((a)*=(b))
 #define vmuleq_mask(mask,a,b) if (mask) (a)*=(b);
 #define vfmaeq(a,b,c) ((a)+=(b)*(c))
 #define vfmaeq_s(a,b,c) ((a)+=(b)*(c))
 #define vfmseq(a,b,c) ((a)-=(b)*(c))
 #define vfmaaeq(a,b,c,d,e) ((a)+=(b)*(c)+(d)*(e))
 #define vfmaseq(a,b,c,d,e) ((a)+=(b)*(c)-(d)*(e))
 #define vneg(a) (-(a))
 #define vload(a) (a)
 #define vload_s(a) (a)
 #define vloadu(p) (*(p))
 #define vloadu_s(p) (*(p))
 #define vabs(a) fabs(a)
 #define vsqrt(a) sqrt(a)
 #define vlt(a,b) ((a)<(b))
 #define vgt(a,b) ((a)>(b))
 #define vge(a,b) ((a)>=(b))
 #define vne(a,b) ((a)!=(b))
 #define vand_mask(a,b) ((a)&&(b))
 #define vstoreu(p, a) (*(p)=a)
 #define vstoreu_s(p, a) (*(p)=a)
 static inline Tv vmin (Tv a, Tv b) { return (a<b) ? a : b; }
 static inline Tv vmax (Tv a, Tv b) { return (a>b) ? a : b; }
 #define vanyTrue(a) (a)
 #define vallTrue(a) (a)
 #define vzero 0.
 #define vone 1.
 #endif
 #if (VLEN==2)
 #include <emmintrin.h>
 #if defined (__SSE3__)
 #include <pmmintrin.h>
 #endif
 #if defined (__SSE4_1__)
 #include <smmintrin.h>
 #endif
 typedef __m128d Tv;
 typedef __m128 Tv_s;
 typedef __m128d Tm;
 #if defined(__SSE4_1__)
 #define vblend__(m,a,b) _mm_blendv_pd(b,a,m)
 #else
 static inline Tv vblend__(Tv m, Tv a, Tv b)
  { return _mm_or_pd(_mm_and_pd(a,m),_mm_andnot_pd(m,b)); }
 #endif
 #define vzero _mm_setzero_pd()
 #define vone _mm_set1_pd(1.)
 #define vadd(a,b) _mm_add_pd(a,b)
 #define vadd_s(a,b) _mm_add_ps(a,b)
 #define vaddeq(a,b) a=_mm_add_pd(a,b)
 #define vaddeq_mask(mask,a,b) a=_mm_add_pd(a,vblend__(mask,b,vzero))
 #define vsub(a,b) _mm_sub_pd(a,b)
 #define vsub_s(a,b) _mm_sub_ps(a,b)
 #define vsubeq(a,b) a=_mm_sub_pd(a,b)
 #define vsubeq_mask(mask,a,b) a=_mm_sub_pd(a,vblend__(mask,b,vzero))
 #define vmul(a,b) _mm_mul_pd(a,b)
 #define vmul_s(a,b) _mm_mul_ps(a,b)
 #define vmuleq(a,b) a=_mm_mul_pd(a,b)
 #define vmuleq_mask(mask,a,b) a=_mm_mul_pd(a,vblend__(mask,b,vone))
 #define vfmaeq(a,b,c) a=_mm_add_pd(a,_mm_mul_pd(b,c))
 #define vfmaeq_s(a,b,c) a=_mm_add_ps(a,_mm_mul_ps(b,c))
 #define vfmseq(a,b,c) a=_mm_sub_pd(a,_mm_mul_pd(b,c))
 #define vfmaaeq(a,b,c,d,e) \
  a=_mm_add_pd(a,_mm_add_pd(_mm_mul_pd(b,c),_mm_mul_pd(d,e)))
 #define vfmaseq(a,b,c,d,e) \
  a=_mm_add_pd(a,_mm_sub_pd(_mm_mul_pd(b,c),_mm_mul_pd(d,e)))
 #define vneg(a) _mm_xor_pd(_mm_set1_pd(-0.),a)
 #define vload(a) _mm_set1_pd(a)
 #define vload_s(a) _mm_set1_ps(a)
 #define vabs(a) _mm_andnot_pd(_mm_set1_pd(-0.),a)
 #define vsqrt(a) _mm_sqrt_pd(a)
 #define vlt(a,b) _mm_cmplt_pd(a,b)
 #define vgt(a,b) _mm_cmpgt_pd(a,b)
 #define vge(a,b) _mm_cmpge_pd(a,b)
 #define vne(a,b) _mm_cmpneq_pd(a,b)
 #define vand_mask(a,b) _mm_and_pd(a,b)
 #define vmin(a,b) _mm_min_pd(a,b)
 #define vmax(a,b) _mm_max_pd(a,b);
 #define vanyTrue(a) (_mm_movemask_pd(a)!=0)
 #define vallTrue(a) (_mm_movemask_pd(a)==3)
 #define vloadu(p) _mm_loadu_pd(p)
 #define vloadu_s(p) _mm_loadu_ps(p)
 #define vstoreu(p, v) _mm_storeu_pd(p, v)
 #define vstoreu_s(p, v) _mm_storeu_ps(p, v)
 #endif
 #if (VLEN==4)
 #include <immintrin.h>
 #if (USE_FMA4)
 #include <x86intrin.h>
 #endif
 typedef __m256d Tv;
 typedef __m256 Tv_s;
 typedef __m256d Tm;
 #define vblend__(m,a,b) _mm256_blendv_pd(b,a,m)
 #define vzero _mm256_setzero_pd()
 #define vone _mm256_set1_pd(1.)
 #define vadd(a,b) _mm256_add_pd(a,b)
 #define vadd_s(a,b) _mm256_add_ps(a,b)
 #define vaddeq(a,b) a=_mm256_add_pd(a,b)
 #define vaddeq_mask(mask,a,b) a=_mm256_add_pd(a,vblend__(mask,b,vzero))
 #define vsub(a,b) _mm256_sub_pd(a,b)
 #define vsub_s(a,b) _mm256_sub_ps(a,b)
 #define vsubeq(a,b) a=_mm256_sub_pd(a,b)
 #define vsubeq_mask(mask,a,b) a=_mm256_sub_pd(a,vblend__(mask,b,vzero))
 #define vmul(a,b) _mm256_mul_pd(a,b)
 #define vmul_s(a,b) _mm256_mul_ps(a,b)
 #define vmuleq(a,b) a=_mm256_mul_pd(a,b)
 #define vmuleq_mask(mask,a,b) a=_mm256_mul_pd(a,vblend__(mask,b,vone))
 #if (USE_FMA4)
 #define vfmaeq(a,b,c) a=_mm256_macc_pd(b,c,a)
 #define vfmaeq_s(a,b,c) a=_mm256_macc_ps(b,c,a)
 #define vfmseq(a,b,c) a=_mm256_nmacc_pd(b,c,a)
 #define vfmaaeq(a,b,c,d,e) a=_mm256_macc_pd(d,e,_mm256_macc_pd(b,c,a))
 #define vfmaseq(a,b,c,d,e) a=_mm256_nmacc_pd(d,e,_mm256_macc_pd(b,c,a))
 #else
 #define vfmaeq(a,b,c) a=_mm256_add_pd(a,_mm256_mul_pd(b,c))
 #define vfmaeq_s(a,b,c) a=_mm256_add_ps(a,_mm256_mul_ps(b,c))
 #define vfmseq(a,b,c) a=_mm256_sub_pd(a,_mm256_mul_pd(b,c))
 #define vfmaaeq(a,b,c,d,e) \
  a=_mm256_add_pd(a,_mm256_add_pd(_mm256_mul_pd(b,c),_mm256_mul_pd(d,e)))
 #define vfmaseq(a,b,c,d,e) \
  a=_mm256_add_pd(a,_mm256_sub_pd(_mm256_mul_pd(b,c),_mm256_mul_pd(d,e)))
 #endif
 #define vneg(a) _mm256_xor_pd(_mm256_set1_pd(-0.),a)
 #define vload(a) _mm256_set1_pd(a)
 #define vload_s(a) _mm256_set1_ps(a)
 #define vabs(a) _mm256_andnot_pd(_mm256_set1_pd(-0.),a)
 #define vsqrt(a) _mm256_sqrt_pd(a)
 #define vlt(a,b) _mm256_cmp_pd(a,b,_CMP_LT_OQ)
 #define vgt(a,b) _mm256_cmp_pd(a,b,_CMP_GT_OQ)
 #define vge(a,b) _mm256_cmp_pd(a,b,_CMP_GE_OQ)
 #define vne(a,b) _mm256_cmp_pd(a,b,_CMP_NEQ_OQ)
 #define vand_mask(a,b) _mm256_and_pd(a,b)
 #define vmin(a,b) _mm256_min_pd(a,b)
 #define vmax(a,b) _mm256_max_pd(a,b)
 #define vanyTrue(a) (_mm256_movemask_pd(a)!=0)
 #define vallTrue(a) (_mm256_movemask_pd(a)==15)
 #define vloadu(p) _mm256_loadu_pd(p)
 #define vloadu_s(p) _mm256_loadu_ps(p)
 #define vstoreu(p, v) _mm256_storeu_pd(p, v)
 #define vstoreu_s(p, v) _mm256_storeu_ps(p, v)
 #endif
 #if (VLEN==8)
 #include <immintrin.h>
 typedef __m512d Tv;
 typedef __mmask8 Tm;
 #define vadd(a,b) _mm512_add_pd(a,b)
 #define vaddeq(a,b) a=_mm512_add_pd(a,b)
 #define vaddeq_mask(mask,a,b) a=_mm512_mask_add_pd(a,mask,a,b);
 #define vsub(a,b) _mm512_sub_pd(a,b)
 #define vsubeq(a,b) a=_mm512_sub_pd(a,b)
 #define vsubeq_mask(mask,a,b) a=_mm512_mask_sub_pd(a,mask,a,b);
 #define vmul(a,b) _mm512_mul_pd(a,b)
 #define vmuleq(a,b) a=_mm512_mul_pd(a,b)
 #define vmuleq_mask(mask,a,b) a=_mm512_mask_mul_pd(a,mask,a,b);
 #define vfmaeq(a,b,c) a=_mm512_fmadd_pd(b,c,a)
 #define vfmseq(a,b,c) a=_mm512_fnmadd_pd(b,c,a)
 #define vfmaaeq(a,b,c,d,e) a=_mm512_fmadd_pd(d,e,_mm512_fmadd_pd(b,c,a))
 #define vfmaseq(a,b,c,d,e) a=_mm512_fnmadd_pd(d,e,_mm512_fmadd_pd(b,c,a))
 #define vneg(a) _mm512_mul_pd(a,_mm512_set1_pd(-1.))
 #define vload(a) _mm512_set1_pd(a)
 #define vabs(a) (__m512d)_mm512_andnot_epi64((__m512i)_mm512_set1_pd(-0.),(__m512i)a)
 #define vsqrt(a) _mm512_sqrt_pd(a)
 #define vlt(a,b) _mm512_cmplt_pd_mask(a,b)
 #define vgt(a,b) _mm512_cmpnle_pd_mask(a,b)
 #define vge(a,b) _mm512_cmpnlt_pd_mask(a,b)
 #define vne(a,b) _mm512_cmpneq_pd_mask(a,b)
 #define vand_mask(a,b) ((a)&(b))
 #define vmin(a,b) _mm512_min_pd(a,b)
 #define vmax(a,b) _mm512_max_pd(a,b)
 #define vanyTrue(a) (a!=0)
 #define vallTrue(a) (a==255)
 #define vzero _mm512_setzero_pd()
 #define vone _mm512_set1_pd(1.)
 #endif
 #endif
--- a/external/sharp/libsharp/sharp_vecutil.h
+++ b/external/sharp/libsharp/sharp_vecutil.h
@ -1,63 +0,0 @@
 /*
 *  This file is part of libc_utils.
 *
 *  libc_utils is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libc_utils is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libc_utils; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*! \file sharp_vecutil.h
 *  Functionality related to vector instruction support
 *
 *  Copyright (C) 2012,2013 Max-Planck-Society
 *  \author Martin Reinecke
 */
 #ifndef SHARP_VECUTIL_H
 #define SHARP_VECUTIL_H
 #ifndef VLEN
 #if (defined (__MIC__))
 #define VLEN 8
 #elif (defined (__AVX__))
 #define VLEN 4
 #elif (defined (__SSE2__))
 #define VLEN 2
 #else
 #define VLEN 1
 #endif
 #endif
 #if (VLEN==1)
 #define VLEN_s 1
 #else
 #define VLEN_s (2*VLEN)
 #endif
 #ifndef USE_FMA4
 #ifdef __FMA4__
 #define USE_FMA4 1
 #else
 #define USE_FMA4 0
 #endif
 #endif
 #endif
--- a/external/sharp/libsharp/sharp_ylmgen_c.c
+++ b/external/sharp/libsharp/sharp_ylmgen_c.c
@ -1,232 +0,0 @@
 /*
 *  This file is part of libsharp.
 *
 *  libsharp is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libsharp is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libsharp; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*
 *  Helper code for efficient calculation of Y_lm(theta,phi=0)
 *
 *  Copyright (C) 2005-2014 Max-Planck-Society
 *  Author: Martin Reinecke
 */
 #include <math.h>
 #include <stdlib.h>
 #include "sharp_ylmgen_c.h"
 #include "c_utils.h"
 static inline void normalize (double *val, int *scale, double xfmax)
  {
  while (fabs(*val)>xfmax) { *val*=sharp_fsmall; ++*scale; }
  if (*val!=0.)
    while (fabs(*val)<xfmax*sharp_fsmall) { *val*=sharp_fbig; --*scale; }
  }
 void sharp_Ylmgen_init (sharp_Ylmgen_C *gen, int l_max, int m_max, int spin)
  {
  const double inv_sqrt4pi = 0.2820947917738781434740397257803862929220;
  gen->lmax = l_max;
  gen->mmax = m_max;
  UTIL_ASSERT(spin>=0,"incorrect spin: must be nonnegative");
  UTIL_ASSERT(l_max>=spin,"incorrect l_max: must be >= spin");
  UTIL_ASSERT(l_max>=m_max,"incorrect l_max: must be >= m_max");
  gen->s = spin;
  UTIL_ASSERT((sharp_minscale<=0)&&(sharp_maxscale>0),
    "bad value for min/maxscale");
  gen->cf=RALLOC(double,sharp_maxscale-sharp_minscale+1);
  gen->cf[-sharp_minscale]=1.;
  for (int m=-sharp_minscale-1; m>=0; --m)
    gen->cf[m]=gen->cf[m+1]*sharp_fsmall;
  for (int m=-sharp_minscale+1; m<(sharp_maxscale-sharp_minscale+1); ++m)
    gen->cf[m]=gen->cf[m-1]*sharp_fbig;
  gen->m = -1;
  if (spin==0)
    {
    gen->rf = RALLOC(sharp_ylmgen_dbl2,gen->lmax+1);
    gen->mfac = RALLOC(double,gen->mmax+1);
    gen->mfac[0] = inv_sqrt4pi;
    for (int m=1; m<=gen->mmax; ++m)
      gen->mfac[m] = gen->mfac[m-1]*sqrt((2*m+1.)/(2*m));
    gen->root = RALLOC(double,2*gen->lmax+5);
    gen->iroot = RALLOC(double,2*gen->lmax+5);
    for (int m=0; m<2*gen->lmax+5; ++m)
      {
      gen->root[m] = sqrt(m);
      gen->iroot[m] = (m==0) ? 0. : 1./gen->root[m];
      }
    }
  else
    {
    gen->m=gen->mlo=gen->mhi=-1234567890;
    ALLOC(gen->fx,sharp_ylmgen_dbl3,gen->lmax+2);
    for (int m=0; m<gen->lmax+2; ++m)
      gen->fx[m].f[0]=gen->fx[m].f[1]=gen->fx[m].f[2]=0.;
    ALLOC(gen->inv,double,gen->lmax+1);
    gen->inv[0]=0;
    for (int m=1; m<gen->lmax+1; ++m) gen->inv[m]=1./m;
    ALLOC(gen->flm1,double,2*gen->lmax+1);
    ALLOC(gen->flm2,double,2*gen->lmax+1);
    for (int m=0; m<2*gen->lmax+1; ++m)
      {
      gen->flm1[m] = sqrt(1./(m+1.));
      gen->flm2[m] = sqrt(m/(m+1.));
      }
    ALLOC(gen->prefac,double,gen->mmax+1);
    ALLOC(gen->fscale,int,gen->mmax+1);
    double *fac = RALLOC(double,2*gen->lmax+1);
    int *facscale = RALLOC(int,2*gen->lmax+1);
    fac[0]=1; facscale[0]=0;
    for (int m=1; m<2*gen->lmax+1; ++m)
      {
      fac[m]=fac[m-1]*sqrt(m);
      facscale[m]=facscale[m-1];
      normalize(&fac[m],&facscale[m],sharp_fbighalf);
      }
    for (int m=0; m<=gen->mmax; ++m)
      {
      int mlo=gen->s, mhi=m;
      if (mhi<mlo) SWAP(mhi,mlo,int);
      double tfac=fac[2*mhi]/fac[mhi+mlo];
      int tscale=facscale[2*mhi]-facscale[mhi+mlo];
      normalize(&tfac,&tscale,sharp_fbighalf);
      tfac/=fac[mhi-mlo];
      tscale-=facscale[mhi-mlo];
      normalize(&tfac,&tscale,sharp_fbighalf);
      gen->prefac[m]=tfac;
      gen->fscale[m]=tscale;
      }
    DEALLOC(fac);
    DEALLOC(facscale);
    }
  }
 void sharp_Ylmgen_destroy (sharp_Ylmgen_C *gen)
  {
  DEALLOC(gen->cf);
  if (gen->s==0)
    {
    DEALLOC(gen->rf);
    DEALLOC(gen->mfac);
    DEALLOC(gen->root);
    DEALLOC(gen->iroot);
    }
  else
    {
    DEALLOC(gen->fx);
    DEALLOC(gen->prefac);
    DEALLOC(gen->fscale);
    DEALLOC(gen->flm1);
    DEALLOC(gen->flm2);
    DEALLOC(gen->inv);
    }
  }
 void sharp_Ylmgen_prepare (sharp_Ylmgen_C *gen, int m)
  {
  if (m==gen->m) return;
  UTIL_ASSERT(m>=0,"incorrect m");
  gen->m = m;
  if (gen->s==0)
    {
    gen->rf[m].f[0] = gen->root[2*m+3];
    gen->rf[m].f[1] = 0.;
    for (int l=m+1; l<=gen->lmax; ++l)
      {
      double tmp=gen->root[2*l+3]*gen->iroot[l+1+m]*gen->iroot[l+1-m];
      gen->rf[l].f[0] = tmp*gen->root[2*l+1];
      gen->rf[l].f[1] = tmp*gen->root[l+m]*gen->root[l-m]*gen->iroot[2*l-1];
      }
    }
  else
    {
    int mlo_=m, mhi_=gen->s;
    if (mhi_<mlo_) SWAP(mhi_,mlo_,int);
    int ms_similar = ((gen->mhi==mhi_) && (gen->mlo==mlo_));
    gen->mlo = mlo_; gen->mhi = mhi_;
    if (!ms_similar)
      {
      for (int l=gen->mhi; l<gen->lmax; ++l)
        {
        double t = gen->flm1[l+gen->m]*gen->flm1[l-gen->m]
                  *gen->flm1[l+gen->s]*gen->flm1[l-gen->s];
        double lt = 2*l+1;
        double l1 = l+1;
        gen->fx[l+1].f[0]=l1*lt*t;
        gen->fx[l+1].f[1]=gen->m*gen->s*gen->inv[l]*gen->inv[l+1];
        t = gen->flm2[l+gen->m]*gen->flm2[l-gen->m]
           *gen->flm2[l+gen->s]*gen->flm2[l-gen->s];
        gen->fx[l+1].f[2]=t*l1*gen->inv[l];
        }
      }
    gen->preMinus_p = gen->preMinus_m = 0;
    if (gen->mhi==gen->m)
      {
      gen->cosPow = gen->mhi+gen->s; gen->sinPow = gen->mhi-gen->s;
      gen->preMinus_p = gen->preMinus_m = ((gen->mhi-gen->s)&1);
      }
    else
      {
      gen->cosPow = gen->mhi+gen->m; gen->sinPow = gen->mhi-gen->m;
      gen->preMinus_m = ((gen->mhi+gen->m)&1);
      }
    }
  }
 double *sharp_Ylmgen_get_norm (int lmax, int spin)
  {
  const double pi = 3.141592653589793238462643383279502884197;
  double *res=RALLOC(double,lmax+1);
  /* sign convention for H=1 (LensPix paper) */
 #if 1
   double spinsign = (spin>0) ? -1.0 : 1.0;
 #else
   double spinsign = 1.0;
 #endif
  if (spin==0)
    {
    for (int l=0; l<=lmax; ++l)
      res[l]=1.;
    return res;
    }
  spinsign = (spin&1) ? -spinsign : spinsign;
  for (int l=0; l<=lmax; ++l)
    res[l] = (l<spin) ? 0. : spinsign*0.5*sqrt((2*l+1)/(4*pi));
  return res;
  }
 double *sharp_Ylmgen_get_d1norm (int lmax)
  {
  const double pi = 3.141592653589793238462643383279502884197;
  double *res=RALLOC(double,lmax+1);
  for (int l=0; l<=lmax; ++l)
    res[l] = (l<1) ? 0. : 0.5*sqrt(l*(l+1.)*(2*l+1.)/(4*pi));
  return res;
  }
--- a/external/sharp/libsharp/sharp_ylmgen_c.h
+++ b/external/sharp/libsharp/sharp_ylmgen_c.h
@ -1,100 +0,0 @@
 /*
 *  This file is part of libsharp.
 *
 *  libsharp is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  libsharp is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with libsharp; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 /*
 *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
 *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
 *  (DLR).
 */
 /*! \file sharp_ylmgen_c.h
 *  Code for efficient calculation of Y_lm(phi=0,theta)
 *
 *  Copyright (C) 2005-2012 Max-Planck-Society
 *  \author Martin Reinecke
 */
 #ifndef SHARP_YLMGEN_C_H
 #define SHARP_YLMGEN_C_H
 #ifdef __cplusplus
 extern "C" {
 #endif
 enum { sharp_minscale=0, sharp_limscale=1, sharp_maxscale=1 };
 static const double sharp_fbig=0x1p+800,sharp_fsmall=0x1p-800;
 static const double sharp_ftol=0x1p-60;
 static const double sharp_fbighalf=0x1p+400;
 typedef struct { double f[2]; } sharp_ylmgen_dbl2;
 typedef struct { double f[3]; } sharp_ylmgen_dbl3;
 typedef struct
  {
 /* for public use; immutable during lifetime */
  int lmax, mmax, s;
  double *cf;
 /* for public use; will typically change after call to Ylmgen_prepare() */
  int m;
 /* used if s==0 */
  double *mfac;
  sharp_ylmgen_dbl2 *rf;
 /* used if s!=0 */
  int sinPow, cosPow, preMinus_p, preMinus_m;
  double *prefac;
  int *fscale;
  sharp_ylmgen_dbl3 *fx;
 /* internal usage only */
 /* used if s==0 */
  double *root, *iroot;
 /* used if s!=0 */
  double *flm1, *flm2, *inv;
  int mlo, mhi;
  } sharp_Ylmgen_C;
 /*! Creates a generator which will calculate helper data for Y_lm calculation
    up to \a l=l_max and \a m=m_max. */
 void sharp_Ylmgen_init (sharp_Ylmgen_C *gen, int l_max, int m_max, int spin);
 /*! Deallocates a generator previously initialised by Ylmgen_init(). */
 void sharp_Ylmgen_destroy (sharp_Ylmgen_C *gen);
 /*! Prepares the object for the calculation at \a m. */
 void sharp_Ylmgen_prepare (sharp_Ylmgen_C *gen, int m);
 /*! Returns a pointer to an array with \a lmax+1 entries containing
    normalisation factors that must be applied to Y_lm values computed for
    \a spin. The array must be deallocated (using free()) by the user. */
 double *sharp_Ylmgen_get_norm (int lmax, int spin);
 /*! Returns a pointer to an array with \a lmax+1 entries containing
    normalisation factors that must be applied to Y_lm values computed for
    first derivatives. The array must be deallocated (using free()) by the
    user. */
 double *sharp_Ylmgen_get_d1norm (int lmax);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/external/sharp/python/fake_pyrex/Pyrex/Distutils/init.py
+++ b/external/sharp/python/fake_pyrex/Pyrex/Distutils/init.py
@ -1 +0,0 @@
 # work around broken setuptools monkey patching
--- a/external/sharp/python/fake_pyrex/Pyrex/Distutils/build_ext.py
+++ b/external/sharp/python/fake_pyrex/Pyrex/Distutils/build_ext.py
@ -1 +0,0 @@
 build_ext = "yes, it's there!"
--- a/external/sharp/python/fake_pyrex/Pyrex/init.py
+++ b/external/sharp/python/fake_pyrex/Pyrex/init.py
@ -1 +0,0 @@
 # work around broken setuptools monkey patching
--- a/external/sharp/python/fake_pyrex/README
+++ b/external/sharp/python/fake_pyrex/README
@ -1,2 +0,0 @@
 This directory is here to fool setuptools into building .pyx files
 even if Pyrex is not installed. See ../setup.py.
--- a/external/sharp/python/libsharp/init.py
+++ b/external/sharp/python/libsharp/init.py
@ -1 +0,0 @@
 from .libsharp import *
--- a/external/sharp/python/libsharp/libsharp.pxd
+++ b/external/sharp/python/libsharp/libsharp.pxd
@ -1,79 +0,0 @@
 cdef extern from "sharp.h":
    ctypedef long ptrdiff_t
    void sharp_legendre_transform_s(float *bl, float *recfac, ptrdiff_t lmax, float *x,
                                    float *out, ptrdiff_t nx)
    void sharp_legendre_transform(double *bl, double *recfac, ptrdiff_t lmax, double *x,
                                  double *out, ptrdiff_t nx)
    void sharp_legendre_transform_recfac(double *r, ptrdiff_t lmax)
    void sharp_legendre_transform_recfac_s(float *r, ptrdiff_t lmax)
    void sharp_legendre_roots(int n, double *x, double *w)
    # sharp_lowlevel.h
    ctypedef struct sharp_alm_info:
        pass
    ctypedef struct sharp_geom_info:
        pass
    void sharp_make_alm_info (int lmax, int mmax, int stride,
                             ptrdiff_t *mvstart, sharp_alm_info **alm_info)
    void sharp_make_geom_info (int nrings, int *nph, ptrdiff_t *ofs,
                               int *stride, double *phi0, double *theta,
                               double *wgt, sharp_geom_info **geom_info)
    void sharp_destroy_alm_info(sharp_alm_info *info)
    void sharp_destroy_geom_info(sharp_geom_info *info)
    ptrdiff_t sharp_map_size(sharp_geom_info *info)
    ptrdiff_t sharp_alm_count(sharp_alm_info *self)
    ctypedef enum sharp_jobtype:
        SHARP_YtW
        SHARP_Yt
        SHARP_WY
        SHARP_Y
    ctypedef enum:
        SHARP_DP
        SHARP_ADD
    void sharp_execute(sharp_jobtype type_,
                       int spin,
                       void *alm,
                       void *map,
                       sharp_geom_info *geom_info,
                       sharp_alm_info *alm_info,
                       int ntrans,
                       int flags,
                       double *time,
                       unsigned long long *opcnt) nogil
    ctypedef enum:
        SHARP_ERROR_NO_MPI
    int sharp_execute_mpi_maybe (void *pcomm, sharp_jobtype type, int spin,
        void *alm, void *map, sharp_geom_info *geom_info,
        sharp_alm_info *alm_info, int ntrans, int flags, double *time,
        unsigned long long *opcnt) nogil
 cdef extern from "sharp_geomhelpers.h":
    void sharp_make_subset_healpix_geom_info(
        int nside, int stride, int nrings,
        int *rings, double *weight, sharp_geom_info **geom_info)
    void sharp_make_gauss_geom_info(
        int nrings, int nphi, double phi0,
        int stride_lon, int stride_lat, sharp_geom_info **geom_info)
 cdef extern from "sharp_almhelpers.h":
    void sharp_make_triangular_alm_info (int lmax, int mmax, int stride,
        sharp_alm_info **alm_info)
    void sharp_make_rectangular_alm_info (int lmax, int mmax, int stride,
        sharp_alm_info **alm_info)
    void sharp_make_mmajor_real_packed_alm_info (int lmax, int stride,
        int nm, const int *ms, sharp_alm_info **alm_info)
--- a/external/sharp/python/libsharp/libsharp.pyx
+++ b/external/sharp/python/libsharp/libsharp.pyx
@ -1,256 +0,0 @@
 import numpy as np
 __all__ = ['legendre_transform', 'legendre_roots', 'sht', 'synthesis', 'adjoint_synthesis',
           'analysis', 'adjoint_analysis', 'healpix_grid', 'triangular_order', 'rectangular_order',
           'packed_real_order']
 def legendre_transform(x, bl, out=None):
    if out is None:
        out = np.empty_like(x)
    if out.shape[0] == 0:
        return out
    elif x.dtype == np.float64:
        if bl.dtype != np.float64:
            bl = bl.astype(np.float64)
        return _legendre_transform(x, bl, out=out)
    elif x.dtype == np.float32:
        if bl.dtype != np.float32:
            bl = bl.astype(np.float32)
        return _legendre_transform_s(x, bl, out=out)
    else:
        raise ValueError("unsupported dtype")
 def _legendre_transform(double[::1] x, double[::1] bl, double[::1] out):
    if out.shape[0] != x.shape[0]:
        raise ValueError('x and out must have same shape')
    sharp_legendre_transform(&bl[0], NULL, bl.shape[0] - 1, &x[0], &out[0], x.shape[0])
    return np.asarray(out)
 def _legendre_transform_s(float[::1] x, float[::1] bl, float[::1] out):
    if out.shape[0] != x.shape[0]:
        raise ValueError('x and out must have same shape')
    sharp_legendre_transform_s(&bl[0], NULL, bl.shape[0] - 1, &x[0], &out[0], x.shape[0])
    return np.asarray(out)
 def legendre_roots(n):
    x = np.empty(n, np.double)
    w = np.empty(n, np.double)
    cdef double[::1] x_buf = x, w_buf = w
    if not (x_buf.shape[0] == w_buf.shape[0] == n):
        raise AssertionError()
    if n > 0:
        sharp_legendre_roots(n, &x_buf[0], &w_buf[0])
    return x, w
 JOBTYPE_TO_CONST = {
    'Y': SHARP_Y,
    'Yt': SHARP_Yt,
    'WY': SHARP_WY,
    'YtW': SHARP_YtW
 }
 def sht(jobtype, geom_info ginfo, alm_info ainfo, double[:, :, ::1] input,
        int spin=0, comm=None, add=False):
    cdef void *comm_ptr
    cdef int flags = SHARP_DP | (SHARP_ADD if add else 0)
    cdef int r
    cdef sharp_jobtype jobtype_i
    cdef double[:, :, ::1] output_buf
    cdef int ntrans = input.shape[0] * input.shape[1]
    cdef int i, j
    if spin == 0 and input.shape[1] != 1:
        raise ValueError('For spin == 0, we need input.shape[1] == 1')
    elif spin != 0 and input.shape[1] != 2:
        raise ValueError('For spin != 0, we need input.shape[1] == 2')
    cdef size_t[::1] ptrbuf = np.empty(2 * ntrans, dtype=np.uintp)
    cdef double **alm_ptrs = <double**>&ptrbuf[0]
    cdef double **map_ptrs = <double**>&ptrbuf[ntrans]
    try:
        jobtype_i = JOBTYPE_TO_CONST[jobtype]
    except KeyError:
        raise ValueError('jobtype must be one of: %s' % ', '.join(sorted(JOBTYPE_TO_CONST.keys())))
    if jobtype_i == SHARP_Y or jobtype_i == SHARP_WY:
        output = np.empty((input.shape[0], input.shape[1], ginfo.local_size()), dtype=np.float64)
        output_buf = output
        for i in range(input.shape[0]):
            for j in range(input.shape[1]):
                alm_ptrs[i * input.shape[1] + j] = &input[i, j, 0]
                map_ptrs[i * input.shape[1] + j] = &output_buf[i, j, 0]
    else:
        output = np.empty((input.shape[0], input.shape[1], ainfo.local_size()), dtype=np.float64)
        output_buf = output
        for i in range(input.shape[0]):
            for j in range(input.shape[1]):
                alm_ptrs[i * input.shape[1] + j] = &output_buf[i, j, 0]
                map_ptrs[i * input.shape[1] + j] = &input[i, j, 0]
    if comm is None:
        with nogil:
            sharp_execute (
                jobtype_i,
                geom_info=ginfo.ginfo, alm_info=ainfo.ainfo,
                spin=spin, alm=alm_ptrs, map=map_ptrs,
                ntrans=ntrans, flags=flags, time=NULL, opcnt=NULL)
    else:
        from mpi4py import MPI
        if not isinstance(comm, MPI.Comm):
            raise TypeError('comm must be an mpi4py communicator')
        from .libsharp_mpi import _addressof
        comm_ptr = <void*><size_t>_addressof(comm)
        with nogil:
            r = sharp_execute_mpi_maybe (
                comm_ptr, jobtype_i,
                geom_info=ginfo.ginfo, alm_info=ainfo.ainfo,
                spin=spin, alm=alm_ptrs, map=map_ptrs,
                ntrans=ntrans, flags=flags, time=NULL, opcnt=NULL)
        if r == SHARP_ERROR_NO_MPI:
            raise Exception('MPI requested, but not available')
    return output
 def synthesis(*args, **kw):
    return sht('Y', *args, **kw)
 def adjoint_synthesis(*args, **kw):
    return sht('Yt', *args, **kw)
 def analysis(*args, **kw):
    return sht('YtW', *args, **kw)
 def adjoint_analysis(*args, **kw):
    return sht('WY', *args, **kw)
 #
 # geom_info
 #
 class NotInitializedError(Exception):
    pass
 cdef class geom_info:
    cdef sharp_geom_info *ginfo
    def __cinit__(self, *args, **kw):
        self.ginfo = NULL
    def local_size(self):
        if self.ginfo == NULL:
            raise NotInitializedError()
        return sharp_map_size(self.ginfo)
    def __dealloc__(self):
        if self.ginfo != NULL:
            sharp_destroy_geom_info(self.ginfo)
        self.ginfo = NULL
 cdef class healpix_grid(geom_info):
    _weight_cache = {}  # { (nside, 'T'/'Q'/'U') -> numpy array of ring weights cached from file }
    def __init__(self, int nside, stride=1, int[::1] rings=None, double[::1] weights=None):
        if weights is not None and weights.shape[0] != 2 * nside:
            raise ValueError('weights must have length 2 * nside')
        sharp_make_subset_healpix_geom_info(nside, stride,
                                            nrings=4 * nside - 1 if rings is None else rings.shape[0],
                                            rings=NULL if rings is None else &rings[0],
                                            weight=NULL if weights is None else &weights[0],
                                            geom_info=&self.ginfo)
    @classmethod
    def load_ring_weights(cls, nside, fields):
        """
        Loads HEALPix ring weights from file. The environment variable
        HEALPIX should be set, and this routine will look in the `data`
        subdirectory.
        Parameters
        ----------
        nside: int
            HEALPix nside parameter
        fields: tuple of str
            Which weights to extract; pass ('T',) to only get scalar
            weights back, or ('T', 'Q', 'U') to get all the weights
        Returns
        -------
        List of NumPy arrays, according to fields parameter.
        """
        import os
        from astropy.io import fits
        data_path = os.path.join(os.environ['HEALPIX'], 'data')
        fits_field_names = {
            'T': 'TEMPERATURE WEIGHTS',
            'Q': 'Q-POLARISATION WEIGHTS',
            'U': 'U-POLARISATION WEIGHTS'}
        must_load = [field for field in fields if (nside, field) not in cls._weight_cache]
        if must_load:
            hdulist = fits.open(os.path.join(data_path, 'weight_ring_n%05d.fits' % nside))
            try:
                for field in must_load:
                    w = hdulist[1].data.field(fits_field_names[field]).ravel().astype(np.double)
                    w += 1
                    cls._weight_cache[nside, field] = w
            finally:
                hdulist.close()
        return [cls._weight_cache[(nside, field)].copy() for field in fields]
 #
 # alm_info
 #
 cdef class alm_info:
    cdef sharp_alm_info *ainfo
    def __cinit__(self, *args, **kw):
        self.ainfo = NULL
    def local_size(self):
        if self.ainfo == NULL:
            raise NotInitializedError()
        return sharp_alm_count(self.ainfo)
    def __dealloc__(self):
        if self.ainfo != NULL:
            sharp_destroy_alm_info(self.ainfo)
        self.ainfo = NULL
 cdef class triangular_order(alm_info):
    def __init__(self, int lmax, mmax=None, stride=1):
        mmax = mmax if mmax is not None else lmax
        sharp_make_triangular_alm_info(lmax, mmax, stride, &self.ainfo)
 cdef class rectangular_order(alm_info):
    def __init__(self, int lmax, mmax=None, stride=1):
        mmax = mmax if mmax is not None else lmax
        sharp_make_rectangular_alm_info(lmax, mmax, stride, &self.ainfo)
 cdef class packed_real_order(alm_info):
    def __init__(self, int lmax, stride=1, int[::1] ms=None):
        sharp_make_mmajor_real_packed_alm_info(lmax=lmax, stride=stride,
                                               nm=lmax + 1 if ms is None else ms.shape[0],
                                               ms=NULL if ms is None else &ms[0],
                                               alm_info=&self.ainfo)
--- a/external/sharp/python/libsharp/libsharp_mpi.pyx
+++ b/external/sharp/python/libsharp/libsharp_mpi.pyx
@ -1,17 +0,0 @@
 cdef extern from "mpi.h":
    ctypedef void *MPI_Comm
 cdef extern from "Python.h":
    object PyLong_FromVoidPtr(void*)
 cdef extern:
    ctypedef class mpi4py.MPI.Comm [object PyMPICommObject]:
        cdef MPI_Comm ob_mpi
        cdef unsigned flags
 # For compatibility with mpi4py <= 1.3.1
 # Newer versions could use the MPI._addressof function
 def _addressof(Comm comm):
    cdef void *ptr = NULL
    ptr = <void*>&comm.ob_mpi
    return PyLong_FromVoidPtr(ptr)
--- a/external/sharp/python/libsharp/tests/init.py
+++ b/external/sharp/python/libsharp/tests/init.py
@ -1 +0,0 @@
 # empty
--- a/external/sharp/python/libsharp/tests/test_legendre.py
+++ b/external/sharp/python/libsharp/tests/test_legendre.py
@ -1,59 +0,0 @@
 import numpy as np
 from scipy.special import legendre
 from scipy.special import p_roots
 import libsharp
 from numpy.testing import assert_allclose
 def check_legendre_transform(lmax, ntheta):
    l = np.arange(lmax + 1)
    if lmax >= 1:
        sigma = -np.log(1e-3) / lmax / (lmax + 1)
        bl = np.exp(-sigma*l*(l+1))
        bl *= (2 * l + 1)
    else:
        bl = np.asarray([1], dtype=np.double)
    theta = np.linspace(0, np.pi, ntheta, endpoint=True)
    x = np.cos(theta)
    # Compute truth using scipy.special.legendre
    P = np.zeros((ntheta, lmax + 1))
    for l in range(lmax + 1):
        P[:, l] = legendre(l)(x)
    y0 = np.dot(P, bl)
    # double-precision
    y = libsharp.legendre_transform(x, bl)
    assert_allclose(y, y0, rtol=1e-12, atol=1e-12)
    # single-precision
    y32 = libsharp.legendre_transform(x.astype(np.float32), bl)
    assert_allclose(y, y0, rtol=1e-5, atol=1e-5)
 def test_legendre_transform():
    nthetas_to_try = [0, 9, 17, 19] + list(np.random.randint(500, size=20))
    for ntheta in nthetas_to_try:
        for lmax in [0, 1, 2, 3, 20] + list(np.random.randint(50, size=4)):
            yield check_legendre_transform, lmax, ntheta
 def check_legendre_roots(n):
    xs, ws = ([], []) if n == 0 else p_roots(n) # from SciPy
    xl, wl = libsharp.legendre_roots(n)
    assert_allclose(xs, xl, rtol=1e-14, atol=1e-14)
    assert_allclose(ws, wl, rtol=1e-14, atol=1e-14)
 def test_legendre_roots():
    """
    Test the Legendre root-finding algorithm from libsharp by comparing it with
    the SciPy version.
    """
    yield check_legendre_roots, 0
    yield check_legendre_roots, 1
    yield check_legendre_roots, 32
    yield check_legendre_roots, 33
    yield check_legendre_roots, 128
--- a/external/sharp/python/libsharp/tests/test_sht.py
+++ b/external/sharp/python/libsharp/tests/test_sht.py
@ -1,34 +0,0 @@
 import numpy as np
 import healpy
 from scipy.special import legendre
 from scipy.special import p_roots
 from numpy.testing import assert_allclose
 import libsharp
 from mpi4py import MPI
 def test_basic():
    lmax = 10
    nside = 8
    rank = MPI.COMM_WORLD.Get_rank()
    ms = np.arange(rank, lmax + 1, MPI.COMM_WORLD.Get_size(), dtype=np.int32)
    order = libsharp.packed_real_order(lmax, ms=ms)
    grid = libsharp.healpix_grid(nside)
    alm = np.zeros(order.local_size())
    if rank == 0:
        alm[0] = 1
    elif rank == 1:
        alm[0] = 1
    map = libsharp.synthesis(grid, order, np.repeat(alm[None, None, :], 3, 0), comm=MPI.COMM_WORLD)
    assert np.all(map[2, :] == map[1, :]) and np.all(map[1, :] == map[0, :])
    map = map[0, 0, :]
    if rank == 0:
        healpy.mollzoom(map)
        from matplotlib.pyplot import show
        show()
--- a/external/sharp/python/setup.py
+++ b/external/sharp/python/setup.py
@ -1,77 +0,0 @@
 #! /usr/bin/env python
 descr   = """Spherical Harmionic transforms package
 Python API for the libsharp spherical harmonic transforms library
 """
 import os
 import sys
 DISTNAME            = 'libsharp'
 DESCRIPTION         = 'libsharp library for fast Spherical Harmonic Transforms'
 LONG_DESCRIPTION    = descr
 MAINTAINER          = 'Dag Sverre Seljebotn',
 MAINTAINER_EMAIL    = 'd.s.seljebotn@astro.uio.no',
 URL                 = 'http://sourceforge.net/projects/libsharp/'
 LICENSE             = 'GPL'
 DOWNLOAD_URL        = "http://sourceforge.net/projects/libsharp/"
 VERSION             = '0.1'
 # Add our fake Pyrex at the end of the Python search path
 # in order to fool setuptools into allowing compilation of
 # pyx files to C files. Importing Cython.Distutils then
 # makes Cython the tool of choice for this rather than
 # (the possibly nonexisting) Pyrex.
 project_path = os.path.split(__file__)[0]
 sys.path.append(os.path.join(project_path, 'fake_pyrex'))
 from setuptools import setup, find_packages, Extension
 from Cython.Distutils import build_ext
 import numpy as np
 libsharp = os.environ.get('LIBSHARP', None)
 libsharp_include = os.environ.get('LIBSHARP_INCLUDE', libsharp and os.path.join(libsharp, 'include'))
 libsharp_lib = os.environ.get('LIBSHARP_LIB', libsharp and os.path.join(libsharp, 'lib'))
 if libsharp_include is None or libsharp_lib is None:
    sys.stderr.write('Please set LIBSHARP environment variable to the install directly of libsharp, '
                     'this script will refer to the lib and include sub-directories. Alternatively '
                     'set LIBSHARP_INCLUDE and LIBSHARP_LIB\n')
    sys.exit(1)
 if __name__ == "__main__":
    setup(install_requires = ['numpy'],
          packages = find_packages(),
          test_suite="nose.collector",
          # Well, technically zipping the package will work, but since it's
          # all compiled code it'll just get unzipped again at runtime, which
          # is pointless:
          zip_safe = False,
          name = DISTNAME,
          version = VERSION,
          maintainer = MAINTAINER,
          maintainer_email = MAINTAINER_EMAIL,
          description = DESCRIPTION,
          license = LICENSE,
          url = URL,
          download_url = DOWNLOAD_URL,
          long_description = LONG_DESCRIPTION,
          classifiers =
            [ 'Development Status :: 3 - Alpha',
              'Environment :: Console',
              'Intended Audience :: Developers',
              'Intended Audience :: Science/Research',
              'License :: OSI Approved :: GNU General Public License (GPL)',
              'Topic :: Scientific/Engineering'],
          cmdclass = {"build_ext": build_ext},
          ext_modules = [
              Extension("libsharp.libsharp",
                        ["libsharp/libsharp.pyx"],
                        libraries=["sharp", "fftpack", "c_utils"],
                        include_dirs=[libsharp_include],
                        library_dirs=[libsharp_lib],
                        extra_link_args=["-fopenmp"],
              )
              ],
          )
--- a/external/sharp/runjinja.py
+++ b/external/sharp/runjinja.py
@ -1,19 +0,0 @@
 #!/usr/bin/env python
 """
 Preprocesses foo.c.in to foo.c. Reads STDIN and writes STDOUT.
 """
 import sys
 import hashlib
 from jinja2 import Template, Environment
 env = Environment(block_start_string='/*{',
                  block_end_string='}*/',
                  variable_start_string='{{',
                  variable_end_string='}}')
 extra_vars = dict(len=len)
 input = sys.stdin.read()
 sys.stdout.write('/* DO NOT EDIT. md5sum of source: %s */' % hashlib.md5(input).hexdigest())
 sys.stdout.write(env.from_string(input).render(**extra_vars))
--- a/src/symbol_visible.hpp
+++ b/src/symbol_visible.hpp
@ -0,0 +1,32 @@
 #ifndef __COSMOTOOL_SYMBOL_VISIBLE_HPP
 #define __COSMOTOOL_SYMBOL_VISIBLE_HPP
 #if defined _WIN32 || defined __CYGWIN__
  #ifdef BUILDING_DLL
    #ifdef __GNUC__
      #define CTOOL_DLL_PUBLIC __attribute__ ((dllexport))
    #else
      #define CTOOL_DLL_PUBLIC __declspec(dllexport) // Note: actually gcc seems to also supports this syntax.
    #endif
  #else
    #ifdef __GNUC__
      #define CTOOL_DLL_PUBLIC __attribute__ ((dllimport))
    #else
      #define CTOOL_DLL_PUBLIC __declspec(dllimport) // Note: actually gcc seems to also supports this syntax.
    #endif
  #endif
  #define CTOOL_DLL_LOCAL
 #else
  #if __GNUC__ >= 4
    #define CTOOL_DLL_PUBLIC __attribute__ ((visibility ("default")))
    #define CTOOL_DLL_LOCAL  __attribute__ ((visibility ("hidden")))
  #else
    #define CTOOL_DLL_PUBLIC
    #define CTOOL_DLL_LOCAL
  #endif
 #endif
 #endif
		`@ -1 +0,0 @@`
			`# work around broken setuptools monkey patching`
		`@ -1,2 +0,0 @@`
			`This directory is here to fool setuptools into building .pyx files`
			`even if Pyrex is not installed. See ../setup.py.`